# Reading data
import os
import git
import shutil
import tempfile
# Create temporary dir
t = tempfile.mkdtemp()
d = 'lwc/topics/covid19/covid-model'
# Clone into temporary dir
git.Repo.clone_from('http://gmarx.jumpingcrab.com:8088/COVID-19/covid19-data.git',
t, branch='master', depth=1)
# Delete files
#os.remove('README.txt')
shutil.rmtree('data')
#shutil.rmtree('secondTest')
# Copy desired file from temporary dir
shutil.move(os.path.join(t, 'data'), '.')
# Remove temporary dir
shutil.rmtree(t)
import pandas as pd
import numpy as np
import os
def loadData(path, file):
csvPath=os.path.join(path, file)
return pd.read_csv(csvPath)
# import jtplot submodule from jupyterthemes
from jupyterthemes import jtplot
PATH=os.path.join("data")
covid_data=loadData(PATH,"time-series-19-covid-combined.csv")
covid_data.head()
|
Date |
Country/Region |
Province/State |
Lat |
Long |
Confirmed |
Recovered |
Deaths |
0 |
2020-01-22 |
Afghanistan |
NaN |
33.0 |
65.0 |
0 |
0.0 |
0 |
1 |
2020-01-23 |
Afghanistan |
NaN |
33.0 |
65.0 |
0 |
0.0 |
0 |
2 |
2020-01-24 |
Afghanistan |
NaN |
33.0 |
65.0 |
0 |
0.0 |
0 |
3 |
2020-01-25 |
Afghanistan |
NaN |
33.0 |
65.0 |
0 |
0.0 |
0 |
4 |
2020-01-26 |
Afghanistan |
NaN |
33.0 |
65.0 |
0 |
0.0 |
0 |
from sklearn.model_selection import train_test_split
train_set, test_set=train_test_split(covid_data,test_size=0.2,random_state=42)
train_cp=train_set.copy()
%matplotlib inline
covid_data.hist()
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x11e46ca50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x116caea90>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x11e65dd10>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11e6a16d0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x11e6d3ed0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11e716710>]],
dtype=object)
%matplotlib inline
import matplotlib.pyplot as plt
covid_mexico = covid_data[covid_data['Country/Region']=='Mexico']
covid_mexico.shape
(63, 8)
covid_data.plot(kind="scatter", x="Long", y="Lat")
<matplotlib.axes._subplots.AxesSubplot at 0x12b7c9910>
from datetime import datetime
#covid_mexico['Date'] =pd.to_datetime(covid_mexico.Date, format="%Y-%m-%d")
mexico_sort=covid_mexico.sort_values(by='Date', ascending=True)
mexico_sort
|
Date |
Country/Region |
Province/State |
Lat |
Long |
Confirmed |
Recovered |
Deaths |
9954 |
2020-01-22 |
Mexico |
NaN |
23.6345 |
-102.5528 |
0 |
0.0 |
0 |
9955 |
2020-01-23 |
Mexico |
NaN |
23.6345 |
-102.5528 |
0 |
0.0 |
0 |
9956 |
2020-01-24 |
Mexico |
NaN |
23.6345 |
-102.5528 |
0 |
0.0 |
0 |
9957 |
2020-01-25 |
Mexico |
NaN |
23.6345 |
-102.5528 |
0 |
0.0 |
0 |
9958 |
2020-01-26 |
Mexico |
NaN |
23.6345 |
-102.5528 |
0 |
0.0 |
0 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
10012 |
2020-03-20 |
Mexico |
NaN |
23.6345 |
-102.5528 |
164 |
4.0 |
1 |
10013 |
2020-03-21 |
Mexico |
NaN |
23.6345 |
-102.5528 |
203 |
4.0 |
2 |
10014 |
2020-03-22 |
Mexico |
NaN |
23.6345 |
-102.5528 |
251 |
4.0 |
2 |
10015 |
2020-03-23 |
Mexico |
NaN |
23.6345 |
-102.5528 |
316 |
4.0 |
3 |
10016 |
2020-03-24 |
Mexico |
NaN |
23.6345 |
-102.5528 |
367 |
NaN |
4 |
63 rows × 8 columns
mexico_filter = mexico_sort[mexico_sort['Confirmed']!=0]
mexico_filter
|
Date |
Country/Region |
Province/State |
Lat |
Long |
Confirmed |
Recovered |
Deaths |
9991 |
2020-02-28 |
Mexico |
NaN |
23.6345 |
-102.5528 |
1 |
0.0 |
0 |
9992 |
2020-02-29 |
Mexico |
NaN |
23.6345 |
-102.5528 |
4 |
0.0 |
0 |
9993 |
2020-03-01 |
Mexico |
NaN |
23.6345 |
-102.5528 |
5 |
0.0 |
0 |
9994 |
2020-03-02 |
Mexico |
NaN |
23.6345 |
-102.5528 |
5 |
0.0 |
0 |
9995 |
2020-03-03 |
Mexico |
NaN |
23.6345 |
-102.5528 |
5 |
1.0 |
0 |
9996 |
2020-03-04 |
Mexico |
NaN |
23.6345 |
-102.5528 |
5 |
1.0 |
0 |
9997 |
2020-03-05 |
Mexico |
NaN |
23.6345 |
-102.5528 |
5 |
1.0 |
0 |
9998 |
2020-03-06 |
Mexico |
NaN |
23.6345 |
-102.5528 |
6 |
1.0 |
0 |
9999 |
2020-03-07 |
Mexico |
NaN |
23.6345 |
-102.5528 |
6 |
1.0 |
0 |
10000 |
2020-03-08 |
Mexico |
NaN |
23.6345 |
-102.5528 |
7 |
1.0 |
0 |
10001 |
2020-03-09 |
Mexico |
NaN |
23.6345 |
-102.5528 |
7 |
1.0 |
0 |
10002 |
2020-03-10 |
Mexico |
NaN |
23.6345 |
-102.5528 |
7 |
4.0 |
0 |
10003 |
2020-03-11 |
Mexico |
NaN |
23.6345 |
-102.5528 |
8 |
4.0 |
0 |
10004 |
2020-03-12 |
Mexico |
NaN |
23.6345 |
-102.5528 |
12 |
4.0 |
0 |
10005 |
2020-03-13 |
Mexico |
NaN |
23.6345 |
-102.5528 |
12 |
4.0 |
0 |
10006 |
2020-03-14 |
Mexico |
NaN |
23.6345 |
-102.5528 |
26 |
4.0 |
0 |
10007 |
2020-03-15 |
Mexico |
NaN |
23.6345 |
-102.5528 |
41 |
4.0 |
0 |
10008 |
2020-03-16 |
Mexico |
NaN |
23.6345 |
-102.5528 |
53 |
4.0 |
0 |
10009 |
2020-03-17 |
Mexico |
NaN |
23.6345 |
-102.5528 |
82 |
4.0 |
0 |
10010 |
2020-03-18 |
Mexico |
NaN |
23.6345 |
-102.5528 |
93 |
4.0 |
0 |
10011 |
2020-03-19 |
Mexico |
NaN |
23.6345 |
-102.5528 |
118 |
4.0 |
1 |
10012 |
2020-03-20 |
Mexico |
NaN |
23.6345 |
-102.5528 |
164 |
4.0 |
1 |
10013 |
2020-03-21 |
Mexico |
NaN |
23.6345 |
-102.5528 |
203 |
4.0 |
2 |
10014 |
2020-03-22 |
Mexico |
NaN |
23.6345 |
-102.5528 |
251 |
4.0 |
2 |
10015 |
2020-03-23 |
Mexico |
NaN |
23.6345 |
-102.5528 |
316 |
4.0 |
3 |
10016 |
2020-03-24 |
Mexico |
NaN |
23.6345 |
-102.5528 |
367 |
NaN |
4 |
n=mexico_filter.shape[0]
days=np.arange(1,n+1,1)
days
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26])
#mexico_filter = mexico_sort[mexico_sort['Confirmed']!=0]
plt.scatter(x=days, y=mexico_filter['Confirmed'])
<matplotlib.collections.PathCollection at 0x12acc8290>
from scipy.optimize import curve_fit
def exponential(x, a,k, b):
return a*np.exp(x*k) + b
potp, pcov = curve_fit(exponential, days, mexico_filter['Confirmed'])
potp
array([ 1.07768657, 0.22640743, -3.90363561])
# Plot outputs
plt.scatter(days, mexico_filter['Confirmed'], color='black')
plt.plot(days,exponential(days,*potp), color='blue', linewidth=2)
[<matplotlib.lines.Line2D at 0x12b888fd0>]