You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

791 lines
13 KiB

  1. ```python
  2. # Reading data
  3. import os
  4. import git
  5. import shutil
  6. import tempfile
  7. # Create temporary dir
  8. t = tempfile.mkdtemp()
  9. d = 'lwc/topics/covid19/covid-model'
  10. # Clone into temporary dir
  11. git.Repo.clone_from('http://gmarx.jumpingcrab.com:8088/COVID-19/covid19-data.git',
  12. t, branch='master', depth=1)
  13. # Delete files
  14. #os.remove('README.txt')
  15. shutil.rmtree('data')
  16. #shutil.rmtree('secondTest')
  17. # Copy desired file from temporary dir
  18. shutil.move(os.path.join(t, 'data'), '.')
  19. # Remove temporary dir
  20. shutil.rmtree(t)
  21. ```
  22. ```python
  23. import pandas as pd
  24. import numpy as np
  25. import os
  26. def loadData(path, file):
  27. csvPath=os.path.join(path, file)
  28. return pd.read_csv(csvPath)
  29. ```
  30. ```python
  31. # import jtplot submodule from jupyterthemes
  32. from jupyterthemes import jtplot
  33. PATH=os.path.join("data")
  34. covid_data=loadData(PATH,"time-series-19-covid-combined.csv")
  35. covid_data.head()
  36. ```
  37. <div>
  38. <style scoped>
  39. .dataframe tbody tr th:only-of-type {
  40. vertical-align: middle;
  41. }
  42. .dataframe tbody tr th {
  43. vertical-align: top;
  44. }
  45. .dataframe thead th {
  46. text-align: right;
  47. }
  48. </style>
  49. <table border="1" class="dataframe">
  50. <thead>
  51. <tr style="text-align: right;">
  52. <th></th>
  53. <th>Date</th>
  54. <th>Country/Region</th>
  55. <th>Province/State</th>
  56. <th>Lat</th>
  57. <th>Long</th>
  58. <th>Confirmed</th>
  59. <th>Recovered</th>
  60. <th>Deaths</th>
  61. </tr>
  62. </thead>
  63. <tbody>
  64. <tr>
  65. <th>0</th>
  66. <td>2020-01-22</td>
  67. <td>Afghanistan</td>
  68. <td>NaN</td>
  69. <td>33.0</td>
  70. <td>65.0</td>
  71. <td>0</td>
  72. <td>0.0</td>
  73. <td>0</td>
  74. </tr>
  75. <tr>
  76. <th>1</th>
  77. <td>2020-01-23</td>
  78. <td>Afghanistan</td>
  79. <td>NaN</td>
  80. <td>33.0</td>
  81. <td>65.0</td>
  82. <td>0</td>
  83. <td>0.0</td>
  84. <td>0</td>
  85. </tr>
  86. <tr>
  87. <th>2</th>
  88. <td>2020-01-24</td>
  89. <td>Afghanistan</td>
  90. <td>NaN</td>
  91. <td>33.0</td>
  92. <td>65.0</td>
  93. <td>0</td>
  94. <td>0.0</td>
  95. <td>0</td>
  96. </tr>
  97. <tr>
  98. <th>3</th>
  99. <td>2020-01-25</td>
  100. <td>Afghanistan</td>
  101. <td>NaN</td>
  102. <td>33.0</td>
  103. <td>65.0</td>
  104. <td>0</td>
  105. <td>0.0</td>
  106. <td>0</td>
  107. </tr>
  108. <tr>
  109. <th>4</th>
  110. <td>2020-01-26</td>
  111. <td>Afghanistan</td>
  112. <td>NaN</td>
  113. <td>33.0</td>
  114. <td>65.0</td>
  115. <td>0</td>
  116. <td>0.0</td>
  117. <td>0</td>
  118. </tr>
  119. </tbody>
  120. </table>
  121. </div>
  122. ```python
  123. from sklearn.model_selection import train_test_split
  124. train_set, test_set=train_test_split(covid_data,test_size=0.2,random_state=42)
  125. train_cp=train_set.copy()
  126. ```
  127. ```python
  128. %matplotlib inline
  129. covid_data.hist()
  130. ```
  131. array([[<matplotlib.axes._subplots.AxesSubplot object at 0x11e46ca50>,
  132. <matplotlib.axes._subplots.AxesSubplot object at 0x116caea90>],
  133. [<matplotlib.axes._subplots.AxesSubplot object at 0x11e65dd10>,
  134. <matplotlib.axes._subplots.AxesSubplot object at 0x11e6a16d0>],
  135. [<matplotlib.axes._subplots.AxesSubplot object at 0x11e6d3ed0>,
  136. <matplotlib.axes._subplots.AxesSubplot object at 0x11e716710>]],
  137. dtype=object)
  138. ![png](output_4_1.png)
  139. ```python
  140. %matplotlib inline
  141. import matplotlib.pyplot as plt
  142. covid_mexico = covid_data[covid_data['Country/Region']=='Mexico']
  143. covid_mexico.shape
  144. ```
  145. (63, 8)
  146. ```python
  147. ```
  148. ```python
  149. covid_data.plot(kind="scatter", x="Long", y="Lat")
  150. ```
  151. <matplotlib.axes._subplots.AxesSubplot at 0x12b7c9910>
  152. ![png](output_7_1.png)
  153. ```python
  154. from datetime import datetime
  155. #covid_mexico['Date'] =pd.to_datetime(covid_mexico.Date, format="%Y-%m-%d")
  156. mexico_sort=covid_mexico.sort_values(by='Date', ascending=True)
  157. mexico_sort
  158. ```
  159. <div>
  160. <style scoped>
  161. .dataframe tbody tr th:only-of-type {
  162. vertical-align: middle;
  163. }
  164. .dataframe tbody tr th {
  165. vertical-align: top;
  166. }
  167. .dataframe thead th {
  168. text-align: right;
  169. }
  170. </style>
  171. <table border="1" class="dataframe">
  172. <thead>
  173. <tr style="text-align: right;">
  174. <th></th>
  175. <th>Date</th>
  176. <th>Country/Region</th>
  177. <th>Province/State</th>
  178. <th>Lat</th>
  179. <th>Long</th>
  180. <th>Confirmed</th>
  181. <th>Recovered</th>
  182. <th>Deaths</th>
  183. </tr>
  184. </thead>
  185. <tbody>
  186. <tr>
  187. <th>9954</th>
  188. <td>2020-01-22</td>
  189. <td>Mexico</td>
  190. <td>NaN</td>
  191. <td>23.6345</td>
  192. <td>-102.5528</td>
  193. <td>0</td>
  194. <td>0.0</td>
  195. <td>0</td>
  196. </tr>
  197. <tr>
  198. <th>9955</th>
  199. <td>2020-01-23</td>
  200. <td>Mexico</td>
  201. <td>NaN</td>
  202. <td>23.6345</td>
  203. <td>-102.5528</td>
  204. <td>0</td>
  205. <td>0.0</td>
  206. <td>0</td>
  207. </tr>
  208. <tr>
  209. <th>9956</th>
  210. <td>2020-01-24</td>
  211. <td>Mexico</td>
  212. <td>NaN</td>
  213. <td>23.6345</td>
  214. <td>-102.5528</td>
  215. <td>0</td>
  216. <td>0.0</td>
  217. <td>0</td>
  218. </tr>
  219. <tr>
  220. <th>9957</th>
  221. <td>2020-01-25</td>
  222. <td>Mexico</td>
  223. <td>NaN</td>
  224. <td>23.6345</td>
  225. <td>-102.5528</td>
  226. <td>0</td>
  227. <td>0.0</td>
  228. <td>0</td>
  229. </tr>
  230. <tr>
  231. <th>9958</th>
  232. <td>2020-01-26</td>
  233. <td>Mexico</td>
  234. <td>NaN</td>
  235. <td>23.6345</td>
  236. <td>-102.5528</td>
  237. <td>0</td>
  238. <td>0.0</td>
  239. <td>0</td>
  240. </tr>
  241. <tr>
  242. <th>...</th>
  243. <td>...</td>
  244. <td>...</td>
  245. <td>...</td>
  246. <td>...</td>
  247. <td>...</td>
  248. <td>...</td>
  249. <td>...</td>
  250. <td>...</td>
  251. </tr>
  252. <tr>
  253. <th>10012</th>
  254. <td>2020-03-20</td>
  255. <td>Mexico</td>
  256. <td>NaN</td>
  257. <td>23.6345</td>
  258. <td>-102.5528</td>
  259. <td>164</td>
  260. <td>4.0</td>
  261. <td>1</td>
  262. </tr>
  263. <tr>
  264. <th>10013</th>
  265. <td>2020-03-21</td>
  266. <td>Mexico</td>
  267. <td>NaN</td>
  268. <td>23.6345</td>
  269. <td>-102.5528</td>
  270. <td>203</td>
  271. <td>4.0</td>
  272. <td>2</td>
  273. </tr>
  274. <tr>
  275. <th>10014</th>
  276. <td>2020-03-22</td>
  277. <td>Mexico</td>
  278. <td>NaN</td>
  279. <td>23.6345</td>
  280. <td>-102.5528</td>
  281. <td>251</td>
  282. <td>4.0</td>
  283. <td>2</td>
  284. </tr>
  285. <tr>
  286. <th>10015</th>
  287. <td>2020-03-23</td>
  288. <td>Mexico</td>
  289. <td>NaN</td>
  290. <td>23.6345</td>
  291. <td>-102.5528</td>
  292. <td>316</td>
  293. <td>4.0</td>
  294. <td>3</td>
  295. </tr>
  296. <tr>
  297. <th>10016</th>
  298. <td>2020-03-24</td>
  299. <td>Mexico</td>
  300. <td>NaN</td>
  301. <td>23.6345</td>
  302. <td>-102.5528</td>
  303. <td>367</td>
  304. <td>NaN</td>
  305. <td>4</td>
  306. </tr>
  307. </tbody>
  308. </table>
  309. <p>63 rows × 8 columns</p>
  310. </div>
  311. ```python
  312. mexico_filter = mexico_sort[mexico_sort['Confirmed']!=0]
  313. mexico_filter
  314. ```
  315. <div>
  316. <style scoped>
  317. .dataframe tbody tr th:only-of-type {
  318. vertical-align: middle;
  319. }
  320. .dataframe tbody tr th {
  321. vertical-align: top;
  322. }
  323. .dataframe thead th {
  324. text-align: right;
  325. }
  326. </style>
  327. <table border="1" class="dataframe">
  328. <thead>
  329. <tr style="text-align: right;">
  330. <th></th>
  331. <th>Date</th>
  332. <th>Country/Region</th>
  333. <th>Province/State</th>
  334. <th>Lat</th>
  335. <th>Long</th>
  336. <th>Confirmed</th>
  337. <th>Recovered</th>
  338. <th>Deaths</th>
  339. </tr>
  340. </thead>
  341. <tbody>
  342. <tr>
  343. <th>9991</th>
  344. <td>2020-02-28</td>
  345. <td>Mexico</td>
  346. <td>NaN</td>
  347. <td>23.6345</td>
  348. <td>-102.5528</td>
  349. <td>1</td>
  350. <td>0.0</td>
  351. <td>0</td>
  352. </tr>
  353. <tr>
  354. <th>9992</th>
  355. <td>2020-02-29</td>
  356. <td>Mexico</td>
  357. <td>NaN</td>
  358. <td>23.6345</td>
  359. <td>-102.5528</td>
  360. <td>4</td>
  361. <td>0.0</td>
  362. <td>0</td>
  363. </tr>
  364. <tr>
  365. <th>9993</th>
  366. <td>2020-03-01</td>
  367. <td>Mexico</td>
  368. <td>NaN</td>
  369. <td>23.6345</td>
  370. <td>-102.5528</td>
  371. <td>5</td>
  372. <td>0.0</td>
  373. <td>0</td>
  374. </tr>
  375. <tr>
  376. <th>9994</th>
  377. <td>2020-03-02</td>
  378. <td>Mexico</td>
  379. <td>NaN</td>
  380. <td>23.6345</td>
  381. <td>-102.5528</td>
  382. <td>5</td>
  383. <td>0.0</td>
  384. <td>0</td>
  385. </tr>
  386. <tr>
  387. <th>9995</th>
  388. <td>2020-03-03</td>
  389. <td>Mexico</td>
  390. <td>NaN</td>
  391. <td>23.6345</td>
  392. <td>-102.5528</td>
  393. <td>5</td>
  394. <td>1.0</td>
  395. <td>0</td>
  396. </tr>
  397. <tr>
  398. <th>9996</th>
  399. <td>2020-03-04</td>
  400. <td>Mexico</td>
  401. <td>NaN</td>
  402. <td>23.6345</td>
  403. <td>-102.5528</td>
  404. <td>5</td>
  405. <td>1.0</td>
  406. <td>0</td>
  407. </tr>
  408. <tr>
  409. <th>9997</th>
  410. <td>2020-03-05</td>
  411. <td>Mexico</td>
  412. <td>NaN</td>
  413. <td>23.6345</td>
  414. <td>-102.5528</td>
  415. <td>5</td>
  416. <td>1.0</td>
  417. <td>0</td>
  418. </tr>
  419. <tr>
  420. <th>9998</th>
  421. <td>2020-03-06</td>
  422. <td>Mexico</td>
  423. <td>NaN</td>
  424. <td>23.6345</td>
  425. <td>-102.5528</td>
  426. <td>6</td>
  427. <td>1.0</td>
  428. <td>0</td>
  429. </tr>
  430. <tr>
  431. <th>9999</th>
  432. <td>2020-03-07</td>
  433. <td>Mexico</td>
  434. <td>NaN</td>
  435. <td>23.6345</td>
  436. <td>-102.5528</td>
  437. <td>6</td>
  438. <td>1.0</td>
  439. <td>0</td>
  440. </tr>
  441. <tr>
  442. <th>10000</th>
  443. <td>2020-03-08</td>
  444. <td>Mexico</td>
  445. <td>NaN</td>
  446. <td>23.6345</td>
  447. <td>-102.5528</td>
  448. <td>7</td>
  449. <td>1.0</td>
  450. <td>0</td>
  451. </tr>
  452. <tr>
  453. <th>10001</th>
  454. <td>2020-03-09</td>
  455. <td>Mexico</td>
  456. <td>NaN</td>
  457. <td>23.6345</td>
  458. <td>-102.5528</td>
  459. <td>7</td>
  460. <td>1.0</td>
  461. <td>0</td>
  462. </tr>
  463. <tr>
  464. <th>10002</th>
  465. <td>2020-03-10</td>
  466. <td>Mexico</td>
  467. <td>NaN</td>
  468. <td>23.6345</td>
  469. <td>-102.5528</td>
  470. <td>7</td>
  471. <td>4.0</td>
  472. <td>0</td>
  473. </tr>
  474. <tr>
  475. <th>10003</th>
  476. <td>2020-03-11</td>
  477. <td>Mexico</td>
  478. <td>NaN</td>
  479. <td>23.6345</td>
  480. <td>-102.5528</td>
  481. <td>8</td>
  482. <td>4.0</td>
  483. <td>0</td>
  484. </tr>
  485. <tr>
  486. <th>10004</th>
  487. <td>2020-03-12</td>
  488. <td>Mexico</td>
  489. <td>NaN</td>
  490. <td>23.6345</td>
  491. <td>-102.5528</td>
  492. <td>12</td>
  493. <td>4.0</td>
  494. <td>0</td>
  495. </tr>
  496. <tr>
  497. <th>10005</th>
  498. <td>2020-03-13</td>
  499. <td>Mexico</td>
  500. <td>NaN</td>
  501. <td>23.6345</td>
  502. <td>-102.5528</td>
  503. <td>12</td>
  504. <td>4.0</td>
  505. <td>0</td>
  506. </tr>
  507. <tr>
  508. <th>10006</th>
  509. <td>2020-03-14</td>
  510. <td>Mexico</td>
  511. <td>NaN</td>
  512. <td>23.6345</td>
  513. <td>-102.5528</td>
  514. <td>26</td>
  515. <td>4.0</td>
  516. <td>0</td>
  517. </tr>
  518. <tr>
  519. <th>10007</th>
  520. <td>2020-03-15</td>
  521. <td>Mexico</td>
  522. <td>NaN</td>
  523. <td>23.6345</td>
  524. <td>-102.5528</td>
  525. <td>41</td>
  526. <td>4.0</td>
  527. <td>0</td>
  528. </tr>
  529. <tr>
  530. <th>10008</th>
  531. <td>2020-03-16</td>
  532. <td>Mexico</td>
  533. <td>NaN</td>
  534. <td>23.6345</td>
  535. <td>-102.5528</td>
  536. <td>53</td>
  537. <td>4.0</td>
  538. <td>0</td>
  539. </tr>
  540. <tr>
  541. <th>10009</th>
  542. <td>2020-03-17</td>
  543. <td>Mexico</td>
  544. <td>NaN</td>
  545. <td>23.6345</td>
  546. <td>-102.5528</td>
  547. <td>82</td>
  548. <td>4.0</td>
  549. <td>0</td>
  550. </tr>
  551. <tr>
  552. <th>10010</th>
  553. <td>2020-03-18</td>
  554. <td>Mexico</td>
  555. <td>NaN</td>
  556. <td>23.6345</td>
  557. <td>-102.5528</td>
  558. <td>93</td>
  559. <td>4.0</td>
  560. <td>0</td>
  561. </tr>
  562. <tr>
  563. <th>10011</th>
  564. <td>2020-03-19</td>
  565. <td>Mexico</td>
  566. <td>NaN</td>
  567. <td>23.6345</td>
  568. <td>-102.5528</td>
  569. <td>118</td>
  570. <td>4.0</td>
  571. <td>1</td>
  572. </tr>
  573. <tr>
  574. <th>10012</th>
  575. <td>2020-03-20</td>
  576. <td>Mexico</td>
  577. <td>NaN</td>
  578. <td>23.6345</td>
  579. <td>-102.5528</td>
  580. <td>164</td>
  581. <td>4.0</td>
  582. <td>1</td>
  583. </tr>
  584. <tr>
  585. <th>10013</th>
  586. <td>2020-03-21</td>
  587. <td>Mexico</td>
  588. <td>NaN</td>
  589. <td>23.6345</td>
  590. <td>-102.5528</td>
  591. <td>203</td>
  592. <td>4.0</td>
  593. <td>2</td>
  594. </tr>
  595. <tr>
  596. <th>10014</th>
  597. <td>2020-03-22</td>
  598. <td>Mexico</td>
  599. <td>NaN</td>
  600. <td>23.6345</td>
  601. <td>-102.5528</td>
  602. <td>251</td>
  603. <td>4.0</td>
  604. <td>2</td>
  605. </tr>
  606. <tr>
  607. <th>10015</th>
  608. <td>2020-03-23</td>
  609. <td>Mexico</td>
  610. <td>NaN</td>
  611. <td>23.6345</td>
  612. <td>-102.5528</td>
  613. <td>316</td>
  614. <td>4.0</td>
  615. <td>3</td>
  616. </tr>
  617. <tr>
  618. <th>10016</th>
  619. <td>2020-03-24</td>
  620. <td>Mexico</td>
  621. <td>NaN</td>
  622. <td>23.6345</td>
  623. <td>-102.5528</td>
  624. <td>367</td>
  625. <td>NaN</td>
  626. <td>4</td>
  627. </tr>
  628. </tbody>
  629. </table>
  630. </div>
  631. ```python
  632. n=mexico_filter.shape[0]
  633. days=np.arange(1,n+1,1)
  634. days
  635. ```
  636. array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
  637. 18, 19, 20, 21, 22, 23, 24, 25, 26])
  638. ```python
  639. #mexico_filter = mexico_sort[mexico_sort['Confirmed']!=0]
  640. plt.scatter(x=days, y=mexico_filter['Confirmed'])
  641. ```
  642. <matplotlib.collections.PathCollection at 0x12acc8290>
  643. ![png](output_11_1.png)
  644. ```python
  645. ```
  646. ```python
  647. from scipy.optimize import curve_fit
  648. def exponential(x, a,k, b):
  649. return a*np.exp(x*k) + b
  650. potp, pcov = curve_fit(exponential, days, mexico_filter['Confirmed'])
  651. potp
  652. ```
  653. array([ 1.07768657, 0.22640743, -3.90363561])
  654. ```python
  655. ```
  656. ```python
  657. ```
  658. ```python
  659. ```
  660. ```python
  661. ```
  662. ```python
  663. # Plot outputs
  664. plt.scatter(days, mexico_filter['Confirmed'], color='black')
  665. plt.plot(days,exponential(days,*potp), color='blue', linewidth=2)
  666. ```
  667. [<matplotlib.lines.Line2D at 0x12b888fd0>]
  668. ![png](output_18_1.png)