Browse Source

[data][xl]: updated the data sources for confirmed and death cases.

This is needed since the previous sources are now outdated.
Refs #31
master
anuveyatsu 4 years ago
parent
commit
0875bc923e
5 changed files with 14842 additions and 32078 deletions
  1. +537
    -1467
      data/countries-aggregated.csv
  2. +14245
    -30551
      data/time-series-19-covid-combined.csv
  3. +46
    -46
      data/worldwide-aggregated.csv
  4. +8
    -8
      datapackage.json
  5. +6
    -6
      process.py

+ 537
- 1467
data/countries-aggregated.csv
File diff suppressed because it is too large
View File


+ 14245
- 30551
data/time-series-19-covid-combined.csv
File diff suppressed because it is too large
View File


+ 46
- 46
data/worldwide-aggregated.csv View File

@ -1,6 +1,6 @@
Date,Confirmed,Recovered,Deaths
2020-01-22,555,28,17
2020-01-23,653,30,18
2020-01-23,654,30,18
2020-01-24,941,36,26
2020-01-25,1434,39,42
2020-01-26,2118,52,56
@ -14,50 +14,50 @@ Date,Confirmed,Recovered,Deaths
2020-02-03,19881,623,426
2020-02-04,23892,852,492
2020-02-05,27635,1124,564
2020-02-06,30817,1487,634
2020-02-06,30794,1487,634
2020-02-07,34391,2011,719
2020-02-08,37120,2616,806
2020-02-09,40150,3244,906
2020-02-10,42762,3946,1013
2020-02-11,44802,4683,1113
2020-02-12,45221,5150,1118
2020-02-13,60368,6295,1371
2020-02-14,66885,8058,1523
2020-02-15,69030,9395,1666
2020-02-16,71224,10865,1770
2020-02-17,73258,12583,1868
2020-02-18,75136,14352,2007
2020-02-19,75639,16121,2122
2020-02-20,76197,18177,2247
2020-02-21,76823,18890,2251
2020-02-22,78579,22886,2458
2020-02-23,78965,23394,2469
2020-02-24,79568,25227,2629
2020-02-25,80413,27905,2708
2020-02-26,81395,30384,2770
2020-02-27,82754,33277,2814
2020-02-28,84120,36711,2872
2020-02-29,86011,39782,2941
2020-03-01,88369,42716,2996
2020-03-02,90306,45602,3085
2020-03-03,92840,48228,3160
2020-03-04,95120,51170,3254
2020-03-05,97882,53796,3348
2020-03-06,101784,55865,3460
2020-03-07,105821,58358,3558
2020-03-08,109795,60694,3802
2020-03-09,113561,62494,3988
2020-03-10,118592,64404,4262
2020-03-11,125865,67003,4615
2020-03-12,128343,68324,4720
2020-03-13,145193,70251,5404
2020-03-14,156094,72624,5819
2020-03-15,167446,76034,6440
2020-03-16,181527,78088,7126
2020-03-17,197142,80840,7905
2020-03-18,214910,83207,8733
2020-03-19,242708,84854,9867
2020-03-20,272166,87256,11299
2020-03-21,304524,91499,12973
2020-03-22,335955,97704,14632
2020-03-23,336004,98334,14643
2020-02-09,40150,3241,906
2020-02-10,42762,3943,1013
2020-02-11,44802,4680,1113
2020-02-12,45221,5144,1118
2020-02-13,60368,6289,1371
2020-02-14,66885,8052,1523
2020-02-15,69030,9387,1666
2020-02-16,71224,10850,1770
2020-02-17,73258,12568,1868
2020-02-18,75136,14337,2007
2020-02-19,75639,16106,2122
2020-02-20,76197,18162,2247
2020-02-21,76819,18873,2251
2020-02-22,78572,22869,2458
2020-02-23,78958,23377,2469
2020-02-24,79561,25210,2629
2020-02-25,80406,27880,2708
2020-02-26,81388,30359,2770
2020-02-27,82746,33252,2814
2020-02-28,84112,36685,2872
2020-02-29,86011,39755,2941
2020-03-01,88369,42689,2996
2020-03-02,90306,45575,3085
2020-03-03,92840,48201,3160
2020-03-04,95120,51143,3254
2020-03-05,97882,53769,3348
2020-03-06,101794,55837,3460
2020-03-07,105831,58320,3558
2020-03-08,109805,60656,3802
2020-03-09,113571,62456,3988
2020-03-10,118602,64365,4262
2020-03-11,125875,66964,4615
2020-03-12,128353,68281,4720
2020-03-13,145209,70208,5404
2020-03-14,156104,72579,5819
2020-03-15,167454,75989,6440
2020-03-16,181573,78036,7126
2020-03-17,197150,80756,7905
2020-03-18,214909,83127,8733
2020-03-19,242706,84774,9867
2020-03-20,272164,87176,11299
2020-03-21,304519,91419,12973
2020-03-22,337089,95436,14651
2020-03-23,378547,95882,16505

+ 8
- 8
datapackage.json View File

@ -1,12 +1,12 @@
{
"bytes": 1905585,
"count_of_rows": 42470,
"hash": "81aae76b0bd89ffd2f3fe135d619d96a",
"bytes": 976643,
"count_of_rows": 25234,
"hash": "9bc713d601a6c93496187964b2922be5",
"name": "covid-19",
"profile": "data-package",
"resources": [
{
"bytes": 1586504,
"bytes": 685516,
"dialect": {
"caseSensitiveHeader": false,
"delimiter": ",",
@ -18,7 +18,7 @@
},
"encoding": "utf-8",
"format": "csv",
"hash": "c1e32d419f94054e3ff37191e64d26b4",
"hash": "2faf346c668744c480a1500f606072c0",
"name": "time-series-19-covid-combined",
"path": "data/time-series-19-covid-combined.csv",
"profile": "tabular-data-resource",
@ -82,7 +82,7 @@
}
},
{
"bytes": 317311,
"bytes": 289357,
"dialect": {
"delimiter": ",",
"doubleQuote": true,
@ -92,7 +92,7 @@
},
"encoding": "utf-8",
"format": "csv",
"hash": "a59719082a4e1e20769a515e330338a5",
"hash": "9db2ed9b2d1b7e753d85aee3b48d5b7f",
"name": "countries-aggregated",
"path": "data/countries-aggregated.csv",
"profile": "data-resource",
@ -147,7 +147,7 @@
},
"encoding": "utf-8",
"format": "csv",
"hash": "0e1f20b63ee4be40bff10a3051864fc8",
"hash": "42e338fecaaa08c5f70a2701584bc44e",
"name": "worldwide-aggregated",
"path": "data/worldwide-aggregated.csv",
"profile": "data-resource",


+ 6
- 6
process.py View File

@ -1,8 +1,8 @@
from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path, update_package, update_resource, update_schema, join, join_with_self, add_computed_field, delete_fields, checkpoint, duplicate, filter_rows
BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
CONFIRMED = 'time_series_19-covid-Confirmed.csv'
DEATH = 'time_series_19-covid-Deaths.csv'
CONFIRMED = 'time_series_covid19_confirmed_global.csv'
DEATH = 'time_series_covid19_deaths_global.csv'
RECOVERED = 'time_series_19-covid-Recovered.csv'
def to_normal_date(row):
@ -34,10 +34,10 @@ Flow(
set_type('Date', type='date', format='%d-%m-%y', resources=None),
set_type('Case', type='number', resources=None),
join(
source_name='time_series_19-covid-Confirmed',
source_name='time_series_covid19_confirmed_global',
source_key=['Province/State', 'Country/Region', 'Date'],
source_delete=True,
target_name='time_series_19-covid-Deaths',
target_name='time_series_covid19_deaths_global',
target_key=['Province/State', 'Country/Region', 'Date'],
fields=dict(Confirmed={
'name': 'Case',
@ -48,7 +48,7 @@ Flow(
source_name='time_series_19-covid-Recovered',
source_key=['Province/State', 'Country/Region', 'Date'],
source_delete=True,
target_name='time_series_19-covid-Deaths',
target_name='time_series_covid19_deaths_global',
target_key=['Province/State', 'Country/Region', 'Date'],
fields=dict(Recovered={
'name': 'Case',
@ -61,7 +61,7 @@ Flow(
with_='{Case}'
),
delete_fields(['Case']),
update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'),
update_resource('time_series_covid19_deaths_global', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'),
update_schema('time-series-19-covid-combined', missingValues=['None', ''], fields=[
{
"format": "%Y-%m-%d",


Loading…
Cancel
Save