From 6b68769bf81825d057973318622a68dfb41a5476 Mon Sep 17 00:00:00 2001 From: anuveyatsu Date: Mon, 23 Mar 2020 00:21:17 +0600 Subject: [PATCH] [data][xl]: add worldwide aggreated data by date. --- data/worldwide-aggregated.csv | 61 +++++++++++++++++++++++++++++++++++ datapackage.json | 49 ++++++++++++++++++++++++++-- process.py | 29 +++++++++++++++++ 3 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 data/worldwide-aggregated.csv diff --git a/data/worldwide-aggregated.csv b/data/worldwide-aggregated.csv new file mode 100644 index 0000000..50c80af --- /dev/null +++ b/data/worldwide-aggregated.csv @@ -0,0 +1,61 @@ +Confirmed,Date,Deaths,Recovered +555,2020-01-22,17,28 +653,2020-01-23,18,30 +941,2020-01-24,26,36 +1434,2020-01-25,42,39 +2118,2020-01-26,56,52 +2927,2020-01-27,82,61 +5578,2020-01-28,131,107 +6166,2020-01-29,133,126 +8234,2020-01-30,171,143 +9927,2020-01-31,213,222 +12038,2020-02-01,259,284 +16787,2020-02-02,362,472 +19881,2020-02-03,426,623 +23892,2020-02-04,492,852 +27635,2020-02-05,564,1124 +30817,2020-02-06,634,1487 +34391,2020-02-07,719,2011 +37120,2020-02-08,806,2616 +40150,2020-02-09,906,3244 +42762,2020-02-10,1013,3946 +44802,2020-02-11,1113,4683 +45221,2020-02-12,1118,5150 +60368,2020-02-13,1371,6295 +66885,2020-02-14,1523,8058 +69030,2020-02-15,1666,9395 +71224,2020-02-16,1770,10865 +73258,2020-02-17,1868,12583 +75136,2020-02-18,2007,14352 +75639,2020-02-19,2122,16121 +76197,2020-02-20,2247,18177 +76823,2020-02-21,2251,18890 +78579,2020-02-22,2458,22886 +78965,2020-02-23,2469,23394 +79568,2020-02-24,2629,25227 +80413,2020-02-25,2708,27905 +81395,2020-02-26,2770,30384 +82754,2020-02-27,2814,33277 +84120,2020-02-28,2872,36711 +86011,2020-02-29,2941,39782 +88369,2020-03-01,2996,42716 +90306,2020-03-02,3085,45602 +92840,2020-03-03,3160,48228 +95120,2020-03-04,3254,51170 +97882,2020-03-05,3348,53796 +101784,2020-03-06,3460,55865 +105821,2020-03-07,3558,58358 +109795,2020-03-08,3802,60694 +113561,2020-03-09,3988,62494 +118592,2020-03-10,4262,64404 +125865,2020-03-11,4615,67003 +128343,2020-03-12,4720,68324 +145193,2020-03-13,5404,70251 +156094,2020-03-14,5819,72624 +167446,2020-03-15,6440,76034 +181527,2020-03-16,7126,78088 +197142,2020-03-17,7905,80840 +214910,2020-03-18,8733,83207 +242708,2020-03-19,9867,84854 +272166,2020-03-20,11299,87256 +304524,2020-03-21,12973,91499 diff --git a/datapackage.json b/datapackage.json index 3abd3cb..0b434f7 100644 --- a/datapackage.json +++ b/datapackage.json @@ -1,7 +1,7 @@ { - "bytes": 1465368, - "count_of_rows": 28920, - "hash": "ac80ca047703880c26c97d8a6dc73df8", + "bytes": 1467076, + "count_of_rows": 28980, + "hash": "6e25a726932e6dabaaa8b8e5ed578dd7", "name": "covid-19", "profile": "data-package", "resources": [ @@ -79,6 +79,49 @@ "" ] } + }, + { + "bytes": 1708, + "dialect": { + "delimiter": ",", + "doubleQuote": true, + "lineTerminator": "\r\n", + "quoteChar": "\"", + "skipInitialSpace": false + }, + "encoding": "utf-8", + "format": "csv", + "hash": "f261b5b527542ceace4f2c7941d69e40", + "name": "worldwide-aggregated", + "path": "data/worldwide-aggregated.csv", + "profile": "data-resource", + "schema": { + "fields": [ + { + "decimalChar": ".", + "groupChar": "", + "name": "Confirmed", + "type": "number" + }, + { + "format": "%Y-%m-%d", + "name": "Date", + "type": "date" + }, + { + "decimalChar": ".", + "groupChar": "", + "name": "Deaths", + "type": "number" + }, + { + "decimalChar": ".", + "groupChar": "", + "name": "Recovered", + "type": "number" + } + ] + } } ], "title": "Novel Coronavirus 2019" diff --git a/process.py b/process.py index e8bcd9c..e1d4b1b 100644 --- a/process.py +++ b/process.py @@ -59,5 +59,34 @@ Flow( delete_fields(['Case']), update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'), update_package(name='covid-19', title='Novel Coronavirus 2019'), + dump_to_path(), + checkpoint('processed_data'), + # Duplicate the stream to create aggregated data + duplicate( + source='time-series-19-covid-combined', + target_name='worldwide-aggregated', + target_path='worldwide-aggregated.csv' + ), + join_with_self( + resource_name='worldwide-aggregated', + join_key=['Date'], + fields=dict( + Date={ + 'name': 'Date' + }, + Confirmed={ + 'name': 'Confirmed', + 'aggregate': 'sum' + }, + Recovered={ + 'name': 'Recovered', + 'aggregate': 'sum' + }, + Deaths={ + 'name': 'Deaths', + 'aggregate': 'sum' + } + ) + ), dump_to_path() ).results()[0]