|
@ -1,6 +1,4 @@ |
|
|
import csv |
|
|
|
|
|
from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path |
|
|
|
|
|
import datapackage |
|
|
|
|
|
|
|
|
from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path, update_resource, join, add_computed_field, delete_fields |
|
|
|
|
|
|
|
|
BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/' |
|
|
BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/' |
|
|
CONFIRMED = 'time_series_19-covid-Confirmed.csv' |
|
|
CONFIRMED = 'time_series_19-covid-Confirmed.csv' |
|
@ -8,26 +6,56 @@ DEATH = 'time_series_19-covid-Deaths.csv' |
|
|
RECOVERED = 'time_series_19-covid-Recovered.csv' |
|
|
RECOVERED = 'time_series_19-covid-Recovered.csv' |
|
|
|
|
|
|
|
|
def to_normal_date(row): |
|
|
def to_normal_date(row): |
|
|
old_date = row['date'] |
|
|
|
|
|
month, day, year = row['date'].split('-') |
|
|
|
|
|
|
|
|
old_date = row['Date'] |
|
|
|
|
|
month, day, year = row['Date'].split('-') |
|
|
day = f'0{day}' if len(day) == 1 else day |
|
|
day = f'0{day}' if len(day) == 1 else day |
|
|
month = f'0{month}' if len(month) == 1 else month |
|
|
month = f'0{month}' if len(month) == 1 else month |
|
|
row['date'] = '-'.join([day, month, year]) |
|
|
|
|
|
|
|
|
row['Date'] = '-'.join([day, month, year]) |
|
|
|
|
|
|
|
|
unpivoting_fields = [ |
|
|
unpivoting_fields = [ |
|
|
{ 'name': '([0-9]+\/[0-9]+\/[0-9]+)', 'keys': {'date': r'\1'} } |
|
|
|
|
|
|
|
|
{ 'name': '([0-9]+\/[0-9]+\/[0-9]+)', 'keys': {'Date': r'\1'} } |
|
|
] |
|
|
] |
|
|
|
|
|
|
|
|
extra_keys = [{'name': 'date', 'type': 'string'} ] |
|
|
|
|
|
extra_value = {'name': 'case', 'type': 'string'} |
|
|
|
|
|
|
|
|
extra_keys = [{'name': 'Date', 'type': 'string'} ] |
|
|
|
|
|
extra_value = {'name': 'Case', 'type': 'number'} |
|
|
|
|
|
|
|
|
for case in [CONFIRMED, DEATH, RECOVERED]: |
|
|
|
|
|
Flow( |
|
|
|
|
|
load(f'{BASE_URL}{case}'), |
|
|
|
|
|
unpivot(unpivoting_fields, extra_keys, extra_value), |
|
|
|
|
|
find_replace([{'name': 'date', 'patterns': [{'find': '/', 'replace': '-'}]}]), |
|
|
|
|
|
to_normal_date, |
|
|
|
|
|
set_type('date', type='date', format='%d-%m-%y'), |
|
|
|
|
|
set_type('case', type='number'), |
|
|
|
|
|
dump_to_path() |
|
|
|
|
|
).results()[0] |
|
|
|
|
|
|
|
|
Flow( |
|
|
|
|
|
load(f'{BASE_URL}{CONFIRMED}'), |
|
|
|
|
|
load(f'{BASE_URL}{RECOVERED}'), |
|
|
|
|
|
load(f'{BASE_URL}{DEATH}'), |
|
|
|
|
|
unpivot(unpivoting_fields, extra_keys, extra_value), |
|
|
|
|
|
find_replace([{'name': 'Date', 'patterns': [{'find': '/', 'replace': '-'}]}]), |
|
|
|
|
|
to_normal_date, |
|
|
|
|
|
set_type('Date', type='date', format='%d-%m-%y', resources=None), |
|
|
|
|
|
set_type('Case', type='number', resources=None), |
|
|
|
|
|
join( |
|
|
|
|
|
source_name='time_series_19-covid-Confirmed', |
|
|
|
|
|
source_key=['Province/State', 'Date'], |
|
|
|
|
|
source_delete=True, |
|
|
|
|
|
target_name='time_series_19-covid-Deaths', |
|
|
|
|
|
target_key=['Province/State', 'Date'], |
|
|
|
|
|
fields=dict(Confirmed={ |
|
|
|
|
|
'name': 'Case', |
|
|
|
|
|
'aggregate': 'first' |
|
|
|
|
|
}) |
|
|
|
|
|
), |
|
|
|
|
|
join( |
|
|
|
|
|
source_name='time_series_19-covid-Recovered', |
|
|
|
|
|
source_key=['Province/State', 'Date'], |
|
|
|
|
|
source_delete=True, |
|
|
|
|
|
target_name='time_series_19-covid-Deaths', |
|
|
|
|
|
target_key=['Province/State', 'Date'], |
|
|
|
|
|
fields=dict(Recovered={ |
|
|
|
|
|
'name': 'Case', |
|
|
|
|
|
'aggregate': 'first' |
|
|
|
|
|
}) |
|
|
|
|
|
), |
|
|
|
|
|
add_computed_field( |
|
|
|
|
|
target={'name': 'Deaths', 'type': 'number'}, |
|
|
|
|
|
operation='format', |
|
|
|
|
|
with_='{Case}' |
|
|
|
|
|
), |
|
|
|
|
|
delete_fields(['Case']), |
|
|
|
|
|
update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='time-series-19-covid-combined.csv'), |
|
|
|
|
|
dump_to_path() |
|
|
|
|
|
).results()[0] |