import csv from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path import datapackage BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/' CONFIRMED = 'time_series_19-covid-Confirmed.csv' DEATH = 'time_series_19-covid-Deaths.csv' RECOVERED = 'time_series_19-covid-Recovered.csv' def to_normal_date(row): old_date = row['date'] month, day, year = row['date'].split('-') day = f'0{day}' if len(day) == 1 else day month = f'0{month}' if len(month) == 1 else month row['date'] = '-'.join([day, month, year]) unpivoting_fields = [ { 'name': '([0-9]+\/[0-9]+\/[0-9]+)', 'keys': {'date': r'\1'} } ] extra_keys = [{'name': 'date', 'type': 'string'} ] extra_value = {'name': 'case', 'type': 'string'} for case in [CONFIRMED, DEATH, RECOVERED]: Flow( load(f'{BASE_URL}{case}'), unpivot(unpivoting_fields, extra_keys, extra_value), find_replace([{'name': 'date', 'patterns': [{'find': '/', 'replace': '-'}]}]), to_normal_date, set_type('date', type='date', format='%d-%m-%y'), set_type('case', type='number'), dump_to_path() ).results()[0]