You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

33 lines
1.2 KiB

  1. import csv
  2. from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path
  3. import datapackage
  4. BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
  5. CONFIRMED = 'time_series_19-covid-Confirmed.csv'
  6. DEATH = 'time_series_19-covid-Deaths.csv'
  7. RECOVERED = 'time_series_19-covid-Recovered.csv'
  8. def to_normal_date(row):
  9. old_date = row['date']
  10. month, day, year = row['date'].split('-')
  11. day = f'0{day}' if len(day) == 1 else day
  12. month = f'0{month}' if len(month) == 1 else month
  13. row['date'] = '-'.join([day, month, year])
  14. unpivoting_fields = [
  15. { 'name': '([0-9]+\/[0-9]+\/[0-9]+)', 'keys': {'date': r'\1'} }
  16. ]
  17. extra_keys = [{'name': 'date', 'type': 'string'} ]
  18. extra_value = {'name': 'case', 'type': 'string'}
  19. for case in [CONFIRMED, DEATH, RECOVERED]:
  20. Flow(
  21. load(f'{BASE_URL}{case}'),
  22. unpivot(unpivoting_fields, extra_keys, extra_value),
  23. find_replace([{'name': 'date', 'patterns': [{'find': '/', 'replace': '-'}]}]),
  24. to_normal_date,
  25. set_type('date', type='date', format='%d-%m-%y'),
  26. set_type('case', type='number'),
  27. dump_to_path()
  28. ).results()[0]