You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

36 lines
1.2 KiB

4 years ago
  1. import csv
  2. import pandas as pd
  3. import datapackage
  4. URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
  5. CONFIRMED = 'time_series_19-covid-Confirmed.csv'
  6. DEATH = 'time_series_19-covid-Deaths.csv'
  7. RECOVERED = 'time_series_19-covid-Recovered.csv'
  8. confirmed_cases = pd.read_csv(URL+CONFIRMED)
  9. total_deaths = pd.read_csv(URL+DEATH)
  10. total_recovered = pd.read_csv(URL+RECOVERED)
  11. def generate_region_file():
  12. subset = confirmed_cases.loc[:,'Province/State':'Long']
  13. subset.insert(0, 'ID', range(1000, 1000 + len(subset)))
  14. subset.to_csv('data/regions.csv', index = False)
  15. def process_data(input_df, output_filename):
  16. output = 'data/' + output_filename
  17. subset = input_df.loc[:,'1/22/20':]
  18. subset.insert(0, 'ID', range(1000, 1000 + len(subset)))
  19. subset = subset.set_index('ID').T
  20. subset = subset.rename(columns = {'ID':'Date'})
  21. subset.to_csv(output, index = False)
  22. def generate_data_package():
  23. package = datapackage.Package()
  24. package.infer('data/*.csv')
  25. package.save('datapackage.json')
  26. generate_region_file()
  27. process_data(confirmed_cases, 'confirmed.csv')
  28. process_data(total_deaths, 'deaths.csv')
  29. process_data(total_recovered, 'recovered.csv')
  30. generate_data_package()