import csv import pandas as pd import datapackage URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/' CONFIRMED = 'time_series_19-covid-Confirmed.csv' DEATH = 'time_series_19-covid-Deaths.csv' RECOVERED = 'time_series_19-covid-Recovered.csv' confirmed_cases = pd.read_csv(URL+CONFIRMED) total_deaths = pd.read_csv(URL+DEATH) total_recovered = pd.read_csv(URL+RECOVERED) def generate_region_file(): subset = confirmed_cases.loc[:,'Province/State':'Long'] subset.insert(0, 'ID', range(1000, 1000 + len(subset))) subset.to_csv('data/regions.csv', index = False) def process_data(input_df, output_filename): output = 'data/' + output_filename subset = input_df.loc[:,'1/22/20':] subset.insert(0, 'ID', range(1000, 1000 + len(subset))) subset = subset.set_index('ID').T subset = subset.rename(columns = {'ID':'Date'}) subset.to_csv(output, index = False) def generate_data_package(): package = datapackage.Package() package.infer('data/*.csv') package.save('datapackage.json') generate_region_file() process_data(confirmed_cases, 'confirmed.csv') process_data(total_deaths, 'deaths.csv') process_data(total_recovered, 'recovered.csv') generate_data_package()