|
import csv
|
|
import pandas as pd
|
|
import datapackage
|
|
|
|
URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
|
|
CONFIRMED = 'time_series_19-covid-Confirmed.csv'
|
|
DEATH = 'time_series_19-covid-Deaths.csv'
|
|
RECOVERED = 'time_series_19-covid-Recovered.csv'
|
|
|
|
confirmed_cases = pd.read_csv(URL+CONFIRMED)
|
|
total_deaths = pd.read_csv(URL+DEATH)
|
|
total_recovered = pd.read_csv(URL+RECOVERED)
|
|
|
|
def generate_region_file():
|
|
subset = confirmed_cases.loc[:,'Province/State':'Long']
|
|
subset.insert(0, 'ID', range(1000, 1000 + len(subset)))
|
|
subset.to_csv('data/regions.csv', index = False)
|
|
|
|
def process_data(input_df, output_filename):
|
|
output = 'data/' + output_filename
|
|
subset = input_df.loc[:,'1/22/20':]
|
|
subset.insert(0, 'ID', range(1000, 1000 + len(subset)))
|
|
subset = subset.set_index('ID').T
|
|
subset = subset.rename(columns = {'ID':'Date'})
|
|
subset.to_csv(output, index = False)
|
|
|
|
def generate_data_package():
|
|
package = datapackage.Package()
|
|
package.infer('data/*.csv')
|
|
package.save('datapackage.json')
|
|
|
|
generate_region_file()
|
|
process_data(confirmed_cases, 'confirmed.csv')
|
|
process_data(total_deaths, 'deaths.csv')
|
|
process_data(total_recovered, 'recovered.csv')
|
|
generate_data_package()
|