From 326936d75f88cc9bedb0b626f46bed1d900b1174 Mon Sep 17 00:00:00 2001 From: anuveyatsu Date: Mon, 23 Mar 2020 11:39:58 +0600 Subject: [PATCH] [schema][m]: update resources schema to include titles and change order of the columns. --- process.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/process.py b/process.py index 872b493..cf406cd 100644 --- a/process.py +++ b/process.py @@ -1,4 +1,4 @@ -from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path, update_package, update_resource, join, join_with_self, add_computed_field, delete_fields, checkpoint, duplicate +from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path, update_package, update_resource, update_schema, join, join_with_self, add_computed_field, delete_fields, checkpoint, duplicate BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/' CONFIRMED = 'time_series_19-covid-Confirmed.csv' @@ -58,6 +58,58 @@ Flow( ), delete_fields(['Case']), update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'), + update_schema('worldwide-aggregated', fields=[ + { + "format": "%Y-%m-%d", + "name": "Date", + "type": "date" + }, + { + "format": "default", + "name": "Country/Region", + "type": "string" + }, + { + "format": "default", + "name": "Province/State", + "type": "string" + }, + { + "decimalChar": ".", + "format": "default", + "groupChar": "", + "name": "Lat", + "type": "number" + }, + { + "decimalChar": ".", + "format": "default", + "groupChar": "", + "name": "Long", + "type": "number" + }, + { + "format": "default", + "groupChar": "", + "name": "Confirmed", + "title": "Cumulative total confirmed cases to date", + "type": "integer" + }, + { + "format": "default", + "groupChar": "", + "name": "Recovered", + "title": "Cumulative total recovered cases to date", + "type": "integer" + }, + { + "format": "default", + "groupChar": "", + "name": "Deaths", + "title": "Cumulative total deaths to date", + "type": "integer" + } + ]), checkpoint('processed_data'), # Duplicate the stream to create aggregated data duplicate( @@ -86,6 +138,58 @@ Flow( } ) ), + update_schema('worldwide-aggregated', fields=[ + { + "format": "default", + "name": "Province/State", + "type": "string" + }, + { + "format": "default", + "name": "Country/Region", + "type": "string" + }, + { + "decimalChar": ".", + "format": "default", + "groupChar": "", + "name": "Lat", + "type": "number" + }, + { + "decimalChar": ".", + "format": "default", + "groupChar": "", + "name": "Long", + "type": "number" + }, + { + "format": "%Y-%m-%d", + "name": "Date", + "type": "date" + }, + { + "format": "default", + "groupChar": "", + "name": "Confirmed", + "title": "Cumulative total confirmed cases to date", + "type": "integer" + }, + { + "format": "default", + "groupChar": "", + "name": "Recovered", + "title": "Cumulative total recovered cases to date", + "type": "integer" + }, + { + "format": "default", + "groupChar": "", + "name": "Deaths", + "title": "Cumulative total deaths to date", + "type": "integer" + } + ]), update_package( name='covid-19', title='Novel Coronavirus 2019',