Browse Source

[schema][m]: update resources schema to include titles and change order of the columns.

master
anuveyatsu 4 years ago
parent
commit
326936d75f
1 changed files with 105 additions and 1 deletions
  1. +105
    -1
      process.py

+ 105
- 1
process.py View File

@ -1,4 +1,4 @@
from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path, update_package, update_resource, join, join_with_self, add_computed_field, delete_fields, checkpoint, duplicate
from dataflows import Flow, load, unpivot, find_replace, set_type, dump_to_path, update_package, update_resource, update_schema, join, join_with_self, add_computed_field, delete_fields, checkpoint, duplicate
BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/' BASE_URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
CONFIRMED = 'time_series_19-covid-Confirmed.csv' CONFIRMED = 'time_series_19-covid-Confirmed.csv'
@ -58,6 +58,58 @@ Flow(
), ),
delete_fields(['Case']), delete_fields(['Case']),
update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'), update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'),
update_schema('worldwide-aggregated', fields=[
{
"format": "%Y-%m-%d",
"name": "Date",
"type": "date"
},
{
"format": "default",
"name": "Country/Region",
"type": "string"
},
{
"format": "default",
"name": "Province/State",
"type": "string"
},
{
"decimalChar": ".",
"format": "default",
"groupChar": "",
"name": "Lat",
"type": "number"
},
{
"decimalChar": ".",
"format": "default",
"groupChar": "",
"name": "Long",
"type": "number"
},
{
"format": "default",
"groupChar": "",
"name": "Confirmed",
"title": "Cumulative total confirmed cases to date",
"type": "integer"
},
{
"format": "default",
"groupChar": "",
"name": "Recovered",
"title": "Cumulative total recovered cases to date",
"type": "integer"
},
{
"format": "default",
"groupChar": "",
"name": "Deaths",
"title": "Cumulative total deaths to date",
"type": "integer"
}
]),
checkpoint('processed_data'), checkpoint('processed_data'),
# Duplicate the stream to create aggregated data # Duplicate the stream to create aggregated data
duplicate( duplicate(
@ -86,6 +138,58 @@ Flow(
} }
) )
), ),
update_schema('worldwide-aggregated', fields=[
{
"format": "default",
"name": "Province/State",
"type": "string"
},
{
"format": "default",
"name": "Country/Region",
"type": "string"
},
{
"decimalChar": ".",
"format": "default",
"groupChar": "",
"name": "Lat",
"type": "number"
},
{
"decimalChar": ".",
"format": "default",
"groupChar": "",
"name": "Long",
"type": "number"
},
{
"format": "%Y-%m-%d",
"name": "Date",
"type": "date"
},
{
"format": "default",
"groupChar": "",
"name": "Confirmed",
"title": "Cumulative total confirmed cases to date",
"type": "integer"
},
{
"format": "default",
"groupChar": "",
"name": "Recovered",
"title": "Cumulative total recovered cases to date",
"type": "integer"
},
{
"format": "default",
"groupChar": "",
"name": "Deaths",
"title": "Cumulative total deaths to date",
"type": "integer"
}
]),
update_package( update_package(
name='covid-19', name='covid-19',
title='Novel Coronavirus 2019', title='Novel Coronavirus 2019',


Loading…
Cancel
Save