Browse Source

[data]: join on country field as well to avoid mismatch

master
zelima 4 years ago
parent
commit
489121a85a
3 changed files with 8362 additions and 4549 deletions
  1. +5
    -5
      datapackage.json
  2. +4
    -4
      process.py
  3. +8353
    -4540
      time-series-19-covid-combined.csv

+ 5
- 5
datapackage.json View File

@ -1,11 +1,11 @@
{ {
"bytes": 463524,
"count_of_rows": 8955,
"hash": "2e74b71787289aac912b647688893625",
"bytes": 672473,
"count_of_rows": 12768,
"hash": "084ed2ea37dd6bce00842d9176f7f959",
"profile": "data-package", "profile": "data-package",
"resources": [ "resources": [
{ {
"bytes": 463524,
"bytes": 672473,
"dialect": { "dialect": {
"caseSensitiveHeader": false, "caseSensitiveHeader": false,
"delimiter": ",", "delimiter": ",",
@ -17,7 +17,7 @@
}, },
"encoding": "utf-8", "encoding": "utf-8",
"format": "csv", "format": "csv",
"hash": "88df9ccb75f4858ce93bc6607a9fefec",
"hash": "edbef7a3c0373565e8154cf7800a7907",
"name": "time-series-19-covid-combined", "name": "time-series-19-covid-combined",
"path": "time-series-19-covid-combined.csv", "path": "time-series-19-covid-combined.csv",
"profile": "tabular-data-resource", "profile": "tabular-data-resource",


+ 4
- 4
process.py View File

@ -30,10 +30,10 @@ Flow(
set_type('Case', type='number', resources=None), set_type('Case', type='number', resources=None),
join( join(
source_name='time_series_19-covid-Confirmed', source_name='time_series_19-covid-Confirmed',
source_key=['Province/State', 'Date'],
source_key=['Province/State', 'Country/Region', 'Date'],
source_delete=True, source_delete=True,
target_name='time_series_19-covid-Deaths', target_name='time_series_19-covid-Deaths',
target_key=['Province/State', 'Date'],
target_key=['Province/State', 'Country/Region', 'Date'],
fields=dict(Confirmed={ fields=dict(Confirmed={
'name': 'Case', 'name': 'Case',
'aggregate': 'first' 'aggregate': 'first'
@ -41,10 +41,10 @@ Flow(
), ),
join( join(
source_name='time_series_19-covid-Recovered', source_name='time_series_19-covid-Recovered',
source_key=['Province/State', 'Date'],
source_key=['Province/State', 'Country/Region', 'Date'],
source_delete=True, source_delete=True,
target_name='time_series_19-covid-Deaths', target_name='time_series_19-covid-Deaths',
target_key=['Province/State', 'Date'],
target_key=['Province/State', 'Country/Region', 'Date'],
fields=dict(Recovered={ fields=dict(Recovered={
'name': 'Case', 'name': 'Case',
'aggregate': 'first' 'aggregate': 'first'


+ 8353
- 4540
time-series-19-covid-combined.csv
File diff suppressed because it is too large
View File


Loading…
Cancel
Save