Browse Source

[schema change][xs]: specify missing values property in the schema so that we don't got cast error for integer field for value 'None'.

For details see https://github.com/datasets/covid-19/runs/529587569?check_suite_focus=true
master
anuveyatsu 4 years ago
parent
commit
c150f41eea
5 changed files with 5101 additions and 2818 deletions
  1. +915
    -0
      data/countries-aggregated.csv
  2. +4161
    -2806
      data/time-series-19-covid-combined.csv
  3. +1
    -0
      data/worldwide-aggregated.csv
  4. +21
    -9
      datapackage.json
  5. +3
    -3
      process.py

+ 915
- 0
data/countries-aggregated.csv
File diff suppressed because it is too large
View File


+ 4161
- 2806
data/time-series-19-covid-combined.csv
File diff suppressed because it is too large
View File


+ 1
- 0
data/worldwide-aggregated.csv View File

@ -60,3 +60,4 @@ Date,Confirmed,Recovered,Deaths
2020-03-20,272166,87256,11299 2020-03-20,272166,87256,11299
2020-03-21,304524,91499,12973 2020-03-21,304524,91499,12973
2020-03-22,335955,97704,14632 2020-03-22,335955,97704,14632
2020-03-23,336004,98334,14643

+ 21
- 9
datapackage.json View File

@ -1,12 +1,12 @@
{ {
"bytes": 1796366,
"count_of_rows": 40199,
"hash": "a0c947ee02ae0feabc93c3a7dc4d2a52",
"bytes": 1905585,
"count_of_rows": 42470,
"hash": "81aae76b0bd89ffd2f3fe135d619d96a",
"name": "covid-19", "name": "covid-19",
"profile": "data-package", "profile": "data-package",
"resources": [ "resources": [
{ {
"bytes": 1503743,
"bytes": 1586504,
"dialect": { "dialect": {
"caseSensitiveHeader": false, "caseSensitiveHeader": false,
"delimiter": ",", "delimiter": ",",
@ -18,7 +18,7 @@
}, },
"encoding": "utf-8", "encoding": "utf-8",
"format": "csv", "format": "csv",
"hash": "d9550a8b6acfae7b139b7c61af1993f1",
"hash": "c1e32d419f94054e3ff37191e64d26b4",
"name": "time-series-19-covid-combined", "name": "time-series-19-covid-combined",
"path": "data/time-series-19-covid-combined.csv", "path": "data/time-series-19-covid-combined.csv",
"profile": "tabular-data-resource", "profile": "tabular-data-resource",
@ -76,12 +76,14 @@
} }
], ],
"missingValues": [ "missingValues": [
"None",
null,
"" ""
] ]
} }
}, },
{ {
"bytes": 290884,
"bytes": 317311,
"dialect": { "dialect": {
"delimiter": ",", "delimiter": ",",
"doubleQuote": true, "doubleQuote": true,
@ -91,7 +93,7 @@
}, },
"encoding": "utf-8", "encoding": "utf-8",
"format": "csv", "format": "csv",
"hash": "c13f9c5f7cd0254ef149c378c3a10902",
"hash": "a59719082a4e1e20769a515e330338a5",
"name": "countries-aggregated", "name": "countries-aggregated",
"path": "data/countries-aggregated.csv", "path": "data/countries-aggregated.csv",
"profile": "data-resource", "profile": "data-resource",
@ -128,11 +130,16 @@
"title": "Cumulative total deaths to date", "title": "Cumulative total deaths to date",
"type": "integer" "type": "integer"
} }
],
"missingValues": [
"None",
null,
""
] ]
} }
}, },
{ {
"bytes": 1739,
"bytes": 1770,
"dialect": { "dialect": {
"delimiter": ",", "delimiter": ",",
"doubleQuote": true, "doubleQuote": true,
@ -142,7 +149,7 @@
}, },
"encoding": "utf-8", "encoding": "utf-8",
"format": "csv", "format": "csv",
"hash": "ef0e2669f3d7949ff7f1e6a8404f1e68",
"hash": "0e1f20b63ee4be40bff10a3051864fc8",
"name": "worldwide-aggregated", "name": "worldwide-aggregated",
"path": "data/worldwide-aggregated.csv", "path": "data/worldwide-aggregated.csv",
"profile": "data-resource", "profile": "data-resource",
@ -174,6 +181,11 @@
"title": "Cumulative total deaths to date", "title": "Cumulative total deaths to date",
"type": "integer" "type": "integer"
} }
],
"missingValues": [
"None",
null,
""
] ]
} }
} }


+ 3
- 3
process.py View File

@ -62,7 +62,7 @@ Flow(
), ),
delete_fields(['Case']), delete_fields(['Case']),
update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'), update_resource('time_series_19-covid-Deaths', name='time-series-19-covid-combined', path='data/time-series-19-covid-combined.csv'),
update_schema('time-series-19-covid-combined', fields=[
update_schema('time-series-19-covid-combined', missingValues=['None', None, ''], fields=[
{ {
"format": "%Y-%m-%d", "format": "%Y-%m-%d",
"name": "Date", "name": "Date",
@ -142,7 +142,7 @@ Flow(
} }
) )
), ),
update_schema('worldwide-aggregated', fields=[
update_schema('worldwide-aggregated', missingValues=['None', None, ''], fields=[
{ {
"format": "%Y-%m-%d", "format": "%Y-%m-%d",
"name": "Date", "name": "Date",
@ -201,7 +201,7 @@ Flow(
} }
) )
), ),
update_schema('countries-aggregated', fields=[
update_schema('countries-aggregated', missingValues=['None', None, ''], fields=[
{ {
"format": "%Y-%m-%d", "format": "%Y-%m-%d",
"name": "Date", "name": "Date",


Loading…
Cancel
Save