From a127ac2e7211ac752fc4f8c66db4bca7ecc379aa Mon Sep 17 00:00:00 2001 From: Jorge Escobar Date: Mon, 16 May 2022 14:47:31 -0400 Subject: [PATCH] PR Review --- data/data-pipeline/data_pipeline/etl/base.py | 2 +- .../data_pipeline/etl/score/config/datasets.yml | 2 +- .../data_pipeline/etl/score/schemas/datasets.py | 10 +++++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/base.py b/data/data-pipeline/data_pipeline/etl/base.py index f0e87ee1..7da25cdf 100644 --- a/data/data-pipeline/data_pipeline/etl/base.py +++ b/data/data-pipeline/data_pipeline/etl/base.py @@ -124,7 +124,7 @@ class ExtractTransformLoad: cls.INPUT_GEOID_TRACT_FIELD_NAME = dataset_config[ "input_geoid_tract_field_name" ] - cls.NULL_REPRESENTATION = dataset_config["null_representation"] + cls.NULL_REPRESENTATION = dataset_config.get("null_representation", []) # get the columns to write on the CSV cls.COLUMNS_TO_KEEP = [ diff --git a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml index 9279d06f..a83f0447 100644 --- a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml +++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml @@ -8,7 +8,7 @@ datasets: extracted_file_name: "NRI_Table_CensusTracts.csv" description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards." input_geoid_tract_field_name: "TRACTFIPS" - null_representation: "None" + null_representation: ["None"] load_fields: - short_name: "ex_loss" df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME" diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py index cbead41c..37691283 100644 --- a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py +++ b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from enum import Enum -from typing import List +from typing import List, Optional class FieldType(Enum): @@ -21,7 +21,11 @@ class DatasetsConfig: short_name: str df_field_name: str long_name: str - field_type: FieldType = field(metadata={"by_value": True}) + field_type: FieldType = field( + metadata={"by_value": True} + ) # this will load the field type's Enum value + # instead of the index, i.e. "string" and not + # STRING tile_include: bool csv_download: bool excel_download: bool @@ -34,7 +38,7 @@ class DatasetsConfig: extracted_file_name: str description: str input_geoid_tract_field_name: str - null_representation: str + null_representation: Optional[list] load_fields: List[LoadField] datasets: List[Dataset]