PR Review

2025-02-23 10:04:18 -08:00 · 2022-05-16 14:47:31 -04:00 · 2022-05-16 14:47:31 -04:00 · a127ac2e72
commit a127ac2e72
parent 20f4337ba8
3 changed files with 9 additions and 5 deletions
--- a/data/data-pipeline/data_pipeline/etl/base.py
+++ b/data/data-pipeline/data_pipeline/etl/base.py
@ -124,7 +124,7 @@ class ExtractTransformLoad:
        cls.INPUT_GEOID_TRACT_FIELD_NAME = dataset_config[
            "input_geoid_tract_field_name"
        ]
-        cls.NULL_REPRESENTATION = dataset_config["null_representation"]
+        cls.NULL_REPRESENTATION = dataset_config.get("null_representation", [])

        # get the columns to write on the CSV
        cls.COLUMNS_TO_KEEP = [
--- a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
+++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
@ -8,7 +8,7 @@ datasets:
    extracted_file_name: "NRI_Table_CensusTracts.csv"
    description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards."
    input_geoid_tract_field_name: "TRACTFIPS"
-    null_representation: "None"
+    null_representation: ["None"]
    load_fields:
      - short_name: "ex_loss"
        df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME"
--- a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py
+++ b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py
@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import List
+from typing import List, Optional


 class FieldType(Enum):
@ -21,7 +21,11 @@ class DatasetsConfig:
            short_name: str
            df_field_name: str
            long_name: str
-            field_type: FieldType = field(metadata={"by_value": True})
+            field_type: FieldType = field(
+                metadata={"by_value": True}
+            )  # this will load the field type's Enum value
+            # instead of the index, i.e. "string" and not
+            # STRING
            tile_include: bool
            csv_download: bool
            excel_download: bool
@ -34,7 +38,7 @@ class DatasetsConfig:
        extracted_file_name: str
        description: str
        input_geoid_tract_field_name: str
-        null_representation: str
+        null_representation: Optional[list]
        load_fields: List[LoadField]

    datasets: List[Dataset]