From d3a54e4820fcfe6be57b920dd9d7256895f573eb Mon Sep 17 00:00:00 2001 From: Jorge Escobar Date: Tue, 5 Apr 2022 16:27:00 -0400 Subject: [PATCH] NRI dataset and initial score YAML configuration --- .../etl/score/config/__init__.py | 0 .../etl/score/config/datasets.yml | 47 +++++++++++++++++++ .../etl/score/schemas/__init__.py | 0 .../etl/score/schemas/datasets.py | 40 ++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 data/data-pipeline/data_pipeline/etl/score/config/__init__.py create mode 100644 data/data-pipeline/data_pipeline/etl/score/config/datasets.yml create mode 100644 data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py create mode 100644 data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py diff --git a/data/data-pipeline/data_pipeline/etl/score/config/__init__.py b/data/data-pipeline/data_pipeline/etl/score/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml new file mode 100644 index 00000000..1396c3fc --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml @@ -0,0 +1,47 @@ +--- +datasets: + - long_name: "FEMA National Risk Index" + short_name: "nri" + module_name: national_risk_index + last_updated_year: 2020 + source_url: https://hazards.fema.gov/nri/Content/StaticDocuments/DataDownload//NRI_Table_CensusTracts/NRI_Table_CensusTracts.zip + extracted_file_name: "NRI_Table_CensusTracts.csv" + description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards." + input_geoid_tract_field_name: "TRACTFIPS" + null_representation: "None" + load_fields: + - short_name: "ex_loss" + df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME" + long_name: "FEMA Risk Index Expected Annual Loss Score" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "ex_pop_loss" + df_field_name: "EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME" + long_name: "Expected population loss rate (Natural Hazards Risk Index)" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "ex_ag_loss" + df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME" + long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "ex_bldg_loss" + df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME" + long_name: "Expected building loss rate (Natural Hazards Risk Index)" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "has_ag_val" + df_field_name: "CONTAINS_AGRIVALUE" + long_name: "Expected building loss rate (Natural Hazards Risk Index)" + field_type: bool + tile_include: true + csv_download: true + excel_download: true diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py b/data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py new file mode 100644 index 00000000..cbead41c --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass, field +from enum import Enum +from typing import List + + +class FieldType(Enum): + STRING = "string" + INT64 = "int64" + BOOL = "bool" + FLOAT = "float" + PERCENTAGE = "percentage" + LOSS_RATE_PERCENTAGE = "loss_rate_percentage" + + +@dataclass +class DatasetsConfig: + @dataclass + class Dataset: + @dataclass + class LoadField: + short_name: str + df_field_name: str + long_name: str + field_type: FieldType = field(metadata={"by_value": True}) + tile_include: bool + csv_download: bool + excel_download: bool + + long_name: str + short_name: str + module_name: str + last_updated_year: int + source_url: str + extracted_file_name: str + description: str + input_geoid_tract_field_name: str + null_representation: str + load_fields: List[LoadField] + + datasets: List[Dataset]