diff --git a/data/data-pipeline/data_pipeline/etl/score/config/__init__.py b/data/data-pipeline/data_pipeline/etl/score/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml new file mode 100644 index 00000000..1396c3fc --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml @@ -0,0 +1,47 @@ +--- +datasets: + - long_name: "FEMA National Risk Index" + short_name: "nri" + module_name: national_risk_index + last_updated_year: 2020 + source_url: https://hazards.fema.gov/nri/Content/StaticDocuments/DataDownload//NRI_Table_CensusTracts/NRI_Table_CensusTracts.zip + extracted_file_name: "NRI_Table_CensusTracts.csv" + description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards." + input_geoid_tract_field_name: "TRACTFIPS" + null_representation: "None" + load_fields: + - short_name: "ex_loss" + df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME" + long_name: "FEMA Risk Index Expected Annual Loss Score" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "ex_pop_loss" + df_field_name: "EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME" + long_name: "Expected population loss rate (Natural Hazards Risk Index)" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "ex_ag_loss" + df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME" + long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "ex_bldg_loss" + df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME" + long_name: "Expected building loss rate (Natural Hazards Risk Index)" + field_type: float + tile_include: true + csv_download: true + excel_download: true + - short_name: "has_ag_val" + df_field_name: "CONTAINS_AGRIVALUE" + long_name: "Expected building loss rate (Natural Hazards Risk Index)" + field_type: bool + tile_include: true + csv_download: true + excel_download: true diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py b/data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py new file mode 100644 index 00000000..cbead41c --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass, field +from enum import Enum +from typing import List + + +class FieldType(Enum): + STRING = "string" + INT64 = "int64" + BOOL = "bool" + FLOAT = "float" + PERCENTAGE = "percentage" + LOSS_RATE_PERCENTAGE = "loss_rate_percentage" + + +@dataclass +class DatasetsConfig: + @dataclass + class Dataset: + @dataclass + class LoadField: + short_name: str + df_field_name: str + long_name: str + field_type: FieldType = field(metadata={"by_value": True}) + tile_include: bool + csv_download: bool + excel_download: bool + + long_name: str + short_name: str + module_name: str + last_updated_year: int + source_url: str + extracted_file_name: str + description: str + input_geoid_tract_field_name: str + null_representation: str + load_fields: List[LoadField] + + datasets: List[Dataset]