From d3a54e4820fcfe6be57b920dd9d7256895f573eb Mon Sep 17 00:00:00 2001
From: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
Date: Tue, 5 Apr 2022 16:27:00 -0400
Subject: [PATCH] NRI dataset and initial score YAML configuration

---
 .../etl/score/config/__init__.py              |  0
 .../etl/score/config/datasets.yml             | 47 +++++++++++++++++++
 .../etl/score/schemas/__init__.py             |  0
 .../etl/score/schemas/datasets.py             | 40 ++++++++++++++++
 4 files changed, 87 insertions(+)
 create mode 100644 data/data-pipeline/data_pipeline/etl/score/config/__init__.py
 create mode 100644 data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
 create mode 100644 data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py
 create mode 100644 data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py

diff --git a/data/data-pipeline/data_pipeline/etl/score/config/__init__.py b/data/data-pipeline/data_pipeline/etl/score/config/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
new file mode 100644
index 00000000..1396c3fc
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
@@ -0,0 +1,47 @@
+---
+datasets:
+  - long_name: "FEMA National Risk Index"
+    short_name: "nri"
+    module_name: national_risk_index
+    last_updated_year: 2020
+    source_url: https://hazards.fema.gov/nri/Content/StaticDocuments/DataDownload//NRI_Table_CensusTracts/NRI_Table_CensusTracts.zip
+    extracted_file_name: "NRI_Table_CensusTracts.csv"
+    description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards."
+    input_geoid_tract_field_name: "TRACTFIPS"
+    null_representation: "None"
+    load_fields:
+      - short_name: "ex_loss"
+        df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME"
+        long_name: "FEMA Risk Index Expected Annual Loss Score"
+        field_type: float
+        tile_include: true
+        csv_download: true
+        excel_download: true
+      - short_name: "ex_pop_loss"
+        df_field_name: "EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME"
+        long_name: "Expected population loss rate (Natural Hazards Risk Index)"
+        field_type: float
+        tile_include: true
+        csv_download: true
+        excel_download: true
+      - short_name: "ex_ag_loss"
+        df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME"
+        long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)"
+        field_type: float
+        tile_include: true
+        csv_download: true
+        excel_download: true
+      - short_name: "ex_bldg_loss"
+        df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME"
+        long_name: "Expected building loss rate (Natural Hazards Risk Index)"
+        field_type: float
+        tile_include: true
+        csv_download: true
+        excel_download: true
+      - short_name: "has_ag_val"
+        df_field_name: "CONTAINS_AGRIVALUE"
+        long_name: "Expected building loss rate (Natural Hazards Risk Index)"
+        field_type: bool
+        tile_include: true
+        csv_download: true
+        excel_download: true
diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py b/data/data-pipeline/data_pipeline/etl/score/schemas/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py
new file mode 100644
index 00000000..cbead41c
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py
@@ -0,0 +1,40 @@
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import List
+
+
+class FieldType(Enum):
+    STRING = "string"
+    INT64 = "int64"
+    BOOL = "bool"
+    FLOAT = "float"
+    PERCENTAGE = "percentage"
+    LOSS_RATE_PERCENTAGE = "loss_rate_percentage"
+
+
+@dataclass
+class DatasetsConfig:
+    @dataclass
+    class Dataset:
+        @dataclass
+        class LoadField:
+            short_name: str
+            df_field_name: str
+            long_name: str
+            field_type: FieldType = field(metadata={"by_value": True})
+            tile_include: bool
+            csv_download: bool
+            excel_download: bool
+
+        long_name: str
+        short_name: str
+        module_name: str
+        last_updated_year: int
+        source_url: str
+        extracted_file_name: str
+        description: str
+        input_geoid_tract_field_name: str
+        null_representation: str
+        load_fields: List[LoadField]
+
+    datasets: List[Dataset]