some notes for implementation

This commit is contained in:
Jorge Escobar 2022-04-01 15:04:47 -04:00
parent 1c13259edd
commit 6424187d0f
3 changed files with 13 additions and 10 deletions

View file

@ -82,7 +82,7 @@ class ExtractTransformLoad:
# This is a classmethod so it can be used by `get_data_frame` without
# needing to create an instance of the class. This is a use case in `etl_score`.
@classmethod
def _get_output_file_path(cls) -> pathlib.Path:
def _get_output_file_path(dataset_yaml_config) -> pathlib.Path:
"""Generate the output file path."""
if cls.NAME is None:
raise NotImplementedError(
@ -234,7 +234,9 @@ class ExtractTransformLoad:
"""
logger.info(f"Saving `{self.NAME}` CSV")
# Create directory if necessary.
## Read YAML Config for this module
# Create directory from YAML if necessary.
output_file_path = self._get_output_file_path()
output_file_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -2,7 +2,8 @@
datasets:
- long_name: "FEMA National Risk Index"
short_name: "nri"
module_name: national_risk_index_2020
module_name: national_risk_index
last_updated_year: 2020
source_url: https://hazards.fema.gov/nri/Content/StaticDocuments/DataDownload//NRI_Table_CensusTracts/NRI_Table_CensusTracts.zip
extracted_file_name: "NRI_Table_CensusTracts.csv"
description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards."

View file

@ -29,25 +29,24 @@ class NationalRiskIndexETL(ExtractTransformLoad):
def __init__(self):
self.INPUT_CSV = self.get_tmp_path() / "NRI_Table_CensusTracts.csv"
## COLUMNS ON CSV ##
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = (
"EAL_SCORE"
)
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
"FEMA Risk Index Expected Annual Loss Score"
)
self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB"
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = (
"EAL_VALA"
)
self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP"
self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE"
self.POPULATION_INPUT_FIELD_NAME = "POPULATION"
self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE"
## /COLUMNS ON CSV ##
## COLUMNS TO WRITE ##
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
"FEMA Risk Index Expected Annual Loss Score"
)
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
"Expected building loss rate (Natural Hazards Risk Index)"
)
@ -58,6 +57,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
"Expected population loss rate (Natural Hazards Risk Index)"
)
self.CONTAINS_AGRIVALUE = "Contains agricultural value"
## /COLUMNS TO WRITE ##
self.COLUMNS_TO_KEEP = [
self.GEOID_TRACT_FIELD_NAME,