some notes for implementation

This commit is contained in:
Jorge Escobar 2022-04-01 15:04:47 -04:00
parent 1c13259edd
commit 6424187d0f
3 changed files with 13 additions and 10 deletions

View file

@ -82,7 +82,7 @@ class ExtractTransformLoad:
# This is a classmethod so it can be used by `get_data_frame` without # This is a classmethod so it can be used by `get_data_frame` without
# needing to create an instance of the class. This is a use case in `etl_score`. # needing to create an instance of the class. This is a use case in `etl_score`.
@classmethod @classmethod
def _get_output_file_path(cls) -> pathlib.Path: def _get_output_file_path(dataset_yaml_config) -> pathlib.Path:
"""Generate the output file path.""" """Generate the output file path."""
if cls.NAME is None: if cls.NAME is None:
raise NotImplementedError( raise NotImplementedError(
@ -234,7 +234,9 @@ class ExtractTransformLoad:
""" """
logger.info(f"Saving `{self.NAME}` CSV") logger.info(f"Saving `{self.NAME}` CSV")
# Create directory if necessary. ## Read YAML Config for this module
# Create directory from YAML if necessary.
output_file_path = self._get_output_file_path() output_file_path = self._get_output_file_path()
output_file_path.parent.mkdir(parents=True, exist_ok=True) output_file_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -2,7 +2,8 @@
datasets: datasets:
- long_name: "FEMA National Risk Index" - long_name: "FEMA National Risk Index"
short_name: "nri" short_name: "nri"
module_name: national_risk_index_2020 module_name: national_risk_index
last_updated_year: 2020
source_url: https://hazards.fema.gov/nri/Content/StaticDocuments/DataDownload//NRI_Table_CensusTracts/NRI_Table_CensusTracts.zip source_url: https://hazards.fema.gov/nri/Content/StaticDocuments/DataDownload//NRI_Table_CensusTracts/NRI_Table_CensusTracts.zip
extracted_file_name: "NRI_Table_CensusTracts.csv" extracted_file_name: "NRI_Table_CensusTracts.csv"
description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards." description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards."

View file

@ -29,25 +29,24 @@ class NationalRiskIndexETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.INPUT_CSV = self.get_tmp_path() / "NRI_Table_CensusTracts.csv" self.INPUT_CSV = self.get_tmp_path() / "NRI_Table_CensusTracts.csv"
## COLUMNS ON CSV ##
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = ( self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = (
"EAL_SCORE" "EAL_SCORE"
) )
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
"FEMA Risk Index Expected Annual Loss Score"
)
self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB" self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB"
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = ( self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = (
"EAL_VALA" "EAL_VALA"
) )
self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP" self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP"
self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE" self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE"
self.POPULATION_INPUT_FIELD_NAME = "POPULATION" self.POPULATION_INPUT_FIELD_NAME = "POPULATION"
self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE" self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE"
## /COLUMNS ON CSV ##
## COLUMNS TO WRITE ##
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
"FEMA Risk Index Expected Annual Loss Score"
)
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = ( self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
"Expected building loss rate (Natural Hazards Risk Index)" "Expected building loss rate (Natural Hazards Risk Index)"
) )
@ -58,6 +57,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
"Expected population loss rate (Natural Hazards Risk Index)" "Expected population loss rate (Natural Hazards Risk Index)"
) )
self.CONTAINS_AGRIVALUE = "Contains agricultural value" self.CONTAINS_AGRIVALUE = "Contains agricultural value"
## /COLUMNS TO WRITE ##
self.COLUMNS_TO_KEEP = [ self.COLUMNS_TO_KEEP = [
self.GEOID_TRACT_FIELD_NAME, self.GEOID_TRACT_FIELD_NAME,