diff --git a/data/data-pipeline/data_pipeline/etl/base.py b/data/data-pipeline/data_pipeline/etl/base.py index 6a89861a..82ee28b8 100644 --- a/data/data-pipeline/data_pipeline/etl/base.py +++ b/data/data-pipeline/data_pipeline/etl/base.py @@ -82,7 +82,7 @@ class ExtractTransformLoad: # This is a classmethod so it can be used by `get_data_frame` without # needing to create an instance of the class. This is a use case in `etl_score`. @classmethod - def _get_output_file_path(cls) -> pathlib.Path: + def _get_output_file_path(dataset_yaml_config) -> pathlib.Path: """Generate the output file path.""" if cls.NAME is None: raise NotImplementedError( @@ -234,7 +234,9 @@ class ExtractTransformLoad: """ logger.info(f"Saving `{self.NAME}` CSV") - # Create directory if necessary. + ## Read YAML Config for this module + + # Create directory from YAML if necessary. output_file_path = self._get_output_file_path() output_file_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml index 8514d179..6ef7395c 100644 --- a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml +++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml @@ -2,7 +2,8 @@ datasets: - long_name: "FEMA National Risk Index" short_name: "nri" - module_name: national_risk_index_2020 + module_name: national_risk_index + last_updated_year: 2020 source_url: https://hazards.fema.gov/nri/Content/StaticDocuments/DataDownload//NRI_Table_CensusTracts/NRI_Table_CensusTracts.zip extracted_file_name: "NRI_Table_CensusTracts.csv" description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards." diff --git a/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py b/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py index cdf00af3..58e9f2a8 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py @@ -29,25 +29,24 @@ class NationalRiskIndexETL(ExtractTransformLoad): def __init__(self): self.INPUT_CSV = self.get_tmp_path() / "NRI_Table_CensusTracts.csv" + ## COLUMNS ON CSV ## self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = ( "EAL_SCORE" ) - - self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = ( - "FEMA Risk Index Expected Annual Loss Score" - ) - self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB" - self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = ( "EAL_VALA" ) self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP" - self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE" self.POPULATION_INPUT_FIELD_NAME = "POPULATION" self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE" + ## /COLUMNS ON CSV ## + ## COLUMNS TO WRITE ## + self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = ( + "FEMA Risk Index Expected Annual Loss Score" + ) self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = ( "Expected building loss rate (Natural Hazards Risk Index)" ) @@ -58,6 +57,7 @@ class NationalRiskIndexETL(ExtractTransformLoad): "Expected population loss rate (Natural Hazards Risk Index)" ) self.CONTAINS_AGRIVALUE = "Contains agricultural value" + ## /COLUMNS TO WRITE ## self.COLUMNS_TO_KEEP = [ self.GEOID_TRACT_FIELD_NAME,