diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 8c610fac..d26c8995 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -313,6 +313,9 @@ class ScoreETL(ExtractTransformLoad): field_names.UNEMPLOYMENT_FIELD, field_names.HT_INDEX_FIELD, field_names.MEDIAN_HOUSE_VALUE_FIELD, + field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME, + field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME, + field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME, ] non_numeric_columns = [ diff --git a/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py b/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py index bbfdd31f..24d2303c 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py @@ -1,3 +1,8 @@ +# Note: I'm not sure why pylint is so upset with the particular dataframe `df_nri`, +# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498 +# pylint: disable=unsubscriptable-object +# pylint: disable=unsupported-assignment-operation + import pandas as pd from data_pipeline.etl.base import ExtractTransformLoad @@ -21,19 +26,37 @@ class NationalRiskIndexETL(ExtractTransformLoad): self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = ( "EAL_SCORE" ) + self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = ( "FEMA Risk Index Expected Annual Loss Score" ) - self.EXPECTED_ANNUAL_LOSS_RATE = ( - "FEMA Risk Index Expected Annual Loss Rate" + self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB" + self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = ( + "EAL_VALA" + ) + self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP" + self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE" + self.POPULATION_INPUT_FIELD_NAME = "POPULATION" + self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE" + + self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = ( + "Expected building loss rate (Natural Hazards Risk Index)" + ) + self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME = ( + "Expected agricultural loss rate (Natural Hazards Risk Index)" + ) + self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME = ( + "Expected population loss rate (Natural Hazards Risk Index)" ) # Note: also need to edit transform step to add fields to output. self.COLUMNS_TO_KEEP = [ self.GEOID_FIELD_NAME, self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME, - self.EXPECTED_ANNUAL_LOSS_RATE, + self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME, + self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME, + self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME, ] self.df: pd.DataFrame @@ -77,56 +100,35 @@ class NationalRiskIndexETL(ExtractTransformLoad): inplace=True, ) - # Calculate a risk score that does not include FEMA's measure of community vulnerability. - disaster_categories = [ - "AVLN", # Avalanche - "CFLD", # Coastal Flooding - "CWAV", # Cold Wave - "DRGT", # Drought - "ERQK", # Earthquake - "HAIL", # Hail - "HWAV", # Heat Wave - "HRCN", # Hurricane - "ISTM", # Ice Storm - "LNDS", # Landslide - "LTNG", # Lightning - "RFLD", # Riverine Flooding - "SWND", # Strong Wind - "TRND", # Tornado - "TSUN", # Tsunami - "VLCN", # Volcanic Activity - "WFIR", # Wildfire - "WNTW", # Winter Weather - ] + # Population EAL Rate = Eal Valp / Population + df_nri[self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME] = ( + df_nri[self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME] + / df_nri[self.POPULATION_INPUT_FIELD_NAME] + ) - # Note: I'm not sure why pylint is so upset with this particular dataframe, - # but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498 - for category in disaster_categories: - df_nri[ # pylint: disable=unsupported-assignment-operation - f"{category}" - ] = ( - df_nri[ # pylint: disable=unsubscriptable-object - f"{category}_EALT" - ] # Expected Annual Loss - Total - / df_nri[ # pylint: disable=unsubscriptable-object - f"{category}_EXPT" - ] - ) - df_nri[ # pylint: disable=unsupported-assignment-operation - self.EXPECTED_ANNUAL_LOSS_RATE - ] = df_nri[ # pylint: disable=unsubscriptable-object - disaster_categories - ].sum( - axis=1 + # Agriculture EAL Rate = Eal Vala / Agrivalue + df_nri[self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME] = ( + df_nri[ + self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME + ] + / df_nri[self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME] + ) + + # divide EAL_VALB (Expected Annual Loss - Building Value) by BUILDVALUE (Building Value ($)). + df_nri[self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME] = ( + df_nri[self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME] + / df_nri[self.BUILDING_VALUE_INPUT_FIELD_NAME] ) # Reduce columns. # Note: normally we wait until writing to CSV for this step, but since the file is so huge, # move this up here for performance reasons. - df_nri = df_nri[ # pylint: disable=unsubscriptable-object + df_nri = df_nri[ [ self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME, - self.EXPECTED_ANNUAL_LOSS_RATE, + self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME, + self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME, + self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME, TRACT_COL, ] ] diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index f1f30a76..a3a043ae 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -60,6 +60,15 @@ FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = ( "FEMA Risk Index Expected Annual Loss Rate" ) +EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = ( + "Expected building loss rate (Natural Hazards Risk Index)" +) +EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME = ( + "Expected agricultural loss rate (Natural Hazards Risk Index)" +) +EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME = ( + "Expected population loss rate (Natural Hazards Risk Index)" +) # Environment DIESEL_FIELD = "Diesel particulate matter" diff --git a/data/data-pipeline/data_pipeline/score/score_l.py b/data/data-pipeline/data_pipeline/score/score_l.py index 6eba9cb1..5b089efa 100644 --- a/data/data-pipeline/data_pipeline/score/score_l.py +++ b/data/data-pipeline/data_pipeline/score/score_l.py @@ -64,19 +64,37 @@ class ScoreL(Score): # Low income: In 60th percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] + climate_criteria = ( + ( + self.df[ + field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + | ( + self.df[ + field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + | ( + self.df[ + field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + ) + return ( self.df[ field_names.POVERTY_LESS_THAN_200_FPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX ] > self.LOW_INCOME_THRESHOLD - ) & ( - self.df[ - field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX - ] - > self.ENVIRONMENTAL_BURDEN_THRESHOLD - ) + ) & climate_criteria def _energy_factor(self) -> bool: # In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score) diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv index 8d778f09..e04ae7c1 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv @@ -1,6 +1,6 @@ -TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE,AVLN_EALT,CFLD_EALT,CWAV_EALT,DRGT_EALT,ERQK_EALT,HAIL_EALT,HWAV_EALT,HRCN_EALT,ISTM_EALT,LNDS_EALT,LTNG_EALT,RFLD_EALT,SWND_EALT,TRND_EALT,TSUN_EALT,VLCN_EALT,WFIR_EALT,WNTW_EALT,AVLN_EXPT,CFLD_EXPT,CWAV_EXPT,DRGT_EXPT,ERQK_EXPT,HAIL_EXPT,HWAV_EXPT,HRCN_EXPT,ISTM_EXPT,LNDS_EXPT,LTNG_EXPT,RFLD_EXPT,SWND_EXPT,TRND_EXPT,TSUN_EXPT,VLCN_EXPT,WFIR_EXPT,WNTW_EXPT -40300,05007040300,10.492015,Very Low,15.3494,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5 -20100,05001020100,14.705854,Relatively Low,36.725828,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5 -40500,15007040500,10.234981,Very Low,13.997993,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5 -21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5 -21101,15001021101,19.434585,Relatively Low,53.392265,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5 +TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE,AVLN_EALT,CFLD_EALT,CWAV_EALT,DRGT_EALT,ERQK_EALT,HAIL_EALT,HWAV_EALT,HRCN_EALT,ISTM_EALT,LNDS_EALT,LTNG_EALT,RFLD_EALT,SWND_EALT,TRND_EALT,TSUN_EALT,VLCN_EALT,WFIR_EALT,WNTW_EALT,AVLN_EXPT,CFLD_EXPT,CWAV_EXPT,DRGT_EXPT,ERQK_EXPT,HAIL_EXPT,HWAV_EXPT,HRCN_EXPT,ISTM_EXPT,LNDS_EXPT,LTNG_EXPT,RFLD_EXPT,SWND_EXPT,TRND_EXPT,TSUN_EXPT,VLCN_EXPT,WFIR_EXPT,WNTW_EXPT,EAL_VALA,EAL_VALP,EAL_VALB,AGRIVALUE,POPULATION,BUILDVALUE +40300,05007040300,10.492015,Very Low,15.3494,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,234.7446176,204.8883901,126.4079101 +20100,05001020100,14.705854,Relatively Low,36.725828,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,238.9921867,179.4960371,96.24552261 +40500,15007040500,10.234981,Very Low,13.997993,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,187.5793934,183.4527834,106.4706219 +21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,142.7041082,236.9465219,175.3803106 +21101,15001021101,19.434585,Relatively Low,53.392265,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,54.5,257.1497377,66.41934096,177.9963115 diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv index 480cd330..9f8cd7f4 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv @@ -1,11 +1,11 @@ -GEOID10,FEMA Risk Index Expected Annual Loss Score,FEMA Risk Index Expected Annual Loss Rate -050070403001,11.5,9.540442348853764 -050070403002,11.5,9.540442348853764 -050010201001,12.5,9.759472262661436 -050010201002,12.5,9.759472262661436 -150070405001,13.5,9.967264470453644 -150070405002,13.5,9.967264470453644 -150010210101,14.5,10.16467498073544 -150010210102,14.5,10.16467498073544 -150010211011,15.5,10.352473850464468 -150010211012,15.5,10.352473850464468 +GEOID10,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index) +050070403001,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629 +050070403002,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629 +050010201001,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664 +050010201002,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664 +150070405001,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781 +150070405002,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781 +150010210101,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191 +150010210102,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191 +150010211011,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156 +150010211012,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156 diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv index d5d2b130..8b33b2fc 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv @@ -1,11 +1,11 @@ -GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,FEMA Risk Index Expected Annual Loss Rate -050070403001,05007040300,11.5,9.540442348853764 -050070403002,05007040300,11.5,9.540442348853764 -050010201001,05001020100,12.5,9.759472262661436 -050010201002,05001020100,12.5,9.759472262661436 -150070405001,15007040500,13.5,9.967264470453644 -150070405002,15007040500,13.5,9.967264470453644 -150010210101,15001021010,14.5,10.164674980735441 -150010210102,15001021010,14.5,10.164674980735441 -150010211011,15001021101,15.5,10.352473850464467 -150010211012,15001021101,15.5,10.352473850464467 +GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index) +050070403001,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297 +050070403002,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297 +050010201001,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664 +050010201002,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664 +150070405001,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781 +150070405002,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781 +150010210101,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913 +150010210102,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913 +150010211011,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606 +150010211012,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606 diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py index 04c904a0..7cef406f 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py @@ -63,7 +63,7 @@ class TestNationalRiskIndexETL: etl.transform() # validation - assert etl.df.shape == (10, 4) + assert etl.df.shape == (10, 6) pd.testing.assert_frame_equal(etl.df, expected) def test_load(self, mock_etl): @@ -93,5 +93,5 @@ class TestNationalRiskIndexETL: # validation assert output_path.exists() - assert output.shape == (10, 3) + assert output.shape == (10, 5) pd.testing.assert_frame_equal(output, expected)