mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 09:41:26 -08:00
Issue 883: Update FEMA risk index measure (#884)
* ETL updated * Adding three fields to score
This commit is contained in:
parent
05ebf9b48c
commit
21834b4a91
8 changed files with 114 additions and 82 deletions
|
@ -313,6 +313,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.HT_INDEX_FIELD,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
]
|
||||
|
||||
non_numeric_columns = [
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
# Note: I'm not sure why pylint is so upset with the particular dataframe `df_nri`,
|
||||
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
|
||||
# pylint: disable=unsubscriptable-object
|
||||
# pylint: disable=unsupported-assignment-operation
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
|
@ -21,19 +26,37 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = (
|
||||
"EAL_SCORE"
|
||||
)
|
||||
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
|
||||
"FEMA Risk Index Expected Annual Loss Score"
|
||||
)
|
||||
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE = (
|
||||
"FEMA Risk Index Expected Annual Loss Rate"
|
||||
self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB"
|
||||
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = (
|
||||
"EAL_VALA"
|
||||
)
|
||||
self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP"
|
||||
self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE"
|
||||
self.POPULATION_INPUT_FIELD_NAME = "POPULATION"
|
||||
self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE"
|
||||
|
||||
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected building loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected agricultural loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected population loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
|
||||
# Note: also need to edit transform step to add fields to output.
|
||||
self.COLUMNS_TO_KEEP = [
|
||||
self.GEOID_FIELD_NAME,
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE,
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
|
||||
]
|
||||
|
||||
self.df: pd.DataFrame
|
||||
|
@ -77,56 +100,35 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
inplace=True,
|
||||
)
|
||||
|
||||
# Calculate a risk score that does not include FEMA's measure of community vulnerability.
|
||||
disaster_categories = [
|
||||
"AVLN", # Avalanche
|
||||
"CFLD", # Coastal Flooding
|
||||
"CWAV", # Cold Wave
|
||||
"DRGT", # Drought
|
||||
"ERQK", # Earthquake
|
||||
"HAIL", # Hail
|
||||
"HWAV", # Heat Wave
|
||||
"HRCN", # Hurricane
|
||||
"ISTM", # Ice Storm
|
||||
"LNDS", # Landslide
|
||||
"LTNG", # Lightning
|
||||
"RFLD", # Riverine Flooding
|
||||
"SWND", # Strong Wind
|
||||
"TRND", # Tornado
|
||||
"TSUN", # Tsunami
|
||||
"VLCN", # Volcanic Activity
|
||||
"WFIR", # Wildfire
|
||||
"WNTW", # Winter Weather
|
||||
]
|
||||
# Population EAL Rate = Eal Valp / Population
|
||||
df_nri[self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME] = (
|
||||
df_nri[self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME]
|
||||
/ df_nri[self.POPULATION_INPUT_FIELD_NAME]
|
||||
)
|
||||
|
||||
# Note: I'm not sure why pylint is so upset with this particular dataframe,
|
||||
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
|
||||
for category in disaster_categories:
|
||||
df_nri[ # pylint: disable=unsupported-assignment-operation
|
||||
f"{category}"
|
||||
] = (
|
||||
df_nri[ # pylint: disable=unsubscriptable-object
|
||||
f"{category}_EALT"
|
||||
] # Expected Annual Loss - Total
|
||||
/ df_nri[ # pylint: disable=unsubscriptable-object
|
||||
f"{category}_EXPT"
|
||||
]
|
||||
)
|
||||
df_nri[ # pylint: disable=unsupported-assignment-operation
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE
|
||||
] = df_nri[ # pylint: disable=unsubscriptable-object
|
||||
disaster_categories
|
||||
].sum(
|
||||
axis=1
|
||||
# Agriculture EAL Rate = Eal Vala / Agrivalue
|
||||
df_nri[self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME] = (
|
||||
df_nri[
|
||||
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME
|
||||
]
|
||||
/ df_nri[self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME]
|
||||
)
|
||||
|
||||
# divide EAL_VALB (Expected Annual Loss - Building Value) by BUILDVALUE (Building Value ($)).
|
||||
df_nri[self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME] = (
|
||||
df_nri[self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME]
|
||||
/ df_nri[self.BUILDING_VALUE_INPUT_FIELD_NAME]
|
||||
)
|
||||
|
||||
# Reduce columns.
|
||||
# Note: normally we wait until writing to CSV for this step, but since the file is so huge,
|
||||
# move this up here for performance reasons.
|
||||
df_nri = df_nri[ # pylint: disable=unsubscriptable-object
|
||||
df_nri = df_nri[
|
||||
[
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE,
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
|
||||
TRACT_COL,
|
||||
]
|
||||
]
|
||||
|
|
|
@ -60,6 +60,15 @@ FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
|||
FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (
|
||||
"FEMA Risk Index Expected Annual Loss Rate"
|
||||
)
|
||||
EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected building loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected agricultural loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected population loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
|
||||
# Environment
|
||||
DIESEL_FIELD = "Diesel particulate matter"
|
||||
|
|
|
@ -64,19 +64,37 @@ class ScoreL(Score):
|
|||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
climate_criteria = (
|
||||
(
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
)
|
||||
|
||||
return (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> self.LOW_INCOME_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
) & climate_criteria
|
||||
|
||||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE,AVLN_EALT,CFLD_EALT,CWAV_EALT,DRGT_EALT,ERQK_EALT,HAIL_EALT,HWAV_EALT,HRCN_EALT,ISTM_EALT,LNDS_EALT,LTNG_EALT,RFLD_EALT,SWND_EALT,TRND_EALT,TSUN_EALT,VLCN_EALT,WFIR_EALT,WNTW_EALT,AVLN_EXPT,CFLD_EXPT,CWAV_EXPT,DRGT_EXPT,ERQK_EXPT,HAIL_EXPT,HWAV_EXPT,HRCN_EXPT,ISTM_EXPT,LNDS_EXPT,LTNG_EXPT,RFLD_EXPT,SWND_EXPT,TRND_EXPT,TSUN_EXPT,VLCN_EXPT,WFIR_EXPT,WNTW_EXPT
|
||||
40300,05007040300,10.492015,Very Low,15.3494,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5
|
||||
20100,05001020100,14.705854,Relatively Low,36.725828,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5
|
||||
40500,15007040500,10.234981,Very Low,13.997993,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5
|
||||
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5
|
||||
21101,15001021101,19.434585,Relatively Low,53.392265,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5
|
||||
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE,AVLN_EALT,CFLD_EALT,CWAV_EALT,DRGT_EALT,ERQK_EALT,HAIL_EALT,HWAV_EALT,HRCN_EALT,ISTM_EALT,LNDS_EALT,LTNG_EALT,RFLD_EALT,SWND_EALT,TRND_EALT,TSUN_EALT,VLCN_EALT,WFIR_EALT,WNTW_EALT,AVLN_EXPT,CFLD_EXPT,CWAV_EXPT,DRGT_EXPT,ERQK_EXPT,HAIL_EXPT,HWAV_EXPT,HRCN_EXPT,ISTM_EXPT,LNDS_EXPT,LTNG_EXPT,RFLD_EXPT,SWND_EXPT,TRND_EXPT,TSUN_EXPT,VLCN_EXPT,WFIR_EXPT,WNTW_EXPT,EAL_VALA,EAL_VALP,EAL_VALB,AGRIVALUE,POPULATION,BUILDVALUE
|
||||
40300,05007040300,10.492015,Very Low,15.3494,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,234.7446176,204.8883901,126.4079101
|
||||
20100,05001020100,14.705854,Relatively Low,36.725828,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,238.9921867,179.4960371,96.24552261
|
||||
40500,15007040500,10.234981,Very Low,13.997993,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,187.5793934,183.4527834,106.4706219
|
||||
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,142.7041082,236.9465219,175.3803106
|
||||
21101,15001021101,19.434585,Relatively Low,53.392265,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,54.5,257.1497377,66.41934096,177.9963115
|
||||
|
|
|
|
@ -1,11 +1,11 @@
|
|||
GEOID10,FEMA Risk Index Expected Annual Loss Score,FEMA Risk Index Expected Annual Loss Rate
|
||||
050070403001,11.5,9.540442348853764
|
||||
050070403002,11.5,9.540442348853764
|
||||
050010201001,12.5,9.759472262661436
|
||||
050010201002,12.5,9.759472262661436
|
||||
150070405001,13.5,9.967264470453644
|
||||
150070405002,13.5,9.967264470453644
|
||||
150010210101,14.5,10.16467498073544
|
||||
150010210102,14.5,10.16467498073544
|
||||
150010211011,15.5,10.352473850464468
|
||||
150010211012,15.5,10.352473850464468
|
||||
GEOID10,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
|
||||
050070403001,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629
|
||||
050070403002,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629
|
||||
050010201001,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664
|
||||
050010201002,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664
|
||||
150070405001,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781
|
||||
150070405002,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781
|
||||
150010210101,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191
|
||||
150010210102,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191
|
||||
150010211011,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156
|
||||
150010211012,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156
|
||||
|
|
|
|
@ -1,11 +1,11 @@
|
|||
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,FEMA Risk Index Expected Annual Loss Rate
|
||||
050070403001,05007040300,11.5,9.540442348853764
|
||||
050070403002,05007040300,11.5,9.540442348853764
|
||||
050010201001,05001020100,12.5,9.759472262661436
|
||||
050010201002,05001020100,12.5,9.759472262661436
|
||||
150070405001,15007040500,13.5,9.967264470453644
|
||||
150070405002,15007040500,13.5,9.967264470453644
|
||||
150010210101,15001021010,14.5,10.164674980735441
|
||||
150010210102,15001021010,14.5,10.164674980735441
|
||||
150010211011,15001021101,15.5,10.352473850464467
|
||||
150010211012,15001021101,15.5,10.352473850464467
|
||||
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
|
||||
050070403001,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297
|
||||
050070403002,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297
|
||||
050010201001,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664
|
||||
050010201002,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664
|
||||
150070405001,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781
|
||||
150070405002,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781
|
||||
150010210101,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913
|
||||
150010210102,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913
|
||||
150010211011,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606
|
||||
150010211012,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606
|
||||
|
|
|
|
@ -63,7 +63,7 @@ class TestNationalRiskIndexETL:
|
|||
etl.transform()
|
||||
|
||||
# validation
|
||||
assert etl.df.shape == (10, 4)
|
||||
assert etl.df.shape == (10, 6)
|
||||
pd.testing.assert_frame_equal(etl.df, expected)
|
||||
|
||||
def test_load(self, mock_etl):
|
||||
|
@ -93,5 +93,5 @@ class TestNationalRiskIndexETL:
|
|||
|
||||
# validation
|
||||
assert output_path.exists()
|
||||
assert output.shape == (10, 3)
|
||||
assert output.shape == (10, 5)
|
||||
pd.testing.assert_frame_equal(output, expected)
|
||||
|
|
Loading…
Add table
Reference in a new issue