Issue 883: Update FEMA risk index measure (#884)

* ETL updated

* Adding three fields to score
This commit is contained in:
Lucas Merrill Brown 2021-11-13 11:32:15 -05:00 committed by GitHub
commit 21834b4a91
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 114 additions and 82 deletions

View file

@ -313,6 +313,9 @@ class ScoreETL(ExtractTransformLoad):
field_names.UNEMPLOYMENT_FIELD,
field_names.HT_INDEX_FIELD,
field_names.MEDIAN_HOUSE_VALUE_FIELD,
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
]
non_numeric_columns = [

View file

@ -1,3 +1,8 @@
# Note: I'm not sure why pylint is so upset with the particular dataframe `df_nri`,
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
@ -21,19 +26,37 @@ class NationalRiskIndexETL(ExtractTransformLoad):
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = (
"EAL_SCORE"
)
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
"FEMA Risk Index Expected Annual Loss Score"
)
self.EXPECTED_ANNUAL_LOSS_RATE = (
"FEMA Risk Index Expected Annual Loss Rate"
self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB"
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = (
"EAL_VALA"
)
self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP"
self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE"
self.POPULATION_INPUT_FIELD_NAME = "POPULATION"
self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE"
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
"Expected building loss rate (Natural Hazards Risk Index)"
)
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME = (
"Expected agricultural loss rate (Natural Hazards Risk Index)"
)
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME = (
"Expected population loss rate (Natural Hazards Risk Index)"
)
# Note: also need to edit transform step to add fields to output.
self.COLUMNS_TO_KEEP = [
self.GEOID_FIELD_NAME,
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
self.EXPECTED_ANNUAL_LOSS_RATE,
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
]
self.df: pd.DataFrame
@ -77,56 +100,35 @@ class NationalRiskIndexETL(ExtractTransformLoad):
inplace=True,
)
# Calculate a risk score that does not include FEMA's measure of community vulnerability.
disaster_categories = [
"AVLN", # Avalanche
"CFLD", # Coastal Flooding
"CWAV", # Cold Wave
"DRGT", # Drought
"ERQK", # Earthquake
"HAIL", # Hail
"HWAV", # Heat Wave
"HRCN", # Hurricane
"ISTM", # Ice Storm
"LNDS", # Landslide
"LTNG", # Lightning
"RFLD", # Riverine Flooding
"SWND", # Strong Wind
"TRND", # Tornado
"TSUN", # Tsunami
"VLCN", # Volcanic Activity
"WFIR", # Wildfire
"WNTW", # Winter Weather
]
# Population EAL Rate = Eal Valp / Population
df_nri[self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME] = (
df_nri[self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME]
/ df_nri[self.POPULATION_INPUT_FIELD_NAME]
)
# Note: I'm not sure why pylint is so upset with this particular dataframe,
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
for category in disaster_categories:
df_nri[ # pylint: disable=unsupported-assignment-operation
f"{category}"
] = (
df_nri[ # pylint: disable=unsubscriptable-object
f"{category}_EALT"
] # Expected Annual Loss - Total
/ df_nri[ # pylint: disable=unsubscriptable-object
f"{category}_EXPT"
]
)
df_nri[ # pylint: disable=unsupported-assignment-operation
self.EXPECTED_ANNUAL_LOSS_RATE
] = df_nri[ # pylint: disable=unsubscriptable-object
disaster_categories
].sum(
axis=1
# Agriculture EAL Rate = Eal Vala / Agrivalue
df_nri[self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME] = (
df_nri[
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME
]
/ df_nri[self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME]
)
# divide EAL_VALB (Expected Annual Loss - Building Value) by BUILDVALUE (Building Value ($)).
df_nri[self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME] = (
df_nri[self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME]
/ df_nri[self.BUILDING_VALUE_INPUT_FIELD_NAME]
)
# Reduce columns.
# Note: normally we wait until writing to CSV for this step, but since the file is so huge,
# move this up here for performance reasons.
df_nri = df_nri[ # pylint: disable=unsubscriptable-object
df_nri = df_nri[
[
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
self.EXPECTED_ANNUAL_LOSS_RATE,
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
TRACT_COL,
]
]