mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-26 02:01:17 -07:00
Issue 883: Update FEMA risk index measure (#884)
* ETL updated * Adding three fields to score
This commit is contained in:
parent
05ebf9b48c
commit
21834b4a91
8 changed files with 114 additions and 82 deletions
|
@ -313,6 +313,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.HT_INDEX_FIELD,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
]
|
||||
|
||||
non_numeric_columns = [
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
# Note: I'm not sure why pylint is so upset with the particular dataframe `df_nri`,
|
||||
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
|
||||
# pylint: disable=unsubscriptable-object
|
||||
# pylint: disable=unsupported-assignment-operation
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
|
@ -21,19 +26,37 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = (
|
||||
"EAL_SCORE"
|
||||
)
|
||||
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
|
||||
"FEMA Risk Index Expected Annual Loss Score"
|
||||
)
|
||||
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE = (
|
||||
"FEMA Risk Index Expected Annual Loss Rate"
|
||||
self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME = "EAL_VALB"
|
||||
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME = (
|
||||
"EAL_VALA"
|
||||
)
|
||||
self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME = "EAL_VALP"
|
||||
self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME = "AGRIVALUE"
|
||||
self.POPULATION_INPUT_FIELD_NAME = "POPULATION"
|
||||
self.BUILDING_VALUE_INPUT_FIELD_NAME = "BUILDVALUE"
|
||||
|
||||
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected building loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected agricultural loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME = (
|
||||
"Expected population loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
|
||||
# Note: also need to edit transform step to add fields to output.
|
||||
self.COLUMNS_TO_KEEP = [
|
||||
self.GEOID_FIELD_NAME,
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE,
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
|
||||
]
|
||||
|
||||
self.df: pd.DataFrame
|
||||
|
@ -77,56 +100,35 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
inplace=True,
|
||||
)
|
||||
|
||||
# Calculate a risk score that does not include FEMA's measure of community vulnerability.
|
||||
disaster_categories = [
|
||||
"AVLN", # Avalanche
|
||||
"CFLD", # Coastal Flooding
|
||||
"CWAV", # Cold Wave
|
||||
"DRGT", # Drought
|
||||
"ERQK", # Earthquake
|
||||
"HAIL", # Hail
|
||||
"HWAV", # Heat Wave
|
||||
"HRCN", # Hurricane
|
||||
"ISTM", # Ice Storm
|
||||
"LNDS", # Landslide
|
||||
"LTNG", # Lightning
|
||||
"RFLD", # Riverine Flooding
|
||||
"SWND", # Strong Wind
|
||||
"TRND", # Tornado
|
||||
"TSUN", # Tsunami
|
||||
"VLCN", # Volcanic Activity
|
||||
"WFIR", # Wildfire
|
||||
"WNTW", # Winter Weather
|
||||
]
|
||||
# Population EAL Rate = Eal Valp / Population
|
||||
df_nri[self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME] = (
|
||||
df_nri[self.EXPECTED_ANNUAL_LOSS_POPULATION_VALUE_INPUT_FIELD_NAME]
|
||||
/ df_nri[self.POPULATION_INPUT_FIELD_NAME]
|
||||
)
|
||||
|
||||
# Note: I'm not sure why pylint is so upset with this particular dataframe,
|
||||
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
|
||||
for category in disaster_categories:
|
||||
df_nri[ # pylint: disable=unsupported-assignment-operation
|
||||
f"{category}"
|
||||
] = (
|
||||
df_nri[ # pylint: disable=unsubscriptable-object
|
||||
f"{category}_EALT"
|
||||
] # Expected Annual Loss - Total
|
||||
/ df_nri[ # pylint: disable=unsubscriptable-object
|
||||
f"{category}_EXPT"
|
||||
]
|
||||
)
|
||||
df_nri[ # pylint: disable=unsupported-assignment-operation
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE
|
||||
] = df_nri[ # pylint: disable=unsubscriptable-object
|
||||
disaster_categories
|
||||
].sum(
|
||||
axis=1
|
||||
# Agriculture EAL Rate = Eal Vala / Agrivalue
|
||||
df_nri[self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME] = (
|
||||
df_nri[
|
||||
self.EXPECTED_ANNUAL_LOSS_AGRICULTURAL_VALUE_INPUT_FIELD_NAME
|
||||
]
|
||||
/ df_nri[self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME]
|
||||
)
|
||||
|
||||
# divide EAL_VALB (Expected Annual Loss - Building Value) by BUILDVALUE (Building Value ($)).
|
||||
df_nri[self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME] = (
|
||||
df_nri[self.EXPECTED_ANNUAL_LOSS_BUILDING_VALUE_INPUT_FIELD_NAME]
|
||||
/ df_nri[self.BUILDING_VALUE_INPUT_FIELD_NAME]
|
||||
)
|
||||
|
||||
# Reduce columns.
|
||||
# Note: normally we wait until writing to CSV for this step, but since the file is so huge,
|
||||
# move this up here for performance reasons.
|
||||
df_nri = df_nri[ # pylint: disable=unsubscriptable-object
|
||||
df_nri = df_nri[
|
||||
[
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
|
||||
self.EXPECTED_ANNUAL_LOSS_RATE,
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
|
||||
TRACT_COL,
|
||||
]
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue