Add FEMA risk index to score file (#687)

* Add to score file
This commit is contained in:
Lucas Merrill Brown 2021-09-15 13:31:32 -05:00 committed by GitHub
commit 1c0d87d84b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 12 deletions

View file

@ -18,6 +18,19 @@ class NationalRiskIndexETL(ExtractTransformLoad):
self.BLOCK_GROUP_CSV = (
self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
)
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME = (
"EAL_SCORE"
)
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME = (
"FEMA Risk Index Expected Annual Loss Score"
)
# Note: also need to edit transform step to add fields to output.
self.COLUMNS_TO_KEEP = [
self.GEOID_FIELD_NAME,
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
]
self.df: pd.DataFrame
def extract(self) -> None:
@ -42,23 +55,37 @@ class NationalRiskIndexETL(ExtractTransformLoad):
NRI_TRACT_COL = "TRACTFIPS" # Census Tract Column in NRI data
TRACT_COL = self.GEOID_TRACT_FIELD_NAME # Census Tract column name
BLOCK_COL = self.GEOID_FIELD_NAME # Census Block Group column name
# read in the unzipped csv from NRI data source then rename the
# Census Tract column for merging
df_nri = pd.read_csv(
df_nri: pd.DataFrame = pd.read_csv(
self.INPUT_CSV,
dtype={NRI_TRACT_COL: "string"},
na_values=["None"],
low_memory=False,
)
df_nri.rename(columns={NRI_TRACT_COL: TRACT_COL}, inplace=True)
df_nri.rename(
columns={
NRI_TRACT_COL: TRACT_COL,
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME: self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
},
inplace=True,
)
# Reduce columns.
# Note: normally we wait until writing to CSV for this step, but since the file is so huge,
# move this up here for performance reasons.
df_nri = df_nri[ # pylint: disable=unsubscriptable-object
[self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME, TRACT_COL]
]
# get the full list of Census Block Groups from the ACS data
# and extract the Census Tract ID from each Block Group ID
df_acs = pd.read_csv(self.BLOCK_GROUP_CSV, dtype={BLOCK_COL: "string"})
df_acs[TRACT_COL] = df_acs[BLOCK_COL].str[0:11]
df_block_group = df_acs[[BLOCK_COL, TRACT_COL]]
df_acs = pd.read_csv(
self.BLOCK_GROUP_CSV, dtype={self.GEOID_FIELD_NAME: "string"}
)
df_acs[TRACT_COL] = df_acs[self.GEOID_FIELD_NAME].str[0:11]
df_block_group = df_acs[[self.GEOID_FIELD_NAME, TRACT_COL]]
# merge NRI data on the Census Tract ID so that each
# Block Group inherits the NRI score of its Census Tract
@ -67,6 +94,9 @@ class NationalRiskIndexETL(ExtractTransformLoad):
def load(self) -> None:
"""Writes the NRI data as a csv to the directory at self.OUTPUT_DIR"""
logger.info("Saving National Risk Index CSV")
# write nationwide csv
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.OUTPUT_DIR / "usa.csv", index=False)
self.df[self.COLUMNS_TO_KEEP].to_csv(
self.OUTPUT_DIR / "usa.csv", index=False
)