mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 09:41:26 -08:00
Update FEMA data to be tracts, not block groups (#906)
This commit is contained in:
parent
893758f1d4
commit
537844236a
5 changed files with 20 additions and 52 deletions
|
@ -226,7 +226,6 @@ class ScoreETL(ExtractTransformLoad):
|
|||
census_block_group_dfs = [
|
||||
self.ejscreen_df,
|
||||
self.census_acs_median_incomes_df,
|
||||
self.national_risk_index_df,
|
||||
]
|
||||
|
||||
census_block_group_df = self._join_cbg_dfs(census_block_group_dfs)
|
||||
|
@ -241,6 +240,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.geocorr_urban_rural_df,
|
||||
self.persistent_poverty_df,
|
||||
self.housing_and_transportation_df,
|
||||
self.national_risk_index_df,
|
||||
]
|
||||
census_tract_df = self._join_tract_dfs(census_tract_dfs)
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
|
||||
# Note: also need to edit transform step to add fields to output.
|
||||
self.COLUMNS_TO_KEEP = [
|
||||
self.GEOID_FIELD_NAME,
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
|
@ -82,7 +82,6 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
logger.info("Transforming National Risk Index Data")
|
||||
|
||||
NRI_TRACT_COL = "TRACTFIPS" # Census Tract Column in NRI data
|
||||
TRACT_COL = self.GEOID_TRACT_FIELD_NAME # Census Tract column name
|
||||
|
||||
# read in the unzipped csv from NRI data source then rename the
|
||||
# Census Tract column for merging
|
||||
|
@ -94,7 +93,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
)
|
||||
df_nri.rename(
|
||||
columns={
|
||||
NRI_TRACT_COL: TRACT_COL,
|
||||
NRI_TRACT_COL: self.GEOID_TRACT_FIELD_NAME,
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME: self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
|
||||
},
|
||||
inplace=True,
|
||||
|
@ -120,30 +119,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
|
|||
/ df_nri[self.BUILDING_VALUE_INPUT_FIELD_NAME]
|
||||
)
|
||||
|
||||
# Reduce columns.
|
||||
# Note: normally we wait until writing to CSV for this step, but since the file is so huge,
|
||||
# move this up here for performance reasons.
|
||||
df_nri = df_nri[
|
||||
[
|
||||
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
|
||||
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
|
||||
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
|
||||
TRACT_COL,
|
||||
]
|
||||
]
|
||||
|
||||
# get the full list of Census Block Groups from the ACS data
|
||||
# and extract the Census Tract ID from each Block Group ID
|
||||
df_acs = pd.read_csv(
|
||||
self.BLOCK_GROUP_CSV, dtype={self.GEOID_FIELD_NAME: "string"}
|
||||
)
|
||||
df_acs[TRACT_COL] = df_acs[self.GEOID_FIELD_NAME].str[0:11]
|
||||
df_block_group = df_acs[[self.GEOID_FIELD_NAME, TRACT_COL]]
|
||||
|
||||
# merge NRI data on the Census Tract ID so that each
|
||||
# Block Group inherits the NRI score of its Census Tract
|
||||
self.df = df_block_group.merge(df_nri, how="left", on=TRACT_COL)
|
||||
self.df = df_nri
|
||||
|
||||
def load(self) -> None:
|
||||
"""Writes the NRI data as a csv to the directory at self.OUTPUT_DIR"""
|
||||
|
|
|
@ -1,11 +1,6 @@
|
|||
GEOID10,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
|
||||
050070403001,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629
|
||||
050070403002,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629
|
||||
050010201001,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664
|
||||
050010201002,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664
|
||||
150070405001,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781
|
||||
150070405002,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781
|
||||
150010210101,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191
|
||||
150010210102,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191
|
||||
150010211011,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156
|
||||
150010211012,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156
|
||||
GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
|
||||
05007040300,11.5,0.241594948,0.206607506,0.399500316
|
||||
05001020100,12.5,0.281343259,0.207119742,0.535089827
|
||||
15007040500,13.5,0.280726185,0.269219337,0.493093767
|
||||
15001021010,14.5,0.221568983,0.360886597,0.305051347
|
||||
15001021101,15.5,0.805488269,0.204161204,0.30618612
|
||||
|
|
|
|
@ -1,11 +1,6 @@
|
|||
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
|
||||
050070403001,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297
|
||||
050070403002,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297
|
||||
050010201001,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664
|
||||
050010201002,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664
|
||||
150070405001,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781
|
||||
150070405002,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781
|
||||
150010210101,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913
|
||||
150010210102,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913
|
||||
150010211011,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606
|
||||
150010211012,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606
|
||||
TRACT,GEOID10_TRACT,RISK_SCORE,RISK_RATNG,RISK_NPCTL,FEMA Risk Index Expected Annual Loss Score,AVLN_EALT,CFLD_EALT,CWAV_EALT,DRGT_EALT,ERQK_EALT,HAIL_EALT,HWAV_EALT,HRCN_EALT,ISTM_EALT,LNDS_EALT,LTNG_EALT,RFLD_EALT,SWND_EALT,TRND_EALT,TSUN_EALT,VLCN_EALT,WFIR_EALT,WNTW_EALT,AVLN_EXPT,CFLD_EXPT,CWAV_EXPT,DRGT_EXPT,ERQK_EXPT,HAIL_EXPT,HWAV_EXPT,HRCN_EXPT,ISTM_EXPT,LNDS_EXPT,LTNG_EXPT,RFLD_EXPT,SWND_EXPT,TRND_EXPT,TSUN_EXPT,VLCN_EXPT,WFIR_EXPT,WNTW_EXPT,EAL_VALA,EAL_VALP,EAL_VALB,AGRIVALUE,POPULATION,BUILDVALUE,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
|
||||
40300,05007040300,10.492015,Very Low,15.3494,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,234.7446176,204.8883901,126.4079101,0.24159494823420938,0.2066075060457531,0.39950031576386297
|
||||
20100,05001020100,14.705854,Relatively Low,36.725828,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,238.9921867,179.4960371,96.24552261,0.2813432586919213,0.20711974179363413,0.5350898265541664
|
||||
40500,15007040500,10.234981,Very Low,13.997993,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,187.5793934,183.4527834,106.4706219,0.28072618493724094,0.26921933739444537,0.4930937667416781
|
||||
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,142.7041082,236.9465219,175.3803106,0.22156898349475204,0.3608865970965789,0.30505134708091913
|
||||
21101,15001021101,19.434585,Relatively Low,53.392265,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,54.5,257.1497377,66.41934096,177.9963115,0.8054882693313613,0.20416120377788743,0.30618612004215606
|
||||
|
|
|
|
@ -52,6 +52,7 @@ class TestNationalRiskIndexETL:
|
|||
acs_dst = etl.BLOCK_GROUP_CSV
|
||||
for src, dst in [(input_src, input_dst), (acs_src, acs_dst)]:
|
||||
copy_data_files(src, dst)
|
||||
|
||||
# setup - read in sample output as dataframe
|
||||
TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
|
||||
BLOCK_COL = etl.GEOID_FIELD_NAME
|
||||
|
@ -59,11 +60,12 @@ class TestNationalRiskIndexETL:
|
|||
DATA_DIR / "transform.csv",
|
||||
dtype={BLOCK_COL: "string", TRACT_COL: "string"},
|
||||
)
|
||||
|
||||
# execution
|
||||
etl.transform()
|
||||
|
||||
# validation
|
||||
assert etl.df.shape == (10, 6)
|
||||
assert etl.df.shape == (5, 51)
|
||||
pd.testing.assert_frame_equal(etl.df, expected)
|
||||
|
||||
def test_load(self, mock_etl):
|
||||
|
@ -93,5 +95,5 @@ class TestNationalRiskIndexETL:
|
|||
|
||||
# validation
|
||||
assert output_path.exists()
|
||||
assert output.shape == (10, 5)
|
||||
assert output.shape == (5, 5)
|
||||
pd.testing.assert_frame_equal(output, expected)
|
||||
|
|
Loading…
Add table
Reference in a new issue