Update FEMA data to be tracts, not block groups (#906)

This commit is contained in:
Lucas Merrill Brown 2021-11-18 10:55:05 -05:00 committed by lucasmbrown-usds
parent 893758f1d4
commit 537844236a
5 changed files with 20 additions and 52 deletions

View file

@ -226,7 +226,6 @@ class ScoreETL(ExtractTransformLoad):
census_block_group_dfs = [
self.ejscreen_df,
self.census_acs_median_incomes_df,
self.national_risk_index_df,
]
census_block_group_df = self._join_cbg_dfs(census_block_group_dfs)
@ -241,6 +240,7 @@ class ScoreETL(ExtractTransformLoad):
self.geocorr_urban_rural_df,
self.persistent_poverty_df,
self.housing_and_transportation_df,
self.national_risk_index_df,
]
census_tract_df = self._join_tract_dfs(census_tract_dfs)

View file

@ -52,7 +52,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
# Note: also need to edit transform step to add fields to output.
self.COLUMNS_TO_KEEP = [
self.GEOID_FIELD_NAME,
self.GEOID_TRACT_FIELD_NAME,
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
@ -82,7 +82,6 @@ class NationalRiskIndexETL(ExtractTransformLoad):
logger.info("Transforming National Risk Index Data")
NRI_TRACT_COL = "TRACTFIPS" # Census Tract Column in NRI data
TRACT_COL = self.GEOID_TRACT_FIELD_NAME # Census Tract column name
# read in the unzipped csv from NRI data source then rename the
# Census Tract column for merging
@ -94,7 +93,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
)
df_nri.rename(
columns={
NRI_TRACT_COL: TRACT_COL,
NRI_TRACT_COL: self.GEOID_TRACT_FIELD_NAME,
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_INPUT_FIELD_NAME: self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
},
inplace=True,
@ -120,30 +119,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
/ df_nri[self.BUILDING_VALUE_INPUT_FIELD_NAME]
)
# Reduce columns.
# Note: normally we wait until writing to CSV for this step, but since the file is so huge,
# move this up here for performance reasons.
df_nri = df_nri[
[
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
self.EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME,
self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME,
self.EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME,
TRACT_COL,
]
]
# get the full list of Census Block Groups from the ACS data
# and extract the Census Tract ID from each Block Group ID
df_acs = pd.read_csv(
self.BLOCK_GROUP_CSV, dtype={self.GEOID_FIELD_NAME: "string"}
)
df_acs[TRACT_COL] = df_acs[self.GEOID_FIELD_NAME].str[0:11]
df_block_group = df_acs[[self.GEOID_FIELD_NAME, TRACT_COL]]
# merge NRI data on the Census Tract ID so that each
# Block Group inherits the NRI score of its Census Tract
self.df = df_block_group.merge(df_nri, how="left", on=TRACT_COL)
self.df = df_nri
def load(self) -> None:
"""Writes the NRI data as a csv to the directory at self.OUTPUT_DIR"""

View file

@ -1,11 +1,6 @@
GEOID10,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
050070403001,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629
050070403002,11.5,0.2415949482342093,0.2066075060457531,0.3995003157638629
050010201001,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664
050010201002,12.5,0.2813432586919213,0.2071197417936341,0.5350898265541664
150070405001,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781
150070405002,13.5,0.2807261849372409,0.2692193373944453,0.4930937667416781
150010210101,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191
150010210102,14.5,0.221568983494752,0.3608865970965789,0.3050513470809191
150010211011,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156
150010211012,15.5,0.8054882693313613,0.2041612037778874,0.306186120042156
GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
05007040300,11.5,0.241594948,0.206607506,0.399500316
05001020100,12.5,0.281343259,0.207119742,0.535089827
15007040500,13.5,0.280726185,0.269219337,0.493093767
15001021010,14.5,0.221568983,0.360886597,0.305051347
15001021101,15.5,0.805488269,0.204161204,0.30618612

1 GEOID10 GEOID10_TRACT FEMA Risk Index Expected Annual Loss Score Expected population loss rate (Natural Hazards Risk Index) Expected agricultural loss rate (Natural Hazards Risk Index) Expected building loss rate (Natural Hazards Risk Index)
2 050070403001 05007040300 11.5 0.2415949482342093 0.241594948 0.2066075060457531 0.206607506 0.3995003157638629 0.399500316
3 050070403002 05001020100 11.5 12.5 0.2415949482342093 0.281343259 0.2066075060457531 0.207119742 0.3995003157638629 0.535089827
4 050010201001 15007040500 12.5 13.5 0.2813432586919213 0.280726185 0.2071197417936341 0.269219337 0.5350898265541664 0.493093767
5 050010201002 15001021010 12.5 14.5 0.2813432586919213 0.221568983 0.2071197417936341 0.360886597 0.5350898265541664 0.305051347
6 150070405001 15001021101 13.5 15.5 0.2807261849372409 0.805488269 0.2692193373944453 0.204161204 0.4930937667416781 0.30618612
150070405002 13.5 0.2807261849372409 0.2692193373944453 0.4930937667416781
150010210101 14.5 0.221568983494752 0.3608865970965789 0.3050513470809191
150010210102 14.5 0.221568983494752 0.3608865970965789 0.3050513470809191
150010211011 15.5 0.8054882693313613 0.2041612037778874 0.306186120042156
150010211012 15.5 0.8054882693313613 0.2041612037778874 0.306186120042156

View file

@ -1,11 +1,6 @@
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
050070403001,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297
050070403002,05007040300,11.5,0.24159494823420938,0.2066075060457531,0.39950031576386297
050010201001,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664
050010201002,05001020100,12.5,0.2813432586919213,0.20711974179363413,0.5350898265541664
150070405001,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781
150070405002,15007040500,13.5,0.28072618493724094,0.26921933739444537,0.4930937667416781
150010210101,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913
150010210102,15001021010,14.5,0.22156898349475204,0.3608865970965789,0.30505134708091913
150010211011,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606
150010211012,15001021101,15.5,0.8054882693313613,0.20416120377788743,0.30618612004215606
TRACT,GEOID10_TRACT,RISK_SCORE,RISK_RATNG,RISK_NPCTL,FEMA Risk Index Expected Annual Loss Score,AVLN_EALT,CFLD_EALT,CWAV_EALT,DRGT_EALT,ERQK_EALT,HAIL_EALT,HWAV_EALT,HRCN_EALT,ISTM_EALT,LNDS_EALT,LTNG_EALT,RFLD_EALT,SWND_EALT,TRND_EALT,TSUN_EALT,VLCN_EALT,WFIR_EALT,WNTW_EALT,AVLN_EXPT,CFLD_EXPT,CWAV_EXPT,DRGT_EXPT,ERQK_EXPT,HAIL_EXPT,HWAV_EXPT,HRCN_EXPT,ISTM_EXPT,LNDS_EXPT,LTNG_EXPT,RFLD_EXPT,SWND_EXPT,TRND_EXPT,TSUN_EXPT,VLCN_EXPT,WFIR_EXPT,WNTW_EXPT,EAL_VALA,EAL_VALP,EAL_VALB,AGRIVALUE,POPULATION,BUILDVALUE,Expected population loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected building loss rate (Natural Hazards Risk Index)
40300,05007040300,10.492015,Very Low,15.3494,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,234.7446176,204.8883901,126.4079101,0.24159494823420938,0.2066075060457531,0.39950031576386297
20100,05001020100,14.705854,Relatively Low,36.725828,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,238.9921867,179.4960371,96.24552261,0.2813432586919213,0.20711974179363413,0.5350898265541664
40500,15007040500,10.234981,Very Low,13.997993,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,187.5793934,183.4527834,106.4706219,0.28072618493724094,0.26921933739444537,0.4930937667416781
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,142.7041082,236.9465219,175.3803106,0.22156898349475204,0.3608865970965789,0.30505134708091913
21101,15001021101,19.434585,Relatively Low,53.392265,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5,52.5,53.5,54.5,257.1497377,66.41934096,177.9963115,0.8054882693313613,0.20416120377788743,0.30618612004215606

1 GEOID10 TRACT GEOID10_TRACT RISK_SCORE RISK_RATNG RISK_NPCTL FEMA Risk Index Expected Annual Loss Score AVLN_EALT CFLD_EALT CWAV_EALT DRGT_EALT ERQK_EALT HAIL_EALT HWAV_EALT HRCN_EALT ISTM_EALT LNDS_EALT LTNG_EALT RFLD_EALT SWND_EALT TRND_EALT TSUN_EALT VLCN_EALT WFIR_EALT WNTW_EALT AVLN_EXPT CFLD_EXPT CWAV_EXPT DRGT_EXPT ERQK_EXPT HAIL_EXPT HWAV_EXPT HRCN_EXPT ISTM_EXPT LNDS_EXPT LTNG_EXPT RFLD_EXPT SWND_EXPT TRND_EXPT TSUN_EXPT VLCN_EXPT WFIR_EXPT WNTW_EXPT EAL_VALA EAL_VALP EAL_VALB AGRIVALUE POPULATION BUILDVALUE Expected population loss rate (Natural Hazards Risk Index) Expected agricultural loss rate (Natural Hazards Risk Index) Expected building loss rate (Natural Hazards Risk Index)
2 050070403001 40300 05007040300 10.492015 Very Low 15.3494 11.5 12.5 13.5 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5 50.5 234.7446176 204.8883901 126.4079101 0.24159494823420938 0.2066075060457531 0.39950031576386297
3 050070403002 20100 05007040300 05001020100 14.705854 Relatively Low 36.725828 11.5 12.5 13.5 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5 50.5 51.5 238.9921867 179.4960371 96.24552261 0.24159494823420938 0.2813432586919213 0.2066075060457531 0.20711974179363413 0.39950031576386297 0.5350898265541664
4 050010201001 40500 05001020100 15007040500 10.234981 Very Low 13.997993 12.5 13.5 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5 50.5 51.5 52.5 187.5793934 183.4527834 106.4706219 0.2813432586919213 0.28072618493724094 0.20711974179363413 0.26921933739444537 0.5350898265541664 0.4930937667416781
5 050010201002 21010 05001020100 15001021010 21.537231 Relatively Moderate 59.488033 12.5 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5 50.5 51.5 52.5 53.5 142.7041082 236.9465219 175.3803106 0.2813432586919213 0.22156898349475204 0.20711974179363413 0.3608865970965789 0.5350898265541664 0.30505134708091913
6 150070405001 21101 15007040500 15001021101 19.434585 Relatively Low 53.392265 13.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5 50.5 51.5 52.5 53.5 54.5 257.1497377 66.41934096 177.9963115 0.28072618493724094 0.8054882693313613 0.26921933739444537 0.20416120377788743 0.4930937667416781 0.30618612004215606
150070405002 15007040500 13.5 0.28072618493724094 0.26921933739444537 0.4930937667416781
150010210101 15001021010 14.5 0.22156898349475204 0.3608865970965789 0.30505134708091913
150010210102 15001021010 14.5 0.22156898349475204 0.3608865970965789 0.30505134708091913
150010211011 15001021101 15.5 0.8054882693313613 0.20416120377788743 0.30618612004215606
150010211012 15001021101 15.5 0.8054882693313613 0.20416120377788743 0.30618612004215606

View file

@ -52,6 +52,7 @@ class TestNationalRiskIndexETL:
acs_dst = etl.BLOCK_GROUP_CSV
for src, dst in [(input_src, input_dst), (acs_src, acs_dst)]:
copy_data_files(src, dst)
# setup - read in sample output as dataframe
TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
BLOCK_COL = etl.GEOID_FIELD_NAME
@ -59,11 +60,12 @@ class TestNationalRiskIndexETL:
DATA_DIR / "transform.csv",
dtype={BLOCK_COL: "string", TRACT_COL: "string"},
)
# execution
etl.transform()
# validation
assert etl.df.shape == (10, 6)
assert etl.df.shape == (5, 51)
pd.testing.assert_frame_equal(etl.df, expected)
def test_load(self, mock_etl):
@ -93,5 +95,5 @@ class TestNationalRiskIndexETL:
# validation
assert output_path.exists()
assert output.shape == (10, 5)
assert output.shape == (5, 5)
pd.testing.assert_frame_equal(output, expected)