diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index dccc8108..6a738e6e 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -334,4 +334,10 @@ fields: format: bool - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. - format: bool \ No newline at end of file + format: bool + - score_name: Tract-level redlining score meets or exceeds 3.25 and is low income + label: Tract experienced historic underinvestment and remains low income + format: bool + - score_name: Tract-level redlining score meets or exceeds 3.25 + label: Tract experienced historic underinvestment + format: bool diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index cdc1362b..c31e4763 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -338,4 +338,10 @@ sheets: format: bool - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. - format: bool \ No newline at end of file + format: bool + - score_name: Tract-level redlining score meets or exceeds 3.25 and is low income + label: Tract experienced historic underinvestment and remains low income + format: bool + - score_name: Tract-level redlining score meets or exceeds 3.25 + label: Tract experienced historic underinvestment + format: bool diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 081347cc..646cb7a0 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -303,9 +303,9 @@ TILES_SCORE_COLUMNS = { field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "FLD_PFS", field_names.FUTURE_WILDFIRE_RISK_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS", + + field_names.PERCENTILE_FIELD_SUFFIX: "WFR_PFS", field_names.HIGH_FUTURE_FLOOD_RISK_FIELD: "FLD_ET", - field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WF_ET", + field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WFR_ET", field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD: "ADJ_ET", field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX: "ADJ_PFS", diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 3631bea3..bec606f5 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 559fdcf2..1fcb590f 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py b/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py index 9776e801..4937a2b2 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py @@ -27,7 +27,7 @@ class FloodRiskETL(ExtractTransformLoad): def __init__(self): # define the full path for the input CSV file self.INPUT_CSV = ( - self.get_tmp_path() / "fsf_flood" / "flood_tract_2010.csv" + self.get_tmp_path() / "fsf_flood" / "flood-tract2010.csv" ) # this is the main dataframe @@ -50,24 +50,16 @@ class FloodRiskETL(ExtractTransformLoad): # read in the unzipped csv data source then rename the # Census Tract column for merging - df_fsf_flood_disagg: pd.DataFrame = pd.read_csv( + df_fsf_flood: pd.DataFrame = pd.read_csv( self.INPUT_CSV, dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str}, low_memory=False, ) - df_fsf_flood_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_flood_disagg[ + df_fsf_flood[self.GEOID_TRACT_FIELD_NAME] = df_fsf_flood[ self.INPUT_GEOID_TRACT_FIELD_NAME ].str.zfill(11) - # Because we have some tracts that are listed twice, we aggregate based on - # GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do! - df_fsf_flood = ( - df_fsf_flood_disagg.groupby(self.GEOID_TRACT_FIELD_NAME) - .sum() - .reset_index() - ) - df_fsf_flood[self.COUNT_PROPERTIES] = df_fsf_flood[ self.COUNT_PROPERTIES_NATIVE_FIELD_NAME ].clip(lower=self.CLIP_PROPERTIES_COUNT) diff --git a/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py b/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py index a41ce1e3..2a26370e 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py @@ -26,9 +26,7 @@ class WildfireRiskETL(ExtractTransformLoad): def __init__(self): # define the full path for the input CSV file - self.INPUT_CSV = ( - self.get_tmp_path() / "fsf_fire" / "fire_tract_2010.csv" - ) + self.INPUT_CSV = self.get_tmp_path() / "fsf_fire" / "fire-tract2010.csv" # this is the main dataframe self.df: pd.DataFrame @@ -49,24 +47,16 @@ class WildfireRiskETL(ExtractTransformLoad): logger.info("Transforming National Risk Index Data") # read in the unzipped csv data source then rename the # Census Tract column for merging - df_fsf_fire_disagg: pd.DataFrame = pd.read_csv( + df_fsf_fire: pd.DataFrame = pd.read_csv( self.INPUT_CSV, dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str}, low_memory=False, ) - df_fsf_fire_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_fire_disagg[ + df_fsf_fire[self.GEOID_TRACT_FIELD_NAME] = df_fsf_fire[ self.INPUT_GEOID_TRACT_FIELD_NAME ].str.zfill(11) - # Because we have some tracts that are listed twice, we aggregate based on - # GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do! - df_fsf_fire = ( - df_fsf_fire_disagg.groupby(self.GEOID_TRACT_FIELD_NAME) - .sum() - .reset_index() - ) - df_fsf_fire[self.COUNT_PROPERTIES] = df_fsf_fire[ self.COUNT_PROPERTIES_NATIVE_FIELD_NAME ].clip(lower=self.CLIP_PROPERTIES_COUNT) diff --git a/data/data-pipeline/data_pipeline/utils.py b/data/data-pipeline/data_pipeline/utils.py index 0ec62616..865e888b 100644 --- a/data/data-pipeline/data_pipeline/utils.py +++ b/data/data-pipeline/data_pipeline/utils.py @@ -1409,6 +1409,8 @@ def get_excel_column_name(index: int) -> str: "ALI", "ALJ", "ALK", + "ALL", + "ALM", ] return excel_column_names[index]