From daf188c1f34f4906eacc7d401a53e21f915abbd3 Mon Sep 17 00:00:00 2001 From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com> Date: Wed, 3 Aug 2022 11:10:13 -0400 Subject: [PATCH] adds UST indicator (#1786) adds leaky underground storage tanks --- .../data_pipeline/content/config/csv.yml | 9 +++++ .../data_pipeline/content/config/excel.yml | 9 +++++ .../data_pipeline/etl/score/constants.py | 4 +++ .../data_pipeline/etl/score/etl_score.py | 1 + .../data_pipeline/score/field_names.py | 6 +++- .../data_pipeline/score/score_narwhal.py | 33 +++++++++++++++---- 6 files changed, 54 insertions(+), 8 deletions(-) diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index 604a81e6..ef2a6e6b 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -257,3 +257,12 @@ fields: - score_name: Percent of population not currently enrolled in college or graduate school label: Percent of residents who are not currently enrolled in higher ed format: percentage + - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + format: bool + - score_name: Leaky underground storage tanks (percentile) + label: Leaky underground storage tanks (percentile) + format: percentage + - score_name: Leaky underground storage tanks + label: Leaky underground storage tanks + format: float diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index b615c8ee..4820187d 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -153,12 +153,21 @@ sheets: - score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students? label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and high percent of residents that are not higher ed students? format: bool + - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + format: bool - score_name: Wastewater discharge (percentile) label: Wastewater discharge (percentile) format: percentage + - score_name: Leaky underground storage tanks (percentile) + label: Leaky underground storage tanks (percentile) + format: percentage - score_name: Wastewater discharge label: Wastewater discharge format: float + - score_name: Leaky underground storage tanks + label: Leaky underground storage tanks + format: float - score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students? label: Greater than or equal to the 90th percentile for asthma, is low income, and high percent of residents that are not higher ed students? format: bool diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index d00e7b24..8e7d17b1 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -195,6 +195,8 @@ TILES_SCORE_COLUMNS = { + field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS", field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS", + field_names.UST_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX: "UST_PFS", field_names.M_WATER: "M_WTR", field_names.M_WORKFORCE: "M_WKFC", field_names.M_CLIMATE: "M_CLT", @@ -220,6 +222,7 @@ TILES_SCORE_COLUMNS = { field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD: "SFLI", field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HWLI", field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "WDLI", + field_names.UST_LOW_INCOME_FIELD: "USTLI", field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DLI", field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD: "ALI", field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HDLI", @@ -242,6 +245,7 @@ TILES_SCORE_COLUMNS = { field_names.NPL_PCTILE_THRESHOLD: "NPL_ET", field_names.TSDF_PCTILE_THRESHOLD: "TSDF_ET", field_names.WASTEWATER_PCTILE_THRESHOLD: "WD_ET", + field_names.UST_PCTILE_THRESHOLD: "UST_ET", field_names.DIABETES_PCTILE_THRESHOLD: "DB_ET", field_names.ASTHMA_PCTILE_THRESHOLD: "A_ET", field_names.HEART_DISEASE_PCTILE_THRESHOLD: "HD_ET", diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 5d528dd6..b073ce5a 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -413,6 +413,7 @@ class ScoreETL(ExtractTransformLoad): field_names.NPL_FIELD, field_names.WASTEWATER_FIELD, field_names.LEAD_PAINT_FIELD, + field_names.UST_FIELD, field_names.UNDER_5_FIELD, field_names.OVER_64_FIELD, field_names.LINGUISTIC_ISO_FIELD, diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 04bc62e4..5b7a88af 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -170,7 +170,7 @@ TSDF_FIELD = "Proximity to hazardous waste sites" NPL_FIELD = "Proximity to NPL sites" AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk" RESPIRATORY_HAZARD_FIELD = "Respiratory hazard index" -UST_FIELD = "Underground storage tanks" +UST_FIELD = "Leaky underground storage tanks" LOW_INCOME_THRESHOLD = "Exceeds FPL200 threshold" @@ -430,6 +430,8 @@ HAZARDOUS_WASTE_LOW_INCOME_FIELD = ( # Critical Clean Water and Waste Infrastructure WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?" +UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?" + # Health Burdens DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income?" @@ -629,6 +631,8 @@ RMP_PCTILE_THRESHOLD = ( NPL_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for NPL (superfund sites) proximity" TSDF_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to hazardous waste sites" WASTEWATER_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge" +UST_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underwater storage tanks" + DIABETES_PCTILE_THRESHOLD = ( f"Greater than or equal to the {PERCENTILE}th percentile for diabetes" ) diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py index d075a1c2..1299acaa 100644 --- a/data/data-pipeline/data_pipeline/score/score_narwhal.py +++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py @@ -442,23 +442,42 @@ class ScoreNarwhal(Score): ] >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) - - # Straight copy here in case we add additional water fields. - self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[ - field_names.WASTEWATER_PCTILE_THRESHOLD - ].copy() + self.df[field_names.UST_PCTILE_THRESHOLD] = ( + self.df[field_names.UST_FIELD + field_names.PERCENTILE_FIELD_SUFFIX] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = ( self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] ) + self.df[field_names.UST_LOW_INCOME_FIELD] = ( + self.df[field_names.UST_PCTILE_THRESHOLD] + & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] + ) + + self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[ + [ + field_names.WASTEWATER_PCTILE_THRESHOLD, + field_names.UST_PCTILE_THRESHOLD, + ] + ].max(axis=1) + self._increment_total_eligibility_exceeded( - [field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD], + [ + field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD, + field_names.UST_LOW_INCOME_FIELD, + ], skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS, ) - return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] + return self.df[ + [ + field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD, + field_names.UST_LOW_INCOME_FIELD, + ] + ].any(axis=1) def _health_factor(self) -> bool: # In Xth percentile or above for diabetes (Source: CDC Places)