From daf188c1f34f4906eacc7d401a53e21f915abbd3 Mon Sep 17 00:00:00 2001
From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com>
Date: Wed, 3 Aug 2022 11:10:13 -0400
Subject: [PATCH] adds UST indicator (#1786)

adds leaky underground storage tanks
---
 .../data_pipeline/content/config/csv.yml      |  9 +++++
 .../data_pipeline/content/config/excel.yml    |  9 +++++
 .../data_pipeline/etl/score/constants.py      |  4 +++
 .../data_pipeline/etl/score/etl_score.py      |  1 +
 .../data_pipeline/score/field_names.py        |  6 +++-
 .../data_pipeline/score/score_narwhal.py      | 33 +++++++++++++++----
 6 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml
index 604a81e6..ef2a6e6b 100644
--- a/data/data-pipeline/data_pipeline/content/config/csv.yml
+++ b/data/data-pipeline/data_pipeline/content/config/csv.yml
@@ -257,3 +257,12 @@ fields:
   - score_name: Percent of population not currently enrolled in college or graduate school
     label: Percent of residents who are not currently enrolled in higher ed
     format: percentage
+  - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
+    label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
+    format: bool
+  - score_name: Leaky underground storage tanks (percentile)
+    label: Leaky underground storage tanks (percentile)
+    format: percentage
+  - score_name: Leaky underground storage tanks
+    label: Leaky underground storage tanks
+    format: float
diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml
index b615c8ee..4820187d 100644
--- a/data/data-pipeline/data_pipeline/content/config/excel.yml
+++ b/data/data-pipeline/data_pipeline/content/config/excel.yml
@@ -153,12 +153,21 @@ sheets:
       - score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?
         label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and high percent of residents that are not higher ed students?
         format: bool
+      - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
+        label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
+        format: bool
       - score_name: Wastewater discharge (percentile)
         label: Wastewater discharge (percentile)
         format: percentage
+      - score_name: Leaky underground storage tanks (percentile)
+        label: Leaky underground storage tanks (percentile)
+        format: percentage
       - score_name: Wastewater discharge
         label: Wastewater discharge
         format: float
+      - score_name: Leaky underground storage tanks
+        label: Leaky underground storage tanks
+        format: float
       - score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?
         label: Greater than or equal to the 90th percentile for asthma, is low income, and high percent of residents that are not higher ed students?
         format: bool
diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py
index d00e7b24..8e7d17b1 100644
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@@ -195,6 +195,8 @@ TILES_SCORE_COLUMNS = {
     + field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS",
     field_names.WASTEWATER_FIELD
     + field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
+    field_names.UST_FIELD
+    + field_names.PERCENTILE_FIELD_SUFFIX: "UST_PFS",
     field_names.M_WATER: "M_WTR",
     field_names.M_WORKFORCE: "M_WKFC",
     field_names.M_CLIMATE: "M_CLT",
@@ -220,6 +222,7 @@ TILES_SCORE_COLUMNS = {
     field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD: "SFLI",
     field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HWLI",
     field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "WDLI",
+    field_names.UST_LOW_INCOME_FIELD: "USTLI",
     field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DLI",
     field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD: "ALI",
     field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HDLI",
@@ -242,6 +245,7 @@ TILES_SCORE_COLUMNS = {
     field_names.NPL_PCTILE_THRESHOLD: "NPL_ET",
     field_names.TSDF_PCTILE_THRESHOLD: "TSDF_ET",
     field_names.WASTEWATER_PCTILE_THRESHOLD: "WD_ET",
+    field_names.UST_PCTILE_THRESHOLD: "UST_ET",
     field_names.DIABETES_PCTILE_THRESHOLD: "DB_ET",
     field_names.ASTHMA_PCTILE_THRESHOLD: "A_ET",
     field_names.HEART_DISEASE_PCTILE_THRESHOLD: "HD_ET",
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index 5d528dd6..b073ce5a 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -413,6 +413,7 @@ class ScoreETL(ExtractTransformLoad):
             field_names.NPL_FIELD,
             field_names.WASTEWATER_FIELD,
             field_names.LEAD_PAINT_FIELD,
+            field_names.UST_FIELD,
             field_names.UNDER_5_FIELD,
             field_names.OVER_64_FIELD,
             field_names.LINGUISTIC_ISO_FIELD,
diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py
index 04bc62e4..5b7a88af 100644
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@@ -170,7 +170,7 @@ TSDF_FIELD = "Proximity to hazardous waste sites"
 NPL_FIELD = "Proximity to NPL sites"
 AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk"
 RESPIRATORY_HAZARD_FIELD = "Respiratory hazard index"
-UST_FIELD = "Underground storage tanks"
+UST_FIELD = "Leaky underground storage tanks"
 
 LOW_INCOME_THRESHOLD = "Exceeds FPL200 threshold"
 
@@ -430,6 +430,8 @@ HAZARDOUS_WASTE_LOW_INCOME_FIELD = (
 
 # Critical Clean Water and Waste Infrastructure
 WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?"
+UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?"
+
 
 # Health Burdens
 DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income?"
@@ -629,6 +631,8 @@ RMP_PCTILE_THRESHOLD = (
 NPL_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for NPL (superfund sites) proximity"
 TSDF_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to hazardous waste sites"
 WASTEWATER_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge"
+UST_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underwater storage tanks"
+
 DIABETES_PCTILE_THRESHOLD = (
     f"Greater than or equal to the {PERCENTILE}th percentile for diabetes"
 )
diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py
index d075a1c2..1299acaa 100644
--- a/data/data-pipeline/data_pipeline/score/score_narwhal.py
+++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@@ -442,23 +442,42 @@ class ScoreNarwhal(Score):
             ]
             >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
         )
-
-        # Straight copy here in case we add additional water fields.
-        self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
-            field_names.WASTEWATER_PCTILE_THRESHOLD
-        ].copy()
+        self.df[field_names.UST_PCTILE_THRESHOLD] = (
+            self.df[field_names.UST_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
+            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
+        )
 
         self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = (
             self.df[field_names.WASTEWATER_PCTILE_THRESHOLD]
             & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
         )
 
+        self.df[field_names.UST_LOW_INCOME_FIELD] = (
+            self.df[field_names.UST_PCTILE_THRESHOLD]
+            & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
+        )
+
+        self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
+            [
+                field_names.WASTEWATER_PCTILE_THRESHOLD,
+                field_names.UST_PCTILE_THRESHOLD,
+            ]
+        ].max(axis=1)
+
         self._increment_total_eligibility_exceeded(
-            [field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD],
+            [
+                field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
+                field_names.UST_LOW_INCOME_FIELD,
+            ],
             skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
         )
 
-        return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
+        return self.df[
+            [
+                field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
+                field_names.UST_LOW_INCOME_FIELD,
+            ]
+        ].any(axis=1)
 
     def _health_factor(self) -> bool:
         # In Xth percentile or above for diabetes (Source: CDC Places)