Changing LHE in tiles to a boolean (#1767)

also includes merging / clean up of the release
2025-09-21 14:11:14 -07:00 · 2022-08-03 13:55:58 -04:00 · 2022-08-03 13:55:58 -04:00 · 0d90ae563a
commit 0d90ae563a
parent b0a728437c
5 changed files with 18 additions and 89 deletions
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -432,7 +432,6 @@ HAZARDOUS_WASTE_LOW_INCOME_FIELD = (
 WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?"
 UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?"

-
 # Health Burdens
 DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income?"
 ASTHMA_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for asthma and is low income?"
--- a/data/data-pipeline/data_pipeline/score/score_narwhal.py
+++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@ -435,6 +435,11 @@ class ScoreNarwhal(Score):
        # poverty level and has a low percent of higher ed students
        # Source: Census's American Community Survey

+        eligibility_columns = [
+            field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
+            field_names.UST_LOW_INCOME_FIELD,
+        ]
+
        self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] = (
            self.df[
                field_names.WASTEWATER_FIELD
@ -457,28 +462,17 @@ class ScoreNarwhal(Score):
            & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
        )

-        self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
-            [
-                field_names.WASTEWATER_PCTILE_THRESHOLD,
-                field_names.UST_PCTILE_THRESHOLD,
-            ]
-        ].max(axis=1)
-
        self._increment_total_eligibility_exceeded(
-            [
-                field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
-                field_names.UST_LOW_INCOME_FIELD,
-            ],
+            eligibility_columns,
            skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
        )

-        return self.df[
-            [
-                field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
-                field_names.UST_LOW_INCOME_FIELD,
-            ]
+        self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
+            eligibility_columns
        ].any(axis=1)

+        return self.df[field_names.WATER_THRESHOLD_EXCEEDED]
+
    def _health_factor(self) -> bool:
        # In Xth percentile or above for diabetes (Source: CDC Places)
        # or
--- a/data/data-pipeline/data_pipeline/score/score_runner.py
+++ b/data/data-pipeline/data_pipeline/score/score_runner.py
@ -28,7 +28,6 @@ class ScoreRunner:
        self.df = ScoreA(df=self.df).add_columns()
        self.df = ScoreB(df=self.df).add_columns()
        self.df = ScoreC(df=self.df).add_columns()
-        self.df = ScoreD(df=self.df).add_columns()
        self.df = ScoreF(df=self.df).add_columns()
        self.df = ScoreG(df=self.df).add_columns()
        self.df = ScoreH(df=self.df).add_columns()
@ -38,33 +37,4 @@ class ScoreRunner:
        self.df = ScoreM(df=self.df).add_columns()
        self.df = ScoreNarwhal(df=self.df).add_columns()

-        # TODO do this with each score instead of in a bundle
-        # Create percentiles for these index scores
-        self.df = self._add_score_percentiles()
-
-        return self.df
-
-    def _add_score_percentiles(self) -> pd.DataFrame:
-        logger.info("Adding Score Percentiles")
-        for score_field in [
-            field_names.SCORE_A,
-            field_names.SCORE_B,
-            field_names.SCORE_C,
-            field_names.SCORE_D,
-            field_names.SCORE_E,
-        ]:
-            self.df[
-                f"{score_field}{field_names.PERCENTILE_FIELD_SUFFIX}"
-            ] = self.df[score_field].rank(pct=True)
-
-            for threshold in [0.25, 0.3, 0.35, 0.4]:
-                fraction_converted_to_percent = int(100 * threshold)
-                self.df[
-                    f"{score_field} (top {fraction_converted_to_percent}th percentile)"
-                ] = (
-                    self.df[
-                        f"{score_field}{field_names.PERCENTILE_FIELD_SUFFIX}"
-                    ]
-                    >= 1 - threshold
-                )
        return self.df