2119 there are a few tracts places where the values over the 90th percentile are not showing as blue (#2160)

* ipython notebook to debug greenspace indicator * changing greenspace and income to just greenspace * fixing greenspace indicator to not include low income * Update greenspace_indicator.ipynb * running tox checks * update score narwhal to pass smoke test (fix donut threshold) --------- Co-authored-by: Travis Newby <travis.b.newby@omb.eop.gov>
2025-02-22 01:31:25 -08:00 · 2023-03-02 15:15:36 -05:00 · 2023-03-02 15:15:36 -05:00 · 4d9c1dd11e
commit 4d9c1dd11e
parent 7384cc5fec
3 changed files with 2698 additions and 6 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -353,7 +353,7 @@ TILES_SCORE_COLUMNS = {
    field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD: "ADJ_ET",
    field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
    + field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
-    field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
+    field_names.NON_NATURAL_PCTILE_THRESHOLD: "IS_ET",  # NON_NATURAL_LOW_INCOME_FIELD_NAME
    field_names.AML_BOOLEAN_FILLED_IN: "AML_ET",
    field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_RAW",
    field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME: "FUDS_ET",
--- a/data/data-pipeline/data_pipeline/ipython/greenspace_indicator.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/greenspace_indicator.ipynb
--- a/data/data-pipeline/data_pipeline/score/score_narwhal.py
+++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@ -4,7 +4,9 @@ import data_pipeline.etl.score.constants as constants
 import data_pipeline.score.field_names as field_names
 import numpy as np
 import pandas as pd
-from data_pipeline.score.score import Score
+from data_pipeline.score.score import (
+    Score,
+)  # this just adds the framework of the score class
 from data_pipeline.score.utils import calculate_tract_adjacency_scores
 from data_pipeline.utils import get_module_logger

@ -14,7 +16,7 @@ logger = get_module_logger(__name__)
 class ScoreNarwhal(Score):
    """Score N, aka Narwhal."""

-    LOW_INCOME_THRESHOLD: float = 0.65
+    LOW_INCOME_THRESHOLD: float = 0.65  # this is the low income threshold that gets compared against the other indicators. It is a percentile rank
    MAX_COLLEGE_ATTENDANCE_THRESHOLD: float = 0.20
    ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
    MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
@ -441,9 +443,13 @@ class ScoreNarwhal(Score):
        )

        # any of the burdens
-        self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] = self.df[
+        self.df[
+            field_names.HOUSING_THREHSOLD_EXCEEDED
+        ] = self.df[  # we need this to include all of the ones that are intersected with low income in order to properly calculate the total score.
            housing_eligibility_columns
-        ].any(axis="columns")
+        ].any(
+            axis="columns"
+        )

        self._increment_total_eligibility_exceeded(
            housing_eligibility_columns,
@ -461,7 +467,7 @@ class ScoreNarwhal(Score):
        # Source: Census's American Community Survey

        pollution_eligibility_columns = [
-            field_names.RMP_LOW_INCOME_FIELD,
+            field_names.RMP_LOW_INCOME_FIELD,  # include low income in these fields because they help calculate the overall score
            field_names.SUPERFUND_LOW_INCOME_FIELD,
            field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
            field_names.AML_LOW_INCOME_FIELD,