Make tribal overlap set score N (#2004)

* Add "Is a Tribal DAC" field (#1998) * Add tribal DACs to score N final (#1998) * Add new fields to downloads (#1998) * Make a int a float (#1998) * Update field names, apply feedback (#1998)
2025-09-29 21:23:18 -07:00 · 2022-10-12 14:59:23 -04:00 · 2022-10-12 14:59:23 -04:00 · 8b611edae6
commit 8b611edae6
parent d89c516131
12 changed files with 63 additions and 8 deletions
--- a/data/data-pipeline/data_pipeline/content/config/csv.yml
+++ b/data/data-pipeline/data_pipeline/content/config/csv.yml
@ -59,9 +59,15 @@ fields:
 - score_name: Definition N (communities) (based on adjacency index and low income alone)
  label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
  format: bool
 - score_name: Identified as disadvantaged due to tribal overlap
  label: Identified as disadvantaged due to tribal overlap
  format: bool
 - score_name: Definition N community, including adjacency index tracts
  label: Identified as disadvantaged
  format: bool
 - score_name: Percentage of tract that is disadvantaged
  label:  Percentage of tract that is disadvantaged by area
  format: percentage
 - score_name: Definition N (communities) (average of neighbors)
  label: Share of neighbors that are identified as disadvantaged
  format: percentage
--- a/data/data-pipeline/data_pipeline/content/config/excel.yml
+++ b/data/data-pipeline/data_pipeline/content/config/excel.yml
@ -63,9 +63,15 @@ sheets:
      - score_name: Definition N (communities) (based on adjacency index and low income alone)
        label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
        format: bool
      - score_name: Identified as disadvantaged due to tribal overlap
        label: Identified as disadvantaged due to tribal overlap
        format: bool
      - score_name: Definition N community, including adjacency index tracts
        label: Identified as disadvantaged
        format: bool
      - score_name: Percentage of tract that is disadvantaged
        label:  Percentage of tract that is disadvantaged by area
        format: percentage
      - score_name: Definition N (communities) (average of neighbors)
        label: Share of neighbors that are identified as disadvantaged
        format: percentage
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -279,6 +279,8 @@ TILES_SCORE_COLUMNS = {
    field_names.SCORE_N_COMMUNITIES
    + field_names.ADJACENT_MEAN_SUFFIX: "SN_DON",
    field_names.SCORE_N_COMMUNITIES: "SN_NO_DON",
    field_names.IS_TRIBAL_DAC: "SN_T",
    field_names.PERCENT_OF_TRACT_IS_DAC: "SN_PERC",
    field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
    field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
    field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
@ -472,4 +474,5 @@ TILES_SCORE_FLOAT_COLUMNS = [
    field_names.AML_BOOLEAN,
    field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
    field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
    field_names.PERCENT_OF_TRACT_IS_DAC,
 ]
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -505,6 +505,7 @@ class ScoreETL(ExtractTransformLoad):
            field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
            field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
            field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
            field_names.IS_TRIBAL_DAC,
        ]
        # For some columns, high values are "good", so we want to reverse the percentile
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
@ -48,6 +48,7 @@ class TribalOverlapETL(ExtractTransformLoad):
    ANNETTE_ISLAND_TRIBAL_NAME = "Annette Island LAR"
    CRS_INTEGER = 3857
    TRIBAL_OVERLAP_CUTOFF = 0.995  # Percentage of overlap that rounds to 100%
    # Define these for easy code completion
    def __init__(self):
@ -58,6 +59,7 @@ class TribalOverlapETL(ExtractTransformLoad):
            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
            field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
            field_names.IS_TRIBAL_DAC,
        ]
        self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
@ -72,8 +74,9 @@ class TribalOverlapETL(ExtractTransformLoad):
        str_list = sorted(str_list)
        return ", ".join(str_list)
-    @staticmethod
+    @classmethod
    def _adjust_percentage_for_frontend(
        cls,
        percentage_float: float,
    ) -> Optional[float]:
        """Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
@ -81,7 +84,7 @@ class TribalOverlapETL(ExtractTransformLoad):
            return None
        if percentage_float < 0.01:
            return 0.0
-        if percentage_float > 0.9995:
+        if percentage_float > cls.TRIBAL_OVERLAP_CUTOFF:
            return 1.0
        return percentage_float
@ -246,6 +249,11 @@ class TribalOverlapETL(ExtractTransformLoad):
            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
        ] = None
        merged_output_df[field_names.IS_TRIBAL_DAC] = (
            merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT]
            > self.TRIBAL_OVERLAP_CUTOFF
        )
        # The very final thing we want to do is produce a string for the front end to show
        # We do this here so that all of the logic is included
        merged_output_df[
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -11,9 +11,6 @@ STATE_FIELD = "State/Territory"
 COUNTY_FIELD = "County Name"
 # Definition Narwhal fields
 FINAL_SCORE_N_BOOLEAN = (
    "Definition M community, including adjacency index tracts"
 )
 SCORE_N_COMMUNITIES = "Definition N (communities)"
 N_CLIMATE = "Climate Factor (Definition N)"
 N_ENERGY = "Energy Factor (Definition N)"
@ -368,6 +365,8 @@ PERCENT_OF_TRIBAL_AREA_IN_TRACT = (
 PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
    "Percent of the Census tract that is within Tribal areas, for display"
 )
 IS_TRIBAL_DAC = "Identified as disadvantaged due to tribal overlap"
 PERCENT_OF_TRACT_IS_DAC = "Percentage of tract that is disadvantaged"
 #####
 # Names for individual factors being exceeded
--- a/data/data-pipeline/data_pipeline/score/score_narwhal.py
+++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@ -997,6 +997,33 @@ class ScoreNarwhal(Score):
            ]
        )
    def _mark_tribal_dacs(self) -> None:
        """Per the October 7th compromise (#1988),
        tracts that are approx 100% tribal are Score N communities.
        """
        self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
            self.df[field_names.IS_TRIBAL_DAC],
            True,
            self.df[field_names.SCORE_N_COMMUNITIES],
        )
    def _get_percent_of_tract_that_is_dac(self) -> float:
        """Per the October 7th compromise (#1988),
        tracts can be partially DACs if some portion of the tract is tribal land.
        Rules are as follows:
        If a tract is a SCORE_N_COMMUNITY, it is 100% a DAC
        If a tract is not, but contains tribal land, the percent that is tribal land is a DAC.
        """
        tribal_percent = self.df[
            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT
        ].fillna(0.0)
        return np.where(
            self.df[field_names.FINAL_SCORE_N_BOOLEAN],
            1.0,
            tribal_percent,
        )
    def add_columns(self) -> pd.DataFrame:
        logger.info("Adding Score Narhwal")
        self.df[field_names.THRESHOLD_COUNT] = 0
@ -1031,10 +1058,15 @@ class ScoreNarwhal(Score):
        ]
        self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
        self.df[field_names.SCORE_N_COMMUNITIES] = self.df[factors].any(axis=1)
        self._mark_tribal_dacs()
        self.df[
            field_names.SCORE_N_COMMUNITIES
            + field_names.PERCENTILE_FIELD_SUFFIX
        ] = self.df[field_names.SCORE_N_COMMUNITIES].astype(int)
        self._mark_donut_hole_tracts()
        self.df[
            field_names.PERCENT_OF_TRACT_IS_DAC
        ] = self._get_percent_of_tract_that_is_dac()
        return self.df