Fixup TA_COUNT and TA_PERC (#1991)

* Change TA_PERC, change TA_COUNT (#1988, #1989) - Make TA_PERC_STR back into a nullable float following the rules requestsed in #1989 - Move TA_COUNT to be TA_COUNT_AK, also add a null TA_COUNT_C for CONUS that we can fill in later. * Fix typo comment (#1988)
2025-09-29 21:23:18 -07:00 · 2022-10-06 16:24:05 -04:00 · 2022-10-06 16:24:05 -04:00 · 72de938c32
commit 72de938c32
parent 6505d49439
8 changed files with 53 additions and 45 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -394,9 +394,10 @@ TILES_SCORE_COLUMNS = {
    field_names.PERCENT_AGE_UNDER_10: "AGE_10",
    field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE",
    field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
-    field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT",
+    field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK: "TA_COUNT_AK",
+    field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS: "TA_COUNT_C",
    field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC",
-    field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING: "TA_PERC_STR",
+    field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE",
 }

 # columns to round floats to 2 decimals
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -488,7 +488,9 @@ class ScoreETL(ExtractTransformLoad):
            field_names.PERCENT_AGE_10_TO_64,
            field_names.PERCENT_AGE_OVER_64,
            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
-            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
+            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
        ] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS

        non_numeric_columns = [
@ -496,7 +498,6 @@ class ScoreETL(ExtractTransformLoad):
            field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
            field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
            field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
-            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
        ]

        boolean_columns = [
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
@ -1,3 +1,5 @@
+from typing import Optional
+
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@ -51,12 +53,14 @@ class TribalOverlapETL(ExtractTransformLoad):
    def __init__(self):
        self.COLUMNS_TO_KEEP = [
            self.GEOID_TRACT_FIELD_NAME,
-            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
            field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
-            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
+            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
        ]

+        self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
        self.output_df: pd.DataFrame
        self.census_tract_gdf: gpd.GeoDataFrame
        self.tribal_gdf: gpd.GeoDataFrame
@ -69,40 +73,18 @@ class TribalOverlapETL(ExtractTransformLoad):
        return ", ".join(str_list)

    @staticmethod
-    def _adjust_percentage_to_string(percentage_float: float) -> str:
-        """Helper method that converts numeric floats to strings based on what-to-show rules.
-
-        What are these rules?
-        0. If None, return none
-        1. If the percentage is below 1%, produce 'less than 1%'
-        2. If the percentage is above 99.95%, produce '100%'
-        3. If the percentage is X.00 when rounded to two sig digits, display the integer of the percent
-        4. If the percentage has unique significant digits, report two digits
-        """
-        # Rule 0
-        if not percentage_float:
-            # I believe we need to do this because JS will do weird things with a mix-type column?
-            return "No tribal areas"
-        # Rule 1
+    def _adjust_percentage_for_frontend(
+        percentage_float: float,
+    ) -> Optional[float]:
+        """Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
+        if percentage_float is None:
+            return None
        if percentage_float < 0.01:
-            return "less than 1%"
-        # Rule 2
+            return 0.0
        if percentage_float > 0.9995:
-            return "100%"
+            return 1.0

-        rounded_percentage_str = str(round(percentage_float, 4) * 100)
-        first_digits, last_digits = rounded_percentage_str.split(".")
-
-        # Rule 3 (this is a shorthand because round(4) will truncate repeated 0s)
-        if last_digits[-1] == "0":
-            return first_digits + "%"
-
-        # Rule 4
-        if last_digits != "00":
-            return rounded_percentage_str + "%"
-
-        # There is something missing!
-        raise Exception("Yikes! The string conversion here failed!")
+        return percentage_float

    def extract(self) -> None:
        self.census_tract_gdf = get_tract_geojson()
@ -130,7 +112,7 @@ class TribalOverlapETL(ExtractTransformLoad):

        tribal_overlap_with_tracts = tribal_overlap_with_tracts.rename(
            columns={
-                field_names.TRIBAL_ID: field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
+                field_names.TRIBAL_ID: self.OVERALL_TRIBAL_COUNT,
                field_names.TRIBAL_LAND_AREA_NAME: field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
            }
        )
@ -245,12 +227,31 @@ class TribalOverlapETL(ExtractTransformLoad):
            merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT],
        )

+        # Counting tribes in the lower 48 is different from counting in AK,
+        # so per request by the design and frontend team, we remove all the
+        # counts outside AK
+        merged_output_df[
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK
+        ] = np.where(
+            # In Alaska
+            (merged_output_df_state_fips_code == "02"),
+            # Keep the counts
+            merged_output_df[self.OVERALL_TRIBAL_COUNT],
+            # Otherwise, null them
+            None,
+        )
+
+        # TODO: Count tribal areas in the lower 48 correctly
+        merged_output_df[
+            field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
+        ] = None
+
        # The very final thing we want to do is produce a string for the front end to show
        # We do this here so that all of the logic is included
        merged_output_df[
-            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING
+            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY
        ] = merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT].apply(
-            self._adjust_percentage_to_string
+            self._adjust_percentage_for_frontend
        )

        self.output_df = merged_output_df
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -355,12 +355,17 @@ TRIBAL_ID = "tribalId"
 TRIBAL_LAND_AREA_NAME = "landAreaName"

 # Tribal overlap variables
-COUNT_OF_TRIBAL_AREAS_IN_TRACT = "Number of Tribal areas within Census tract"
+COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS = (
+    "Number of Tribal areas within Census tract"
+)
+COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK = (
+    "Number of Tribal areas within Census tract for Alaska"
+)
 NAMES_OF_TRIBAL_AREAS_IN_TRACT = "Names of Tribal areas within Census tract"
 PERCENT_OF_TRIBAL_AREA_IN_TRACT = (
    "Percent of the Census tract that is within Tribal areas"
 )
-PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING = (
+PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
    "Percent of the Census tract that is within Tribal areas, for display"
 )