Donut Hole DAC Qualification - Solution for Water Boundaries

2025-07-28 14:51:16 -07:00 · 2024-12-03 10:46:45 -05:00 · 2024-12-03 10:46:45 -05:00 · 84c1a3aaf4
commit 84c1a3aaf4
parent 0ad64588ea
3 changed files with 5816 additions and 0 deletions
--- a/data/data-pipeline/data_pipeline/ipython/donut_hole_testing_2024.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/donut_hole_testing_2024.ipynb
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -10,6 +10,9 @@ GEOID_TRACT_FIELD = "GEOID10_TRACT"
 STATE_FIELD = "State/Territory"
 COUNTY_FIELD = "County Name"

+# Census per-tract land area field name
+LAND_AREA_FIELD = "ALAND10"
+
 # Definition Narwhal fields
 SCORE_N_COMMUNITIES = "Definition N (communities)"
 N_CLIMATE = "Climate Factor (Definition N)"
--- a/data/data-pipeline/data_pipeline/score/utils.py
+++ b/data/data-pipeline/data_pipeline/score/utils.py
@ -11,6 +11,42 @@ from data_pipeline.utils import get_module_logger
 logger = get_module_logger(__name__)


+def in_water_range(x: int):
+    """Check if a tract ID is within the water area ID range
+
+    Input must be integer representation of the 6-char census tract code
+    (equivalent to last 6 characters of full geo ID)
+
+    Returns TRUE if ID is in the water area range.
+    Returns FALSE if ID is NOT in the water area range.
+
+    NB: Not currently in use; artifact of attempt to filter
+    water areas using Census tract ID rangers.
+    """
+    if x >= 990000 and x <= 990099:
+        return True
+    return False
+
+
+def full_geo_id_to_water_range_bool(x: str):
+    """Check if a geo ID is within the water area ID range.
+
+    First, convert full ID string to int of last six digits,
+    Then check if this int is in the correct range.
+
+    Input should be the 11 character Census tract identifier,
+    the last 6 characters of which are the census tract code.
+
+    Returns TRUE if ID is in the water area range.
+    Returns FALSE if ID is NOT in the water area range.
+
+    NB: Not currently in use; artifact of attempt to filter
+    water areas using Census tract ID rangers.
+    """
+    num_x = int(x[-6:])
+    return in_water_range(num_x)
+
+
 def calculate_tract_adjacency_scores(
    df: pd.DataFrame, score_column: str
 ) -> pd.DataFrame:
@ -33,11 +69,15 @@ def calculate_tract_adjacency_scores(
    ORIGINAL_TRACT = "ORIGINAL_TRACT"
    logger.debug("Calculating tract adjacency scores")
    tract_data = get_tract_geojson()
+
    df: gpd.GeoDataFrame = tract_data.merge(
        df, on=field_names.GEOID_TRACT_FIELD
    )
    df = df.rename(columns={field_names.GEOID_TRACT_FIELD: ORIGINAL_TRACT})

+    # remove water areas from input frame
+    df = df[df[field_names.LAND_AREA_FIELD] > 0]
+
    logger.debug("Perfoming spatial join to find all adjacent tracts")
    adjacent_tracts: gpd.GeoDataFrame = df.sjoin(
        tract_data, predicate="touches"