Donut Hole DAC Qualification - Solution for Water Boundaries

This commit is contained in:
alene 2024-12-03 10:46:45 -05:00 committed by Carlos Felix
commit 84c1a3aaf4
3 changed files with 5816 additions and 0 deletions

File diff suppressed because it is too large Load diff

View file

@ -10,6 +10,9 @@ GEOID_TRACT_FIELD = "GEOID10_TRACT"
STATE_FIELD = "State/Territory"
COUNTY_FIELD = "County Name"
# Census per-tract land area field name
LAND_AREA_FIELD = "ALAND10"
# Definition Narwhal fields
SCORE_N_COMMUNITIES = "Definition N (communities)"
N_CLIMATE = "Climate Factor (Definition N)"

View file

@ -11,6 +11,42 @@ from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
def in_water_range(x: int):
"""Check if a tract ID is within the water area ID range
Input must be integer representation of the 6-char census tract code
(equivalent to last 6 characters of full geo ID)
Returns TRUE if ID is in the water area range.
Returns FALSE if ID is NOT in the water area range.
NB: Not currently in use; artifact of attempt to filter
water areas using Census tract ID rangers.
"""
if x >= 990000 and x <= 990099:
return True
return False
def full_geo_id_to_water_range_bool(x: str):
"""Check if a geo ID is within the water area ID range.
First, convert full ID string to int of last six digits,
Then check if this int is in the correct range.
Input should be the 11 character Census tract identifier,
the last 6 characters of which are the census tract code.
Returns TRUE if ID is in the water area range.
Returns FALSE if ID is NOT in the water area range.
NB: Not currently in use; artifact of attempt to filter
water areas using Census tract ID rangers.
"""
num_x = int(x[-6:])
return in_water_range(num_x)
def calculate_tract_adjacency_scores(
df: pd.DataFrame, score_column: str
) -> pd.DataFrame:
@ -33,11 +69,15 @@ def calculate_tract_adjacency_scores(
ORIGINAL_TRACT = "ORIGINAL_TRACT"
logger.debug("Calculating tract adjacency scores")
tract_data = get_tract_geojson()
df: gpd.GeoDataFrame = tract_data.merge(
df, on=field_names.GEOID_TRACT_FIELD
)
df = df.rename(columns={field_names.GEOID_TRACT_FIELD: ORIGINAL_TRACT})
# remove water areas from input frame
df = df[df[field_names.LAND_AREA_FIELD] > 0]
logger.debug("Perfoming spatial join to find all adjacent tracts")
adjacent_tracts: gpd.GeoDataFrame = df.sjoin(
tract_data, predicate="touches"