mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-05 14:14:19 -07:00
Fixup TA_COUNT and TA_PERC (#1991)
* Change TA_PERC, change TA_COUNT (#1988, #1989) - Make TA_PERC_STR back into a nullable float following the rules requestsed in #1989 - Move TA_COUNT to be TA_COUNT_AK, also add a null TA_COUNT_C for CONUS that we can fill in later. * Fix typo comment (#1988)
This commit is contained in:
parent
6505d49439
commit
72de938c32
8 changed files with 53 additions and 45 deletions
|
@ -394,9 +394,10 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.PERCENT_AGE_UNDER_10: "AGE_10",
|
||||
field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE",
|
||||
field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT",
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK: "TA_COUNT_AK",
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS: "TA_COUNT_C",
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC",
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING: "TA_PERC_STR",
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE",
|
||||
}
|
||||
|
||||
# columns to round floats to 2 decimals
|
||||
|
|
|
@ -488,7 +488,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.PERCENT_AGE_10_TO_64,
|
||||
field_names.PERCENT_AGE_OVER_64,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
|
||||
] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS
|
||||
|
||||
non_numeric_columns = [
|
||||
|
@ -496,7 +498,6 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
|
||||
]
|
||||
|
||||
boolean_columns = [
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,3 +1,5 @@
|
|||
from typing import Optional
|
||||
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
@ -51,12 +53,14 @@ class TribalOverlapETL(ExtractTransformLoad):
|
|||
def __init__(self):
|
||||
self.COLUMNS_TO_KEEP = [
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
|
||||
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
|
||||
]
|
||||
|
||||
self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
|
||||
self.output_df: pd.DataFrame
|
||||
self.census_tract_gdf: gpd.GeoDataFrame
|
||||
self.tribal_gdf: gpd.GeoDataFrame
|
||||
|
@ -69,40 +73,18 @@ class TribalOverlapETL(ExtractTransformLoad):
|
|||
return ", ".join(str_list)
|
||||
|
||||
@staticmethod
|
||||
def _adjust_percentage_to_string(percentage_float: float) -> str:
|
||||
"""Helper method that converts numeric floats to strings based on what-to-show rules.
|
||||
|
||||
What are these rules?
|
||||
0. If None, return none
|
||||
1. If the percentage is below 1%, produce 'less than 1%'
|
||||
2. If the percentage is above 99.95%, produce '100%'
|
||||
3. If the percentage is X.00 when rounded to two sig digits, display the integer of the percent
|
||||
4. If the percentage has unique significant digits, report two digits
|
||||
"""
|
||||
# Rule 0
|
||||
if not percentage_float:
|
||||
# I believe we need to do this because JS will do weird things with a mix-type column?
|
||||
return "No tribal areas"
|
||||
# Rule 1
|
||||
def _adjust_percentage_for_frontend(
|
||||
percentage_float: float,
|
||||
) -> Optional[float]:
|
||||
"""Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
|
||||
if percentage_float is None:
|
||||
return None
|
||||
if percentage_float < 0.01:
|
||||
return "less than 1%"
|
||||
# Rule 2
|
||||
return 0.0
|
||||
if percentage_float > 0.9995:
|
||||
return "100%"
|
||||
return 1.0
|
||||
|
||||
rounded_percentage_str = str(round(percentage_float, 4) * 100)
|
||||
first_digits, last_digits = rounded_percentage_str.split(".")
|
||||
|
||||
# Rule 3 (this is a shorthand because round(4) will truncate repeated 0s)
|
||||
if last_digits[-1] == "0":
|
||||
return first_digits + "%"
|
||||
|
||||
# Rule 4
|
||||
if last_digits != "00":
|
||||
return rounded_percentage_str + "%"
|
||||
|
||||
# There is something missing!
|
||||
raise Exception("Yikes! The string conversion here failed!")
|
||||
return percentage_float
|
||||
|
||||
def extract(self) -> None:
|
||||
self.census_tract_gdf = get_tract_geojson()
|
||||
|
@ -130,7 +112,7 @@ class TribalOverlapETL(ExtractTransformLoad):
|
|||
|
||||
tribal_overlap_with_tracts = tribal_overlap_with_tracts.rename(
|
||||
columns={
|
||||
field_names.TRIBAL_ID: field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
field_names.TRIBAL_ID: self.OVERALL_TRIBAL_COUNT,
|
||||
field_names.TRIBAL_LAND_AREA_NAME: field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
}
|
||||
)
|
||||
|
@ -245,12 +227,31 @@ class TribalOverlapETL(ExtractTransformLoad):
|
|||
merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT],
|
||||
)
|
||||
|
||||
# Counting tribes in the lower 48 is different from counting in AK,
|
||||
# so per request by the design and frontend team, we remove all the
|
||||
# counts outside AK
|
||||
merged_output_df[
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK
|
||||
] = np.where(
|
||||
# In Alaska
|
||||
(merged_output_df_state_fips_code == "02"),
|
||||
# Keep the counts
|
||||
merged_output_df[self.OVERALL_TRIBAL_COUNT],
|
||||
# Otherwise, null them
|
||||
None,
|
||||
)
|
||||
|
||||
# TODO: Count tribal areas in the lower 48 correctly
|
||||
merged_output_df[
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
|
||||
] = None
|
||||
|
||||
# The very final thing we want to do is produce a string for the front end to show
|
||||
# We do this here so that all of the logic is included
|
||||
merged_output_df[
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY
|
||||
] = merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT].apply(
|
||||
self._adjust_percentage_to_string
|
||||
self._adjust_percentage_for_frontend
|
||||
)
|
||||
|
||||
self.output_df = merged_output_df
|
||||
|
|
|
@ -355,12 +355,17 @@ TRIBAL_ID = "tribalId"
|
|||
TRIBAL_LAND_AREA_NAME = "landAreaName"
|
||||
|
||||
# Tribal overlap variables
|
||||
COUNT_OF_TRIBAL_AREAS_IN_TRACT = "Number of Tribal areas within Census tract"
|
||||
COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS = (
|
||||
"Number of Tribal areas within Census tract"
|
||||
)
|
||||
COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK = (
|
||||
"Number of Tribal areas within Census tract for Alaska"
|
||||
)
|
||||
NAMES_OF_TRIBAL_AREAS_IN_TRACT = "Names of Tribal areas within Census tract"
|
||||
PERCENT_OF_TRIBAL_AREA_IN_TRACT = (
|
||||
"Percent of the Census tract that is within Tribal areas"
|
||||
)
|
||||
PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING = (
|
||||
PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
|
||||
"Percent of the Census tract that is within Tribal areas, for display"
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue