mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 17:44:20 -08:00
Adding island area indicators to the tiles (#1213)
This updates the backend to produce tile data with island indicators / island fields. Contains: - new tile codes for island data - threshold column that specifies number of thresholds to show - ui experience column that specifies which ui experience to show TODO: Drop the logger info message from main :)
This commit is contained in:
parent
b86450c72b
commit
389eb59ac4
9 changed files with 116 additions and 36 deletions
|
@ -77,6 +77,29 @@ PERCENT_PREFIXES_SUFFIXES = [
|
|||
]
|
||||
TILES_ROUND_NUM_DECIMALS = 2
|
||||
|
||||
# The following constants and fields get used by the front end to change the side panel.
|
||||
# The islands, Puerto Rico and the nation all have different
|
||||
# data available, and as a consequence, show a different number of fields.
|
||||
|
||||
# Controlling Tile user experience columns
|
||||
THRESHOLD_COUNT_TO_SHOW_FIELD_NAME = "Thresholds"
|
||||
TILES_ISLAND_AREAS_THRESHOLD_COUNT = 4
|
||||
TILES_PUERTO_RICO_THRESHOLD_COUNT = 5
|
||||
TILES_NATION_THRESHOLD_COUNT = 21
|
||||
|
||||
# Note that the FIPS code is a string
|
||||
# The FIPS codes listed are:
|
||||
# 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands
|
||||
TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"]
|
||||
TILES_PUERTO_RICO_FIPS_CODE = ["72"]
|
||||
|
||||
# Constant to reflect UI Experience version
|
||||
# "Nation" referring to 50 states and DC is from Census
|
||||
USER_INTERFACE_EXPERIENCE_FIELD_NAME = "UI Experience"
|
||||
NATION_USER_EXPERIENCE = "Nation"
|
||||
PUERTO_RICO_USER_EXPERIENCE = "Puerto Rico"
|
||||
ISLAND_AREAS_USER_EXPERIENCE = "Island Areas"
|
||||
|
||||
# FEMA rounding columns
|
||||
FEMA_ROUND_NUM_COLUMNS = [
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||
|
@ -166,9 +189,20 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "FPL200S",
|
||||
field_names.THRESHOLD_COUNT: "TC",
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE",
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "IAPLHSE",
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
|
||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE",
|
||||
# Percentiles for Island areas' workforce columns
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IALMILHSE_PFS",
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAPLHSE_PFS",
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAULHSE_PFS",
|
||||
# Percentage of HS Degree completion for Islands
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF",
|
||||
}
|
||||
|
||||
# columns to round floats to 2 decimals
|
||||
|
@ -202,6 +236,18 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
# Percentiles for Island areas' workforce columns
|
||||
# To be clear: the island areas pull from 2009 census. PR does not.
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
# Island areas HS degree attainment rate
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
|
||||
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
@ -297,4 +343,12 @@ DOWNLOADABLE_SCORE_COLUMNS = [
|
|||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
|
|
|
@ -63,7 +63,7 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
score_path, dtype={self.GEOID_TRACT_FIELD_NAME: "string"}
|
||||
)
|
||||
|
||||
# Convert total population to an int:
|
||||
# Convert total population to an int
|
||||
df["Total population"] = df["Total population"].astype(
|
||||
int, errors="ignore"
|
||||
)
|
||||
|
@ -234,6 +234,36 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
axis=0,
|
||||
)
|
||||
|
||||
logger.info("Adding fields for island areas and Puerto Rico")
|
||||
# The below operation constructs variables for the front end.
|
||||
# Since the Island Areas, Puerto Rico, and the nation all have a different
|
||||
# set of available data, each has its own user experience.
|
||||
|
||||
# First, we identify which user experience -- Puerto Rico, islands, or nation --
|
||||
# a row pertains to using the FIPS codes
|
||||
fips_code_series = score_tiles[field_names.GEOID_TRACT_FIELD].str[:2]
|
||||
score_tiles[constants.USER_INTERFACE_EXPERIENCE_FIELD_NAME] = np.where(
|
||||
fips_code_series.isin(constants.TILES_PUERTO_RICO_FIPS_CODE),
|
||||
constants.PUERTO_RICO_USER_EXPERIENCE,
|
||||
np.where(
|
||||
fips_code_series.isin(constants.TILES_ISLAND_AREA_FIPS_CODES),
|
||||
constants.ISLAND_AREAS_USER_EXPERIENCE,
|
||||
constants.NATION_USER_EXPERIENCE,
|
||||
),
|
||||
)
|
||||
|
||||
# Next, we determine how many thresholds the front end should show, entirely
|
||||
# based on the variable for user interface experience.
|
||||
score_tiles[constants.THRESHOLD_COUNT_TO_SHOW_FIELD_NAME] = score_tiles[
|
||||
constants.USER_INTERFACE_EXPERIENCE_FIELD_NAME
|
||||
].map(
|
||||
{
|
||||
constants.PUERTO_RICO_USER_EXPERIENCE: constants.TILES_PUERTO_RICO_THRESHOLD_COUNT,
|
||||
constants.ISLAND_AREAS_USER_EXPERIENCE: constants.TILES_ISLAND_AREAS_THRESHOLD_COUNT,
|
||||
constants.NATION_USER_EXPERIENCE: constants.TILES_NATION_THRESHOLD_COUNT,
|
||||
}
|
||||
)
|
||||
|
||||
# create indexes
|
||||
score_tiles = score_tiles.rename(
|
||||
columns=constants.TILES_SCORE_COLUMNS,
|
||||
|
@ -306,6 +336,7 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
transformed_states,
|
||||
transformed_score,
|
||||
)
|
||||
|
||||
self.output_score_tiles_df = self._create_tile_data(
|
||||
output_score_county_state_merged_df
|
||||
)
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -3,6 +3,7 @@ PERCENTILE_FIELD_SUFFIX = " (percentile)"
|
|||
PERCENTILE_URBAN_RURAL_FIELD_SUFFIX = " (percentile urban/rural)"
|
||||
MIN_MAX_FIELD_SUFFIX = " (min-max normalized)"
|
||||
TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)"
|
||||
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
|
||||
|
||||
# Geographic field names
|
||||
GEOID_TRACT_FIELD = "GEOID10_TRACT"
|
||||
|
|
|
@ -34,9 +34,9 @@ class ScoreM(Score):
|
|||
This function is fairly logically complicated. It takes the following steps:
|
||||
|
||||
1. Combine the two different fields into a single field.
|
||||
2. Calculate the 90th percentile cutoff raw value for the combined field.
|
||||
2. Calculate the 90th percentile for the combined field.
|
||||
3. Create a boolean series that is true for any census tract in the island
|
||||
areas (and only the island areas) that exceeds this cutoff.
|
||||
areas (and only the island areas) that exceeds this percentile.
|
||||
|
||||
For step one, it combines data that is either the island area's Decennial Census
|
||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||
|
@ -57,22 +57,20 @@ class ScoreM(Score):
|
|||
[column_from_island_areas, column_from_decennial_census]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
logger.info(
|
||||
f"Combined field `{combined_column_name}` has "
|
||||
f"{df[combined_column_name].isnull().sum()} "
|
||||
f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) "
|
||||
f"missing values for census tracts. "
|
||||
# Create a percentile field for use in the Islands / PR visualization
|
||||
# TODO: move this code
|
||||
# In the code below, percentiles are constructed based on the combined column
|
||||
# of census and island data, but only reported for the island areas (where there
|
||||
# is no other comprehensive percentile information)
|
||||
return_series_name = (
|
||||
column_from_island_areas
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
)
|
||||
|
||||
# Calculate the percentile threshold raw value.
|
||||
raw_threshold = np.nanquantile(
|
||||
a=df[combined_column_name], q=threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For combined field `{combined_column_name}`, "
|
||||
f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
|
||||
f"raw value of {raw_threshold:.3f}."
|
||||
df[return_series_name] = np.where(
|
||||
df[column_from_decennial_census].isna(),
|
||||
df[combined_column_name].rank(pct=True),
|
||||
np.nan,
|
||||
)
|
||||
|
||||
threshold_column_name = (
|
||||
|
@ -81,20 +79,7 @@ class ScoreM(Score):
|
|||
)
|
||||
|
||||
df[threshold_column_name] = (
|
||||
df[column_from_island_areas] >= raw_threshold
|
||||
)
|
||||
|
||||
percent_of_tracts_highlighted = (
|
||||
100
|
||||
* df[threshold_column_name].sum()
|
||||
/ df[column_from_island_areas].notnull().sum()
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For `{threshold_column_name}`, "
|
||||
f"{df[threshold_column_name].sum()} ("
|
||||
f"{percent_of_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
df[return_series_name] >= threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
return df, threshold_column_name
|
||||
|
@ -615,6 +600,8 @@ class ScoreM(Score):
|
|||
]
|
||||
|
||||
# First, combine unemployment.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_unemployment_criteria_field_name,
|
||||
|
@ -627,6 +614,8 @@ class ScoreM(Score):
|
|||
)
|
||||
|
||||
# Next, combine poverty.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_criteria_field_name,
|
||||
|
@ -640,6 +629,11 @@ class ScoreM(Score):
|
|||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
|
||||
# Note that because the field for low median does not have to be combined,
|
||||
# unlike the other fields, we do not need to create a new percentile
|
||||
# column. This code should probably be refactored when (TODO) we do the big
|
||||
# refactor.
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
|
||||
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||
f"{field_names.PERCENTILE}th percentile"
|
||||
|
|
Loading…
Add table
Reference in a new issue