mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
Adding island area indicators to the tiles (#1213)
This updates the backend to produce tile data with island indicators / island fields. Contains: - new tile codes for island data - threshold column that specifies number of thresholds to show - ui experience column that specifies which ui experience to show TODO: Drop the logger info message from main :)
This commit is contained in:
parent
b86450c72b
commit
389eb59ac4
9 changed files with 116 additions and 36 deletions
|
@ -77,6 +77,29 @@ PERCENT_PREFIXES_SUFFIXES = [
|
||||||
]
|
]
|
||||||
TILES_ROUND_NUM_DECIMALS = 2
|
TILES_ROUND_NUM_DECIMALS = 2
|
||||||
|
|
||||||
|
# The following constants and fields get used by the front end to change the side panel.
|
||||||
|
# The islands, Puerto Rico and the nation all have different
|
||||||
|
# data available, and as a consequence, show a different number of fields.
|
||||||
|
|
||||||
|
# Controlling Tile user experience columns
|
||||||
|
THRESHOLD_COUNT_TO_SHOW_FIELD_NAME = "Thresholds"
|
||||||
|
TILES_ISLAND_AREAS_THRESHOLD_COUNT = 4
|
||||||
|
TILES_PUERTO_RICO_THRESHOLD_COUNT = 5
|
||||||
|
TILES_NATION_THRESHOLD_COUNT = 21
|
||||||
|
|
||||||
|
# Note that the FIPS code is a string
|
||||||
|
# The FIPS codes listed are:
|
||||||
|
# 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands
|
||||||
|
TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"]
|
||||||
|
TILES_PUERTO_RICO_FIPS_CODE = ["72"]
|
||||||
|
|
||||||
|
# Constant to reflect UI Experience version
|
||||||
|
# "Nation" referring to 50 states and DC is from Census
|
||||||
|
USER_INTERFACE_EXPERIENCE_FIELD_NAME = "UI Experience"
|
||||||
|
NATION_USER_EXPERIENCE = "Nation"
|
||||||
|
PUERTO_RICO_USER_EXPERIENCE = "Puerto Rico"
|
||||||
|
ISLAND_AREAS_USER_EXPERIENCE = "Island Areas"
|
||||||
|
|
||||||
# FEMA rounding columns
|
# FEMA rounding columns
|
||||||
FEMA_ROUND_NUM_COLUMNS = [
|
FEMA_ROUND_NUM_COLUMNS = [
|
||||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||||
|
@ -166,9 +189,20 @@ TILES_SCORE_COLUMNS = {
|
||||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "FPL200S",
|
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "FPL200S",
|
||||||
field_names.THRESHOLD_COUNT: "TC",
|
field_names.THRESHOLD_COUNT: "TC",
|
||||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
||||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE",
|
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "IAPLHSE",
|
||||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
|
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
|
||||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE",
|
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE",
|
||||||
|
# Percentiles for Island areas' workforce columns
|
||||||
|
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "IALMILHSE_PFS",
|
||||||
|
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
||||||
|
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAPLHSE_PFS",
|
||||||
|
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||||
|
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAULHSE_PFS",
|
||||||
|
# Percentage of HS Degree completion for Islands
|
||||||
|
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF",
|
||||||
}
|
}
|
||||||
|
|
||||||
# columns to round floats to 2 decimals
|
# columns to round floats to 2 decimals
|
||||||
|
@ -202,6 +236,18 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
||||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
# Percentiles for Island areas' workforce columns
|
||||||
|
# To be clear: the island areas pull from 2009 census. PR does not.
|
||||||
|
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
||||||
|
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||||
|
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
# Island areas HS degree attainment rate
|
||||||
|
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
|
||||||
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD,
|
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD,
|
||||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
|
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
|
||||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
@ -297,4 +343,12 @@ DOWNLOADABLE_SCORE_COLUMNS = [
|
||||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
||||||
|
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||||
|
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
]
|
]
|
||||||
|
|
|
@ -63,7 +63,7 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
score_path, dtype={self.GEOID_TRACT_FIELD_NAME: "string"}
|
score_path, dtype={self.GEOID_TRACT_FIELD_NAME: "string"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert total population to an int:
|
# Convert total population to an int
|
||||||
df["Total population"] = df["Total population"].astype(
|
df["Total population"] = df["Total population"].astype(
|
||||||
int, errors="ignore"
|
int, errors="ignore"
|
||||||
)
|
)
|
||||||
|
@ -234,6 +234,36 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
axis=0,
|
axis=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info("Adding fields for island areas and Puerto Rico")
|
||||||
|
# The below operation constructs variables for the front end.
|
||||||
|
# Since the Island Areas, Puerto Rico, and the nation all have a different
|
||||||
|
# set of available data, each has its own user experience.
|
||||||
|
|
||||||
|
# First, we identify which user experience -- Puerto Rico, islands, or nation --
|
||||||
|
# a row pertains to using the FIPS codes
|
||||||
|
fips_code_series = score_tiles[field_names.GEOID_TRACT_FIELD].str[:2]
|
||||||
|
score_tiles[constants.USER_INTERFACE_EXPERIENCE_FIELD_NAME] = np.where(
|
||||||
|
fips_code_series.isin(constants.TILES_PUERTO_RICO_FIPS_CODE),
|
||||||
|
constants.PUERTO_RICO_USER_EXPERIENCE,
|
||||||
|
np.where(
|
||||||
|
fips_code_series.isin(constants.TILES_ISLAND_AREA_FIPS_CODES),
|
||||||
|
constants.ISLAND_AREAS_USER_EXPERIENCE,
|
||||||
|
constants.NATION_USER_EXPERIENCE,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Next, we determine how many thresholds the front end should show, entirely
|
||||||
|
# based on the variable for user interface experience.
|
||||||
|
score_tiles[constants.THRESHOLD_COUNT_TO_SHOW_FIELD_NAME] = score_tiles[
|
||||||
|
constants.USER_INTERFACE_EXPERIENCE_FIELD_NAME
|
||||||
|
].map(
|
||||||
|
{
|
||||||
|
constants.PUERTO_RICO_USER_EXPERIENCE: constants.TILES_PUERTO_RICO_THRESHOLD_COUNT,
|
||||||
|
constants.ISLAND_AREAS_USER_EXPERIENCE: constants.TILES_ISLAND_AREAS_THRESHOLD_COUNT,
|
||||||
|
constants.NATION_USER_EXPERIENCE: constants.TILES_NATION_THRESHOLD_COUNT,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# create indexes
|
# create indexes
|
||||||
score_tiles = score_tiles.rename(
|
score_tiles = score_tiles.rename(
|
||||||
columns=constants.TILES_SCORE_COLUMNS,
|
columns=constants.TILES_SCORE_COLUMNS,
|
||||||
|
@ -306,6 +336,7 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
transformed_states,
|
transformed_states,
|
||||||
transformed_score,
|
transformed_score,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.output_score_tiles_df = self._create_tile_data(
|
self.output_score_tiles_df = self._create_tile_data(
|
||||||
output_score_county_state_merged_df
|
output_score_county_state_merged_df
|
||||||
)
|
)
|
||||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -3,6 +3,7 @@ PERCENTILE_FIELD_SUFFIX = " (percentile)"
|
||||||
PERCENTILE_URBAN_RURAL_FIELD_SUFFIX = " (percentile urban/rural)"
|
PERCENTILE_URBAN_RURAL_FIELD_SUFFIX = " (percentile urban/rural)"
|
||||||
MIN_MAX_FIELD_SUFFIX = " (min-max normalized)"
|
MIN_MAX_FIELD_SUFFIX = " (min-max normalized)"
|
||||||
TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)"
|
TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)"
|
||||||
|
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
|
||||||
|
|
||||||
# Geographic field names
|
# Geographic field names
|
||||||
GEOID_TRACT_FIELD = "GEOID10_TRACT"
|
GEOID_TRACT_FIELD = "GEOID10_TRACT"
|
||||||
|
|
|
@ -34,9 +34,9 @@ class ScoreM(Score):
|
||||||
This function is fairly logically complicated. It takes the following steps:
|
This function is fairly logically complicated. It takes the following steps:
|
||||||
|
|
||||||
1. Combine the two different fields into a single field.
|
1. Combine the two different fields into a single field.
|
||||||
2. Calculate the 90th percentile cutoff raw value for the combined field.
|
2. Calculate the 90th percentile for the combined field.
|
||||||
3. Create a boolean series that is true for any census tract in the island
|
3. Create a boolean series that is true for any census tract in the island
|
||||||
areas (and only the island areas) that exceeds this cutoff.
|
areas (and only the island areas) that exceeds this percentile.
|
||||||
|
|
||||||
For step one, it combines data that is either the island area's Decennial Census
|
For step one, it combines data that is either the island area's Decennial Census
|
||||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||||
|
@ -57,22 +57,20 @@ class ScoreM(Score):
|
||||||
[column_from_island_areas, column_from_decennial_census]
|
[column_from_island_areas, column_from_decennial_census]
|
||||||
].mean(axis=1, skipna=True)
|
].mean(axis=1, skipna=True)
|
||||||
|
|
||||||
logger.info(
|
# Create a percentile field for use in the Islands / PR visualization
|
||||||
f"Combined field `{combined_column_name}` has "
|
# TODO: move this code
|
||||||
f"{df[combined_column_name].isnull().sum()} "
|
# In the code below, percentiles are constructed based on the combined column
|
||||||
f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) "
|
# of census and island data, but only reported for the island areas (where there
|
||||||
f"missing values for census tracts. "
|
# is no other comprehensive percentile information)
|
||||||
|
return_series_name = (
|
||||||
|
column_from_island_areas
|
||||||
|
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
)
|
)
|
||||||
|
df[return_series_name] = np.where(
|
||||||
# Calculate the percentile threshold raw value.
|
df[column_from_decennial_census].isna(),
|
||||||
raw_threshold = np.nanquantile(
|
df[combined_column_name].rank(pct=True),
|
||||||
a=df[combined_column_name], q=threshold_cutoff_for_island_areas
|
np.nan,
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"For combined field `{combined_column_name}`, "
|
|
||||||
f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
|
|
||||||
f"raw value of {raw_threshold:.3f}."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
threshold_column_name = (
|
threshold_column_name = (
|
||||||
|
@ -81,20 +79,7 @@ class ScoreM(Score):
|
||||||
)
|
)
|
||||||
|
|
||||||
df[threshold_column_name] = (
|
df[threshold_column_name] = (
|
||||||
df[column_from_island_areas] >= raw_threshold
|
df[return_series_name] >= threshold_cutoff_for_island_areas
|
||||||
)
|
|
||||||
|
|
||||||
percent_of_tracts_highlighted = (
|
|
||||||
100
|
|
||||||
* df[threshold_column_name].sum()
|
|
||||||
/ df[column_from_island_areas].notnull().sum()
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"For `{threshold_column_name}`, "
|
|
||||||
f"{df[threshold_column_name].sum()} ("
|
|
||||||
f"{percent_of_tracts_highlighted:.2f}% of tracts that have non-null data "
|
|
||||||
f"in the column) have a value of TRUE."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return df, threshold_column_name
|
return df, threshold_column_name
|
||||||
|
@ -615,6 +600,8 @@ class ScoreM(Score):
|
||||||
]
|
]
|
||||||
|
|
||||||
# First, combine unemployment.
|
# First, combine unemployment.
|
||||||
|
# This will include an adjusted percentile column for the island areas
|
||||||
|
# to be used by the front end.
|
||||||
(
|
(
|
||||||
self.df,
|
self.df,
|
||||||
island_areas_unemployment_criteria_field_name,
|
island_areas_unemployment_criteria_field_name,
|
||||||
|
@ -627,6 +614,8 @@ class ScoreM(Score):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Next, combine poverty.
|
# Next, combine poverty.
|
||||||
|
# This will include an adjusted percentile column for the island areas
|
||||||
|
# to be used by the front end.
|
||||||
(
|
(
|
||||||
self.df,
|
self.df,
|
||||||
island_areas_poverty_criteria_field_name,
|
island_areas_poverty_criteria_field_name,
|
||||||
|
@ -640,6 +629,11 @@ class ScoreM(Score):
|
||||||
|
|
||||||
# Also check whether low area median income is 90th percentile or higher
|
# Also check whether low area median income is 90th percentile or higher
|
||||||
# within the islands.
|
# within the islands.
|
||||||
|
|
||||||
|
# Note that because the field for low median does not have to be combined,
|
||||||
|
# unlike the other fields, we do not need to create a new percentile
|
||||||
|
# column. This code should probably be refactored when (TODO) we do the big
|
||||||
|
# refactor.
|
||||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
|
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
|
||||||
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||||
f"{field_names.PERCENTILE}th percentile"
|
f"{field_names.PERCENTILE}th percentile"
|
||||||
|
|
Loading…
Add table
Reference in a new issue