Adding island area indicators to the tiles (#1213)

This updates the backend to produce tile data with island indicators / island fields. 

Contains:
- new tile codes for island data
- threshold column that specifies number of thresholds to show
- ui experience column that specifies which ui experience to show

TODO: Drop the logger info message from main :)
This commit is contained in:
Emma Nechamkin 2022-02-09 20:33:42 -05:00 committed by GitHub
parent b86450c72b
commit 389eb59ac4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 116 additions and 36 deletions

View file

@ -77,6 +77,29 @@ PERCENT_PREFIXES_SUFFIXES = [
] ]
TILES_ROUND_NUM_DECIMALS = 2 TILES_ROUND_NUM_DECIMALS = 2
# The following constants and fields get used by the front end to change the side panel.
# The islands, Puerto Rico and the nation all have different
# data available, and as a consequence, show a different number of fields.
# Controlling Tile user experience columns
THRESHOLD_COUNT_TO_SHOW_FIELD_NAME = "Thresholds"
TILES_ISLAND_AREAS_THRESHOLD_COUNT = 4
TILES_PUERTO_RICO_THRESHOLD_COUNT = 5
TILES_NATION_THRESHOLD_COUNT = 21
# Note that the FIPS code is a string
# The FIPS codes listed are:
# 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands
TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"]
TILES_PUERTO_RICO_FIPS_CODE = ["72"]
# Constant to reflect UI Experience version
# "Nation" referring to 50 states and DC is from Census
USER_INTERFACE_EXPERIENCE_FIELD_NAME = "UI Experience"
NATION_USER_EXPERIENCE = "Nation"
PUERTO_RICO_USER_EXPERIENCE = "Puerto Rico"
ISLAND_AREAS_USER_EXPERIENCE = "Island Areas"
# FEMA rounding columns # FEMA rounding columns
FEMA_ROUND_NUM_COLUMNS = [ FEMA_ROUND_NUM_COLUMNS = [
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD, field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
@ -166,9 +189,20 @@ TILES_SCORE_COLUMNS = {
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "FPL200S", field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "FPL200S",
field_names.THRESHOLD_COUNT: "TC", field_names.THRESHOLD_COUNT: "TC",
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE", field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE", field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "IAPLHSE",
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE", field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE", field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE",
# Percentiles for Island areas' workforce columns
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
+ field_names.PERCENTILE_FIELD_SUFFIX: "IALMILHSE_PFS",
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAPLHSE_PFS",
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAULHSE_PFS",
# Percentage of HS Degree completion for Islands
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF",
} }
# columns to round floats to 2 decimals # columns to round floats to 2 decimals
@ -202,6 +236,18 @@ TILES_SCORE_FLOAT_COLUMNS = [
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
# Percentiles for Island areas' workforce columns
# To be clear: the island areas pull from 2009 census. PR does not.
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
# Island areas HS degree attainment rate
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD, field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD,
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD, field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
@ -297,4 +343,12 @@ DOWNLOADABLE_SCORE_COLUMNS = [
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD, field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD, field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD, field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
] ]

View file

@ -63,7 +63,7 @@ class PostScoreETL(ExtractTransformLoad):
score_path, dtype={self.GEOID_TRACT_FIELD_NAME: "string"} score_path, dtype={self.GEOID_TRACT_FIELD_NAME: "string"}
) )
# Convert total population to an int: # Convert total population to an int
df["Total population"] = df["Total population"].astype( df["Total population"] = df["Total population"].astype(
int, errors="ignore" int, errors="ignore"
) )
@ -234,6 +234,36 @@ class PostScoreETL(ExtractTransformLoad):
axis=0, axis=0,
) )
logger.info("Adding fields for island areas and Puerto Rico")
# The below operation constructs variables for the front end.
# Since the Island Areas, Puerto Rico, and the nation all have a different
# set of available data, each has its own user experience.
# First, we identify which user experience -- Puerto Rico, islands, or nation --
# a row pertains to using the FIPS codes
fips_code_series = score_tiles[field_names.GEOID_TRACT_FIELD].str[:2]
score_tiles[constants.USER_INTERFACE_EXPERIENCE_FIELD_NAME] = np.where(
fips_code_series.isin(constants.TILES_PUERTO_RICO_FIPS_CODE),
constants.PUERTO_RICO_USER_EXPERIENCE,
np.where(
fips_code_series.isin(constants.TILES_ISLAND_AREA_FIPS_CODES),
constants.ISLAND_AREAS_USER_EXPERIENCE,
constants.NATION_USER_EXPERIENCE,
),
)
# Next, we determine how many thresholds the front end should show, entirely
# based on the variable for user interface experience.
score_tiles[constants.THRESHOLD_COUNT_TO_SHOW_FIELD_NAME] = score_tiles[
constants.USER_INTERFACE_EXPERIENCE_FIELD_NAME
].map(
{
constants.PUERTO_RICO_USER_EXPERIENCE: constants.TILES_PUERTO_RICO_THRESHOLD_COUNT,
constants.ISLAND_AREAS_USER_EXPERIENCE: constants.TILES_ISLAND_AREAS_THRESHOLD_COUNT,
constants.NATION_USER_EXPERIENCE: constants.TILES_NATION_THRESHOLD_COUNT,
}
)
# create indexes # create indexes
score_tiles = score_tiles.rename( score_tiles = score_tiles.rename(
columns=constants.TILES_SCORE_COLUMNS, columns=constants.TILES_SCORE_COLUMNS,
@ -306,6 +336,7 @@ class PostScoreETL(ExtractTransformLoad):
transformed_states, transformed_states,
transformed_score, transformed_score,
) )
self.output_score_tiles_df = self._create_tile_data( self.output_score_tiles_df = self._create_tile_data(
output_score_county_state_merged_df output_score_county_state_merged_df
) )

File diff suppressed because one or more lines are too long

View file

@ -3,6 +3,7 @@ PERCENTILE_FIELD_SUFFIX = " (percentile)"
PERCENTILE_URBAN_RURAL_FIELD_SUFFIX = " (percentile urban/rural)" PERCENTILE_URBAN_RURAL_FIELD_SUFFIX = " (percentile urban/rural)"
MIN_MAX_FIELD_SUFFIX = " (min-max normalized)" MIN_MAX_FIELD_SUFFIX = " (min-max normalized)"
TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)" TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)"
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
# Geographic field names # Geographic field names
GEOID_TRACT_FIELD = "GEOID10_TRACT" GEOID_TRACT_FIELD = "GEOID10_TRACT"

View file

@ -34,9 +34,9 @@ class ScoreM(Score):
This function is fairly logically complicated. It takes the following steps: This function is fairly logically complicated. It takes the following steps:
1. Combine the two different fields into a single field. 1. Combine the two different fields into a single field.
2. Calculate the 90th percentile cutoff raw value for the combined field. 2. Calculate the 90th percentile for the combined field.
3. Create a boolean series that is true for any census tract in the island 3. Create a boolean series that is true for any census tract in the island
areas (and only the island areas) that exceeds this cutoff. areas (and only the island areas) that exceeds this percentile.
For step one, it combines data that is either the island area's Decennial Census For step one, it combines data that is either the island area's Decennial Census
value in 2009 or the state's value in 5-year ACS ending in 2010. value in 2009 or the state's value in 5-year ACS ending in 2010.
@ -57,22 +57,20 @@ class ScoreM(Score):
[column_from_island_areas, column_from_decennial_census] [column_from_island_areas, column_from_decennial_census]
].mean(axis=1, skipna=True) ].mean(axis=1, skipna=True)
logger.info( # Create a percentile field for use in the Islands / PR visualization
f"Combined field `{combined_column_name}` has " # TODO: move this code
f"{df[combined_column_name].isnull().sum()} " # In the code below, percentiles are constructed based on the combined column
f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) " # of census and island data, but only reported for the island areas (where there
f"missing values for census tracts. " # is no other comprehensive percentile information)
return_series_name = (
column_from_island_areas
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
) )
df[return_series_name] = np.where(
# Calculate the percentile threshold raw value. df[column_from_decennial_census].isna(),
raw_threshold = np.nanquantile( df[combined_column_name].rank(pct=True),
a=df[combined_column_name], q=threshold_cutoff_for_island_areas np.nan,
)
logger.info(
f"For combined field `{combined_column_name}`, "
f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
f"raw value of {raw_threshold:.3f}."
) )
threshold_column_name = ( threshold_column_name = (
@ -81,20 +79,7 @@ class ScoreM(Score):
) )
df[threshold_column_name] = ( df[threshold_column_name] = (
df[column_from_island_areas] >= raw_threshold df[return_series_name] >= threshold_cutoff_for_island_areas
)
percent_of_tracts_highlighted = (
100
* df[threshold_column_name].sum()
/ df[column_from_island_areas].notnull().sum()
)
logger.info(
f"For `{threshold_column_name}`, "
f"{df[threshold_column_name].sum()} ("
f"{percent_of_tracts_highlighted:.2f}% of tracts that have non-null data "
f"in the column) have a value of TRUE."
) )
return df, threshold_column_name return df, threshold_column_name
@ -615,6 +600,8 @@ class ScoreM(Score):
] ]
# First, combine unemployment. # First, combine unemployment.
# This will include an adjusted percentile column for the island areas
# to be used by the front end.
( (
self.df, self.df,
island_areas_unemployment_criteria_field_name, island_areas_unemployment_criteria_field_name,
@ -627,6 +614,8 @@ class ScoreM(Score):
) )
# Next, combine poverty. # Next, combine poverty.
# This will include an adjusted percentile column for the island areas
# to be used by the front end.
( (
self.df, self.df,
island_areas_poverty_criteria_field_name, island_areas_poverty_criteria_field_name,
@ -640,6 +629,11 @@ class ScoreM(Score):
# Also check whether low area median income is 90th percentile or higher # Also check whether low area median income is 90th percentile or higher
# within the islands. # within the islands.
# Note that because the field for low median does not have to be combined,
# unlike the other fields, we do not need to create a new percentile
# column. This code should probably be refactored when (TODO) we do the big
# refactor.
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = ( island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds " f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
f"{field_names.PERCENTILE}th percentile" f"{field_names.PERCENTILE}th percentile"