Updates backend constants to N (#1854)

This commit is contained in:
Emma Nechamkin 2022-08-23 16:19:00 -04:00 committed by GitHub
commit 6418335219
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 1277 additions and 911 deletions

View file

@ -198,42 +198,42 @@ TILES_SCORE_COLUMNS = {
field_names.WASTEWATER_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
field_names.UST_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "UST_PFS",
field_names.M_WATER: "M_WTR",
field_names.M_WORKFORCE: "M_WKFC",
field_names.M_CLIMATE: "M_CLT",
field_names.M_ENERGY: "M_ENY",
field_names.M_TRANSPORTATION: "M_TRN",
field_names.M_HOUSING: "M_HSG",
field_names.M_POLLUTION: "M_PLN",
field_names.M_HEALTH: "M_HLTH",
field_names.N_WATER: "N_WTR",
field_names.N_WORKFORCE: "N_WKFC",
field_names.N_CLIMATE: "N_CLT",
field_names.N_ENERGY: "N_ENY",
field_names.N_TRANSPORTATION: "N_TRN",
field_names.N_HOUSING: "N_HSG",
field_names.N_POLLUTION: "N_PLN",
field_names.N_HEALTH: "N_HLTH",
# temporarily update this so that it's the Narwhal score that gets visualized on the map
# The NEW final score value INCLUDES the adjacency index.
field_names.FINAL_SCORE_N_BOOLEAN: "SM_C",
field_names.FINAL_SCORE_N_BOOLEAN: "SN_C",
field_names.SCORE_N_COMMUNITIES
+ field_names.ADJACENT_MEAN_SUFFIX: "SM_DON",
field_names.SCORE_N_COMMUNITIES: "SM_NO_DON",
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",
field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "PM25LI",
field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLI",
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DPMLI",
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD: "TPLI",
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "LPMHVLI",
field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HBLI",
field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD: "RMPLI",
field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD: "SFLI",
field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HWLI",
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "WDLI",
+ field_names.ADJACENT_MEAN_SUFFIX: "SN_DON",
field_names.SCORE_N_COMMUNITIES: "SN_NO_DON",
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD: "PM25LI",
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD: "EBLI",
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD: "DPMLI",
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD: "TPLI",
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD: "LPMHVLI",
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD: "HBLI",
field_names.RMP_LOW_INCOME_FIELD: "RMPLI",
field_names.SUPERFUND_LOW_INCOME_FIELD: "SFLI",
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD: "HWLI",
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD: "WDLI",
field_names.UST_LOW_INCOME_FIELD: "USTLI",
field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DLI",
field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD: "ALI",
field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HDLI",
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD: "LLELI",
field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD: "LILHSE",
field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD: "PLHSE",
field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD: "LMILHSE",
field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD: "ULHSE",
field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
field_names.HEART_DISEASE_LOW_INCOME_FIELD: "HDLI",
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD: "LLELI",
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD: "LILHSE",
field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE",
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE",
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE",
# new booleans only for the environmental factors
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD: "EPL_ET",
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD: "EAL_ET",
@ -276,28 +276,24 @@ TILES_SCORE_COLUMNS = {
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAULHSE_PFS",
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD: "LHE",
field_names.LOW_HS_EDUCATION_FIELD: "LHE",
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE",
# Percentage of HS Degree completion for Islands
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF",
field_names.COLLEGE_ATTENDANCE_FIELD: "CA",
field_names.COLLEGE_NON_ATTENDANCE_FIELD: "NCA",
# This is logically equivalent to "non-college greater than 80%"
field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD: "CA_LT20",
# Booleans for the front end about the types of thresholds exceeded
field_names.CLIMATE_THRESHOLD_EXCEEDED: "M_CLT_EOMI",
field_names.ENERGY_THRESHOLD_EXCEEDED: "M_ENY_EOMI",
field_names.TRAFFIC_THRESHOLD_EXCEEDED: "M_TRN_EOMI",
field_names.HOUSING_THREHSOLD_EXCEEDED: "M_HSG_EOMI",
field_names.POLLUTION_THRESHOLD_EXCEEDED: "M_PLN_EOMI",
field_names.WATER_THRESHOLD_EXCEEDED: "M_WTR_EOMI",
field_names.HEALTH_THRESHOLD_EXCEEDED: "M_HLTH_EOMI",
field_names.WORKFORCE_THRESHOLD_EXCEEDED: "M_WKFC_EOMI",
field_names.CLIMATE_THRESHOLD_EXCEEDED: "N_CLT_EOMI",
field_names.ENERGY_THRESHOLD_EXCEEDED: "N_ENY_EOMI",
field_names.TRAFFIC_THRESHOLD_EXCEEDED: "N_TRN_EOMI",
field_names.HOUSING_THREHSOLD_EXCEEDED: "N_HSG_EOMI",
field_names.POLLUTION_THRESHOLD_EXCEEDED: "N_PLN_EOMI",
field_names.WATER_THRESHOLD_EXCEEDED: "N_WTR_EOMI",
field_names.HEALTH_THRESHOLD_EXCEEDED: "N_HLTH_EOMI",
field_names.WORKFORCE_THRESHOLD_EXCEEDED: "N_WKFC_EOMI",
# These are the booleans for socioeconomic indicators
## this measures low income boolean
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED: "FPL200S",
## Low high school for t&wd
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "N_WKFC_EBSI",
field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",
field_names.DOT_TRAVEL_BURDEN_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS",
@ -377,8 +373,6 @@ TILES_SCORE_FLOAT_COLUMNS = [
# Island areas HS degree attainment rate
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
field_names.COLLEGE_ATTENDANCE_FIELD,
field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.FUTURE_WILDFIRE_RISK_FIELD

View file

@ -403,6 +403,7 @@ class ScoreETL(ExtractTransformLoad):
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
)
# Donut columns get added later
numeric_columns = [
field_names.HOUSING_BURDEN_FIELD,
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD,
@ -477,12 +478,15 @@ class ScoreETL(ExtractTransformLoad):
non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
field_names.PERSISTENT_POVERTY_FIELD,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
]
boolean_columns = [
field_names.AML_BOOLEAN,
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
]
# For some columns, high values are "good", so we want to reverse the percentile
@ -523,6 +527,7 @@ class ScoreETL(ExtractTransformLoad):
non_numeric_columns
+ numeric_columns
+ [rp.field_name for rp in reverse_percentiles]
+ boolean_columns
)
df_copy = df[columns_to_keep].copy()
@ -533,6 +538,10 @@ class ScoreETL(ExtractTransformLoad):
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
# coerce all booleans to bools
for col in boolean_columns:
df_copy[col] = df_copy[col].astype(bool)
# Convert all columns to numeric and do math
# Note that we have a few special conditions here and we handle them explicitly.
# For *Linguistic Isolation*, we do NOT want to include Puerto Rico in the percentile

View file

@ -53,7 +53,7 @@ class GeoScoreETL(ExtractTransformLoad):
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
field_names.SCORE_N
]
self.TARGET_SCORE_RENAME_TO = "M_SCORE"
self.TARGET_SCORE_RENAME_TO = "SCORE"
# Import the shortened name for tract ("GTF") that's used on the tiles.
self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[

File diff suppressed because one or more lines are too long