mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 08:11:16 -07:00
Issue 970: reverse percentiles for AMI and life expectancy (#1018)
* switching to low * fixing score-etl-post * updating comments * fixing comparison * create separate field for clarity * comment fix * removing healthy food * fixing bug in score post * running black and adding comment * Update pickles and add a helpful notes to README Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
This commit is contained in:
parent
24bac56d9e
commit
7fcecaee42
11 changed files with 144 additions and 100 deletions
|
@ -60,11 +60,15 @@ MEDIAN_INCOME_FIELD = "Median household income in the past 12 months"
|
|||
MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD = (
|
||||
"Median household income (% of state median household income)"
|
||||
)
|
||||
MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)"
|
||||
PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
||||
AMI_FIELD = "Area Median Income (State or metropolitan)"
|
||||
|
||||
COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
|
||||
MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = (
|
||||
"Median household income as a percent of area median income"
|
||||
)
|
||||
LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = (
|
||||
"Low median household income as a percent of area median income"
|
||||
)
|
||||
|
||||
# Climate
|
||||
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
||||
|
@ -105,7 +109,6 @@ ENERGY_BURDEN_FIELD = "Energy burden"
|
|||
DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years"
|
||||
ASTHMA_FIELD = "Current asthma among adults aged >=18 years"
|
||||
HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years"
|
||||
LIFE_EXPECTANCY_FIELD = "Life expectancy (years)"
|
||||
CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years"
|
||||
HEALTH_INSURANCE_FIELD = (
|
||||
"Current lack of health insurance among adults aged 18-64 years"
|
||||
|
@ -113,6 +116,8 @@ HEALTH_INSURANCE_FIELD = (
|
|||
PHYS_HEALTH_NOT_GOOD_FIELD = (
|
||||
"Physical health not good for >=14 days among adults aged >=18 years"
|
||||
)
|
||||
LIFE_EXPECTANCY_FIELD = "Life expectancy (years)"
|
||||
LOW_LIFE_EXPECTANCY_FIELD = "Low life expectancy"
|
||||
|
||||
# Other Demographics
|
||||
TOTAL_POP_FIELD = "Total population"
|
||||
|
@ -130,9 +135,6 @@ OVER_64_FIELD = "Individuals over 64 years old"
|
|||
|
||||
# Fields from 2010 decennial census (generally only loaded for the territories)
|
||||
CENSUS_DECENNIAL_MEDIAN_INCOME_2009 = "Median household income in 2009 ($)"
|
||||
CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
|
||||
"Median household income as a percent of territory median income in 2009"
|
||||
)
|
||||
CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 = (
|
||||
"Percentage households below 100% of federal poverty line in 2009"
|
||||
)
|
||||
|
@ -141,7 +143,10 @@ CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 = (
|
|||
"Unemployed civilians (percent) in 2009"
|
||||
)
|
||||
CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009"
|
||||
|
||||
CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
|
||||
"Median household income as a percent of territory median income in 2009"
|
||||
)
|
||||
LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = "Low median household income as a percent of territory median income in 2009"
|
||||
# Fields from 2010 ACS (loaded for comparison with the territories)
|
||||
CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployed civilians (percent) in 2010"
|
||||
CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
|
||||
|
@ -265,7 +270,10 @@ ASTHMA_LOW_INCOME_FIELD = (
|
|||
)
|
||||
HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for heart disease and is low income"
|
||||
|
||||
LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"
|
||||
LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile "
|
||||
f"for low life expectancy and is low income"
|
||||
)
|
||||
|
||||
# Workforce
|
||||
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
||||
|
@ -288,9 +296,9 @@ LOW_READING_LOW_HS_EDUCATION_FIELD = (
|
|||
" and has low HS education"
|
||||
)
|
||||
|
||||
MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
||||
f"At or below the {PERCENTILE}th percentile for median income"
|
||||
" and has low HS education"
|
||||
LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
||||
f"At or below the {PERCENTILE}th percentile for low median household income as a "
|
||||
f"percent of area median income and has low HS education"
|
||||
)
|
||||
|
||||
# Not currently used in a factor
|
||||
|
|
|
@ -44,6 +44,8 @@ class ScoreL(Score):
|
|||
robustness over 1-year ACS.
|
||||
"""
|
||||
# Create the combined field.
|
||||
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||
# since most other percentile logic is there.
|
||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||
# But just to be safe, we take the mean and ignore null values so if there
|
||||
# *were* entries in both, this result would make sense.
|
||||
|
@ -169,7 +171,7 @@ class ScoreL(Score):
|
|||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
|
@ -225,7 +227,7 @@ class ScoreL(Score):
|
|||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
# AND
|
||||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
|
@ -268,7 +270,7 @@ class ScoreL(Score):
|
|||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# AND
|
||||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
|
@ -315,7 +317,7 @@ class ScoreL(Score):
|
|||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
|
@ -363,7 +365,7 @@ class ScoreL(Score):
|
|||
def _pollution_factor(self) -> bool:
|
||||
# Proximity to Risk Management Plan sites is > X
|
||||
# AND
|
||||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
|
@ -410,7 +412,7 @@ class ScoreL(Score):
|
|||
def _water_factor(self) -> bool:
|
||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||
# AND
|
||||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
|
@ -441,7 +443,7 @@ class ScoreL(Score):
|
|||
# or
|
||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||
# AND
|
||||
# Low income: In 60th percentile or above for percent of block group population
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
|
@ -449,8 +451,7 @@ class ScoreL(Score):
|
|||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.HEALTHY_FOOD_LOW_INCOME_FIELD,
|
||||
field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
diabetes_threshold = (
|
||||
|
@ -475,24 +476,14 @@ class ScoreL(Score):
|
|||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
healthy_food_threshold = (
|
||||
low_life_expectancy_threshold = (
|
||||
self.df[
|
||||
field_names.HEALTHY_FOOD_FIELD
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
life_expectancy_threshold = (
|
||||
self.df[
|
||||
field_names.LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
# Note: a high life expectancy is good, so take 1 minus the threshold to invert it,
|
||||
# and then look for life expenctancies lower than that (not greater than).
|
||||
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
|
||||
diabetes_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
@ -502,11 +493,8 @@ class ScoreL(Score):
|
|||
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
|
||||
heart_disease_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||
life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = (
|
||||
healthy_food_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||
low_life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
||||
|
@ -514,23 +502,25 @@ class ScoreL(Score):
|
|||
return self.df[health_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _workforce_factor(self) -> bool:
|
||||
# Where unemployment is above X%
|
||||
# Where unemployment is above Xth percentile
|
||||
# or
|
||||
# Where median income is less than Y% of the area median income
|
||||
# Where median income as a percent of area median income is above Xth percentile
|
||||
# or
|
||||
# Where the percent of households at or below 100% of the federal poverty level is greater than Z%
|
||||
# Where the percent of households at or below 100% of the federal poverty level
|
||||
# is above Xth percentile
|
||||
# or
|
||||
# Where linguistic isolation is greater than Y%
|
||||
# Where linguistic isolation is above Xth percentile
|
||||
# AND
|
||||
# Where the high school degree achievement rates for adults 25 years and older is less than 95%
|
||||
# (necessary to screen out university block groups)
|
||||
# Where the high school degree achievement rates for adults 25 years and older
|
||||
# is less than Y%
|
||||
# (necessary to screen out university tracts)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
high_scool_achievement_rate_threshold = (
|
||||
|
@ -546,14 +536,12 @@ class ScoreL(Score):
|
|||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
median_income_threshold = (
|
||||
low_median_income_threshold = (
|
||||
self.df[
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
# Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
|
||||
# and then look for median income lower than that (not greater than).
|
||||
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
linguistic_isolation_threshold = (
|
||||
|
@ -581,8 +569,8 @@ class ScoreL(Score):
|
|||
poverty_threshold & high_scool_achievement_rate_threshold
|
||||
)
|
||||
|
||||
self.df[field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
||||
median_income_threshold & high_scool_achievement_rate_threshold
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
||||
low_median_income_threshold & high_scool_achievement_rate_threshold
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
|
||||
|
@ -624,23 +612,31 @@ class ScoreL(Score):
|
|||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name = (
|
||||
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||
f"{field_names.PERCENTILE}th percentile"
|
||||
)
|
||||
self.df[
|
||||
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
|
||||
] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_island_areas = (
|
||||
self.df[unemployment_island_areas_criteria_field_name]
|
||||
| self.df[poverty_island_areas_criteria_field_name]
|
||||
# Also check whether area median income is 10th percentile or lower
|
||||
# within the islands.
|
||||
| (
|
||||
self.df[
|
||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
# Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
|
||||
# and then look for median income lower than that (not greater than).
|
||||
< 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
| self.df[
|
||||
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
|
||||
]
|
||||
) & (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
> self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
percent_of_island_tracts_highlighted = (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue