mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-26 01:41:16 -07:00
Issue 954: Add various data sources from Child Opportunity Index (#986)
* Adds four fields: * Summer days above 90F * Percent low access to healthy food * Percent impenetrable surface areas * Low third grade reading proficiency * Each of these four gets added into Definition L in various factors. * Additionally, I add college attendance fields to the ETL for Census ACS. * This PR also introduces the notion of "reverse percentiles", relevant to ticket #970.
This commit is contained in:
parent
df564658a5
commit
5a6d6d8557
8 changed files with 357 additions and 40 deletions
|
@ -63,6 +63,8 @@ MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)"
|
|||
PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
||||
AMI_FIELD = "Area Median Income (State or metropolitan)"
|
||||
|
||||
COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
|
||||
|
||||
# Climate
|
||||
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
||||
EXPECTED_BUILDING_LOSS_RATE_FIELD = (
|
||||
|
@ -206,30 +208,63 @@ HOLC_GRADE_D_TRACT_50_PERCENT_FIELD: str = "Tract is >50% HOLC Grade D"
|
|||
HOLC_GRADE_D_TRACT_75_PERCENT_FIELD: str = "Tract is >75% HOLC Grade D"
|
||||
|
||||
|
||||
# Child Opportunity Index data
|
||||
# Summer days with maximum temperature above 90F.
|
||||
EXTREME_HEAT_FIELD = "Summer days above 90F"
|
||||
|
||||
# Percentage households without a car located further than a half-mile from the
|
||||
# nearest supermarket.
|
||||
HEALTHY_FOOD_FIELD = "Percent low access to healthy food"
|
||||
|
||||
# Percentage impenetrable surface areas such as rooftops, roads or parking lots.
|
||||
IMPENETRABLE_SURFACES_FIELD = "Percent impenetrable surface areas"
|
||||
|
||||
# Percentage third graders scoring proficient on standardized reading tests,
|
||||
# converted to NAEP scale score points.
|
||||
READING_FIELD = "Third grade reading proficiency"
|
||||
LOW_READING_FIELD = "Low third grade reading proficiency"
|
||||
|
||||
# Names for individual factors being exceeded
|
||||
# Climate Change
|
||||
EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected population loss rate and is low income"
|
||||
EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected agriculture loss rate and is low income"
|
||||
EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected building loss rate and is low income"
|
||||
EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for summer days above 90F and "
|
||||
f"the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
|
||||
f"percentile and is low income"
|
||||
)
|
||||
|
||||
# Clean energy and efficiency
|
||||
PM25_EXPOSURE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for PM2.5 exposure and is low income"
|
||||
ENERGY_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for energy burden and is low income"
|
||||
|
||||
# Clean transportation
|
||||
DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for diesel particulate matter and is low income"
|
||||
TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for traffic proximity and is low income"
|
||||
|
||||
# Affordable and Sustainable Housing
|
||||
LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD = (
|
||||
LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for lead paint and"
|
||||
" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th percentile and is low income"
|
||||
f" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
|
||||
f"percentile and is low income"
|
||||
)
|
||||
HOUSING_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for housing burden and is low income"
|
||||
|
||||
IMPENETRABLE_SURFACES_LOW_INCOME_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for impenetrable surfaces and is low "
|
||||
f"income"
|
||||
)
|
||||
|
||||
# Remediation and Reduction of Legacy Pollution
|
||||
RMP_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to RMP sites and is low income"
|
||||
SUPERFUND_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to superfund sites and is low income"
|
||||
HAZARDOUS_WASTE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to hazardous waste facilities and is low income"
|
||||
|
||||
# Critical Clean Water and Waste Infrastructure
|
||||
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for wastewater discharge and is low income"
|
||||
# Health Burden
|
||||
|
||||
# Health Burdens
|
||||
DIABETES_LOW_INCOME_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for diabetes and is low income"
|
||||
)
|
||||
|
@ -240,25 +275,35 @@ HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for
|
|||
|
||||
LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"
|
||||
|
||||
HEALTHY_FOOD_LOW_INCOME_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for low "
|
||||
f"access to healthy food and is low income"
|
||||
)
|
||||
|
||||
# Workforce
|
||||
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for unemployment"
|
||||
" and low HS education"
|
||||
" and has low HS education"
|
||||
)
|
||||
|
||||
LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for households in linguistic isolation"
|
||||
" and low HS education"
|
||||
" and has low HS education"
|
||||
)
|
||||
|
||||
POVERTY_LOW_HS_EDUCATION_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
|
||||
" and low HS education"
|
||||
" and has low HS education"
|
||||
)
|
||||
|
||||
LOW_READING_LOW_HS_EDUCATION_FIELD = (
|
||||
f"At or above the {PERCENTILE}th percentile for low 3rd grade reading proficiency"
|
||||
" and has low HS education"
|
||||
)
|
||||
|
||||
MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
||||
f"At or below the {PERCENTILE}th percentile for median income"
|
||||
" and low HS education"
|
||||
" and has low HS education"
|
||||
)
|
||||
|
||||
THRESHOLD_COUNT = "Total threshold criteria exceeded"
|
||||
|
|
|
@ -177,6 +177,8 @@ class ScoreL(Score):
|
|||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
expected_population_loss_threshold = (
|
||||
|
@ -203,6 +205,28 @@ class ScoreL(Score):
|
|||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
extreme_heat_median_home_value_threshold = (
|
||||
self.df[
|
||||
field_names.EXTREME_HEAT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
impenetrable_surfaces_threshold = (
|
||||
self.df[
|
||||
field_names.IMPENETRABLE_SURFACES_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_population_loss_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
|
@ -218,6 +242,18 @@ class ScoreL(Score):
|
|||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD
|
||||
] = (
|
||||
extreme_heat_median_home_value_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD] = (
|
||||
impenetrable_surfaces_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(climate_eligibility_columns)
|
||||
|
||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||
|
@ -320,11 +356,11 @@ class ScoreL(Score):
|
|||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
housing_eligibility_columns = [
|
||||
field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
lead_paint_median_house_hold_threshold = (
|
||||
lead_paint_median_home_value_threshold = (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
|
@ -347,8 +383,8 @@ class ScoreL(Score):
|
|||
)
|
||||
|
||||
# series by series indicators
|
||||
self.df[field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD] = (
|
||||
lead_paint_median_house_hold_threshold
|
||||
self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = (
|
||||
lead_paint_median_home_value_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
|
@ -449,6 +485,7 @@ class ScoreL(Score):
|
|||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.HEALTHY_FOOD_LOW_INCOME_FIELD,
|
||||
field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
|
@ -474,6 +511,14 @@ class ScoreL(Score):
|
|||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
healthy_food_threshold = (
|
||||
self.df[
|
||||
field_names.HEALTHY_FOOD_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
life_expectancy_threshold = (
|
||||
self.df[
|
||||
field_names.LIFE_EXPECTANCY_FIELD
|
||||
|
@ -496,6 +541,9 @@ class ScoreL(Score):
|
|||
self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||
life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = (
|
||||
healthy_food_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
||||
|
||||
|
@ -513,6 +561,15 @@ class ScoreL(Score):
|
|||
# Where the high school degree achievement rates for adults 25 years and older is less than 95%
|
||||
# (necessary to screen out university block groups)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_READING_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
high_scool_achievement_rate_threshold = (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
|
@ -552,6 +609,14 @@ class ScoreL(Score):
|
|||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
low_reading_threshold = (
|
||||
self.df[
|
||||
field_names.LOW_READING_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
|
||||
linguistic_isolation_threshold
|
||||
& high_scool_achievement_rate_threshold
|
||||
|
@ -569,15 +634,9 @@ class ScoreL(Score):
|
|||
unemployment_threshold & high_scool_achievement_rate_threshold
|
||||
)
|
||||
|
||||
# Workforce criteria for states fields that create indicator columns
|
||||
# for each tract in order to indicate whether they met any of the four
|
||||
# criteria. We will used this create individual indicator columns.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
self.df[field_names.LOW_READING_LOW_HS_EDUCATION_FIELD] = (
|
||||
low_reading_threshold & high_scool_achievement_rate_threshold
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_states = self.df[
|
||||
workforce_eligibility_columns
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue