New Create indicators for all thresholds exceeded by a community in Definition L (#980)

* added fieldnames

* todo pollution, water, health & workforce

* workforce

* work in progress

* add utility function to replace duplicate summation logic

* move fpl series into add columns - run black .

* added revisions - still a wip

* added fieldnames

* todo pollution, water, health & workforce

* workforce

* work in progress

* add utility function to replace duplicate summation logic

* move fpl series into add columns - run black .

* added revisions - still a wip

* revise workforce and water

* revise housing and add incremental counter for workforce

* last PR nit

* revise workforce

* more PR feedback in score l

* more PR feedback in score l

* more PR feedback in score l

* addd FPL_SERIES and update references in score 1

* fix bugs

* reparameterize function

* final revisions in fieldnames

* make computations all consistent so we assing with FPL_200_SERIES

* fieldnames refactor after clarification and PR review

* finalize

* finalize with no typos

* fix length

* added median income var

* swap thresholds

* remove iteration

* remove stray '

* address flake 8

* added f string formatting and fixed typos

* added f string formatting and fixed typos

* move up

* remove dupes

* reformat

* fix bugs

* fix bugs

* initialize

Co-authored-by: Saran Ahluwalia <sarahluw@cisco.com>
This commit is contained in:
Saran Ahluwalia 2021-12-05 19:51:19 -05:00 committed by GitHub
commit 07ee4165b4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 390 additions and 173 deletions

View file

@ -31,6 +31,8 @@ L_WATER = "Water Factor (Definition L)"
L_HEALTH = "Health Factor (Definition L)"
L_WORKFORCE = "Workforce Factor (Definition L)"
L_NON_WORKFORCE = "Any Non-Workforce Factor (Definition L)"
PERCENTILE = 90
MEDIAN_HOUSE_VALUE_PERCENTILE = 90
# Poverty / Income
POVERTY_FIELD = "Poverty (Less than 200% of federal poverty line)"
@ -196,3 +198,63 @@ HOLC_GRADE_D_TRACT_PERCENT_FIELD: str = "Percent of tract that is HOLC Grade D"
HOLC_GRADE_D_TRACT_20_PERCENT_FIELD: str = "Tract is >20% HOLC Grade D"
HOLC_GRADE_D_TRACT_50_PERCENT_FIELD: str = "Tract is >50% HOLC Grade D"
HOLC_GRADE_D_TRACT_75_PERCENT_FIELD: str = "Tract is >75% HOLC Grade D"
# Climate Change
EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected population loss rate and is low income"
EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected agriculture loss rate and is low income"
EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected building loss rate and is low income"
# Clean energy and efficiency
PM25_EXPOSURE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for PM2.5 exposure and is low income"
ENERGY_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for energy burden and is low income"
# Clean transportation
DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for diesel particulate matter and is low income"
TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for traffic proximity and is low income"
# Affordable and Sustainable Housing
LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD = (
f"At or above the {PERCENTILE}th percentile for lead paint and"
" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th percentile and is low income"
)
HOUSING_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for housing burden and is low income"
# Remediation and Reduction of Legacy Pollution
RMP_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to RMP sites and is low income"
SUPERFUND_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to superfund sites and is low income"
HAZARDOUS_WASTE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to hazardous waste facilities and is low income"
# Critical Clean Water and Waste Infrastructure
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for wastewater discharge and is low income"
# Health Burden
DIABETES_LOW_INCOME_FIELD = (
f"At or above the {PERCENTILE}th percentile for diabetes and is low income"
)
ASTHMA_LOW_INCOME_FIELD = (
f"At or above the {PERCENTILE}th percentile for asthma and is low income"
)
HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for heart disease and is low income"
LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"
# Workforce
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
f"At or above the {PERCENTILE}th percentile for unemployment"
" and low HS education"
)
LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = (
f"At or above the {PERCENTILE}th percentile for households in linguistic isolation"
" and low HS education"
)
POVERTY_LOW_HS_EDUCATION_FIELD = (
f"At or above the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
" and low HS education"
)
MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
f"At or below the {PERCENTILE}th percentile for median income"
" and low HS education"
)
THRESHOLD_COUNT = "Total threshold criteria exceeded"
FPL_200_SERIES = "Is low income"

View file

@ -93,9 +93,38 @@ class ScoreL(Score):
return df, threshold_column_name
def _create_low_income_threshold(self, df: pd.DataFrame) -> pd.Series:
"""
Returns a pandas series (really a numpy array)
of booleans based on the condition of the FPL at 200%
is at or more than some established threshold
"""
return (
df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
)
def _increment_total_eligibility_exceeded(
self, columns_for_subset: list
) -> None:
"""
Increments the total eligible factors for a given tract
"""
self.df[field_names.THRESHOLD_COUNT] += self.df[columns_for_subset].sum(
axis=1
)
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score L")
self.df[field_names.THRESHOLD_COUNT] = 0
self.df[field_names.FPL_200_SERIES] = self._create_low_income_threshold(
self.df
)
self.df[field_names.L_CLIMATE] = self._climate_factor()
self.df[field_names.L_ENERGY] = self._energy_factor()
self.df[field_names.L_TRANSPORTATION] = self._transportation_factor()
@ -143,37 +172,55 @@ class ScoreL(Score):
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
climate_criteria = (
(
self.df[
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
)
return (
climate_eligibility_columns = [
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
]
expected_population_loss_threshold = (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
) & climate_criteria
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
expected_agriculture_loss_threshold = (
self.df[
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
expected_building_loss_threshold = (
self.df[
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
expected_population_loss_threshold
& self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD] = (
expected_agriculture_loss_threshold
& self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD] = (
expected_building_loss_threshold
& self.df[field_names.FPL_200_SERIES]
)
self._increment_total_eligibility_exceeded(climate_eligibility_columns)
return self.df[climate_eligibility_columns].any(axis="columns")
def _energy_factor(self) -> bool:
# In Xth percentile or above for DOEs energy cost burden score (Source: LEAD Score)
@ -181,26 +228,38 @@ class ScoreL(Score):
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
energy_criteria = (
energy_eligibility_columns = [
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
]
energy_burden_threshold = (
self.df[
field_names.ENERGY_BURDEN_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
) | (
)
pm25_threshold = (
self.df[
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
) & energy_criteria
self.df[field_names.PM25_EXPOSURE_LOW_INCOME_FIELD] = (
pm25_threshold & self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.ENERGY_BURDEN_LOW_INCOME_FIELD] = (
energy_burden_threshold & self.df[field_names.FPL_200_SERIES]
)
self._increment_total_eligibility_exceeded(energy_eligibility_columns)
return self.df[energy_eligibility_columns].any(axis="columns")
def _transportation_factor(self) -> bool:
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
@ -212,25 +271,39 @@ class ScoreL(Score):
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
transportation_criteria = (
transportion_eligibility_columns = [
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
]
diesel_threshold = (
self.df[
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
) | (
)
traffic_threshold = (
self.df[
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
) & transportation_criteria
self.df[field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD] = (
diesel_threshold & self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD] = (
traffic_threshold & self.df[field_names.FPL_200_SERIES]
)
self._increment_total_eligibility_exceeded(
transportion_eligibility_columns
)
return self.df[transportion_eligibility_columns].any(axis="columns")
def _housing_factor(self) -> bool:
# (
@ -245,35 +318,47 @@ class ScoreL(Score):
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
housing_criteria = (
(
self.df[
field_names.LEAD_PAINT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
& (
self.df[
field_names.MEDIAN_HOUSE_VALUE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
)
) | (
housing_eligibility_columns = [
field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD,
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
]
lead_paint_median_house_hold_threshold = (
self.df[
field_names.LEAD_PAINT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
) & (
self.df[
field_names.MEDIAN_HOUSE_VALUE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
)
housing_burden_threshold = (
self.df[
field_names.HOUSING_BURDEN_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
) & housing_criteria
# series by series indicators
self.df[field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD] = (
lead_paint_median_house_hold_threshold
& self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.HOUSING_BURDEN_LOW_INCOME_FIELD] = (
housing_burden_threshold & self.df[field_names.FPL_200_SERIES]
)
self._increment_total_eligibility_exceeded(housing_eligibility_columns)
return self.df[housing_eligibility_columns].any(axis="columns")
def _pollution_factor(self) -> bool:
# Proximity to Risk Management Plan sites is > X
@ -282,48 +367,54 @@ class ScoreL(Score):
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
pollution_criteria = (
(
self.df[
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
pollution_eligibility_columns = [
field_names.RMP_LOW_INCOME_FIELD,
field_names.SUPERFUND_LOW_INCOME_FIELD,
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
]
rmp_sites_threshold = (
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return pollution_criteria & (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
npl_sites_threshold = (
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
tsdf_sites_threshold = (
self.df[
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
# individual series-by-series
self.df[field_names.RMP_LOW_INCOME_FIELD] = (
rmp_sites_threshold & self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.SUPERFUND_LOW_INCOME_FIELD] = (
npl_sites_threshold & self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD] = (
tsdf_sites_threshold & self.df[field_names.FPL_200_SERIES]
)
self._increment_total_eligibility_exceeded(
pollution_eligibility_columns
)
return self.df[pollution_eligibility_columns].any(axis="columns")
def _water_factor(self) -> bool:
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
# AND
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
return (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
) & (
wastewater_threshold = (
self.df[
field_names.WASTEWATER_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
@ -331,6 +422,16 @@ class ScoreL(Score):
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = (
wastewater_threshold & self.df[field_names.FPL_200_SERIES]
)
self._increment_total_eligibility_exceeded(
[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
)
return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
def _health_factor(self) -> bool:
# In Xth percentile or above for diabetes (Source: CDC Places)
# or
@ -344,45 +445,61 @@ class ScoreL(Score):
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
health_criteria = (
(
self.df[
field_names.DIABETES_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.ASTHMA_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.HEART_DISEASE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.LIFE_EXPECTANCY_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
# Note: a high life expectancy is good, so take 1 minus the threshold to invert it,
# and then look for life expenctancies lower than that (not greater than).
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
)
return (
health_eligibility_columns = [
field_names.DIABETES_LOW_INCOME_FIELD,
field_names.ASTHMA_LOW_INCOME_FIELD,
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
]
diabetes_threshold = (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
asthma_threshold = (
self.df[
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
heart_disease_threshold = (
self.df[
field_names.HEART_DISEASE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.LOW_INCOME_THRESHOLD
) & health_criteria
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
life_expectancy_threshold = (
self.df[
field_names.LIFE_EXPECTANCY_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
# Note: a high life expectancy is good, so take 1 minus the threshold to invert it,
# and then look for life expenctancies lower than that (not greater than).
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
diabetes_threshold & self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.ASTHMA_LOW_INCOME_FIELD] = (
asthma_threshold & self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
heart_disease_threshold & self.df[field_names.FPL_200_SERIES]
)
self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
)
self._increment_total_eligibility_exceeded(health_eligibility_columns)
return self.df[health_eligibility_columns].any(axis="columns")
def _workforce_factor(self) -> bool:
# Where unemployment is above X%
@ -395,42 +512,80 @@ class ScoreL(Score):
# AND
# Where the high school degree achievement rates for adults 25 years and older is less than 95%
# (necessary to screen out university block groups)
workforce_criteria_for_states = (
(
self.df[
field_names.UNEMPLOYMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.MEDIAN_INCOME_PERCENT_AMI_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
# Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
# and then look for median income lower than that (not greater than).
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[
field_names.LINGUISTIC_ISO_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
)
workforce_combined_criteria_for_states = (
high_scool_achievement_rate_threshold = (
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
) & workforce_criteria_for_states
)
unemployment_threshold = (
self.df[
field_names.UNEMPLOYMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
median_income_threshold = (
self.df[
field_names.MEDIAN_INCOME_PERCENT_AMI_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
# Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
# and then look for median income lower than that (not greater than).
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
linguistic_isolation_threshold = (
self.df[
field_names.LINGUISTIC_ISO_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
poverty_threshold = (
self.df[
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
linguistic_isolation_threshold
& high_scool_achievement_rate_threshold
)
self.df[field_names.POVERTY_LOW_HS_EDUCATION_FIELD] = (
poverty_threshold & high_scool_achievement_rate_threshold
)
self.df[field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
median_income_threshold & high_scool_achievement_rate_threshold
)
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
unemployment_threshold & high_scool_achievement_rate_threshold
)
# Workforce criteria for states fields that create indicator columns
# for each tract in order to indicate whether they met any of the four
# criteria. We will used this create individual indicator columns.
workforce_eligibility_columns = [
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
]
workforce_combined_criteria_for_states = self.df[
workforce_eligibility_columns
].any(axis="columns")
self._increment_total_eligibility_exceeded(
workforce_eligibility_columns
)
# Now, calculate workforce criteria for island territories.