Various updates to Definition L (#850)

* removing percentiles as separate field names

* adding RMP
This commit is contained in:
Lucas Merrill Brown 2021-11-04 12:17:45 -04:00 committed by GitHub
parent 3fbc77076c
commit 8372b47d42
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 213 additions and 124 deletions

View file

@ -459,7 +459,7 @@
"execution_count": null,
"id": "bfae9cf5",
"metadata": {
"scrolled": true
"scrolled": false
},
"outputs": [],
"source": [

View file

@ -34,31 +34,16 @@ L_NON_WORKFORCE = "Any Non-Workforce Factor (Definition L)"
# Poverty / Income
POVERTY_FIELD = "Poverty (Less than 200% of federal poverty line)"
POVERTY_PERCENTILE_FIELD = (
"Poverty (Less than 200% of federal poverty line) (percentile)"
)
POVERTY_LESS_THAN_200_FPL_FIELD = (
"Percent of individuals < 200% Federal Poverty Line"
)
POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD = (
"Percent of individuals < 200% Federal Poverty Line (percentile)"
)
POVERTY_LESS_THAN_150_FPL_FIELD = (
"Percent of individuals < 150% Federal Poverty Line"
)
POVERTY_LESS_THAN_150_FPL_PERCENTILE_FIELD = (
"Percent of individuals < 150% Federal Poverty Line (percentile)"
)
POVERTY_LESS_THAN_100_FPL_FIELD = (
"Percent of individuals < 100% Federal Poverty Line"
)
POVERTY_LESS_THAN_100_FPL_PERCENTILE_FIELD = (
"Percent of individuals < 100% Federal Poverty Line (percentile)"
)
MEDIAN_INCOME_PERCENT_AMI_FIELD = "Median household income (% of AMI)"
MEDIAN_INCOME_PERCENT_AMI_PERCENTILE_FIELD = (
"Median household income (% of AMI) (percentile)"
)
STATE_MEDIAN_INCOME_FIELD = (
"Median household income (State; 2019 inflation-adjusted dollars)"
)
@ -72,63 +57,36 @@ AMI_FIELD = "Area Median Income (State or metropolitan)"
# Climate
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
FEMA_RISK_PERCENTILE_FIELD = (
"FEMA Risk Index Expected Annual Loss Score (percentile)"
)
# Environment
DIESEL_FIELD = "Diesel particulate matter"
DIESEL_PERCENTILE_FIELD = "Diesel particulate matter (percentile)"
PM25_FIELD = "Particulate matter (PM2.5)"
PM25_PERCENTILE_FIELD = "Particulate matter (PM2.5) (percentile)"
OZONE_FIELD = "Ozone"
TRAFFIC_FIELD = "Traffic proximity and volume"
TRAFFIC_PERCENTILE_FIELD = "Traffic proximity and volume (percentile)"
LEAD_PAINT_FIELD = "Percent pre-1960s housing (lead paint indicator)"
LEAD_PAINT_PERCENTILE_FIELD = (
"Percent pre-1960s housing (lead paint indicator) (percentile)"
)
WASTEWATER_FIELD = "Wastewater discharge"
WASTEWATER_PERCENTILE_FIELD = "Wastewater discharge (percentile)"
AGGREGATION_POLLUTION_FIELD = "Pollution Burden"
RMP_FIELD = "Proximity to RMP sites (percentile)"
RMP_PERCENTILE_FIELD = "Proximity to RMP sites (percentile)"
RMP_FIELD = "Proximity to Risk Management Plan (RMP) facilities"
TSDF_FIELD = "Proximity to TSDF sites"
NPL_FIELD = "Proximity to NPL sites"
AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk"
# Housing
HOUSING_BURDEN_FIELD = "Housing burden (percent)"
HOUSING_BURDEN_PERCENTILE_FIELD = "Housing burden (percent) (percentile)"
HT_INDEX_FIELD = (
"Housing + Transportation Costs % Income for the Regional Typical Household"
)
# Energy
ENERGY_BURDEN_FIELD = "Energy burden"
ENERGY_BURDEN_PERCENTILE_FIELD = "Energy burden (percentile)"
# Health
DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years"
DIABETES_PERCENTILE_FIELD = (
"Diagnosed diabetes among adults aged >=18 years (percentile)"
)
ASTHMA_FIELD = "Current asthma among adults aged >=18 years"
ASTHMA_PERCENTILE_FIELD = (
"Current asthma among adults aged >=18 years (percentile)"
)
HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years"
HEART_DISEASE_PERCENTILE_FIELD = (
"Coronary heart disease among adults aged >=18 years (percentile)"
)
LIFE_EXPECTANCY_FIELD = "Life expectancy (years)"
LIFE_EXPECTANCY_PERCENTILE_FIELD = "Life expectancy (years) (percentile)"
RESPITORY_HAZARD_FIELD = "Respiratory hazard index"
RESPITORY_HAZARD_PERCENTILE_FIELD = "Respiratory hazard index (percentile)"
CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years"
CANCER_PERCENTILE_FIELD = (
"Cancer (excluding skin cancer) among adults aged >=18 years (percentile)"
)
HEALTH_INSURANCE_FIELD = (
"Current lack of health insurance among adults aged 18-64 years"
)
@ -139,16 +97,13 @@ PHYS_HEALTH_NOT_GOOD_FIELD = (
# Other Demographics
TOTAL_POP_FIELD = "Total population"
UNEMPLOYMENT_FIELD = "Unemployed civilians (percent)"
UNEMPLOYMENT_PERCENTILE_FIELD = "Unemployed civilians (percent) (percentile)"
LINGUISTIC_ISO_FIELD = "Linguistic isolation (percent)"
LINGUISTIC_ISO_PERCENTILE_FIELD = "Linguistic isolation (percent) (percentile)"
HOUSEHOLDS_LINGUISTIC_ISO_FIELD = (
"Percent of households in linguistic isolation"
)
HIGH_SCHOOL_ED_FIELD = (
"Percent individuals age 25 or over with less than high school degree"
)
HIGH_SCHOOL_ED_PERCENTILE_FIELD = "Percent individuals age 25 or over with less than high school degree (percentile)"
AGGREGATION_POPULATION_FIELD = "Population Characteristics"
UNDER_5_FIELD = "Individuals under 5 years old"
OVER_64_FIELD = "Individuals over 64 years old"

View file

@ -12,8 +12,9 @@ class ScoreA(Score):
logger.info("Adding Score A")
self.df[field_names.SCORE_A] = self.df[
[
field_names.POVERTY_PERCENTILE_FIELD,
field_names.HIGH_SCHOOL_ED_PERCENTILE_FIELD,
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HIGH_SCHOOL_ED_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
]
].mean(axis=1)
return self.df

View file

@ -11,7 +11,12 @@ class ScoreB(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score B")
self.df[field_names.SCORE_B] = (
self.df[field_names.POVERTY_PERCENTILE_FIELD]
* self.df[field_names.HIGH_SCHOOL_ED_PERCENTILE_FIELD]
self.df[
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
* self.df[
field_names.HIGH_SCHOOL_ED_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
)
return self.df

View file

@ -12,43 +12,54 @@ class ScoreC(Score):
def __init__(self, df: pd.DataFrame) -> None:
Bucket = namedtuple(typename="Bucket", field_names=["name", "fields"])
# Note: we use percentiles for every field below.
# To do so, we add the percentile suffix to all the field names.
self.BUCKET_SOCIOECONOMIC = Bucket(
field_names.C_SOCIOECONOMIC,
[
field_names.HOUSEHOLDS_LINGUISTIC_ISO_FIELD,
field_names.POVERTY_FIELD,
field_names.HIGH_SCHOOL_ED_FIELD,
field_names.UNEMPLOYMENT_FIELD,
field_names.HT_INDEX_FIELD,
field_names.HOUSEHOLDS_LINGUISTIC_ISO_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HIGH_SCHOOL_ED_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.UNEMPLOYMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HT_INDEX_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
],
)
self.BUCKET_SENSITIVE = Bucket(
field_names.C_SENSITIVE,
[
field_names.UNDER_5_FIELD,
field_names.OVER_64_FIELD,
field_names.LINGUISTIC_ISO_FIELD,
field_names.UNDER_5_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.OVER_64_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LINGUISTIC_ISO_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
],
)
self.BUCKET_ENVIRONMENTAL = Bucket(
field_names.C_ENVIRONMENTAL,
[
field_names.RMP_FIELD,
field_names.TSDF_FIELD,
field_names.NPL_FIELD,
field_names.WASTEWATER_FIELD,
field_names.LEAD_PAINT_FIELD,
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.WASTEWATER_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LEAD_PAINT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
],
)
self.BUCKET_EXPOSURES = Bucket(
field_names.C_EXPOSURES,
[
field_names.AIR_TOXICS_CANCER_RISK_FIELD,
field_names.RESPITORY_HAZARD_FIELD,
field_names.DIESEL_FIELD,
field_names.PM25_FIELD,
field_names.OZONE_FIELD,
field_names.TRAFFIC_FIELD,
field_names.AIR_TOXICS_CANCER_RISK_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.RESPITORY_HAZARD_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.OZONE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
],
)
self.BUCKETS = [
@ -63,16 +74,8 @@ class ScoreC(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score C")
# Average all the percentile values in each bucket into a single score for each of the four buckets.
# TODO just use the percentile fields in the list instead
for bucket in self.BUCKETS:
fields_to_average = []
for field in bucket.fields:
fields_to_average.append(
f"{field}{field_names.PERCENTILE_FIELD_SUFFIX}"
)
self.df[f"{bucket.name}"] = self.df[fields_to_average].mean(axis=1)
self.df[bucket.name] = self.df[bucket.fields].mean(axis=1)
# Combine the score from the two Exposures and Environmental Effects buckets
# into a single score called "Pollution Burden".

View file

@ -28,15 +28,67 @@ class ScoreF(Score):
)
self.df[meets_burden_field] = (
(self.df[field_names.PM25_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.RESPITORY_HAZARD_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.TRAFFIC_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.LEAD_PAINT_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.RMP_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.ASTHMA_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.HEART_DISEASE_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.CANCER_PERCENTILE_FIELD] > 0.9)
| (self.df[field_names.DIABETES_PERCENTILE_FIELD] > 0.9)
(
self.df[
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.RESPITORY_HAZARD_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.TRAFFIC_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.LEAD_PAINT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.ASTHMA_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.HEART_DISEASE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.CANCER_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
| (
self.df[
field_names.DIABETES_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> 0.9
)
)
self.df[field_names.SCORE_F_COMMUNITIES] = (

View file

@ -60,10 +60,16 @@ class ScoreL(Score):
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
return (
self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD]
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.LOW_INCOME_THRESHOLD
) & (
self.df[field_names.FEMA_RISK_PERCENTILE_FIELD]
self.df[
field_names.FEMA_RISK_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
@ -73,14 +79,27 @@ class ScoreL(Score):
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
return (
self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD]
> self.LOW_INCOME_THRESHOLD
) & (
self.df[field_names.ENERGY_BURDEN_PERCENTILE_FIELD]
energy_criteria = (
self.df[
field_names.ENERGY_BURDEN_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
) | (
self.df[
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.LOW_INCOME_THRESHOLD
) & energy_criteria
def _transportation_factor(self) -> bool:
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
# or
@ -92,22 +111,22 @@ class ScoreL(Score):
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
transportation_criteria = (
(
self.df[field_names.DIESEL_PERCENTILE_FIELD]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.PM25_PERCENTILE_FIELD]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.TRAFFIC_PERCENTILE_FIELD]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
self.df[
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
) | (
self.df[
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return (
self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD]
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.LOW_INCOME_THRESHOLD
) & transportation_criteria
@ -121,24 +140,45 @@ class ScoreL(Score):
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
housing_criteria = (
self.df[field_names.LEAD_PAINT_PERCENTILE_FIELD]
self.df[
field_names.LEAD_PAINT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
) | (
self.df[field_names.HOUSING_BURDEN_PERCENTILE_FIELD]
self.df[
field_names.HOUSING_BURDEN_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return (
self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD]
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.LOW_INCOME_THRESHOLD
) & housing_criteria
def _pollution_factor(self) -> bool:
# TBD
# Proximity to Risk Management Plan sites is > X
# AND
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
return False
return (
self.df[
field_names.RMP_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
) & (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.LOW_INCOME_THRESHOLD
)
def _water_factor(self) -> bool:
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
@ -147,10 +187,16 @@ class ScoreL(Score):
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
return (
self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD]
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.LOW_INCOME_THRESHOLD
) & (
self.df[field_names.WASTEWATER_PERCENTILE_FIELD]
self.df[
field_names.WASTEWATER_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
@ -169,26 +215,41 @@ class ScoreL(Score):
health_criteria = (
(
self.df[field_names.DIABETES_PERCENTILE_FIELD]
self.df[
field_names.DIABETES_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.ASTHMA_PERCENTILE_FIELD]
self.df[
field_names.ASTHMA_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.HEART_DISEASE_PERCENTILE_FIELD]
self.df[
field_names.HEART_DISEASE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.LIFE_EXPECTANCY_PERCENTILE_FIELD]
self.df[
field_names.LIFE_EXPECTANCY_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
# Note: a high life expectancy is good, so take 1 minus the threshold to invert it,
# and then look for life expenctancies lower than that (not greater than).
< 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
)
return (
self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD]
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.LOW_INCOME_THRESHOLD
) & health_criteria
@ -205,21 +266,33 @@ class ScoreL(Score):
# (necessary to screen out university block groups)
workforce_criteria = (
(
self.df[field_names.UNEMPLOYMENT_PERCENTILE_FIELD]
self.df[
field_names.UNEMPLOYMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.MEDIAN_INCOME_PERCENT_AMI_PERCENTILE_FIELD]
self.df[
field_names.MEDIAN_INCOME_PERCENT_AMI_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
# Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
# and then look for median income lower than that (not greater than).
< 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.POVERTY_LESS_THAN_100_FPL_PERCENTILE_FIELD]
self.df[
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
| (
self.df[field_names.LINGUISTIC_ISO_PERCENTILE_FIELD]
self.df[
field_names.LINGUISTIC_ISO_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
)