diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index 61ca4b20..5930c2e4 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -459,7 +459,7 @@ "execution_count": null, "id": "bfae9cf5", "metadata": { - "scrolled": true + "scrolled": false }, "outputs": [], "source": [ diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index b2de2a1a..a4114989 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -34,31 +34,16 @@ L_NON_WORKFORCE = "Any Non-Workforce Factor (Definition L)" # Poverty / Income POVERTY_FIELD = "Poverty (Less than 200% of federal poverty line)" -POVERTY_PERCENTILE_FIELD = ( - "Poverty (Less than 200% of federal poverty line) (percentile)" -) POVERTY_LESS_THAN_200_FPL_FIELD = ( "Percent of individuals < 200% Federal Poverty Line" ) -POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD = ( - "Percent of individuals < 200% Federal Poverty Line (percentile)" -) POVERTY_LESS_THAN_150_FPL_FIELD = ( "Percent of individuals < 150% Federal Poverty Line" ) -POVERTY_LESS_THAN_150_FPL_PERCENTILE_FIELD = ( - "Percent of individuals < 150% Federal Poverty Line (percentile)" -) POVERTY_LESS_THAN_100_FPL_FIELD = ( "Percent of individuals < 100% Federal Poverty Line" ) -POVERTY_LESS_THAN_100_FPL_PERCENTILE_FIELD = ( - "Percent of individuals < 100% Federal Poverty Line (percentile)" -) MEDIAN_INCOME_PERCENT_AMI_FIELD = "Median household income (% of AMI)" -MEDIAN_INCOME_PERCENT_AMI_PERCENTILE_FIELD = ( - "Median household income (% of AMI) (percentile)" -) STATE_MEDIAN_INCOME_FIELD = ( "Median household income (State; 2019 inflation-adjusted dollars)" ) @@ -72,63 +57,36 @@ AMI_FIELD = "Area Median Income (State or metropolitan)" # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" -FEMA_RISK_PERCENTILE_FIELD = ( - "FEMA Risk Index Expected Annual Loss Score (percentile)" -) # Environment DIESEL_FIELD = "Diesel particulate matter" -DIESEL_PERCENTILE_FIELD = "Diesel particulate matter (percentile)" PM25_FIELD = "Particulate matter (PM2.5)" -PM25_PERCENTILE_FIELD = "Particulate matter (PM2.5) (percentile)" OZONE_FIELD = "Ozone" TRAFFIC_FIELD = "Traffic proximity and volume" -TRAFFIC_PERCENTILE_FIELD = "Traffic proximity and volume (percentile)" LEAD_PAINT_FIELD = "Percent pre-1960s housing (lead paint indicator)" -LEAD_PAINT_PERCENTILE_FIELD = ( - "Percent pre-1960s housing (lead paint indicator) (percentile)" -) WASTEWATER_FIELD = "Wastewater discharge" -WASTEWATER_PERCENTILE_FIELD = "Wastewater discharge (percentile)" AGGREGATION_POLLUTION_FIELD = "Pollution Burden" -RMP_FIELD = "Proximity to RMP sites (percentile)" -RMP_PERCENTILE_FIELD = "Proximity to RMP sites (percentile)" +RMP_FIELD = "Proximity to Risk Management Plan (RMP) facilities" TSDF_FIELD = "Proximity to TSDF sites" NPL_FIELD = "Proximity to NPL sites" AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk" # Housing HOUSING_BURDEN_FIELD = "Housing burden (percent)" -HOUSING_BURDEN_PERCENTILE_FIELD = "Housing burden (percent) (percentile)" HT_INDEX_FIELD = ( "Housing + Transportation Costs % Income for the Regional Typical Household" ) # Energy ENERGY_BURDEN_FIELD = "Energy burden" -ENERGY_BURDEN_PERCENTILE_FIELD = "Energy burden (percentile)" # Health DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years" -DIABETES_PERCENTILE_FIELD = ( - "Diagnosed diabetes among adults aged >=18 years (percentile)" -) ASTHMA_FIELD = "Current asthma among adults aged >=18 years" -ASTHMA_PERCENTILE_FIELD = ( - "Current asthma among adults aged >=18 years (percentile)" -) HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years" -HEART_DISEASE_PERCENTILE_FIELD = ( - "Coronary heart disease among adults aged >=18 years (percentile)" -) LIFE_EXPECTANCY_FIELD = "Life expectancy (years)" -LIFE_EXPECTANCY_PERCENTILE_FIELD = "Life expectancy (years) (percentile)" RESPITORY_HAZARD_FIELD = "Respiratory hazard index" -RESPITORY_HAZARD_PERCENTILE_FIELD = "Respiratory hazard index (percentile)" CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years" -CANCER_PERCENTILE_FIELD = ( - "Cancer (excluding skin cancer) among adults aged >=18 years (percentile)" -) HEALTH_INSURANCE_FIELD = ( "Current lack of health insurance among adults aged 18-64 years" ) @@ -139,16 +97,13 @@ PHYS_HEALTH_NOT_GOOD_FIELD = ( # Other Demographics TOTAL_POP_FIELD = "Total population" UNEMPLOYMENT_FIELD = "Unemployed civilians (percent)" -UNEMPLOYMENT_PERCENTILE_FIELD = "Unemployed civilians (percent) (percentile)" LINGUISTIC_ISO_FIELD = "Linguistic isolation (percent)" -LINGUISTIC_ISO_PERCENTILE_FIELD = "Linguistic isolation (percent) (percentile)" HOUSEHOLDS_LINGUISTIC_ISO_FIELD = ( "Percent of households in linguistic isolation" ) HIGH_SCHOOL_ED_FIELD = ( "Percent individuals age 25 or over with less than high school degree" ) -HIGH_SCHOOL_ED_PERCENTILE_FIELD = "Percent individuals age 25 or over with less than high school degree (percentile)" AGGREGATION_POPULATION_FIELD = "Population Characteristics" UNDER_5_FIELD = "Individuals under 5 years old" OVER_64_FIELD = "Individuals over 64 years old" diff --git a/data/data-pipeline/data_pipeline/score/score_a.py b/data/data-pipeline/data_pipeline/score/score_a.py index 3cb64bc1..8f0bdde4 100644 --- a/data/data-pipeline/data_pipeline/score/score_a.py +++ b/data/data-pipeline/data_pipeline/score/score_a.py @@ -12,8 +12,9 @@ class ScoreA(Score): logger.info("Adding Score A") self.df[field_names.SCORE_A] = self.df[ [ - field_names.POVERTY_PERCENTILE_FIELD, - field_names.HIGH_SCHOOL_ED_PERCENTILE_FIELD, + field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.HIGH_SCHOOL_ED_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, ] ].mean(axis=1) return self.df diff --git a/data/data-pipeline/data_pipeline/score/score_b.py b/data/data-pipeline/data_pipeline/score/score_b.py index 74f28311..449ccade 100644 --- a/data/data-pipeline/data_pipeline/score/score_b.py +++ b/data/data-pipeline/data_pipeline/score/score_b.py @@ -11,7 +11,12 @@ class ScoreB(Score): def add_columns(self) -> pd.DataFrame: logger.info("Adding Score B") self.df[field_names.SCORE_B] = ( - self.df[field_names.POVERTY_PERCENTILE_FIELD] - * self.df[field_names.HIGH_SCHOOL_ED_PERCENTILE_FIELD] + self.df[ + field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX + ] + * self.df[ + field_names.HIGH_SCHOOL_ED_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] ) return self.df diff --git a/data/data-pipeline/data_pipeline/score/score_c.py b/data/data-pipeline/data_pipeline/score/score_c.py index 633739af..90f091aa 100644 --- a/data/data-pipeline/data_pipeline/score/score_c.py +++ b/data/data-pipeline/data_pipeline/score/score_c.py @@ -12,43 +12,54 @@ class ScoreC(Score): def __init__(self, df: pd.DataFrame) -> None: Bucket = namedtuple(typename="Bucket", field_names=["name", "fields"]) + # Note: we use percentiles for every field below. + # To do so, we add the percentile suffix to all the field names. self.BUCKET_SOCIOECONOMIC = Bucket( field_names.C_SOCIOECONOMIC, [ - field_names.HOUSEHOLDS_LINGUISTIC_ISO_FIELD, - field_names.POVERTY_FIELD, - field_names.HIGH_SCHOOL_ED_FIELD, - field_names.UNEMPLOYMENT_FIELD, - field_names.HT_INDEX_FIELD, + field_names.HOUSEHOLDS_LINGUISTIC_ISO_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.HIGH_SCHOOL_ED_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.UNEMPLOYMENT_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.HT_INDEX_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, ], ) self.BUCKET_SENSITIVE = Bucket( field_names.C_SENSITIVE, [ - field_names.UNDER_5_FIELD, - field_names.OVER_64_FIELD, - field_names.LINGUISTIC_ISO_FIELD, + field_names.UNDER_5_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.OVER_64_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.LINGUISTIC_ISO_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, ], ) self.BUCKET_ENVIRONMENTAL = Bucket( field_names.C_ENVIRONMENTAL, [ - field_names.RMP_FIELD, - field_names.TSDF_FIELD, - field_names.NPL_FIELD, - field_names.WASTEWATER_FIELD, - field_names.LEAD_PAINT_FIELD, + field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.WASTEWATER_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.LEAD_PAINT_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, ], ) self.BUCKET_EXPOSURES = Bucket( field_names.C_EXPOSURES, [ - field_names.AIR_TOXICS_CANCER_RISK_FIELD, - field_names.RESPITORY_HAZARD_FIELD, - field_names.DIESEL_FIELD, - field_names.PM25_FIELD, - field_names.OZONE_FIELD, - field_names.TRAFFIC_FIELD, + field_names.AIR_TOXICS_CANCER_RISK_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.RESPITORY_HAZARD_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.OZONE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, ], ) self.BUCKETS = [ @@ -63,16 +74,8 @@ class ScoreC(Score): def add_columns(self) -> pd.DataFrame: logger.info("Adding Score C") # Average all the percentile values in each bucket into a single score for each of the four buckets. - - # TODO just use the percentile fields in the list instead for bucket in self.BUCKETS: - fields_to_average = [] - for field in bucket.fields: - fields_to_average.append( - f"{field}{field_names.PERCENTILE_FIELD_SUFFIX}" - ) - - self.df[f"{bucket.name}"] = self.df[fields_to_average].mean(axis=1) + self.df[bucket.name] = self.df[bucket.fields].mean(axis=1) # Combine the score from the two Exposures and Environmental Effects buckets # into a single score called "Pollution Burden". diff --git a/data/data-pipeline/data_pipeline/score/score_f.py b/data/data-pipeline/data_pipeline/score/score_f.py index 44b42d68..cc07d0b2 100644 --- a/data/data-pipeline/data_pipeline/score/score_f.py +++ b/data/data-pipeline/data_pipeline/score/score_f.py @@ -28,15 +28,67 @@ class ScoreF(Score): ) self.df[meets_burden_field] = ( - (self.df[field_names.PM25_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.RESPITORY_HAZARD_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.TRAFFIC_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.LEAD_PAINT_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.RMP_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.ASTHMA_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.HEART_DISEASE_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.CANCER_PERCENTILE_FIELD] > 0.9) - | (self.df[field_names.DIABETES_PERCENTILE_FIELD] > 0.9) + ( + self.df[ + field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.RESPITORY_HAZARD_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.TRAFFIC_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.LEAD_PAINT_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.ASTHMA_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.HEART_DISEASE_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.CANCER_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) + | ( + self.df[ + field_names.DIABETES_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > 0.9 + ) ) self.df[field_names.SCORE_F_COMMUNITIES] = ( diff --git a/data/data-pipeline/data_pipeline/score/score_l.py b/data/data-pipeline/data_pipeline/score/score_l.py index e2c53eac..554fdbfa 100644 --- a/data/data-pipeline/data_pipeline/score/score_l.py +++ b/data/data-pipeline/data_pipeline/score/score_l.py @@ -60,10 +60,16 @@ class ScoreL(Score): # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] return ( - self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD] + self.df[ + field_names.POVERTY_LESS_THAN_200_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.LOW_INCOME_THRESHOLD ) & ( - self.df[field_names.FEMA_RISK_PERCENTILE_FIELD] + self.df[ + field_names.FEMA_RISK_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) @@ -73,14 +79,27 @@ class ScoreL(Score): # Low income: In 60th percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] - return ( - self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD] - > self.LOW_INCOME_THRESHOLD - ) & ( - self.df[field_names.ENERGY_BURDEN_PERCENTILE_FIELD] + energy_criteria = ( + self.df[ + field_names.ENERGY_BURDEN_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) | ( + self.df[ + field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) + return ( + self.df[ + field_names.POVERTY_LESS_THAN_200_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.LOW_INCOME_THRESHOLD + ) & energy_criteria + def _transportation_factor(self) -> bool: # In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA) # or @@ -92,22 +111,22 @@ class ScoreL(Score): # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] transportation_criteria = ( - ( - self.df[field_names.DIESEL_PERCENTILE_FIELD] - > self.ENVIRONMENTAL_BURDEN_THRESHOLD - ) - | ( - self.df[field_names.PM25_PERCENTILE_FIELD] - > self.ENVIRONMENTAL_BURDEN_THRESHOLD - ) - | ( - self.df[field_names.TRAFFIC_PERCENTILE_FIELD] - > self.ENVIRONMENTAL_BURDEN_THRESHOLD - ) + self.df[ + field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) | ( + self.df[ + field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) return ( - self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD] + self.df[ + field_names.POVERTY_LESS_THAN_200_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.LOW_INCOME_THRESHOLD ) & transportation_criteria @@ -121,24 +140,45 @@ class ScoreL(Score): # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] housing_criteria = ( - self.df[field_names.LEAD_PAINT_PERCENTILE_FIELD] + self.df[ + field_names.LEAD_PAINT_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) | ( - self.df[field_names.HOUSING_BURDEN_PERCENTILE_FIELD] + self.df[ + field_names.HOUSING_BURDEN_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) return ( - self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD] + self.df[ + field_names.POVERTY_LESS_THAN_200_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.LOW_INCOME_THRESHOLD ) & housing_criteria def _pollution_factor(self) -> bool: - # TBD + # Proximity to Risk Management Plan sites is > X # AND # Low income: In 60th percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] - return False + return ( + self.df[ + field_names.RMP_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) & ( + self.df[ + field_names.POVERTY_LESS_THAN_200_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + > self.LOW_INCOME_THRESHOLD + ) def _water_factor(self) -> bool: # In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model) @@ -147,10 +187,16 @@ class ScoreL(Score): # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] return ( - self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD] + self.df[ + field_names.POVERTY_LESS_THAN_200_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.LOW_INCOME_THRESHOLD ) & ( - self.df[field_names.WASTEWATER_PERCENTILE_FIELD] + self.df[ + field_names.WASTEWATER_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) @@ -169,26 +215,41 @@ class ScoreL(Score): health_criteria = ( ( - self.df[field_names.DIABETES_PERCENTILE_FIELD] + self.df[ + field_names.DIABETES_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) | ( - self.df[field_names.ASTHMA_PERCENTILE_FIELD] + self.df[ + field_names.ASTHMA_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) | ( - self.df[field_names.HEART_DISEASE_PERCENTILE_FIELD] + self.df[ + field_names.HEART_DISEASE_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) | ( - self.df[field_names.LIFE_EXPECTANCY_PERCENTILE_FIELD] + self.df[ + field_names.LIFE_EXPECTANCY_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] # Note: a high life expectancy is good, so take 1 minus the threshold to invert it, # and then look for life expenctancies lower than that (not greater than). < 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD ) ) return ( - self.df[field_names.POVERTY_LESS_THAN_200_FPL_PERCENTILE_FIELD] + self.df[ + field_names.POVERTY_LESS_THAN_200_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.LOW_INCOME_THRESHOLD ) & health_criteria @@ -205,21 +266,33 @@ class ScoreL(Score): # (necessary to screen out university block groups) workforce_criteria = ( ( - self.df[field_names.UNEMPLOYMENT_PERCENTILE_FIELD] + self.df[ + field_names.UNEMPLOYMENT_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) | ( - self.df[field_names.MEDIAN_INCOME_PERCENT_AMI_PERCENTILE_FIELD] + self.df[ + field_names.MEDIAN_INCOME_PERCENT_AMI_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] # Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it. # and then look for median income lower than that (not greater than). < 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD ) | ( - self.df[field_names.POVERTY_LESS_THAN_100_FPL_PERCENTILE_FIELD] + self.df[ + field_names.POVERTY_LESS_THAN_100_FPL_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) | ( - self.df[field_names.LINGUISTIC_ISO_PERCENTILE_FIELD] + self.df[ + field_names.LINGUISTIC_ISO_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] > self.ENVIRONMENTAL_BURDEN_THRESHOLD ) )