diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 0ce0e052..064d1670 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -322,7 +322,7 @@ class ScoreETL(ExtractTransformLoad): field_names.FEMA_RISK_FIELD, field_names.URBAN_HEURISTIC_FIELD, field_names.AIR_TOXICS_CANCER_RISK_FIELD, - field_names.RESPITORY_HAZARD_FIELD, + field_names.RESPIRATORY_HAZARD_FIELD, field_names.DIESEL_FIELD, field_names.PM25_FIELD, field_names.OZONE_FIELD, diff --git a/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py b/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py index c5724d7d..da88ea48 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py @@ -8,6 +8,12 @@ logger = get_module_logger(__name__) class EJSCREENETL(ExtractTransformLoad): + """Load EJSCREEN data. + + Data dictionary: + https://gaftp.epa.gov/EJSCREEN/2019/2019_EJSCREEN_columns_explained.csv + """ + def __init__(self): self.EJSCREEN_FTP_URL = "https://edap-arcgiscloud-data-commons.s3.amazonaws.com/EJSCREEN2020/EJSCREEN_Tract_2020_USPR.csv.zip" self.EJSCREEN_CSV = self.TMP_PATH / "EJSCREEN_Tract_2020_USPR.csv" @@ -19,7 +25,7 @@ class EJSCREENETL(ExtractTransformLoad): field_names.TOTAL_POP_FIELD, # pylint: disable=duplicate-code field_names.AIR_TOXICS_CANCER_RISK_FIELD, - field_names.RESPITORY_HAZARD_FIELD, + field_names.RESPIRATORY_HAZARD_FIELD, field_names.DIESEL_FIELD, field_names.PM25_FIELD, field_names.OZONE_FIELD, @@ -61,7 +67,7 @@ class EJSCREENETL(ExtractTransformLoad): # but I think that's the direction we'd like to move all ETL classes. - LMB "ACSTOTPOP": field_names.TOTAL_POP_FIELD, "CANCER": field_names.AIR_TOXICS_CANCER_RISK_FIELD, - "RESP": field_names.RESPITORY_HAZARD_FIELD, + "RESP": field_names.RESPIRATORY_HAZARD_FIELD, "DSLPM": field_names.DIESEL_FIELD, "PM25": field_names.PM25_FIELD, "OZONE": field_names.OZONE_FIELD, diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index 5903550e..e5e02974 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -479,7 +479,7 @@ "comparison_fields = [\n", " field_names.POVERTY_LESS_THAN_100_FPL_FIELD,\n", " field_names.POVERTY_LESS_THAN_200_FPL_FIELD,\n", - " field_names.MEDIAN_INCOME_PERCENT_AMI_FIELD,\n", + " field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,\n", " field_names.LINGUISTIC_ISO_FIELD,\n", " field_names.UNEMPLOYMENT_FIELD,\n", " field_names.HIGH_SCHOOL_ED_FIELD,\n", diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 4c47a555..c323e8f2 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -89,6 +89,7 @@ RMP_FIELD = "Proximity to Risk Management Plan (RMP) facilities" TSDF_FIELD = "Proximity to TSDF sites" NPL_FIELD = "Proximity to NPL sites" AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk" +RESPIRATORY_HAZARD_FIELD = "Respiratory hazard index" # Housing HOUSING_BURDEN_FIELD = "Housing burden (percent)" @@ -104,7 +105,6 @@ DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years" ASTHMA_FIELD = "Current asthma among adults aged >=18 years" HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years" LIFE_EXPECTANCY_FIELD = "Life expectancy (years)" -RESPITORY_HAZARD_FIELD = "Respiratory hazard index" CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years" HEALTH_INSURANCE_FIELD = ( "Current lack of health insurance among adults aged 18-64 years" @@ -260,6 +260,8 @@ IMPENETRABLE_SURFACES_LOW_INCOME_FIELD = ( RMP_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to RMP sites and is low income" SUPERFUND_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to superfund sites and is low income" HAZARDOUS_WASTE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to hazardous waste facilities and is low income" +AIR_TOXICS_CANCER_RISK_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for air toxics cancer risk and is low income" +RESPIRATORY_HAZARD_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for respiratory hazard index and is low income" # Critical Clean Water and Waste Infrastructure WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for wastewater discharge and is low income" diff --git a/data/data-pipeline/data_pipeline/score/score_c.py b/data/data-pipeline/data_pipeline/score/score_c.py index 9fe68fff..57194b45 100644 --- a/data/data-pipeline/data_pipeline/score/score_c.py +++ b/data/data-pipeline/data_pipeline/score/score_c.py @@ -54,7 +54,7 @@ class ScoreC(Score): [ field_names.AIR_TOXICS_CANCER_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.RESPITORY_HAZARD_FIELD + field_names.RESPIRATORY_HAZARD_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, diff --git a/data/data-pipeline/data_pipeline/score/score_f.py b/data/data-pipeline/data_pipeline/score/score_f.py index cc07d0b2..fb254776 100644 --- a/data/data-pipeline/data_pipeline/score/score_f.py +++ b/data/data-pipeline/data_pipeline/score/score_f.py @@ -36,7 +36,7 @@ class ScoreF(Score): ) | ( self.df[ - field_names.RESPITORY_HAZARD_FIELD + field_names.RESPIRATORY_HAZARD_FIELD + field_names.PERCENTILE_FIELD_SUFFIX ] > 0.9 diff --git a/data/data-pipeline/data_pipeline/score/score_l.py b/data/data-pipeline/data_pipeline/score/score_l.py index 7653ce46..64976d80 100644 --- a/data/data-pipeline/data_pipeline/score/score_l.py +++ b/data/data-pipeline/data_pipeline/score/score_l.py @@ -407,6 +407,8 @@ class ScoreL(Score): field_names.RMP_LOW_INCOME_FIELD, field_names.SUPERFUND_LOW_INCOME_FIELD, field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD, + field_names.AIR_TOXICS_CANCER_RISK_LOW_INCOME_FIELD, + field_names.RESPIRATORY_HAZARD_LOW_INCOME_FIELD, ] rmp_sites_threshold = ( @@ -426,6 +428,22 @@ class ScoreL(Score): >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) + air_toxics_cancer_risk_threshold = ( + self.df[ + field_names.AIR_TOXICS_CANCER_RISK_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + + respiratory_hazard_risk_threshold = ( + self.df[ + field_names.RESPIRATORY_HAZARD_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + # individual series-by-series self.df[field_names.RMP_LOW_INCOME_FIELD] = ( rmp_sites_threshold & self.df[field_names.FPL_200_SERIES] @@ -436,6 +454,14 @@ class ScoreL(Score): self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD] = ( tsdf_sites_threshold & self.df[field_names.FPL_200_SERIES] ) + self.df[field_names.AIR_TOXICS_CANCER_RISK_LOW_INCOME_FIELD] = ( + air_toxics_cancer_risk_threshold + & self.df[field_names.FPL_200_SERIES] + ) + self.df[field_names.RESPIRATORY_HAZARD_LOW_INCOME_FIELD] = ( + respiratory_hazard_risk_threshold + & self.df[field_names.FPL_200_SERIES] + ) self._increment_total_eligibility_exceeded( pollution_eligibility_columns