From 7e6dd1d3db7ad6d116cdefc004b8ad8d68a33179 Mon Sep 17 00:00:00 2001 From: Saran Ahluwalia Date: Fri, 31 Dec 2021 15:46:48 -0500 Subject: [PATCH] added revised --- .../etl/sources/maryland_ejscreen/README.md | 1 - .../etl/sources/maryland_ejscreen/etl.py | 30 ++++++++++++++----- .../ipython/scoring_comparison.ipynb | 9 ++++-- .../data_pipeline/score/field_names.py | 8 ++++- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/README.md b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/README.md index fb2cd16c..2fc797a6 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/README.md +++ b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/README.md @@ -14,7 +14,6 @@ The two "Pollution Burden" average scores are then averaged together and the res For each indicator, the percentile is given. For example, the indicator value for "Asthma Emergency Discharges" with 0.9 is therefore in the 90th percentile, which means only 10% of tracts in Maryland have higher values. EJ Scores near 1 represent areas of the greatest environmental justice concern. - A study of Bladensburg, MD - located in Prince George’s County - demonstrated the application of the MD EJSCREEN (Driver et al., 2019). According to the study, The Bladensburg population is primarily Black (62.7%) and Latinx (33.0%), with 20.1% of the community members living below the federal poverty line. Through an analysis, leveraging the Maryland EJSCREEN, Bladensburg with MD EJSCREEN, the researchers found that Bladensburg has an EJ score higher than 99% of the census tracts in Prince George’s County, indicating a higher prevalence of environmental hazards in the region. Furthermore, it was determined that Bladensburg residents are at a higher risk of developing cancer due to air pollution than 90–100% of the census tracts in the state or county. diff --git a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py index 9525152b..2bd43a78 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py @@ -31,6 +31,8 @@ class MarylandEJScreenETL(ExtractTransformLoad): field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD, field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD, field_names.MARYLAND_PERCENTILE_FIELD_NAME, + field_names.MARYLAND_SCORE_FIELD_NAME, + field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD, ] self.df: pd.DataFrame @@ -81,27 +83,39 @@ class MarylandEJScreenETL(ExtractTransformLoad): # Set our class instance variable. self.df = combined_df.copy() - # Rename + # Rename columns self.df.rename( columns={ "Census_Tra": self.GEOID_TRACT_FIELD_NAME, - "EJScore": field_names.MARYLAND_PERCENTILE_FIELD_NAME, + "EJScore": field_names.MARYLAND_SCORE_FIELD_NAME, }, inplace=True, ) - # Baseline Comparisons with some quartiles and the 90th percentile. + self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] = self.df[ + field_names.MARYLAND_SCORE_FIELD_NAME + ].rank( + pct=True, + # Set ascending to the parameter value. + ascending=True + ) + + # An arbitrarily chosen percentile is used in the comparison tool output + self.df[field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD] = ( + self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] > 0.75 + ) + + # Baseline Comparisons with some quartiles and the 90th percent OF EJ Score # Interpretation: The score is greater than or equal to N% of the tracts in the state. self.df[field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD] = ( - self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.25 + self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.25 ) self.df[field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD] = ( - self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.50 + self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.50 ) self.df[field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD] = ( - self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.75 + self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.75 ) - # This percentile is used in the comparison tool. self.df[field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD] = ( self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.90 ) @@ -110,6 +124,6 @@ class MarylandEJScreenETL(ExtractTransformLoad): logger.info("Saving Maryland EJSCREEN CSV") # write maryland tracts to csv self.OUTPUT_CSV_PATH.mkdir(parents=True, exist_ok=True) - self.df.to_csv( + self.df[self.COLUMNS_TO_KEEP].to_csv( self.OUTPUT_CSV_PATH / "maryland_ejscreen.csv", index=False ) diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index b16953da..bd34cd20 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -477,9 +477,14 @@ " ),\n", " Index(\n", " method_name=\"Maryland EJSCREEN\",\n", - " priority_communities_field=field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD,\n", + " priority_communities_field=field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD,\n", " other_census_tract_fields_to_keep=[\n", - " field_names.MARYLAND_PERCENTILE_FIELD_NAME\n", + " field_names.MARYLAND_SCORE_FIELD_NAME,\n", + " field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD,\n", + " field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD,\n", + " field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD,\n", + " field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD,\n", + " field_names.MARYLAND_PERCENTILE_FIELD_NAME \n", " ]\n", " ), \n", " Index(\n", diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index ade33097..9bc22192 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -234,7 +234,13 @@ MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD: str = ( ) MARYLAND_PERCENTILE_FIELD_NAME: str = ( - "Maryland Environmental Justice Percentile" + "Maryland Environmental Justice Percentile for EJ Score" +) + +MARYLAND_SCORE_FIELD_NAME: str = "Maryland Environmental Justice Score" + +MARYLAND_EJSCREEN_BURDENED_THRESHOLD: str = ( + "Tract is greater than 75th percentile for Maryland EJ Score" ) # Child Opportunity Index data