From c0b85807915e5886d6e8a7c1eeacae8323877e07 Mon Sep 17 00:00:00 2001 From: Saran Ahluwalia Date: Fri, 31 Dec 2021 18:48:19 -0500 Subject: [PATCH] parallelism --- .../etl/sources/maryland_ejscreen/etl.py | 27 +++++++++---------- .../ipython/scoring_comparison.ipynb | 6 ++--- .../data_pipeline/score/field_names.py | 6 ++--- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py index 2bd43a78..dc18ebaf 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py @@ -30,9 +30,9 @@ class MarylandEJScreenETL(ExtractTransformLoad): field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD, field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD, field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD, - field_names.MARYLAND_PERCENTILE_FIELD_NAME, - field_names.MARYLAND_SCORE_FIELD_NAME, - field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD, + field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD, + field_names.MARYLAND_EJSCREEN_SCORE_FIELD, + field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD, ] self.df: pd.DataFrame @@ -80,44 +80,43 @@ class MarylandEJScreenETL(ExtractTransformLoad): # https://github.com/usds/justice40-tool/issues/239#issuecomment-995821572 combined_df = combined_df[combined_df["Census_Tra"] != 0] - # Set our class instance variable. + # Set our class instance variable after conversions in lines 50-81 self.df = combined_df.copy() # Rename columns self.df.rename( columns={ "Census_Tra": self.GEOID_TRACT_FIELD_NAME, - "EJScore": field_names.MARYLAND_SCORE_FIELD_NAME, + "EJScore": field_names.MARYLAND_EJSCREEN_SCORE_FIELD, }, inplace=True, ) - self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] = self.df[ - field_names.MARYLAND_SCORE_FIELD_NAME + self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] = self.df[ + field_names.MARYLAND_EJSCREEN_SCORE_FIELD ].rank( pct=True, - # Set ascending to the parameter value. ascending=True ) # An arbitrarily chosen percentile is used in the comparison tool output - self.df[field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD] = ( - self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] > 0.75 + self.df[field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD] = ( + self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] > 0.75 ) # Baseline Comparisons with some quartiles and the 90th percent OF EJ Score # Interpretation: The score is greater than or equal to N% of the tracts in the state. self.df[field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD] = ( - self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.25 + self.df[field_names.MARYLAND_EJSCREEN_SCORE_FIELD] >= 0.25 ) self.df[field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD] = ( - self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.50 + self.df[field_names.MARYLAND_EJSCREEN_SCORE_FIELD] >= 0.50 ) self.df[field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD] = ( - self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.75 + self.df[field_names.MARYLAND_EJSCREEN_SCORE_FIELD] >= 0.75 ) self.df[field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD] = ( - self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.90 + self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] >= 0.90 ) def load(self) -> None: diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index bd34cd20..0f71391c 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -477,14 +477,14 @@ " ),\n", " Index(\n", " method_name=\"Maryland EJSCREEN\",\n", - " priority_communities_field=field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD,\n", + " priority_communities_field=field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD,\n", " other_census_tract_fields_to_keep=[\n", - " field_names.MARYLAND_SCORE_FIELD_NAME,\n", + " field_names.MARYLAND_EJSCREEN_SCORE_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD,\n", - " field_names.MARYLAND_PERCENTILE_FIELD_NAME \n", + " field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD \n", " ]\n", " ), \n", " Index(\n", diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 9bc22192..50aa7be8 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -233,13 +233,13 @@ MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD: str = ( "Tract is >=90% all other Maryland Tracts" ) -MARYLAND_PERCENTILE_FIELD_NAME: str = ( +MARYLAND_EJSCREEN_PERCENTILE_FIELD: str = ( "Maryland Environmental Justice Percentile for EJ Score" ) -MARYLAND_SCORE_FIELD_NAME: str = "Maryland Environmental Justice Score" +MARYLAND_EJSCREEN_SCORE_FIELD: str = "Maryland Environmental Justice Score" -MARYLAND_EJSCREEN_BURDENED_THRESHOLD: str = ( +MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD: str = ( "Tract is greater than 75th percentile for Maryland EJ Score" )