parallelism

This commit is contained in:
Saran Ahluwalia 2021-12-31 18:48:19 -05:00
commit c0b8580791
3 changed files with 19 additions and 20 deletions

View file

@ -30,9 +30,9 @@ class MarylandEJScreenETL(ExtractTransformLoad):
field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD, field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD,
field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD, field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD,
field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD, field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD,
field_names.MARYLAND_PERCENTILE_FIELD_NAME, field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD,
field_names.MARYLAND_SCORE_FIELD_NAME, field_names.MARYLAND_EJSCREEN_SCORE_FIELD,
field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD, field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD,
] ]
self.df: pd.DataFrame self.df: pd.DataFrame
@ -80,44 +80,43 @@ class MarylandEJScreenETL(ExtractTransformLoad):
# https://github.com/usds/justice40-tool/issues/239#issuecomment-995821572 # https://github.com/usds/justice40-tool/issues/239#issuecomment-995821572
combined_df = combined_df[combined_df["Census_Tra"] != 0] combined_df = combined_df[combined_df["Census_Tra"] != 0]
# Set our class instance variable. # Set our class instance variable after conversions in lines 50-81
self.df = combined_df.copy() self.df = combined_df.copy()
# Rename columns # Rename columns
self.df.rename( self.df.rename(
columns={ columns={
"Census_Tra": self.GEOID_TRACT_FIELD_NAME, "Census_Tra": self.GEOID_TRACT_FIELD_NAME,
"EJScore": field_names.MARYLAND_SCORE_FIELD_NAME, "EJScore": field_names.MARYLAND_EJSCREEN_SCORE_FIELD,
}, },
inplace=True, inplace=True,
) )
self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] = self.df[ self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] = self.df[
field_names.MARYLAND_SCORE_FIELD_NAME field_names.MARYLAND_EJSCREEN_SCORE_FIELD
].rank( ].rank(
pct=True, pct=True,
# Set ascending to the parameter value.
ascending=True ascending=True
) )
# An arbitrarily chosen percentile is used in the comparison tool output # An arbitrarily chosen percentile is used in the comparison tool output
self.df[field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD] = ( self.df[field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD] = (
self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] > 0.75 self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] > 0.75
) )
# Baseline Comparisons with some quartiles and the 90th percent OF EJ Score # Baseline Comparisons with some quartiles and the 90th percent OF EJ Score
# Interpretation: The score is greater than or equal to N% of the tracts in the state. # Interpretation: The score is greater than or equal to N% of the tracts in the state.
self.df[field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD] = ( self.df[field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.25 self.df[field_names.MARYLAND_EJSCREEN_SCORE_FIELD] >= 0.25
) )
self.df[field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD] = ( self.df[field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.50 self.df[field_names.MARYLAND_EJSCREEN_SCORE_FIELD] >= 0.50
) )
self.df[field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD] = ( self.df[field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_SCORE_FIELD_NAME] >= 0.75 self.df[field_names.MARYLAND_EJSCREEN_SCORE_FIELD] >= 0.75
) )
self.df[field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD] = ( self.df[field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.90 self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] >= 0.90
) )
def load(self) -> None: def load(self) -> None:

View file

@ -477,14 +477,14 @@
" ),\n", " ),\n",
" Index(\n", " Index(\n",
" method_name=\"Maryland EJSCREEN\",\n", " method_name=\"Maryland EJSCREEN\",\n",
" priority_communities_field=field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD,\n", " priority_communities_field=field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD,\n",
" other_census_tract_fields_to_keep=[\n", " other_census_tract_fields_to_keep=[\n",
" field_names.MARYLAND_SCORE_FIELD_NAME,\n", " field_names.MARYLAND_EJSCREEN_SCORE_FIELD,\n",
" field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD,\n",
" field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD,\n",
" field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD,\n",
" field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD,\n", " field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD,\n",
" field_names.MARYLAND_PERCENTILE_FIELD_NAME \n", " field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD \n",
" ]\n", " ]\n",
" ), \n", " ), \n",
" Index(\n", " Index(\n",

View file

@ -233,13 +233,13 @@ MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD: str = (
"Tract is >=90% all other Maryland Tracts" "Tract is >=90% all other Maryland Tracts"
) )
MARYLAND_PERCENTILE_FIELD_NAME: str = ( MARYLAND_EJSCREEN_PERCENTILE_FIELD: str = (
"Maryland Environmental Justice Percentile for EJ Score" "Maryland Environmental Justice Percentile for EJ Score"
) )
MARYLAND_SCORE_FIELD_NAME: str = "Maryland Environmental Justice Score" MARYLAND_EJSCREEN_SCORE_FIELD: str = "Maryland Environmental Justice Score"
MARYLAND_EJSCREEN_BURDENED_THRESHOLD: str = ( MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD: str = (
"Tract is greater than 75th percentile for Maryland EJ Score" "Tract is greater than 75th percentile for Maryland EJ Score"
) )