diff --git a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py index 5538ed97..eed10ad1 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/maryland_ejscreen/etl.py @@ -92,14 +92,13 @@ class MarylandEJScreenETL(ExtractTransformLoad): inplace=True, ) + # Interpretation: An EJ score (reported as a percentile) + # has a percentile rank of N for some N between 0 - 100" self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] = self.df[ field_names.MARYLAND_EJSCREEN_SCORE_FIELD - ].rank( - pct=True, - ascending=True - ) + ].rank(pct=True, ascending=True) - # An arbitrarily chosen percentile is used in the comparison tool output + # An arbitrarily chosen threshold is used in the comparison tool output self.df[field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD] = ( self.df[field_names.MARYLAND_EJSCREEN_PERCENTILE_FIELD] > 0.75 ) diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 50aa7be8..6d7c4cd4 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -239,8 +239,11 @@ MARYLAND_EJSCREEN_PERCENTILE_FIELD: str = ( MARYLAND_EJSCREEN_SCORE_FIELD: str = "Maryland Environmental Justice Score" +# this references ranked percentiles +# please see here for interpretation: https://github.com/usds/justice40-tool/issues/239#issuecomment-1003567593 +# and here: https://github.com/usds/justice40-tool/issues/239#issuecomment-1003448074 MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD: str = ( - "Tract is greater than 75th percentile for Maryland EJ Score" + "Tract has an EJ Score greater than 75" ) # Child Opportunity Index data