Score Indicators (#690)

* Score Indicators * roudning issue with housing burden column * switching out score g * final list of columns * removing duplicate housing burden percentile fields * removing duplicate Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
2025-09-11 03:28:17 -07:00 · 2021-09-16 10:53:05 -04:00 · 2021-09-16 10:53:05 -04:00 · 487f6a8e04
commit 487f6a8e04
parent 47df35b77e
3 changed files with 51 additions and 5 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -64,6 +64,20 @@ TILES_SCORE_COLUMNS = [
    "Linguistic isolation (percent) (percentile)",
    "Unemployed civilians (percent) (percentile)",
    "Housing burden (percent) (percentile)",
    "Diagnosed diabetes among adults aged >=18 years (percentile)",
    "Current asthma among adults aged >=18 years (percentile)",
    "Coronary heart disease among adults aged >=18 years (percentile)",
    "Life expectancy (years) (percentile)",
    "Traffic proximity and volume (percentile)",
    "FEMA Risk Index Expected Annual Loss Score (percentile)",
    "Energy burden (percentile)",
    "Wastewater discharge (percentile)",
    "Percent pre-1960s housing (lead paint indicator) (percentile)",
    "Diesel particulate matter (percentile)",
    "Particulate matter (PM2.5) (percentile)",
    "Median household income (% of AMI) (percentile)",
    "Percent of individuals < 200% Federal Poverty Line (percentile)",
    "Percent individuals age 25 or over with less than high school degree (percentile)",
 ]
 # columns to round floats to 2 decimals
@ -77,6 +91,24 @@ TILES_SCORE_FLOAT_COLUMNS = [
    "Linguistic isolation (percent)",
    "Unemployed civilians (percent)",
    "Housing burden (percent)",
    "Poverty (Less than 200% of federal poverty line) (percentile)",
    "Percent individuals age 25 or over with less than high school degree (percentile)",
    "Linguistic isolation (percent) (percentile)",
    "Unemployed civilians (percent) (percentile)",
    "Housing burden (percent) (percentile)",
    "Diagnosed diabetes among adults aged >=18 years (percentile)",
    "Current asthma among adults aged >=18 years (percentile)",
    "Coronary heart disease among adults aged >=18 years (percentile)",
    "Life expectancy (years) (percentile)",
    "Traffic proximity and volume (percentile)",
    "FEMA Risk Index Expected Annual Loss Score (percentile)",
    "Energy burden (percentile)",
    "Wastewater discharge (percentile)",
    "Percent pre-1960s housing (lead paint indicator) (percentile)",
    "Diesel particulate matter (percentile)",
    "Particulate matter (PM2.5) (percentile)",
    "Median household income (% of AMI) (percentile)",
    "Percent of individuals < 200% Federal Poverty Line (percentile)",
 ]
 TILES_ROUND_NUM_DECIMALS = 2
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -608,15 +608,26 @@ class ScoreETL(ExtractTransformLoad):
        high_school_cutoff_threshold = 0.05
        high_school_cutoff_threshold_2 = 0.06
        # Score G is now modified NMTC
        df["Score G (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
        ) | (
            (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20)
            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
        )
        df["Score G"] = df["Score G (communities)"].astype(int)
        df["Score G (percentile)"] = df["Score G"]
        df["Score I (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.7)
            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
        ) | (
            (df[self.POVERTY_LESS_THAN_200_FPL_FIELD_NAME] > 0.50)
            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
        )
-        df["Score G"] = df["Score G (communities)"].astype(int)
+        df["Score I"] = df["Score I (communities)"].astype(int)
-        df["Score G (percentile)"] = df["Score G"]
+        df["Score I (percentile)"] = df["Score I"]
        df["Score H (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
@ -631,12 +642,13 @@ class ScoreETL(ExtractTransformLoad):
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
        ) | (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20)
-        df["NMTC modified (communities)"] = (
+
        df["Score K (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
-            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
        ) | (
            (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20)
-            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
        )
        return df
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -183,6 +183,7 @@ class PostScoreETL(ExtractTransformLoad):
    def _create_tile_data(
        self, score_county_state_merged_df: pd.DataFrame
    ) -> pd.DataFrame:
        logger.info("Rounding Decimals")
        score_tiles = score_county_state_merged_df[
            constants.TILES_SCORE_COLUMNS
        ]
@ -191,6 +192,7 @@ class PostScoreETL(ExtractTransformLoad):
            * len(constants.TILES_SCORE_FLOAT_COLUMNS),
            index=constants.TILES_SCORE_FLOAT_COLUMNS,
        )
        return score_tiles.round(decimals)
    def _create_downloadable_data(