Score Indicators (#690)

* Score Indicators * roudning issue with housing burden column * switching out score g * final list of columns * removing duplicate housing burden percentile fields * removing duplicate Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
2025-07-24 01:10:17 -07:00 · 2021-09-16 10:53:05 -04:00 · 2021-09-16 10:53:05 -04:00 · 487f6a8e04
commit 487f6a8e04
parent 47df35b77e
3 changed files with 51 additions and 5 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -64,6 +64,20 @@ TILES_SCORE_COLUMNS = [
    "Linguistic isolation (percent) (percentile)",
    "Unemployed civilians (percent) (percentile)",
    "Housing burden (percent) (percentile)",
+    "Diagnosed diabetes among adults aged >=18 years (percentile)",
+    "Current asthma among adults aged >=18 years (percentile)",
+    "Coronary heart disease among adults aged >=18 years (percentile)",
+    "Life expectancy (years) (percentile)",
+    "Traffic proximity and volume (percentile)",
+    "FEMA Risk Index Expected Annual Loss Score (percentile)",
+    "Energy burden (percentile)",
+    "Wastewater discharge (percentile)",
+    "Percent pre-1960s housing (lead paint indicator) (percentile)",
+    "Diesel particulate matter (percentile)",
+    "Particulate matter (PM2.5) (percentile)",
+    "Median household income (% of AMI) (percentile)",
+    "Percent of individuals < 200% Federal Poverty Line (percentile)",
+    "Percent individuals age 25 or over with less than high school degree (percentile)",
 ]

 # columns to round floats to 2 decimals
@ -77,6 +91,24 @@ TILES_SCORE_FLOAT_COLUMNS = [
    "Linguistic isolation (percent)",
    "Unemployed civilians (percent)",
    "Housing burden (percent)",
+    "Poverty (Less than 200% of federal poverty line) (percentile)",
+    "Percent individuals age 25 or over with less than high school degree (percentile)",
+    "Linguistic isolation (percent) (percentile)",
+    "Unemployed civilians (percent) (percentile)",
+    "Housing burden (percent) (percentile)",
+    "Diagnosed diabetes among adults aged >=18 years (percentile)",
+    "Current asthma among adults aged >=18 years (percentile)",
+    "Coronary heart disease among adults aged >=18 years (percentile)",
+    "Life expectancy (years) (percentile)",
+    "Traffic proximity and volume (percentile)",
+    "FEMA Risk Index Expected Annual Loss Score (percentile)",
+    "Energy burden (percentile)",
+    "Wastewater discharge (percentile)",
+    "Percent pre-1960s housing (lead paint indicator) (percentile)",
+    "Diesel particulate matter (percentile)",
+    "Particulate matter (PM2.5) (percentile)",
+    "Median household income (% of AMI) (percentile)",
+    "Percent of individuals < 200% Federal Poverty Line (percentile)",
 ]
 TILES_ROUND_NUM_DECIMALS = 2

--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -608,15 +608,26 @@ class ScoreETL(ExtractTransformLoad):
        high_school_cutoff_threshold = 0.05
        high_school_cutoff_threshold_2 = 0.06

+        # Score G is now modified NMTC
        df["Score G (communities)"] = (
+            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
+        ) | (
+            (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
+        )
+        df["Score G"] = df["Score G (communities)"].astype(int)
+        df["Score G (percentile)"] = df["Score G"]
+
+        df["Score I (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.7)
            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
        ) | (
            (df[self.POVERTY_LESS_THAN_200_FPL_FIELD_NAME] > 0.50)
            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
        )
-        df["Score G"] = df["Score G (communities)"].astype(int)
-        df["Score G (percentile)"] = df["Score G"]
+        df["Score I"] = df["Score I (communities)"].astype(int)
+        df["Score I (percentile)"] = df["Score I"]

        df["Score H (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
@ -631,12 +642,13 @@ class ScoreETL(ExtractTransformLoad):
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
        ) | (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20)

-        df["NMTC modified (communities)"] = (
+
+        df["Score K (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
-            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
        ) | (
            (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20)
-            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
        )

        return df
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -183,6 +183,7 @@ class PostScoreETL(ExtractTransformLoad):
    def _create_tile_data(
        self, score_county_state_merged_df: pd.DataFrame
    ) -> pd.DataFrame:
+        logger.info("Rounding Decimals")
        score_tiles = score_county_state_merged_df[
            constants.TILES_SCORE_COLUMNS
        ]
@ -191,6 +192,7 @@ class PostScoreETL(ExtractTransformLoad):
            * len(constants.TILES_SCORE_FLOAT_COLUMNS),
            index=constants.TILES_SCORE_FLOAT_COLUMNS,
        )
+
        return score_tiles.round(decimals)

    def _create_downloadable_data(