Prototype H (#682)

2025-09-30 02:33:17 -07:00 · 2021-09-14 16:16:41 -05:00 · 2021-09-14 16:16:41 -05:00 · 52e70653f0
commit 52e70653f0
parent 5bd63c083b
2 changed files with 213 additions and 30 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -161,6 +161,11 @@ class ScoreETL(ExtractTransformLoad):
                renamed_field=self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME,
                bucket=None,
            ),
+            DataSet(
+                input_field=self.MEDIAN_INCOME_FIELD_NAME,
+                renamed_field=self.MEDIAN_INCOME_FIELD_NAME,
+                bucket=None,
+            ),
            # The following data sets have buckets, because they're used in Score C
            DataSet(
                input_field="CANCER",
@ -540,6 +545,7 @@ class ScoreETL(ExtractTransformLoad):
        logger.info("Adding Score G")

        high_school_cutoff_threshold = 0.05
+        high_school_cutoff_threshold_2 = 0.06

        df["Score G (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.7)
@ -551,6 +557,25 @@ class ScoreETL(ExtractTransformLoad):
        df["Score G"] = df["Score G (communities)"].astype(int)
        df["Score G (percentile)"] = df["Score G"]

+        df["Score H (communities)"] = (
+            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
+        ) | (
+            (df[self.POVERTY_LESS_THAN_200_FPL_FIELD_NAME] > 0.40)
+            & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
+        )
+        df["Score H"] = df["Score H (communities)"].astype(int)
+
+        # df["80% AMI & 6% high school (communities)"] = (
+        #     (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
+        #     & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
+        #     )
+        #
+        # df["FPL200>40% & 6% high school (communities)"] = (
+        #     (df[self.POVERTY_LESS_THAN_200_FPL_FIELD_NAME] > 0.40)
+        #     & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2)
+        # )
+
        df["NMTC (communities)"] = (
            (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8)
        ) | (
@ -637,7 +662,8 @@ class ScoreETL(ExtractTransformLoad):
            # Skip GEOID_FIELD_NAME, because it's a string.
            if data_set.renamed_field == self.GEOID_FIELD_NAME:
                continue
-            df[f"{data_set.renamed_field}"] = pd.to_numeric(
+
+            df[data_set.renamed_field] = pd.to_numeric(
                df[data_set.renamed_field]
            )