Issue 954: Add various data sources from Child Opportunity Index (#986)

* Adds four fields: * Summer days above 90F * Percent low access to healthy food * Percent impenetrable surface areas * Low third grade reading proficiency * Each of these four gets added into Definition L in various factors. * Additionally, I add college attendance fields to the ETL for Census ACS. * This PR also introduces the notion of "reverse percentiles", relevant to ticket #970.
2025-08-22 16:51:40 -07:00 · 2021-12-07 11:33:49 -05:00 · 2021-12-07 11:33:49 -05:00 · 5a6d6d8557
commit 5a6d6d8557
parent df564658a5
8 changed files with 357 additions and 40 deletions
--- a/data/data-pipeline/data_pipeline/etl/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/constants.py
@ -49,6 +49,11 @@ DATASET_LIST = [
        "module_dir": "geocorr",
        "class_name": "GeoCorrETL",
    },
+    {
+        "name": "child_opportunity_index",
+        "module_dir": "child_opportunity_index",
+        "class_name": "ChildOpportunityIndex",
+    },
    {
        "name": "mapping_inequality",
        "module_dir": "mapping_inequality",
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -1,4 +1,6 @@
 import functools
+from collections import namedtuple
+
 import pandas as pd

 from data_pipeline.etl.base import ExtractTransformLoad
@ -29,6 +31,7 @@ class ScoreETL(ExtractTransformLoad):
        self.persistent_poverty_df: pd.DataFrame
        self.census_decennial_df: pd.DataFrame
        self.census_2010_df: pd.DataFrame
+        self.child_opportunity_index_df: pd.DataFrame

    def extract(self) -> None:
        logger.info("Loading data sets from disk.")
@ -162,6 +165,19 @@ class ScoreETL(ExtractTransformLoad):
            low_memory=False,
        )

+        # Load COI data
+        child_opportunity_index_csv = (
+            constants.DATA_PATH
+            / "dataset"
+            / "child_opportunity_index"
+            / "usa.csv"
+        )
+        self.child_opportunity_index_df = pd.read_csv(
+            child_opportunity_index_csv,
+            dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
+            low_memory=False,
+        )
+
    def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
        logger.info("Joining Census Tract dataframes")

@ -255,6 +271,7 @@ class ScoreETL(ExtractTransformLoad):
            self.census_acs_median_incomes_df,
            self.census_decennial_df,
            self.census_2010_df,
+            self.child_opportunity_index_df,
        ]

        # Sanity check each data frame before merging.
@ -323,6 +340,7 @@ class ScoreETL(ExtractTransformLoad):
            field_names.HIGH_SCHOOL_ED_FIELD,
            field_names.UNEMPLOYMENT_FIELD,
            field_names.MEDIAN_HOUSE_VALUE_FIELD,
+            field_names.COLLEGE_ATTENDANCE_FIELD,
            field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
            field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
            field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
@ -333,6 +351,9 @@ class ScoreETL(ExtractTransformLoad):
            field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
            field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
            field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
+            field_names.EXTREME_HEAT_FIELD,
+            field_names.HEALTHY_FOOD_FIELD,
+            field_names.IMPENETRABLE_SURFACES_FIELD,
        ]

        non_numeric_columns = [
@ -340,7 +361,32 @@ class ScoreETL(ExtractTransformLoad):
            field_names.PERSISTENT_POVERTY_FIELD,
        ]

-        columns_to_keep = non_numeric_columns + numeric_columns
+        # For some columns, high values are "good", so we want to reverse the percentile
+        # so that high values are "bad" and any scoring logic can still check if it's
+        # >= some threshold.
+        # TODO: Add more fields here.
+        #  https://github.com/usds/justice40-tool/issues/970
+        ReversePercentile = namedtuple(
+            typename="ReversePercentile",
+            field_names=["field_name", "low_field_name"],
+        )
+        reverse_percentiles = [
+            # This dictionary follows the format:
+            # <field name> : <field name for low values>
+            # for instance, 3rd grade reading level : Low 3rd grade reading level.
+            # This low field will not exist yet, it is only calculated for the
+            # percentile.
+            ReversePercentile(
+                field_name=field_names.READING_FIELD,
+                low_field_name=field_names.LOW_READING_FIELD,
+            )
+        ]
+
+        columns_to_keep = (
+            non_numeric_columns
+            + numeric_columns
+            + [rp.field_name for rp in reverse_percentiles]
+        )

        df_copy = df[columns_to_keep].copy()

@ -375,6 +421,19 @@ class ScoreETL(ExtractTransformLoad):
                df_copy[col] - min_value
            ) / (max_value - min_value)

+        # Create reversed percentiles for these fields
+        for reverse_percentile in reverse_percentiles:
+            # Calculate reverse percentiles
+            # For instance, for 3rd grade reading level (score from 0-500),
+            # calculate reversed percentiles and give the result the name
+            # `Low 3rd grade reading level (percentile)`.
+            df_copy[
+                f"{reverse_percentile.low_field_name}"
+                f"{field_names.PERCENTILE_FIELD_SUFFIX}"
+            ] = df_copy[reverse_percentile.field_name].rank(
+                pct=True, ascending=False
+            )
+
        # Special logic: create a combined population field.
        # We sometimes run analytics on "population", and this makes a single field
        # that is either the island area's population in 2009 or the state's
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -114,6 +114,27 @@ class CensusACSETL(ExtractTransformLoad):
        )
        self.HIGH_SCHOOL_ED_FIELD = "Percent individuals age 25 or over with less than high school degree"

+        # College attendance fields
+        self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED = (
+            "B14004_001E"  # Estimate!!Total
+        )
+        self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC = "B14004_003E"  # Estimate!!Total!!Male!!Enrolled in public college or graduate school
+        self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE = "B14004_008E"  # Estimate!!Total!!Male!!Enrolled in private college or graduate school
+        self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC = "B14004_019E"  # Estimate!!Total!!Female!!Enrolled in public college or graduate school
+        self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE = "B14004_024E"  # Estimate!!Total!!Female!!Enrolled in private college or graduate school
+
+        self.COLLEGE_ATTENDANCE_FIELDS = [
+            self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED,
+            self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC,
+            self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE,
+            self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC,
+            self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE,
+        ]
+
+        self.COLLEGE_ATTENDANCE_FIELD = (
+            "Percent enrollment in college or graduate school"
+        )
+
        self.RE_FIELDS = [
            "B02001_001E",
            "B02001_002E",
@ -156,15 +177,30 @@ class CensusACSETL(ExtractTransformLoad):

        self.STATE_GEOID_FIELD_NAME = "GEOID2"

+        self.COLUMNS_TO_KEEP = (
+            [
+                self.GEOID_TRACT_FIELD_NAME,
+                self.UNEMPLOYED_FIELD_NAME,
+                self.LINGUISTIC_ISOLATION_FIELD_NAME,
+                self.MEDIAN_INCOME_FIELD_NAME,
+                self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
+                self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
+                self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
+                self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
+                self.HIGH_SCHOOL_ED_FIELD,
+                self.COLLEGE_ATTENDANCE_FIELD,
+            ]
+            + self.RE_OUTPUT_FIELDS
+            + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
+        )
+
        self.df: pd.DataFrame

    def extract(self) -> None:
        # Define the variables to retrieve
        variables = (
            [
-                # Income field
                self.MEDIAN_INCOME_FIELD,
-                # House value
                self.MEDIAN_HOUSE_VALUE_FIELD,
            ]
            + self.EMPLOYMENT_FIELDS
@ -172,6 +208,7 @@ class CensusACSETL(ExtractTransformLoad):
            + self.POVERTY_FIELDS
            + self.EDUCATIONAL_FIELDS
            + self.RE_FIELDS
+            + self.COLLEGE_ATTENDANCE_FIELDS
        )

        self.df = retrieve_census_acs_data(
@ -308,6 +345,14 @@ class CensusACSETL(ExtractTransformLoad):
            df["B03003_003E"] / df["B03003_001E"]
        )

+        # Calculate college attendance:
+        df[self.COLLEGE_ATTENDANCE_FIELD] = (
+            df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC]
+            + df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE]
+            + df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC]
+            + df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
+        ) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
+
        # Save results to self.
        self.df = df

@ -317,23 +362,7 @@ class CensusACSETL(ExtractTransformLoad):
        # mkdir census
        self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

-        columns_to_include = (
-            [
-                self.GEOID_TRACT_FIELD_NAME,
-                self.UNEMPLOYED_FIELD_NAME,
-                self.LINGUISTIC_ISOLATION_FIELD_NAME,
-                self.MEDIAN_INCOME_FIELD_NAME,
-                self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
-                self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
-                self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
-                self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
-                self.HIGH_SCHOOL_ED_FIELD,
-            ]
-            + self.RE_OUTPUT_FIELDS
-            + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
-        )
-
-        self.df[columns_to_include].to_csv(
+        self.df[self.COLUMNS_TO_KEEP].to_csv(
            path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
        )

--- a/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/README.md
+++ b/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/README.md
--- a/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/init.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/init.py
--- a/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/etl.py
@ -0,0 +1,120 @@
+from pathlib import Path
+import pandas as pd
+
+from data_pipeline.etl.base import ExtractTransformLoad
+from data_pipeline.score import field_names
+from data_pipeline.utils import get_module_logger, unzip_file_from_url
+
+logger = get_module_logger(__name__)
+
+
+class ChildOpportunityIndex(ExtractTransformLoad):
+    """ETL Child Opportunity Index data.
+
+    COI compiles a number of useful data sets. In the future, we could pull these
+    data sets in directly from their original creators.
+
+    Data dictionary available when you download zip from `self.COI_FILE_URL`.
+
+    Data source overview: https://data.diversitydatakids.org/dataset/coi20-child-opportunity-index-2-0-database.
+
+    Full technical documents: https://www.diversitydatakids.org/sites/default/files/2020-02/ddk_coi2.0_technical_documentation_20200212.pdf.
+
+    Github repo: https://github.com/diversitydatakids/COI/
+
+    """
+
+    def __init__(self):
+        self.COI_FILE_URL = (
+            "https://data.diversitydatakids.org/datastore/zip/f16fff12-b1e5-4f60-85d3-"
+            "3a0ededa30a0?format=csv"
+        )
+
+        self.OUTPUT_PATH: Path = (
+            self.DATA_PATH / "dataset" / "child_opportunity_index"
+        )
+
+        self.TRACT_INPUT_COLUMN_NAME = "geoid"
+        self.EXTREME_HEAT_INPUT_FIELD = "HE_HEAT"
+        self.HEALTHY_FOOD_INPUT_FIELD = "HE_FOOD"
+        self.IMPENETRABLE_SURFACES_INPUT_FIELD = "HE_GREEN"
+        self.READING_INPUT_FIELD = "ED_READING"
+
+        # Constants for output
+        self.COLUMNS_TO_KEEP = [
+            self.GEOID_TRACT_FIELD_NAME,
+            field_names.EXTREME_HEAT_FIELD,
+            field_names.HEALTHY_FOOD_FIELD,
+            field_names.IMPENETRABLE_SURFACES_FIELD,
+            field_names.READING_FIELD,
+        ]
+
+        self.raw_df: pd.DataFrame
+        self.output_df: pd.DataFrame
+
+    def extract(self) -> None:
+        logger.info("Starting 51MB data download.")
+
+        unzip_file_from_url(
+            file_url=self.COI_FILE_URL,
+            download_path=self.TMP_PATH,
+            unzipped_file_path=self.TMP_PATH / "child_opportunity_index",
+        )
+
+        self.raw_df = pd.read_csv(
+            filepath_or_buffer=self.TMP_PATH
+            / "child_opportunity_index"
+            / "raw.csv",
+            # The following need to remain as strings for all of their digits, not get
+            # converted to numbers.
+            dtype={
+                self.TRACT_INPUT_COLUMN_NAME: "string",
+            },
+            low_memory=False,
+        )
+
+    def transform(self) -> None:
+        logger.info("Starting transforms.")
+
+        output_df = self.raw_df.rename(
+            columns={
+                self.TRACT_INPUT_COLUMN_NAME: self.GEOID_TRACT_FIELD_NAME,
+                self.EXTREME_HEAT_INPUT_FIELD: field_names.EXTREME_HEAT_FIELD,
+                self.HEALTHY_FOOD_INPUT_FIELD: field_names.HEALTHY_FOOD_FIELD,
+                self.IMPENETRABLE_SURFACES_INPUT_FIELD: field_names.IMPENETRABLE_SURFACES_FIELD,
+                self.READING_INPUT_FIELD: field_names.READING_FIELD,
+            }
+        )
+
+        # Sanity check the tract field.
+        if len(output_df[self.GEOID_TRACT_FIELD_NAME].str.len().unique()) != 1:
+            raise ValueError("Wrong tract length.")
+
+        # COI has two rows per tract: one for 2010 and one for 2015.
+        output_df = output_df[output_df["year"] == 2015]
+
+        # Convert percents from 0-100 to 0-1 to standardize with our other fields.
+        percent_fields_to_convert = [
+            field_names.HEALTHY_FOOD_FIELD,
+            field_names.IMPENETRABLE_SURFACES_FIELD,
+        ]
+
+        for percent_field_to_convert in percent_fields_to_convert:
+            output_df[percent_field_to_convert] = (
+                output_df[percent_field_to_convert] / 100
+            )
+
+        self.output_df = output_df
+
+    def validate(self) -> None:
+        logger.info("Validating data.")
+
+        pass
+
+    def load(self) -> None:
+        logger.info("Saving CSV")
+
+        self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
+        self.output_df[self.COLUMNS_TO_KEEP].to_csv(
+            path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
+        )
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -63,6 +63,8 @@ MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)"
 PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
 AMI_FIELD = "Area Median Income (State or metropolitan)"

+COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
+
 # Climate
 FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
 EXPECTED_BUILDING_LOSS_RATE_FIELD = (
@ -206,30 +208,63 @@ HOLC_GRADE_D_TRACT_50_PERCENT_FIELD: str = "Tract is >50% HOLC Grade D"
 HOLC_GRADE_D_TRACT_75_PERCENT_FIELD: str = "Tract is >75% HOLC Grade D"


+# Child Opportunity Index data
+# Summer days with maximum temperature above 90F.
+EXTREME_HEAT_FIELD = "Summer days above 90F"
+
+# Percentage households without a car located further than a half-mile from the
+# nearest supermarket.
+HEALTHY_FOOD_FIELD = "Percent low access to healthy food"
+
+# Percentage impenetrable surface areas such as rooftops, roads or parking lots.
+IMPENETRABLE_SURFACES_FIELD = "Percent impenetrable surface areas"
+
+# Percentage third graders scoring proficient on standardized reading tests,
+# converted to NAEP scale score points.
+READING_FIELD = "Third grade reading proficiency"
+LOW_READING_FIELD = "Low third grade reading proficiency"
+
+# Names for individual factors being exceeded
 # Climate Change
 EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected population loss rate and is low income"
 EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected agriculture loss rate and is low income"
 EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected building loss rate and is low income"
+EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
+    f"At or above the {PERCENTILE}th percentile for summer days above 90F and "
+    f"the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
+    f"percentile and is low income"
+)

 # Clean energy and efficiency
 PM25_EXPOSURE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for PM2.5 exposure and is low income"
 ENERGY_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for energy burden and is low income"
+
 # Clean transportation
 DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for diesel particulate matter and is low income"
 TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for traffic proximity and is low income"
+
 # Affordable and Sustainable Housing
-LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD = (
+LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
    f"At or above the {PERCENTILE}th percentile for lead paint and"
-    " the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th percentile and is low income"
+    f" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
+    f"percentile and is low income"
 )
 HOUSING_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for housing burden and is low income"
+
+IMPENETRABLE_SURFACES_LOW_INCOME_FIELD = (
+    f"At or above the {PERCENTILE}th percentile for impenetrable surfaces and is low "
+    f"income"
+)
+
 # Remediation and Reduction of Legacy Pollution
 RMP_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to RMP sites and is low income"
 SUPERFUND_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to superfund sites and is low income"
 HAZARDOUS_WASTE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to hazardous waste facilities and is low income"
+
 # Critical Clean Water and Waste Infrastructure
 WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for wastewater discharge and is low income"
-# Health Burden
+
+# Health Burdens
 DIABETES_LOW_INCOME_FIELD = (
    f"At or above the {PERCENTILE}th percentile for diabetes and is low income"
 )
@ -240,25 +275,35 @@ HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for

 LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"

+HEALTHY_FOOD_LOW_INCOME_FIELD = (
+    f"At or above the {PERCENTILE}th percentile for low "
+    f"access to healthy food and is low income"
+)
+
 # Workforce
 UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
    f"At or above the {PERCENTILE}th percentile for unemployment"
-    " and low HS education"
+    " and has low HS education"
 )

 LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = (
    f"At or above the {PERCENTILE}th percentile for households in linguistic isolation"
-    " and low HS education"
+    " and has low HS education"
 )

 POVERTY_LOW_HS_EDUCATION_FIELD = (
    f"At or above the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
-    " and low HS education"
+    " and has low HS education"
+)
+
+LOW_READING_LOW_HS_EDUCATION_FIELD = (
+    f"At or above the {PERCENTILE}th percentile for low 3rd grade reading proficiency"
+    " and has low HS education"
 )

 MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
    f"At or below the {PERCENTILE}th percentile for median income"
-    "  and low HS education"
+    "  and has low HS education"
 )

 THRESHOLD_COUNT = "Total threshold criteria exceeded"
--- a/data/data-pipeline/data_pipeline/score/score_l.py
+++ b/data/data-pipeline/data_pipeline/score/score_l.py
@ -177,6 +177,8 @@ class ScoreL(Score):
            field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
            field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
            field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
+            field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
+            field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD,
        ]

        expected_population_loss_threshold = (
@ -203,6 +205,28 @@ class ScoreL(Score):
            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )

+        extreme_heat_median_home_value_threshold = (
+            self.df[
+                field_names.EXTREME_HEAT_FIELD
+                + field_names.PERCENTILE_FIELD_SUFFIX
+            ]
+            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
+        ) & (
+            self.df[
+                field_names.MEDIAN_HOUSE_VALUE_FIELD
+                + field_names.PERCENTILE_FIELD_SUFFIX
+            ]
+            <= self.MEDIAN_HOUSE_VALUE_THRESHOLD
+        )
+
+        impenetrable_surfaces_threshold = (
+            self.df[
+                field_names.IMPENETRABLE_SURFACES_FIELD
+                + field_names.PERCENTILE_FIELD_SUFFIX
+            ]
+            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
+        )
+
        self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
            expected_population_loss_threshold
            & self.df[field_names.FPL_200_SERIES]
@ -218,6 +242,18 @@ class ScoreL(Score):
            & self.df[field_names.FPL_200_SERIES]
        )

+        self.df[
+            field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD
+        ] = (
+            extreme_heat_median_home_value_threshold
+            & self.df[field_names.FPL_200_SERIES]
+        )
+
+        self.df[field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD] = (
+            impenetrable_surfaces_threshold
+            & self.df[field_names.FPL_200_SERIES]
+        )
+
        self._increment_total_eligibility_exceeded(climate_eligibility_columns)

        return self.df[climate_eligibility_columns].any(axis="columns")
@ -320,11 +356,11 @@ class ScoreL(Score):
        # poverty level. Source: Census's American Community Survey]

        housing_eligibility_columns = [
-            field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD,
+            field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
            field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
        ]

-        lead_paint_median_house_hold_threshold = (
+        lead_paint_median_home_value_threshold = (
            self.df[
                field_names.LEAD_PAINT_FIELD
                + field_names.PERCENTILE_FIELD_SUFFIX
@ -347,8 +383,8 @@ class ScoreL(Score):
        )

        # series by series indicators
-        self.df[field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD] = (
-            lead_paint_median_house_hold_threshold
+        self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = (
+            lead_paint_median_home_value_threshold
            & self.df[field_names.FPL_200_SERIES]
        )

@ -449,6 +485,7 @@ class ScoreL(Score):
            field_names.DIABETES_LOW_INCOME_FIELD,
            field_names.ASTHMA_LOW_INCOME_FIELD,
            field_names.HEART_DISEASE_LOW_INCOME_FIELD,
+            field_names.HEALTHY_FOOD_LOW_INCOME_FIELD,
            field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
        ]

@ -474,6 +511,14 @@ class ScoreL(Score):
            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )

+        healthy_food_threshold = (
+            self.df[
+                field_names.HEALTHY_FOOD_FIELD
+                + field_names.PERCENTILE_FIELD_SUFFIX
+            ]
+            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
+        )
+
        life_expectancy_threshold = (
            self.df[
                field_names.LIFE_EXPECTANCY_FIELD
@ -496,6 +541,9 @@ class ScoreL(Score):
        self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
            life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
        )
+        self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = (
+            healthy_food_threshold & self.df[field_names.FPL_200_SERIES]
+        )

        self._increment_total_eligibility_exceeded(health_eligibility_columns)

@ -513,6 +561,15 @@ class ScoreL(Score):
        # Where the high school degree achievement rates for adults 25 years and older is less than 95%
        # (necessary to screen out university block groups)

+        # Workforce criteria for states fields.
+        workforce_eligibility_columns = [
+            field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
+            field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
+            field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
+            field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
+            field_names.LOW_READING_LOW_HS_EDUCATION_FIELD,
+        ]
+
        high_scool_achievement_rate_threshold = (
            self.df[field_names.HIGH_SCHOOL_ED_FIELD]
            >= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
@ -552,6 +609,14 @@ class ScoreL(Score):
            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )

+        low_reading_threshold = (
+            self.df[
+                field_names.LOW_READING_FIELD
+                + field_names.PERCENTILE_FIELD_SUFFIX
+            ]
+            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
+        )
+
        self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
            linguistic_isolation_threshold
            & high_scool_achievement_rate_threshold
@ -569,15 +634,9 @@ class ScoreL(Score):
            unemployment_threshold & high_scool_achievement_rate_threshold
        )

-        # Workforce criteria for states fields that create indicator columns
-        # for each tract in order to indicate whether they met any of the four
-        # criteria. We will used this create individual indicator columns.
-        workforce_eligibility_columns = [
-            field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
-            field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
-            field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
-            field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
-        ]
+        self.df[field_names.LOW_READING_LOW_HS_EDUCATION_FIELD] = (
+            low_reading_threshold & high_scool_achievement_rate_threshold
+        )

        workforce_combined_criteria_for_states = self.df[
            workforce_eligibility_columns