Issue 970: reverse percentiles for AMI and life expectancy (#1018)

* switching to low * fixing score-etl-post * updating comments * fixing comparison * create separate field for clarity * comment fix * removing healthy food * fixing bug in score post * running black and adding comment * Update pickles and add a helpful notes to README Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
2025-02-23 01:54:18 -08:00 · 2021-12-10 10:16:22 -05:00 · 2021-12-10 10:16:22 -05:00 · 7fcecaee42
commit 7fcecaee42
parent 24bac56d9e
11 changed files with 144 additions and 100 deletions
--- a/data/data-pipeline/README.md
+++ b/data/data-pipeline/README.md
@ -309,7 +309,7 @@ If you update the score in any way, it is necessary to create new pickles so tha
 It starts with the `data_pipeline/etl/score/tests/sample_data/score_data_initial.csv`, which is the first two rows of the `score/full/usa.csv`.
-To update this file, run a full score generation and then update the file as follows:
+To update this file, run a full score generation, then open a Python shell from the `data-pipeline` directory (e.g. `poetry run python3`), and then update the file with the following commands:
 ```
 import pickle
 from pathlib import Path
@ -322,6 +322,8 @@ score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10_TRACT": "string"}
 score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False)
 ```
 Now you can move on to updating inidvidual pickles for the tests. Note that it is helpful to do them in this order:
 We have four pickle files that correspond to expected files:
 - `score_data_expected.pkl`: Initial score without counties
 - `score_transformed_expected.pkl`: Intermediate score with `etl._extract_score` and `etl. _transform_score` applied. There's no file for this intermediate process, so we need to capture the pickle mid-process.
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -81,7 +81,7 @@ TILES_SCORE_COLUMNS = [
    field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
-    field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
+    field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
@ -89,7 +89,7 @@ TILES_SCORE_COLUMNS = [
    field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
-    field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
+    field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
    + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.POVERTY_LESS_THAN_200_FPL_FIELD
    + field_names.PERCENTILE_FIELD_SUFFIX,
@ -115,7 +115,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
    field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
-    field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
+    field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
@ -123,7 +123,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
    field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
-    field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
+    field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
    + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.POVERTY_LESS_THAN_200_FPL_FIELD
    + field_names.PERCENTILE_FIELD_SUFFIX,
@ -137,7 +137,6 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
    field_names.DIABETES_FIELD,
    field_names.ASTHMA_FIELD,
    field_names.HEART_DISEASE_FIELD,
    field_names.LIFE_EXPECTANCY_FIELD,
    field_names.TRAFFIC_FIELD,
    field_names.FEMA_RISK_FIELD,
    field_names.ENERGY_BURDEN_FIELD,
@ -149,11 +148,11 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
    field_names.TOTAL_POP_FIELD,
 ]
-# For every indicator above, we want to include percentile and min-max normalized variants also
+# For every indicator above, we want to include percentile also.
 DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
    pd.core.common.flatten(
        [
-            [p, f"{p} (percentile)"]
+            [p, f"{p}{field_names.PERCENTILE_FIELD_SUFFIX}"]
            for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
        ]
    )
@ -165,8 +164,15 @@ DOWNLOADABLE_SCORE_COLUMNS = [
    field_names.COUNTY_FIELD,
    field_names.STATE_FIELD,
    field_names.SCORE_G_COMMUNITIES,
    # Note: the reverse percentile fields get moved down here because
    # we put the raw value in the download along with the *reversed* percentile.
    # All other fields we put in f"{field_name}" and
    # f"{field_name}{field_names.PERCENTILE_FIELD_SUFFIX}", which doesn't work for the
    # reversed percentile fields.
    field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
-    field_names.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD
+    field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
    + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.LIFE_EXPECTANCY_FIELD,
    field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    *DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
 ]
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -404,9 +404,7 @@ class ScoreETL(ExtractTransformLoad):
            field_names.POVERTY_LESS_THAN_150_FPL_FIELD,
            field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
            field_names.AMI_FIELD,
            field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
            field_names.MEDIAN_INCOME_FIELD,
            field_names.LIFE_EXPECTANCY_FIELD,
            field_names.ENERGY_BURDEN_FIELD,
            field_names.FEMA_RISK_FIELD,
            field_names.URBAN_HEURISTIC_FIELD,
@ -439,7 +437,6 @@ class ScoreETL(ExtractTransformLoad):
            field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
            field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
            field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
            field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
            field_names.EXTREME_HEAT_FIELD,
            field_names.HEALTHY_FOOD_FIELD,
            field_names.IMPENETRABLE_SURFACES_FIELD,
@ -468,7 +465,19 @@ class ScoreETL(ExtractTransformLoad):
            ReversePercentile(
                field_name=field_names.READING_FIELD,
                low_field_name=field_names.LOW_READING_FIELD,
-            )
+            ),
            ReversePercentile(
                field_name=field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
                low_field_name=field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
            ),
            ReversePercentile(
                field_name=field_names.LIFE_EXPECTANCY_FIELD,
                low_field_name=field_names.LOW_LIFE_EXPECTANCY_FIELD,
            ),
            ReversePercentile(
                field_name=field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
                low_field_name=field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
            ),
        ]
        columns_to_keep = (
@ -505,10 +514,6 @@ class ScoreETL(ExtractTransformLoad):
            max_value = df_copy[numeric_column].max(skipna=True)
            logger.info(
                f"For data set {numeric_column}, the min value is {min_value} and the max value is {max_value}."
            )
            df_copy[f"{numeric_column}{field_names.MIN_MAX_FIELD_SUFFIX}"] = (
                df_copy[numeric_column] - min_value
            ) / (max_value - min_value)
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb
@ -8,7 +8,10 @@
   "outputs": [],
   "source": [
    "import IPython\n",
    "import os\n",
    "import pandas as pd\n",
    "import pathlib\n",
    "import sys\n",
    "\n",
    "module_path = os.path.abspath(os.path.join(\"../..\"))\n",
    "if module_path not in sys.path:\n",
@ -28,12 +31,8 @@
   "outputs": [],
   "source": [
    "# Load\n",
-    "path_to_score_file_1 = (\n",
+    "path_to_score_file_1 = DATA_DIR / \"compare_two_score_csvs/usa (pre 970).csv\"\n",
-    "    DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa1.csv\"\n",
+    "path_to_score_file_2 = DATA_DIR / \"compare_two_score_csvs/usa (post 970).csv\"\n",
    ")\n",
    "path_to_score_file_2 = (\n",
    "        DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa2.csv\"\n",
    ")\n",
    "\n",
    "score_1_df = pd.read_csv(\n",
    "    path_to_score_file_1,\n",
@ -68,11 +67,16 @@
   "source": [
    "# List rows in one but not the other\n",
    "\n",
-    "if len(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) != len(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]):\n",
+    "if len(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) != len(\n",
    "    score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]\n",
    "):\n",
    "    print(\"Different lengths!\")\n",
    "\n",
    "print(\"Difference in tract IDs:\")\n",
-    "print(set(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) ^ set(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]))\n"
+    "print(\n",
    "    set(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME])\n",
    "    ^ set(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME])\n",
    ")"
   ]
  },
  {
@ -83,7 +87,12 @@
   "outputs": [],
   "source": [
    "# Join\n",
-    "merged_df = score_1_df.merge(score_2_df, how=\"outer\", on=ExtractTransformLoad.GEOID_TRACT_FIELD_NAME, suffixes=('_1', '_2'))\n",
+    "merged_df = score_1_df.merge(\n",
    "    score_2_df,\n",
    "    how=\"outer\",\n",
    "    on=ExtractTransformLoad.GEOID_TRACT_FIELD_NAME,\n",
    "    suffixes=(\"_1\", \"_2\"),\n",
    ")\n",
    "merged_df"
   ]
  },
@ -95,13 +104,31 @@
   "outputs": [],
   "source": [
    "# Check each duplicate column:\n",
    "# Remove the suffix \"_1\"\n",
    "duplicate_columns = [x[:-2] for x in merged_df.columns if \"_1\" in x]\n",
    "\n",
-    "for duplicate_column in duplicate_columns:\n",
+    "columns_to_exclude_from_duplicates_check = [\n",
-    "    print(f\"Checking duplicate column {duplicate_column}\")\n",
+    "    \"Total threshold criteria exceeded\"\n",
-    "    if not merged_df[f\"{duplicate_column}_1\"].equals(merged_df[f\"{duplicate_column}_2\"]):\n",
+    "]\n",
-    "        print(merged_df[f\"{duplicate_column}_1\"].compare(merged_df[f\"{duplicate_column}_2\"]))\n",
+    "\n",
-    "        raise ValueError(f\"Error! Different values in {duplicate_column}\")"
+    "columns_to_check = [column for column in duplicate_columns if column not in columns_to_exclude_from_duplicates_check]\n",
    "\n",
    "any_errors_found = False\n",
    "for column_to_check in columns_to_check:\n",
    "    print(f\"Checking duplicate column {column_to_check}\")\n",
    "    if not merged_df[f\"{column_to_check}_1\"].equals(\n",
    "        merged_df[f\"{column_to_check}_2\"]\n",
    "    ):\n",
    "        print(f\"Error! Different values in {column_to_check}\")\n",
    "        print(\n",
    "            merged_df[f\"{column_to_check}_1\"].compare(\n",
    "                merged_df[f\"{column_to_check}_2\"]\n",
    "            )\n",
    "        )\n",
    "        any_errors_found = True\n",
    "\n",
    "if any_errors_found:\n",
    "    raise ValueError(f\"Error! Different values in one or more columns.\")"
   ]
  }
 ],
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -60,11 +60,15 @@ MEDIAN_INCOME_FIELD = "Median household income in the past 12 months"
 MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD = (
    "Median household income (% of state median household income)"
 )
 MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)"
 PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
 AMI_FIELD = "Area Median Income (State or metropolitan)"
 COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
 MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = (
    "Median household income as a percent of area median income"
 )
 LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = (
    "Low median household income as a percent of area median income"
 )
 # Climate
 FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
@ -105,7 +109,6 @@ ENERGY_BURDEN_FIELD = "Energy burden"
 DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years"
 ASTHMA_FIELD = "Current asthma among adults aged >=18 years"
 HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years"
 LIFE_EXPECTANCY_FIELD = "Life expectancy (years)"
 CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years"
 HEALTH_INSURANCE_FIELD = (
    "Current lack of health insurance among adults aged 18-64 years"
@ -113,6 +116,8 @@ HEALTH_INSURANCE_FIELD = (
 PHYS_HEALTH_NOT_GOOD_FIELD = (
    "Physical health not good for >=14 days among adults aged >=18 years"
 )
 LIFE_EXPECTANCY_FIELD = "Life expectancy (years)"
 LOW_LIFE_EXPECTANCY_FIELD = "Low life expectancy"
 # Other Demographics
 TOTAL_POP_FIELD = "Total population"
@ -130,9 +135,6 @@ OVER_64_FIELD = "Individuals over 64 years old"
 # Fields from 2010 decennial census (generally only loaded for the territories)
 CENSUS_DECENNIAL_MEDIAN_INCOME_2009 = "Median household income in 2009 ($)"
 CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
    "Median household income as a percent of territory median income in 2009"
 )
 CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 = (
    "Percentage households below 100% of federal poverty line in 2009"
 )
@ -141,7 +143,10 @@ CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 = (
    "Unemployed civilians (percent) in 2009"
 )
 CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009"
-
+CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
    "Median household income as a percent of territory median income in 2009"
 )
 LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = "Low median household income as a percent of territory median income in 2009"
 # Fields from 2010 ACS (loaded for comparison with the territories)
 CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployed civilians (percent) in 2010"
 CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
@ -265,7 +270,10 @@ ASTHMA_LOW_INCOME_FIELD = (
 )
 HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for heart disease and is low income"
-LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"
+LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD = (
    f"At or above the {PERCENTILE}th percentile "
    f"for low life expectancy and is low income"
 )
 # Workforce
 UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
@ -288,9 +296,9 @@ LOW_READING_LOW_HS_EDUCATION_FIELD = (
    " and has low HS education"
 )
-MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
+LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
-    f"At or below the {PERCENTILE}th percentile for median income"
+    f"At or below the {PERCENTILE}th percentile for low median household income as a "
-    "  and has low HS education"
+    f"percent of area median income and has low HS education"
 )
 # Not currently used in a factor
--- a/data/data-pipeline/data_pipeline/score/score_l.py
+++ b/data/data-pipeline/data_pipeline/score/score_l.py
@ -44,6 +44,8 @@ class ScoreL(Score):
        robustness over 1-year ACS.
        """
        # Create the combined field.
        # TODO: move this combined field percentile calculation to `etl_score`,
        #  since most other percentile logic is there.
        # There should only be one entry in either 2009 or 2019 fields, not one in both.
        # But just to be safe, we take the mean and ignore null values so if there
        # *were* entries in both, this result would make sense.
@ -169,7 +171,7 @@ class ScoreL(Score):
    def _climate_factor(self) -> bool:
        # In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
        # AND
-        # Low income: In 60th percentile or above for percent of block group population
+        # Low income: In Nth percentile or above for percent of block group population
        # of households where household income is less than or equal to twice the federal
        # poverty level. Source: Census's American Community Survey]
@ -225,7 +227,7 @@ class ScoreL(Score):
    def _energy_factor(self) -> bool:
        # In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
        # AND
-        # Low income: In 60th percentile or above for percent of block group population
+        # Low income: In Nth percentile or above for percent of block group population
        # of households where household income is less than or equal to twice the federal
        # poverty level. Source: Census's American Community Survey]
@ -268,7 +270,7 @@ class ScoreL(Score):
        # or
        # In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
        # AND
-        # Low income: In 60th percentile or above for percent of block group population
+        # Low income: In Nth percentile or above for percent of block group population
        # of households where household income is less than or equal to twice the federal
        # poverty level. Source: Census's American Community Survey]
@ -315,7 +317,7 @@ class ScoreL(Score):
        # or
        # In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
        # AND
-        # Low income: In 60th percentile or above for percent of block group population
+        # Low income: In Nth percentile or above for percent of block group population
        # of households where household income is less than or equal to twice the federal
        # poverty level. Source: Census's American Community Survey]
@ -363,7 +365,7 @@ class ScoreL(Score):
    def _pollution_factor(self) -> bool:
        # Proximity to Risk Management Plan sites is > X
        # AND
-        # Low income: In 60th percentile or above for percent of block group population
+        # Low income: In Nth percentile or above for percent of block group population
        # of households where household income is less than or equal to twice the federal
        # poverty level. Source: Census's American Community Survey]
@ -410,7 +412,7 @@ class ScoreL(Score):
    def _water_factor(self) -> bool:
        # In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
        # AND
-        # Low income: In 60th percentile or above for percent of block group population
+        # Low income: In Nth percentile or above for percent of block group population
        # of households where household income is less than or equal to twice the federal
        # poverty level. Source: Census's American Community Survey]
@ -441,7 +443,7 @@ class ScoreL(Score):
        # or
        # In Xth percentile or above for low life expectancy (Source: CDC Places)
        # AND
-        # Low income: In 60th percentile or above for percent of block group population
+        # Low income: In Nth percentile or above for percent of block group population
        # of households where household income is less than or equal to twice the federal
        # poverty level. Source: Census's American Community Survey]
@ -449,8 +451,7 @@ class ScoreL(Score):
            field_names.DIABETES_LOW_INCOME_FIELD,
            field_names.ASTHMA_LOW_INCOME_FIELD,
            field_names.HEART_DISEASE_LOW_INCOME_FIELD,
-            field_names.HEALTHY_FOOD_LOW_INCOME_FIELD,
+            field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
            field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
        ]
        diabetes_threshold = (
@ -475,24 +476,14 @@ class ScoreL(Score):
            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )
-        healthy_food_threshold = (
+        low_life_expectancy_threshold = (
            self.df[
-                field_names.HEALTHY_FOOD_FIELD
+                field_names.LOW_LIFE_EXPECTANCY_FIELD
                + field_names.PERCENTILE_FIELD_SUFFIX
            ]
            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )
        life_expectancy_threshold = (
            self.df[
                field_names.LIFE_EXPECTANCY_FIELD
                + field_names.PERCENTILE_FIELD_SUFFIX
            ]
            # Note: a high life expectancy is good, so take 1 minus the threshold to invert it,
            # and then look for life expenctancies lower than that (not greater than).
            <= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )
        self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
            diabetes_threshold & self.df[field_names.FPL_200_SERIES]
        )
@ -502,11 +493,8 @@ class ScoreL(Score):
        self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
            heart_disease_threshold & self.df[field_names.FPL_200_SERIES]
        )
-        self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
+        self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
-            life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
+            low_life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
        )
        self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = (
            healthy_food_threshold & self.df[field_names.FPL_200_SERIES]
        )
        self._increment_total_eligibility_exceeded(health_eligibility_columns)
@ -514,23 +502,25 @@ class ScoreL(Score):
        return self.df[health_eligibility_columns].any(axis="columns")
    def _workforce_factor(self) -> bool:
-        # Where unemployment is above X%
+        # Where unemployment is above Xth percentile
        # or
-        # Where median income is less than Y% of the area median income
+        # Where median income as a percent of area median income is above Xth percentile
        # or
-        # Where the percent of households at or below 100% of the federal poverty level is greater than Z%
+        # Where the percent of households at or below 100% of the federal poverty level
        # is above Xth percentile
        # or
-        # Where linguistic isolation is greater than Y%
+        # Where linguistic isolation is above Xth percentile
        # AND
-        # Where the high school degree achievement rates for adults 25 years and older is less than 95%
+        # Where the high school degree achievement rates for adults 25 years and older
-        # (necessary to screen out university block groups)
+        # is less than Y%
        # (necessary to screen out university tracts)
        # Workforce criteria for states fields.
        workforce_eligibility_columns = [
            field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
            field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
            field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
-            field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
+            field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
        ]
        high_scool_achievement_rate_threshold = (
@ -546,14 +536,12 @@ class ScoreL(Score):
            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )
-        median_income_threshold = (
+        low_median_income_threshold = (
            self.df[
-                field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
+                field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
                + field_names.PERCENTILE_FIELD_SUFFIX
            ]
-            # Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
+            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
            # and then look for median income lower than that (not greater than).
            <= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )
        linguistic_isolation_threshold = (
@ -581,8 +569,8 @@ class ScoreL(Score):
            poverty_threshold & high_scool_achievement_rate_threshold
        )
-        self.df[field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
+        self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
-            median_income_threshold & high_scool_achievement_rate_threshold
+            low_median_income_threshold & high_scool_achievement_rate_threshold
        )
        self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
@ -624,23 +612,31 @@ class ScoreL(Score):
            threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
        )
        # Also check whether low area median income is 90th percentile or higher
        # within the islands.
        low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name = (
            f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
            f"{field_names.PERCENTILE}th percentile"
        )
        self.df[
            low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
        ] = (
            self.df[
                field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
                + field_names.PERCENTILE_FIELD_SUFFIX
            ]
            >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
        )
        workforce_combined_criteria_for_island_areas = (
            self.df[unemployment_island_areas_criteria_field_name]
            | self.df[poverty_island_areas_criteria_field_name]
-            # Also check whether area median income is 10th percentile or lower
+            | self.df[
-            # within the islands.
+                low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
            | (
                self.df[
                    field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
                    + field_names.PERCENTILE_FIELD_SUFFIX
            ]
                # Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
                # and then look for median income lower than that (not greater than).
                < 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
            )
        ) & (
            self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
-            > self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
+            >= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
        )
        percent_of_island_tracts_highlighted = (