Add Score L (#812)

* Create ScoreCalculator This calculates all the factors for score L for now (with placeholder formulae because this is a WIP). I think ideallly we'll want to refactor all the score code to be extracted into this or similar classes. * Add factor logic for score L Updated factor logic to match score L factors methodology. Still need to get the Score L field itself working. Cleanup needed: Pull field names into constants file, extract all score calculation into score calculator Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov> Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
2025-09-13 08:18:17 -07:00 · 2021-10-28 16:07:41 -04:00 · 2021-10-28 16:07:41 -04:00 · 7b87e0ec99
commit 7b87e0ec99
parent b1adc1f69f
3 changed files with 385 additions and 73 deletions
--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@ -119,13 +119,14 @@
   "source": [
    "# Analyze one field at a time (useful for setting thresholds)\n",
    "\n",
-    "quantile = 0.8\n",
+    "quantile = 0.9\n",
    "\n",
    "for field in [\n",
-    "    \"Percent of individuals < 200% Federal Poverty Line\",\n",
-    "    \"Life expectancy (years)\",\n",
-    "    \"Energy burden\",\n",
-    "    URBAN_HEURISTIC_FIELD,\n",
+    "    \"Linguistic isolation (percent)\",\n",
+    "    \"Diesel particulate matter (percentile)\",\n",
+    "    \"Particulate matter (PM2.5) (percentile)\",\n",
+    "    \"Traffic proximity and volume (percentile)\",\n",
+    "    \"Percent of individuals < 200% Federal Poverty Line (percentile)\",\n",
    "]:\n",
    "    print(f\"\\n~~~~Analysis for field `{field}`~~~~\")\n",
    "    print(cejst_df[field].describe())\n",
@ -234,7 +235,7 @@
   "execution_count": null,
   "id": "8da016db",
   "metadata": {
-    "scrolled": false
+    "scrolled": true
   },
   "outputs": [],
   "source": [
@ -278,64 +279,83 @@
    "\n",
    "# Define the indices used for CEJST scoring (`census_block_group_indices`) as well as comparison\n",
    "# (`census_tract_indices`).\n",
-    "census_block_group_indices = [\n",
-    "    Index(\n",
-    "        method_name=\"Score G\",\n",
-    "        priority_communities_field=\"Score G (communities)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score H\",\n",
-    "        priority_communities_field=\"Score H (communities)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score I\",\n",
-    "        priority_communities_field=\"Score I (communities)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"NMTC\",\n",
-    "        priority_communities_field=\"NMTC (communities)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score A\",\n",
-    "        priority_communities_field=\"Score A (top 25th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score B\",\n",
-    "        priority_communities_field=\"Score B (top 25th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score C\",\n",
-    "        priority_communities_field=\"Score C (top 25th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score D (25th percentile)\",\n",
-    "        priority_communities_field=\"Score D (top 25th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score F\",\n",
-    "        priority_communities_field=\"Score F (communities)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Poverty\",\n",
-    "        priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Persistent Poverty (CBG)\",\n",
-    "        priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
+    "\n",
+    "definition_l_factors = [\n",
+    "    \"Climate Factor (Definition L)\",\n",
+    "    \"Energy Factor (Definition L)\",\n",
+    "    \"Transportation Factor (Definition L)\",\n",
+    "    \"Housing Factor (Definition L)\",\n",
+    "    \"Pollution Factor (Definition L)\",\n",
+    "    \"Water Factor (Definition L)\",\n",
+    "    \"Health Factor (Definition L)\",\n",
+    "    \"Workforce Factor (Definition L)\",\n",
+    "    # Also include a combined factor for all the non-workforce elements.\n",
+    "    \"Any Non-Workforce Factor (Definition L)\",\n",
    "]\n",
    "\n",
+    "census_block_group_indices = (\n",
+    "    [\n",
+    "        Index(\n",
+    "            method_name=\"Definition L\",\n",
+    "            priority_communities_field=\"Definition L (communities)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "    ]\n",
+    "    # Insert indices for each of the factors from Definition L.\n",
+    "    # Note: since these involve no renaming, we write them using list comprehension.\n",
+    "    + [\n",
+    "        Index(\n",
+    "            method_name=factor,\n",
+    "            priority_communities_field=factor,\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        )\n",
+    "        for factor in definition_l_factors\n",
+    "    ]\n",
+    "    + [\n",
+    "        Index(\n",
+    "            # Note: we're renaming Score G as NMTC Modified for clarity, since that's what Score G is under the hood.\n",
+    "            method_name=\"NMTC Modified\",\n",
+    "            priority_communities_field=\"Score G (communities)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "        Index(\n",
+    "            method_name=\"NMTC\",\n",
+    "            priority_communities_field=\"NMTC (communities)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "        Index(\n",
+    "            method_name=\"Score C\",\n",
+    "            priority_communities_field=\"Score C (top 25th percentile)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "        Index(\n",
+    "            method_name=\"Score D (30th percentile)\",\n",
+    "            priority_communities_field=\"Score D (top 30th percentile)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "        Index(\n",
+    "            method_name=\"Score D (25th percentile)\",\n",
+    "            priority_communities_field=\"Score D (top 25th percentile)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "        Index(\n",
+    "            method_name=\"Score F\",\n",
+    "            priority_communities_field=\"Score F (communities)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "        Index(\n",
+    "            method_name=\"Poverty\",\n",
+    "            priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "        Index(\n",
+    "            method_name=\"Persistent Poverty (CBG)\",\n",
+    "            priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
+    "            other_census_tract_fields_to_keep=[],\n",
+    "        ),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
    "census_tract_indices = [\n",
    "    Index(\n",
    "        method_name=\"Persistent Poverty\",\n",
@ -623,9 +643,7 @@
    "write_state_distribution_excel(\n",
    "    state_distribution_df=state_distribution_df,\n",
    "    file_path=COMPARISON_OUTPUTS_DIR / f\"{file_prefix}.xlsx\",\n",
-    ")\n",
-    "\n",
-    "state_distribution_df.head()"
+    ")"
   ]
  },
  {
@ -633,7 +651,7 @@
   "execution_count": null,
   "id": "8790cd64",
   "metadata": {
-    "scrolled": false
+    "scrolled": true
   },
   "outputs": [],
   "source": [
@ -1461,7 +1479,7 @@
   "execution_count": null,
   "id": "908e0ad4",
   "metadata": {
-    "scrolled": true
+    "scrolled": false
   },
   "outputs": [],
   "source": [