Prototype G (#672)

* wip * cleanup * cleanup 2 * fixing import ordering linter error * updating backend to use score G * adding percentile to score output * update tippeanoe compression Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
2025-07-28 02:21:17 -07:00 · 2021-09-14 09:48:11 -05:00 · 2021-09-14 09:48:11 -05:00 · 1083e953da
commit 1083e953da
parent 92d7f40004
6 changed files with 123 additions and 39 deletions
--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@ -85,7 +85,7 @@
   "execution_count": null,
   "id": "3b1b5ccf",
   "metadata": {
-    "scrolled": false
+    "scrolled": true
   },
   "outputs": [],
   "source": [
@ -107,6 +107,21 @@
    "cejst_df.head()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d9968187",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Analyze one field at a time (useful for setting thresholds)\n",
+    "field = \"Percent of individuals < 200% Federal Poverty Line\"\n",
+    "print(cejst_df[field].describe())\n",
+    "quantile = .8\n",
+    "print(f\"Quantile at {quantile} is {np.nanquantile(a=cejst_df[field], q=quantile)}\")\n",
+    "cejst_df[field].hist()"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -201,8 +216,8 @@
    ")\n",
    "\n",
    "\n",
-    "if len(merged_df) > 220333:\n",
-    "    raise ValueError(\"Too many rows in the join.\")\n",
+    "if len(merged_df) > 220335:\n",
+    "    raise ValueError(f\"Too many rows in the join: {len(merged_df)}\")\n",
    "\n",
    "merged_df.head()\n",
    "\n",
@ -232,22 +247,33 @@
    "\n",
    "# Define the indices used for CEJST scoring (`census_block_group_indices`) as well as comparison\n",
    "# (`census_tract_indices`).\n",
+    "\n",
    "census_block_group_indices = [\n",
    "    Index(\n",
+    "        method_name=\"Score G\",\n",
+    "        priority_communities_field=\"Score G (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"NMTC\",\n",
+    "        priority_communities_field=\"NMTC (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
    "        method_name=\"Score F\",\n",
    "        priority_communities_field=\"Score F (communities)\",\n",
    "        other_census_tract_fields_to_keep=[],\n",
    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score F (socioeconomic only)\",\n",
-    "        priority_communities_field=\"Meets socioeconomic criteria\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score F (burden only)\",\n",
-    "        priority_communities_field=\"Meets burden criteria\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
+    "#     Index(\n",
+    "#         method_name=\"Score F (socioeconomic only)\",\n",
+    "#         priority_communities_field=\"Meets socioeconomic criteria\",\n",
+    "#         other_census_tract_fields_to_keep=[],\n",
+    "#     ),\n",
+    "#     Index(\n",
+    "#         method_name=\"Score F (burden only)\",\n",
+    "#         priority_communities_field=\"Meets burden criteria\",\n",
+    "#         other_census_tract_fields_to_keep=[],\n",
+    "#     ),\n",
    "    Index(\n",
    "        method_name=\"Score A\",\n",
    "        priority_communities_field=\"Score A (top 25th percentile)\",\n",