Score F, testing methodology (#510)

* fixing dependency issue * fixing more dependencies * including fraction of state AMI * wip * nitpick whitespace * etl working now * wip on scoring * fix rename error * reducing metrics * fixing score f * fixing readme * adding dependency * passing tests; * linting/black * removing unnecessary sample * fixing error * adding verify flag on etl/base Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
2025-07-26 19:11:16 -07:00 · 2021-08-24 15:40:54 -05:00 · 2021-08-24 15:40:54 -05:00 · 65ceb7900f
commit 65ceb7900f
parent 043ed983ea
23 changed files with 557 additions and 153 deletions
--- a/data/data-pipeline/data_pipeline/ipython/census_explore.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/census_explore.ipynb
@ -3,9 +3,6 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "0491828b",
-   "metadata": {},
-   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import censusdata\n",
@ -32,30 +29,26 @@
    "# Some display settings to make pandas outputs more readable.\n",
    "pd.set_option(\"display.expand_frame_repr\", False)\n",
    "pd.set_option(\"display.precision\", 2)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "654f25a1",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
   "source": [
    "# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.\n",
    "# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx\n",
    "censusdata.printtable(censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"B19013\"))"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "scrolled": true
+   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "8999cea4",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
   "source": [
    "def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:\n",
    "    \"\"\"Create a FIPS code from the proprietary censusgeo index.\"\"\"\n",
@ -85,31 +78,33 @@
    "df[GEOID_FIELD_NAME] = df.index.to_series().apply(func=fips_from_censusdata_censusgeo)\n",
    "\n",
    "df.head()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "scrolled": true
+   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "2a269bb1",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
   "source": [
    "columns_to_include = [\"GEOID2\", \"Median household income (State)\"]\n",
    "\n",
    "df.rename(columns={\"GEOID10\": \"GEOID2\", \"B19013_001E\": \"Median household income (State)\"}, inplace=True)\n",
    "\n",
-    "df[columns_to_include].to_csv(path_or_buf= \"/Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv\", index=False)"
-   ]
+    "# df[columns_to_include].to_csv(path_or_buf= \"/Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv\", index=False)"
+   ],
+   "outputs": [],
+   "metadata": {
+    "scrolled": true
+   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "91932af5",
-   "metadata": {},
+   "source": [],
   "outputs": [],
-   "source": []
+   "metadata": {}
  }
 ],
 "metadata": {
@ -133,4 +128,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 5
-}
+}
--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@ -28,7 +28,7 @@
    "from datetime import datetime\n",
    "from tqdm.notebook import tqdm_notebook\n",
    "\n",
-    "module_path = os.path.abspath(os.path.join(\"..\"))\n",
+    "module_path = os.path.abspath(os.path.join(\"../..\"))\n",
    "if module_path not in sys.path:\n",
    "    sys.path.append(module_path)\n",
    "\n",
@ -215,7 +215,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "8b795fb4",
+   "id": "274f6bc6",
   "metadata": {},
   "outputs": [],
   "source": [
@ -234,6 +234,21 @@
    "# (`census_tract_indices`).\n",
    "census_block_group_indices = [\n",
    "    Index(\n",
+    "        method_name=\"Score F\",\n",
+    "        priority_communities_field=\"Score F (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"Score F (socioeconomic only)\",\n",
+    "        priority_communities_field=\"Meets socioeconomic criteria\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"Score F (burden only)\",\n",
+    "        priority_communities_field=\"Meets burden criteria\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
    "        method_name=\"Score A\",\n",
    "        priority_communities_field=\"Score A (top 25th percentile)\",\n",
    "        other_census_tract_fields_to_keep=[],\n",
@ -253,21 +268,21 @@
    "        priority_communities_field=\"Score D (top 25th percentile)\",\n",
    "        other_census_tract_fields_to_keep=[],\n",
    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score D (30th percentile)\",\n",
-    "        priority_communities_field=\"Score D (top 30th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score D (35th percentile)\",\n",
-    "        priority_communities_field=\"Score D (top 35th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
-    "        method_name=\"Score D (40th percentile)\",\n",
-    "        priority_communities_field=\"Score D (top 40th percentile)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
+    "#     Index(\n",
+    "#         method_name=\"Score D (30th percentile)\",\n",
+    "#         priority_communities_field=\"Score D (top 30th percentile)\",\n",
+    "#         other_census_tract_fields_to_keep=[],\n",
+    "#     ),\n",
+    "#     Index(\n",
+    "#         method_name=\"Score D (35th percentile)\",\n",
+    "#         priority_communities_field=\"Score D (top 35th percentile)\",\n",
+    "#         other_census_tract_fields_to_keep=[],\n",
+    "#     ),\n",
+    "#     Index(\n",
+    "#         method_name=\"Score D (40th percentile)\",\n",
+    "#         priority_communities_field=\"Score D (top 40th percentile)\",\n",
+    "#         other_census_tract_fields_to_keep=[],\n",
+    "#     ),\n",
    "    Index(\n",
    "        method_name=\"Poverty\",\n",
    "        priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
@ -534,7 +549,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "d7acf80d",
+   "id": "eeb9699d",
   "metadata": {},
   "outputs": [],
   "source": [
@ -682,7 +697,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "777a4623",
+   "id": "4f44426c",
   "metadata": {},
   "outputs": [],
   "source": [
@ -1140,14 +1155,6 @@
    "\n",
    "print(file_paths)"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e679502a",
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {