Add EJSCREEN Areas of Concern (#843)

* Adding ej screen areas of concern * Uses it where user has local files, but not otherwise Co-authored-by: VincentLaUSDS <vincent.la@omb.eop.gov>
2025-07-26 16:41:16 -07:00 · 2021-11-02 15:38:42 -04:00 · 2021-11-02 15:38:42 -04:00 · 1d541be447
commit 1d541be447
parent 1795be6cb4
10 changed files with 2546 additions and 18 deletions
--- a/data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb
--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@ -34,7 +34,9 @@
    "\n",
    "from data_pipeline.utils import remove_all_from_dir, get_excel_column_name\n",
    "from data_pipeline.etl.sources.census.etl_utils import get_state_information\n",
-    "\n",
+    "from data_pipeline.etl.sources.ejscreen_areas_of_concern.etl import (\n",
+    "    EJSCREENAreasOfConcernETL,\n",
+    ")\n",
    "\n",
    "# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n",
    "tqdm_notebook.pandas()"
@ -77,6 +79,14 @@
    "CEJST_PERCENTILE_FIELD = \"cejst_percentile\"\n",
    "CEJST_PRIORITY_COMMUNITY_FIELD = \"cejst_priority_community\"\n",
    "\n",
+    "LIFE_EXPECTANCY_FIELD = \"Life expectancy (years)\"\n",
+    "HEALTH_INSURANCE_FIELD = (\n",
+    "    \"Current lack of health insurance among adults aged 18-64 years\"\n",
+    ")\n",
+    "BAD_HEALTH_FIELD = (\n",
+    "    \"Physical health not good for >=14 days among adults aged >=18 years\"\n",
+    ")\n",
+    "\n",
    "# Define some suffixes\n",
    "POPULATION_SUFFIX = \" (priority population)\""
   ]
@ -108,6 +118,55 @@
    "cejst_df.head()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b1083e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load EJSCREEN Areas of Concern data.\n",
+    "\n",
+    "# Load EJ Screen Areas of Concern\n",
+    "# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
+    "ejscreen_areas_of_concern_df: pd.DataFrame = None\n",
+    "\n",
+    "if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
+    "    print(\"Loading EJSCREEN Areas of Concern data for score pipeline.\")\n",
+    "    ejscreen_areas_of_concern_csv = (\n",
+    "        DATA_DIR / \"dataset\" / \"ejscreen_areas_of_concern\" / \"usa.csv\"\n",
+    "    )\n",
+    "    ejscreen_areas_of_concern_df = pd.read_csv(\n",
+    "        ejscreen_areas_of_concern_csv,\n",
+    "        dtype={GEOID_FIELD_NAME: \"string\"},\n",
+    "        low_memory=False,\n",
+    "    )\n",
+    "else:\n",
+    "    print(\n",
+    "        \"EJSCREEN areas of concern data does not exist locally. Not attempting to load data into comparison tool.\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fec0ed63",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Merge EJSCREEN AoCs into CEJST data.\n",
+    "# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
+    "if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
+    "    # If available, merge EJSCREEN AoC data into CBG dfs.\n",
+    "    cejst_df = cejst_df.merge(\n",
+    "        ejscreen_areas_of_concern_df, on=GEOID_FIELD_NAME, how=\"outer\"\n",
+    "    )\n",
+    "else:\n",
+    "    pass\n",
+    "\n",
+    "cejst_df.head()"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -343,11 +402,6 @@
    "            other_census_tract_fields_to_keep=[],\n",
    "        ),\n",
    "        Index(\n",
-    "            method_name=\"Poverty\",\n",
-    "            priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
-    "            other_census_tract_fields_to_keep=[],\n",
-    "        ),\n",
-    "        Index(\n",
    "            method_name=\"Persistent Poverty (CBG)\",\n",
    "            priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
    "            other_census_tract_fields_to_keep=[],\n",
@ -355,6 +409,34 @@
    "    ]\n",
    ")\n",
    "\n",
+    "\n",
+    "ejscreen_areas_of_concern_census_block_group_indices = [\n",
+    "    Index(\n",
+    "        method_name=\"EJSCREEN Areas of Concern, National, 80th percentile\",\n",
+    "        priority_communities_field=\"EJSCREEN Areas of Concern, National, 80th percentile (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"EJSCREEN Areas of Concern, National, 90th percentile\",\n",
+    "        priority_communities_field=\"EJSCREEN Areas of Concern, National, 90th percentile (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"EJSCREEN Areas of Concern, National, 95th percentile\",\n",
+    "        priority_communities_field=\"EJSCREEN Areas of Concern, National, 95th percentile (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "# Before including EJSCREEN AoC indicators are included, check whether or not the EJSCREEN AoC data is available locally.\n",
+    "if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
+    "    # Add EJSCREEN AoCs to all of the CBG indices.\n",
+    "    census_block_group_indices.extend(\n",
+    "        ejscreen_areas_of_concern_census_block_group_indices\n",
+    "    )\n",
+    "else:\n",
+    "    pass\n",
+    "\n",
    "census_tract_indices = [\n",
    "    Index(\n",
    "        method_name=\"Persistent Poverty\",\n",
@ -620,6 +702,17 @@
    "    for index in census_block_group_indices + census_tract_indices\n",
    "]\n",
    "\n",
+    "# Convert all indices to boolean\n",
+    "for field_to_analyze in fields_to_analyze:\n",
+    "    if \"Areas of Concern\" in field_to_analyze:\n",
+    "        print(f\"Converting {field_to_analyze} to boolean.\")\n",
+    "\n",
+    "        merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(\n",
+    "            value=0\n",
+    "        )\n",
+    "        merged_df[field_to_analyze] = merged_df[field_to_analyze].astype(bool)\n",
+    "\n",
+    "\n",
    "state_fips_codes = get_state_information(DATA_DIR)\n",
    "\n",
    "merged_with_state_information_df = merged_df.merge(\n",
@ -835,6 +928,9 @@
    "    \"Unemployed civilians (percent)\",\n",
    "    \"Median household income in the past 12 months\",\n",
    "    URBAN_HEURISTIC_FIELD,\n",
+    "    LIFE_EXPECTANCY_FIELD,\n",
+    "    HEALTH_INSURANCE_FIELD,\n",
+    "    BAD_HEALTH_FIELD,\n",
    "]\n",
    "\n",
    "for (index_a, index_b) in itertools.combinations(census_block_group_indices, 2):\n",
@ -1495,7 +1591,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@ -1509,7 +1605,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.5"
+   "version": "3.9.6"
  }
 },
 "nbformat": 4,