From a2a321d93dacfe4ad5b5f00bf23a44f5afe6562f Mon Sep 17 00:00:00 2001
From: Lucas Merrill Brown <lucas.m.brown@omb.eop.gov>
Date: Tue, 22 Jun 2021 08:57:59 -0700
Subject: [PATCH] Score comparison tool, first draft  (#140)

---
 .gitattributes                         |   3 -
 score/.vscode/settings.json            |   3 +
 score/README.md                        |  11 +
 score/ipython/score_calc_0.1.ipynb     | 324 ++++++++-
 score/ipython/scoring_comparison.ipynb | 907 +++++++++++++++++++++++++
 score/ipython/test.ipynb               | 133 ----
 score/requirements.txt                 |   1 +
 7 files changed, 1226 insertions(+), 156 deletions(-)
 delete mode 100644 .gitattributes
 create mode 100644 score/.vscode/settings.json
 create mode 100644 score/ipython/scoring_comparison.ipynb
 delete mode 100644 score/ipython/test.ipynb
diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index 9512cafa..00000000
--- a/.gitattributes
+++ /dev/null
@@ -1,3 +0,0 @@
-* text=auto
-*.sh text eol=lf
-*.conf text eol=lf
diff --git a/score/.vscode/settings.json b/score/.vscode/settings.json
new file mode 100644
index 00000000..84a192ce
--- /dev/null
+++ b/score/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "venv\\Scripts\\python.exe"
+}
\ No newline at end of file
diff --git a/score/README.md b/score/README.md
index 1a67e3e0..2c945617 100644
--- a/score/README.md
+++ b/score/README.md
@@ -18,6 +18,17 @@
 - Activate your virtualenv (see above)
 - Type `jupyter notebook`. Your browser should open with a Jupyter Notebook tab
 
+## Activating variable-enabled Markdown for Jupyter notebooks
+
+- Change to this directory (i.e. `cd score`)
+- Run `jupyter contrib nbextension install --user`
+- Run `jupyter nbextension enable python-markdown/main`
+- Make sure you've loaded the Jupyter notebook in a "Trusted" state. (See button near
+  top right of Notebook screen.)
+
+For more information, see [nbextensions docs](https://jupyter-contrib-nbextensions.readthedocs.io/en/latest/install.html) and
+see [python-markdown docs](https://github.com/ipython-contrib/jupyter_contrib_nbextensions/tree/master/src/jupyter_contrib_nbextensions/nbextensions/python-markdown).
+
 ## Downloading Census Block Groups GeoJSON and Generating CBG CSVs
 
 - Make sure you have Docker running in your machine
diff --git a/score/ipython/score_calc_0.1.ipynb b/score/ipython/score_calc_0.1.ipynb
index 7e722062..01cd19b2 100644
--- a/score/ipython/score_calc_0.1.ipynb
+++ b/score/ipython/score_calc_0.1.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "a664f981",
    "metadata": {},
    "outputs": [],
@@ -18,10 +18,91 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "7df430cb",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ID</th>\n",
+       "      <th>ACSTOTPOP</th>\n",
+       "      <th>LESSHSPCT</th>\n",
+       "      <th>LOWINCPCT</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>010010201001</td>\n",
+       "      <td>636</td>\n",
+       "      <td>0.208134</td>\n",
+       "      <td>0.385220</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>010010201002</td>\n",
+       "      <td>1287</td>\n",
+       "      <td>0.040678</td>\n",
+       "      <td>0.163170</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>010010202001</td>\n",
+       "      <td>810</td>\n",
+       "      <td>0.135563</td>\n",
+       "      <td>0.501247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>010010202002</td>\n",
+       "      <td>1218</td>\n",
+       "      <td>0.192000</td>\n",
+       "      <td>0.393701</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>010010203001</td>\n",
+       "      <td>2641</td>\n",
+       "      <td>0.125473</td>\n",
+       "      <td>0.308217</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             ID  ACSTOTPOP  LESSHSPCT  LOWINCPCT\n",
+       "0  010010201001        636   0.208134   0.385220\n",
+       "1  010010201002       1287   0.040678   0.163170\n",
+       "2  010010202001        810   0.135563   0.501247\n",
+       "3  010010202002       1218   0.192000   0.393701\n",
+       "4  010010203001       2641   0.125473   0.308217"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# EJSCreen csv Load\n",
     "ejscreen_csv = data_path / \"dataset\" / \"ejscreen_2020\" / \"usa.csv\"\n",
@@ -31,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "27677132",
    "metadata": {
     "scrolled": true
@@ -45,37 +126,182 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "1f7b864f",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ID</th>\n",
+       "      <th>ACSTOTPOP</th>\n",
+       "      <th>LESSHSPCT</th>\n",
+       "      <th>LOWINCPCT</th>\n",
+       "      <th>lesshs_percentile</th>\n",
+       "      <th>lowin_percentile</th>\n",
+       "      <th>score_a</th>\n",
+       "      <th>score_b</th>\n",
+       "      <th>score_a_percentile</th>\n",
+       "      <th>score_b_percentile</th>\n",
+       "      <th>score_a_top_percentile_25</th>\n",
+       "      <th>score_b_top_percentile_25</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>010010201001</td>\n",
+       "      <td>636</td>\n",
+       "      <td>0.208134</td>\n",
+       "      <td>0.385220</td>\n",
+       "      <td>0.793292</td>\n",
+       "      <td>0.625015</td>\n",
+       "      <td>0.709154</td>\n",
+       "      <td>0.495820</td>\n",
+       "      <td>0.739540</td>\n",
+       "      <td>0.743311</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>010010201002</td>\n",
+       "      <td>1287</td>\n",
+       "      <td>0.040678</td>\n",
+       "      <td>0.163170</td>\n",
+       "      <td>0.238550</td>\n",
+       "      <td>0.246722</td>\n",
+       "      <td>0.242636</td>\n",
+       "      <td>0.058856</td>\n",
+       "      <td>0.206805</td>\n",
+       "      <td>0.249590</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>010010202001</td>\n",
+       "      <td>810</td>\n",
+       "      <td>0.135563</td>\n",
+       "      <td>0.501247</td>\n",
+       "      <td>0.634390</td>\n",
+       "      <td>0.772002</td>\n",
+       "      <td>0.703196</td>\n",
+       "      <td>0.489750</td>\n",
+       "      <td>0.733009</td>\n",
+       "      <td>0.738859</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>010010202002</td>\n",
+       "      <td>1218</td>\n",
+       "      <td>0.192000</td>\n",
+       "      <td>0.393701</td>\n",
+       "      <td>0.765126</td>\n",
+       "      <td>0.637158</td>\n",
+       "      <td>0.701142</td>\n",
+       "      <td>0.487506</td>\n",
+       "      <td>0.730848</td>\n",
+       "      <td>0.737357</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>010010203001</td>\n",
+       "      <td>2641</td>\n",
+       "      <td>0.125473</td>\n",
+       "      <td>0.308217</td>\n",
+       "      <td>0.603841</td>\n",
+       "      <td>0.504977</td>\n",
+       "      <td>0.554409</td>\n",
+       "      <td>0.304925</td>\n",
+       "      <td>0.568571</td>\n",
+       "      <td>0.586058</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             ID  ACSTOTPOP  LESSHSPCT  LOWINCPCT  lesshs_percentile  \\\n",
+       "0  010010201001        636   0.208134   0.385220           0.793292   \n",
+       "1  010010201002       1287   0.040678   0.163170           0.238550   \n",
+       "2  010010202001        810   0.135563   0.501247           0.634390   \n",
+       "3  010010202002       1218   0.192000   0.393701           0.765126   \n",
+       "4  010010203001       2641   0.125473   0.308217           0.603841   \n",
+       "\n",
+       "   lowin_percentile   score_a   score_b  score_a_percentile  \\\n",
+       "0          0.625015  0.709154  0.495820            0.739540   \n",
+       "1          0.246722  0.242636  0.058856            0.206805   \n",
+       "2          0.772002  0.703196  0.489750            0.733009   \n",
+       "3          0.637158  0.701142  0.487506            0.730848   \n",
+       "4          0.504977  0.554409  0.304925            0.568571   \n",
+       "\n",
+       "   score_b_percentile  score_a_top_percentile_25  score_b_top_percentile_25  \n",
+       "0            0.743311                      False                      False  \n",
+       "1            0.249590                      False                      False  \n",
+       "2            0.738859                      False                      False  \n",
+       "3            0.737357                      False                      False  \n",
+       "4            0.586058                      False                      False  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# calculate scores\n",
-    "df['score_a'] = df[['lesshs_percentile', 'lowin_percentile']].mean(axis=1)\n",
-    "df['score_b'] = df.lesshs_percentile * df.lowin_percentile\n",
+    "df[\"score_a\"] = df[[\"lesshs_percentile\", \"lowin_percentile\"]].mean(axis=1)\n",
+    "df[\"score_b\"] = df.lesshs_percentile * df.lowin_percentile\n",
     "\n",
     "# Create percentiles for the scores \n",
-    "df['score_a_percentile'] = df.score_a.rank(pct = True)\n",
-    "df['score_b_percentile'] = df.score_b.rank(pct = True)\n",
-    "df['score_a_top_percentile_25'] = df['score_a_percentile'] >= 0.75\n",
-    "df['score_b_top_percentile_25'] = df['score_b_percentile'] >= 0.75\n",
+    "df[\"score_a_percentile\"] = df.score_a.rank(pct = True)\n",
+    "df[\"score_b_percentile\"] = df.score_b.rank(pct = True)\n",
+    "df[\"score_a_top_percentile_25\"] = df[\"score_a_percentile\"] >= 0.75\n",
+    "df[\"score_b_top_percentile_25\"] = df[\"score_b_percentile\"] >= 0.75\n",
     "df.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "91755bcf",
    "metadata": {},
    "outputs": [],
    "source": [
     "# strip calculations\n",
-    "df = df[[\"ID\", \"score_a_percentile\", \"score_b_percentile\",\"score_a_top_percentile_25\",\"score_b_top_percentile_25\"]]"
+    "df = df[[\"ID\", \"ACSTOTPOP\", \"score_a\",\"score_b\", \"score_a_percentile\", \"score_b_percentile\",\"score_a_top_percentile_25\",\"score_b_top_percentile_25\"]]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "b3a65af4",
    "metadata": {},
    "outputs": [],
@@ -86,10 +312,68 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "58ddd8b3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generating data01 csv\n",
+      "Generating data02 csv\n",
+      "Generating data04 csv\n",
+      "Generating data05 csv\n",
+      "Generating data06 csv\n",
+      "Generating data08 csv\n",
+      "Generating data09 csv\n",
+      "Generating data10 csv\n",
+      "Generating data11 csv\n",
+      "Generating data12 csv\n",
+      "Generating data13 csv\n",
+      "Generating data15 csv\n",
+      "Generating data16 csv\n",
+      "Generating data17 csv\n",
+      "Generating data18 csv\n",
+      "Generating data19 csv\n",
+      "Generating data20 csv\n",
+      "Generating data21 csv\n",
+      "Generating data22 csv\n",
+      "Generating data23 csv\n",
+      "Generating data24 csv\n",
+      "Generating data25 csv\n",
+      "Generating data26 csv\n",
+      "Generating data27 csv\n",
+      "Generating data28 csv\n",
+      "Generating data29 csv\n",
+      "Generating data30 csv\n",
+      "Generating data31 csv\n",
+      "Generating data32 csv\n",
+      "Generating data33 csv\n",
+      "Generating data34 csv\n",
+      "Generating data35 csv\n",
+      "Generating data36 csv\n",
+      "Generating data37 csv\n",
+      "Generating data38 csv\n",
+      "Generating data39 csv\n",
+      "Generating data40 csv\n",
+      "Generating data41 csv\n",
+      "Generating data42 csv\n",
+      "Generating data44 csv\n",
+      "Generating data45 csv\n",
+      "Generating data46 csv\n",
+      "Generating data47 csv\n",
+      "Generating data48 csv\n",
+      "Generating data49 csv\n",
+      "Generating data50 csv\n",
+      "Generating data51 csv\n",
+      "Generating data53 csv\n",
+      "Generating data54 csv\n",
+      "Generating data55 csv\n",
+      "Generating data56 csv\n"
+     ]
+    }
+   ],
    "source": [
     "# write per state csvs\n",
     "with open(fips_csv_path) as csv_file:\n",
@@ -110,7 +394,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bce50823",
+   "id": "e545623b",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -132,7 +416,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,
diff --git a/score/ipython/scoring_comparison.ipynb b/score/ipython/scoring_comparison.ipynb
new file mode 100644
index 00000000..1b6b401c
--- /dev/null
+++ b/score/ipython/scoring_comparison.ipynb
@@ -0,0 +1,907 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "54615cef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Before running this script as it currently stands, you'll need to run two notebooks:\n",
+    "# 1. ejscreen_etl.ipynb\n",
+    "# 2. score_calc_0.1.ipynb\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import requests\n",
+    "import zipfile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "49a63129",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Suppress scientific notation in pandas (this shows up for census tract IDs)\n",
+    "pd.options.display.float_format = \"{:.2f}\".format\n",
+    "\n",
+    "# Set some global parameters\n",
+    "DATA_DIR = Path.cwd().parent / \"data\"\n",
+    "TEMP_DATA_DIR = Path.cwd().parent / \"data\" / \"tmp\"\n",
+    "# None of these numbers are final, but just for the purposes of comparison.\n",
+    "CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD = 75\n",
+    "CEJST_PRIORITY_COMMUNITY_THRESHOLD = 0.75\n",
+    "\n",
+    "# Name fields using variables. (This makes it easy to reference the same fields frequently without using strings\n",
+    "# and introducing the risk of misspelling the field name.)\n",
+    "CENSUS_BLOCK_GROUP_ID_FIELD = \"census_block_group_id\"\n",
+    "CENSUS_BLOCK_GROUP_POPULATION_FIELD = \"census_block_group_population\"\n",
+    "CENSUS_TRACT_ID_FIELD = \"census_tract_id\"\n",
+    "CALENVIROSCREEN_SCORE_FIELD = \"calenviroscreen_score\"\n",
+    "CALENVIROSCREEN_PERCENTILE_FIELD = \"calenviroscreen_percentile\"\n",
+    "CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD = \"calenviroscreen_priority_community\"\n",
+    "\n",
+    "# Note: we are pretending the EJSCREEN's low income percent is the actual score for now as a placeholder.\n",
+    "CEJST_SCORE_FIELD = \"cejst_score\"\n",
+    "CEJST_PERCENTILE_FIELD = \"cejst_percentile\"\n",
+    "CEJST_PRIORITY_COMMUNITY_FIELD = \"cejst_priority_community\"\n",
+    "\n",
+    "# Comparison field names\n",
+    "tract_has_at_least_one_cbg = \"CES Tract has at least one CEJST CBG?\"\n",
+    "tract_has_100_percent_cbg = \"CES Tract has 100% CEJST CBGs?\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2b26dccf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>census_block_group_id</th>\n",
+       "      <th>census_block_group_population</th>\n",
+       "      <th>cejst_score</th>\n",
+       "      <th>score_b</th>\n",
+       "      <th>cejst_percentile</th>\n",
+       "      <th>score_b_percentile</th>\n",
+       "      <th>score_a_top_percentile_25</th>\n",
+       "      <th>score_b_top_percentile_25</th>\n",
+       "      <th>cejst_priority_community</th>\n",
+       "      <th>census_tract_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>10297</th>\n",
+       "      <td>60014001001</td>\n",
+       "      <td>3115</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.10</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6001400100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10298</th>\n",
+       "      <td>60014002001</td>\n",
+       "      <td>1037</td>\n",
+       "      <td>0.09</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6001400200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10299</th>\n",
+       "      <td>60014002002</td>\n",
+       "      <td>988</td>\n",
+       "      <td>0.15</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.11</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6001400200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10300</th>\n",
+       "      <td>60014003001</td>\n",
+       "      <td>1137</td>\n",
+       "      <td>0.03</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6001400300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10301</th>\n",
+       "      <td>60014003002</td>\n",
+       "      <td>1404</td>\n",
+       "      <td>0.34</td>\n",
+       "      <td>0.09</td>\n",
+       "      <td>0.31</td>\n",
+       "      <td>0.31</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>6001400300</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       census_block_group_id  census_block_group_population  cejst_score  \\\n",
+       "10297            60014001001                           3115         0.14   \n",
+       "10298            60014002001                           1037         0.09   \n",
+       "10299            60014002002                            988         0.15   \n",
+       "10300            60014003001                           1137         0.03   \n",
+       "10301            60014003002                           1404         0.34   \n",
+       "\n",
+       "       score_b  cejst_percentile  score_b_percentile  \\\n",
+       "10297     0.02              0.10                0.14   \n",
+       "10298     0.01              0.05                0.07   \n",
+       "10299     0.02              0.11                0.12   \n",
+       "10300     0.00              0.01                0.02   \n",
+       "10301     0.09              0.31                0.31   \n",
+       "\n",
+       "       score_a_top_percentile_25  score_b_top_percentile_25  \\\n",
+       "10297                      False                      False   \n",
+       "10298                      False                      False   \n",
+       "10299                      False                      False   \n",
+       "10300                      False                      False   \n",
+       "10301                      False                      False   \n",
+       "\n",
+       "       cejst_priority_community  census_tract_id  \n",
+       "10297                     False       6001400100  \n",
+       "10298                     False       6001400200  \n",
+       "10299                     False       6001400200  \n",
+       "10300                     False       6001400300  \n",
+       "10301                     False       6001400300  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load CEJST score data\n",
+    "cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"usa.csv\"\n",
+    "\n",
+    "cejst_df = pd.read_csv(cejst_data_path)\n",
+    "\n",
+    "cejst_df.head()\n",
+    "\n",
+    "# Rename unclear name \"id\" to \"census_block_group_id\", as well as other renamings.\n",
+    "cejst_df.rename(\n",
+    "    columns={\n",
+    "        \"ID\": CENSUS_BLOCK_GROUP_ID_FIELD,\n",
+    "        \"ACSTOTPOP\": CENSUS_BLOCK_GROUP_POPULATION_FIELD,\n",
+    "        \"score_a\": CEJST_SCORE_FIELD,\n",
+    "        \"score_a_percentile\": CEJST_PERCENTILE_FIELD,\n",
+    "    },\n",
+    "    inplace=True,\n",
+    "    errors=\"raise\",\n",
+    ")\n",
+    "\n",
+    "# Calculate the top K% of prioritized communities\n",
+    "cejst_df[CEJST_PRIORITY_COMMUNITY_FIELD] = (\n",
+    "    cejst_df[CEJST_PERCENTILE_FIELD] >= CEJST_PRIORITY_COMMUNITY_THRESHOLD\n",
+    ")\n",
+    "\n",
+    "# Create the CBG's Census Tract ID by dropping the last number from the FIPS CODE of the CBG.\n",
+    "# The CBG ID is the last one character.\n",
+    "# For more information, see https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html.\n",
+    "cejst_df.loc[:, CENSUS_TRACT_ID_FIELD] = (\n",
+    "    cejst_df.loc[:, CENSUS_BLOCK_GROUP_ID_FIELD].astype(str).str[:-1].astype(np.int64)\n",
+    ")\n",
+    "\n",
+    "# Remove all non-California data\n",
+    "cejst_df = cejst_df.loc[\n",
+    "    cejst_df[CENSUS_BLOCK_GROUP_ID_FIELD].astype(str).str[0] == \"6\", :\n",
+    "]\n",
+    "\n",
+    "cejst_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "ec6b27e3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\opt\\justice40-tool\\score\\venv\\lib\\site-packages\\urllib3\\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'justice40-data.s3.amazonaws.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:\n",
+    "# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip\n",
+    "\n",
+    "download = requests.get(\"https://justice40-data.s3.amazonaws.com/CalEnviroScreen/CalEnviroScreen_4.0_2021.zip\", verify=False)\n",
+    "file_contents = download.content\n",
+    "zip_file_path = TEMP_DATA_DIR\n",
+    "zip_file = open(zip_file_path  / \"downloaded.zip\", \"wb\")\n",
+    "zip_file.write(file_contents)\n",
+    "zip_file.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "bdf08971",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "C:\\opt\\justice40-tool\\score\\data\\tmp\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Extract zip\n",
+    "print(zip_file_path)\n",
+    "with zipfile.ZipFile(zip_file_path / \"downloaded.zip\", \"r\") as zip_ref:\n",
+    "    zip_ref.extractall(zip_file_path)\n",
+    "calenviroscreen_4_csv_name = \"CalEnviroScreen_4.0_2021.csv\"\n",
+    "calenviroscreen_data_path = TEMP_DATA_DIR.joinpath(calenviroscreen_4_csv_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "29c14b29",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>census_tract_id</th>\n",
+       "      <th>Total Population</th>\n",
+       "      <th>California County</th>\n",
+       "      <th>ZIP</th>\n",
+       "      <th>Nearby City \\r\\n(to help approximate location only)</th>\n",
+       "      <th>Longitude</th>\n",
+       "      <th>Latitude</th>\n",
+       "      <th>calenviroscreen_score</th>\n",
+       "      <th>calenviroscreen_percentile</th>\n",
+       "      <th>DRAFT CES 4.0\\r\\nPercentile Range</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Poverty</th>\n",
+       "      <th>Poverty Pctl</th>\n",
+       "      <th>Unemployment</th>\n",
+       "      <th>Unemployment Pctl</th>\n",
+       "      <th>Housing Burden</th>\n",
+       "      <th>Housing Burden Pctl</th>\n",
+       "      <th>Pop. Char.</th>\n",
+       "      <th>Pop. Char. Score</th>\n",
+       "      <th>Pop. Char. Pctl</th>\n",
+       "      <th>calenviroscreen_priority_community</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>6019001100</td>\n",
+       "      <td>2760</td>\n",
+       "      <td>Fresno</td>\n",
+       "      <td>93706</td>\n",
+       "      <td>Fresno</td>\n",
+       "      <td>-119.78</td>\n",
+       "      <td>36.71</td>\n",
+       "      <td>94.61</td>\n",
+       "      <td>100.00</td>\n",
+       "      <td>95-100% (highest scores)</td>\n",
+       "      <td>...</td>\n",
+       "      <td>76.60</td>\n",
+       "      <td>98.43</td>\n",
+       "      <td>16.20</td>\n",
+       "      <td>97.15</td>\n",
+       "      <td>30.70</td>\n",
+       "      <td>90.61</td>\n",
+       "      <td>93.73</td>\n",
+       "      <td>9.72</td>\n",
+       "      <td>99.87</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>6077000700</td>\n",
+       "      <td>4177</td>\n",
+       "      <td>San Joaquin</td>\n",
+       "      <td>95206</td>\n",
+       "      <td>Stockton</td>\n",
+       "      <td>-121.29</td>\n",
+       "      <td>37.94</td>\n",
+       "      <td>90.83</td>\n",
+       "      <td>99.99</td>\n",
+       "      <td>95-100% (highest scores)</td>\n",
+       "      <td>...</td>\n",
+       "      <td>70.60</td>\n",
+       "      <td>96.43</td>\n",
+       "      <td>18.50</td>\n",
+       "      <td>98.45</td>\n",
+       "      <td>35.20</td>\n",
+       "      <td>95.61</td>\n",
+       "      <td>93.40</td>\n",
+       "      <td>9.68</td>\n",
+       "      <td>99.84</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6077000100</td>\n",
+       "      <td>4055</td>\n",
+       "      <td>San Joaquin</td>\n",
+       "      <td>95202</td>\n",
+       "      <td>Stockton</td>\n",
+       "      <td>-121.29</td>\n",
+       "      <td>37.95</td>\n",
+       "      <td>85.75</td>\n",
+       "      <td>99.97</td>\n",
+       "      <td>95-100% (highest scores)</td>\n",
+       "      <td>...</td>\n",
+       "      <td>81.80</td>\n",
+       "      <td>99.50</td>\n",
+       "      <td>17.90</td>\n",
+       "      <td>98.17</td>\n",
+       "      <td>36.40</td>\n",
+       "      <td>96.51</td>\n",
+       "      <td>95.71</td>\n",
+       "      <td>9.92</td>\n",
+       "      <td>99.97</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>6071001600</td>\n",
+       "      <td>5527</td>\n",
+       "      <td>San Bernardino</td>\n",
+       "      <td>91761</td>\n",
+       "      <td>Ontario</td>\n",
+       "      <td>-117.62</td>\n",
+       "      <td>34.06</td>\n",
+       "      <td>83.56</td>\n",
+       "      <td>99.96</td>\n",
+       "      <td>95-100% (highest scores)</td>\n",
+       "      <td>...</td>\n",
+       "      <td>67.10</td>\n",
+       "      <td>94.82</td>\n",
+       "      <td>6.70</td>\n",
+       "      <td>57.20</td>\n",
+       "      <td>32.10</td>\n",
+       "      <td>92.65</td>\n",
+       "      <td>80.59</td>\n",
+       "      <td>8.36</td>\n",
+       "      <td>93.06</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>6037204920</td>\n",
+       "      <td>2639</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>90023</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "      <td>-118.20</td>\n",
+       "      <td>34.02</td>\n",
+       "      <td>82.90</td>\n",
+       "      <td>99.95</td>\n",
+       "      <td>95-100% (highest scores)</td>\n",
+       "      <td>...</td>\n",
+       "      <td>64.90</td>\n",
+       "      <td>93.51</td>\n",
+       "      <td>5.60</td>\n",
+       "      <td>43.81</td>\n",
+       "      <td>25.00</td>\n",
+       "      <td>77.95</td>\n",
+       "      <td>83.95</td>\n",
+       "      <td>8.70</td>\n",
+       "      <td>95.78</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 59 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   census_tract_id   Total Population California County    ZIP  \\\n",
+       "0       6019001100               2760           Fresno   93706   \n",
+       "1       6077000700               4177       San Joaquin  95206   \n",
+       "2       6077000100               4055       San Joaquin  95202   \n",
+       "3       6071001600               5527    San Bernardino  91761   \n",
+       "4       6037204920               2639       Los Angeles  90023   \n",
+       "\n",
+       "  Nearby City \\r\\n(to help approximate location only)  Longitude  Latitude  \\\n",
+       "0                                             Fresno     -119.78     36.71   \n",
+       "1                                           Stockton     -121.29     37.94   \n",
+       "2                                           Stockton     -121.29     37.95   \n",
+       "3                                            Ontario     -117.62     34.06   \n",
+       "4                                        Los Angeles     -118.20     34.02   \n",
+       "\n",
+       "   calenviroscreen_score  calenviroscreen_percentile  \\\n",
+       "0                  94.61                      100.00   \n",
+       "1                  90.83                       99.99   \n",
+       "2                  85.75                       99.97   \n",
+       "3                  83.56                       99.96   \n",
+       "4                  82.90                       99.95   \n",
+       "\n",
+       "  DRAFT CES 4.0\\r\\nPercentile Range  ...  Poverty  Poverty Pctl  Unemployment  \\\n",
+       "0          95-100% (highest scores)  ...    76.60         98.43         16.20   \n",
+       "1          95-100% (highest scores)  ...    70.60         96.43         18.50   \n",
+       "2          95-100% (highest scores)  ...    81.80         99.50         17.90   \n",
+       "3          95-100% (highest scores)  ...    67.10         94.82          6.70   \n",
+       "4          95-100% (highest scores)  ...    64.90         93.51          5.60   \n",
+       "\n",
+       "   Unemployment Pctl  Housing Burden  Housing Burden Pctl  Pop. Char.   \\\n",
+       "0              97.15           30.70                90.61        93.73   \n",
+       "1              98.45           35.20                95.61        93.40   \n",
+       "2              98.17           36.40                96.51        95.71   \n",
+       "3              57.20           32.10                92.65        80.59   \n",
+       "4              43.81           25.00                77.95        83.95   \n",
+       "\n",
+       "   Pop. Char. Score  Pop. Char. Pctl  calenviroscreen_priority_community  \n",
+       "0              9.72            99.87                                True  \n",
+       "1              9.68            99.84                                True  \n",
+       "2              9.92            99.97                                True  \n",
+       "3              8.36            93.06                                True  \n",
+       "4              8.70            95.78                                True  \n",
+       "\n",
+       "[5 rows x 59 columns]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load comparison index (CalEnviroScreen 4)\n",
+    "\n",
+    "calenviroscreen_df = pd.read_csv(calenviroscreen_data_path)\n",
+    "\n",
+    "calenviroscreen_df.rename(\n",
+    "    columns={\n",
+    "        \"Census Tract\": CENSUS_TRACT_ID_FIELD,\n",
+    "        \"DRAFT CES 4.0 Score\": CALENVIROSCREEN_SCORE_FIELD,\n",
+    "        \"DRAFT CES 4.0 Percentile\": CALENVIROSCREEN_PERCENTILE_FIELD,\n",
+    "    },\n",
+    "    inplace=True,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# Calculate the top K% of prioritized communities\n",
+    "calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD] = (\n",
+    "    calenviroscreen_df[CALENVIROSCREEN_PERCENTILE_FIELD]\n",
+    "    >= CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD\n",
+    ")\n",
+    "\n",
+    "calenviroscreen_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "813e5656",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>census_block_group_id</th>\n",
+       "      <th>census_tract_id</th>\n",
+       "      <th>census_block_group_population</th>\n",
+       "      <th>cejst_score</th>\n",
+       "      <th>cejst_percentile</th>\n",
+       "      <th>cejst_priority_community</th>\n",
+       "      <th>calenviroscreen_score</th>\n",
+       "      <th>calenviroscreen_percentile</th>\n",
+       "      <th>calenviroscreen_priority_community</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>60014001001</td>\n",
+       "      <td>6001400100</td>\n",
+       "      <td>3115</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>0.10</td>\n",
+       "      <td>False</td>\n",
+       "      <td>4.40</td>\n",
+       "      <td>2.38</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>60014002001</td>\n",
+       "      <td>6001400200</td>\n",
+       "      <td>1037</td>\n",
+       "      <td>0.09</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>False</td>\n",
+       "      <td>5.05</td>\n",
+       "      <td>3.48</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>60014002002</td>\n",
+       "      <td>6001400200</td>\n",
+       "      <td>988</td>\n",
+       "      <td>0.15</td>\n",
+       "      <td>0.11</td>\n",
+       "      <td>False</td>\n",
+       "      <td>5.05</td>\n",
+       "      <td>3.48</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>60014003001</td>\n",
+       "      <td>6001400300</td>\n",
+       "      <td>1137</td>\n",
+       "      <td>0.03</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>False</td>\n",
+       "      <td>9.92</td>\n",
+       "      <td>13.44</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>60014003002</td>\n",
+       "      <td>6001400300</td>\n",
+       "      <td>1404</td>\n",
+       "      <td>0.34</td>\n",
+       "      <td>0.31</td>\n",
+       "      <td>False</td>\n",
+       "      <td>9.92</td>\n",
+       "      <td>13.44</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   census_block_group_id  census_tract_id  census_block_group_population  \\\n",
+       "0            60014001001       6001400100                           3115   \n",
+       "1            60014002001       6001400200                           1037   \n",
+       "2            60014002002       6001400200                            988   \n",
+       "3            60014003001       6001400300                           1137   \n",
+       "4            60014003002       6001400300                           1404   \n",
+       "\n",
+       "   cejst_score  cejst_percentile  cejst_priority_community  \\\n",
+       "0         0.14              0.10                     False   \n",
+       "1         0.09              0.05                     False   \n",
+       "2         0.15              0.11                     False   \n",
+       "3         0.03              0.01                     False   \n",
+       "4         0.34              0.31                     False   \n",
+       "\n",
+       "   calenviroscreen_score  calenviroscreen_percentile  \\\n",
+       "0                   4.40                        2.38   \n",
+       "1                   5.05                        3.48   \n",
+       "2                   5.05                        3.48   \n",
+       "3                   9.92                       13.44   \n",
+       "4                   9.92                       13.44   \n",
+       "\n",
+       "  calenviroscreen_priority_community  \n",
+       "0                              False  \n",
+       "1                              False  \n",
+       "2                              False  \n",
+       "3                              False  \n",
+       "4                              False  "
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Join CalEnviroScreen and CEJST data.\n",
+    "# Note: we're joining on the census *tract*, so there will be multiple CBG entries joined to the same census tract row from CES,\n",
+    "# creating multiple rows of the same CES data.\n",
+    "\n",
+    "# For simplicity, we'll only keep certain columns from each data frame.\n",
+    "cejst_columns_to_keep = [\n",
+    "    CENSUS_BLOCK_GROUP_ID_FIELD,\n",
+    "    CENSUS_TRACT_ID_FIELD,\n",
+    "    CENSUS_BLOCK_GROUP_POPULATION_FIELD,\n",
+    "    CEJST_SCORE_FIELD,\n",
+    "    CEJST_PERCENTILE_FIELD,\n",
+    "    CEJST_PRIORITY_COMMUNITY_FIELD,\n",
+    "]\n",
+    "\n",
+    "calenviroscreen_columns_to_keep = [\n",
+    "    CENSUS_TRACT_ID_FIELD,\n",
+    "    CALENVIROSCREEN_SCORE_FIELD,\n",
+    "    CALENVIROSCREEN_PERCENTILE_FIELD,\n",
+    "    CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD,\n",
+    "]\n",
+    "\n",
+    "merged_df = cejst_df.loc[:, cejst_columns_to_keep].merge(\n",
+    "    calenviroscreen_df.loc[:, calenviroscreen_columns_to_keep],\n",
+    "    how=\"left\",\n",
+    "    on=CENSUS_TRACT_ID_FIELD,\n",
+    ")\n",
+    "\n",
+    "merged_df.head()\n",
+    "\n",
+    "# merged_df.to_csv(\n",
+    "#     path_or_buf=TEMP_DATA_DIR / \"merged.csv\",\n",
+    "#     na_rep=\"\",\n",
+    "#     index=False\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "939baea4",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                 census_tract_id  calenviroscreen_score  \\\n",
+      "census_tract_id                                           \n",
+      "6019001100            6019001100                  94.61   \n",
+      "6077000700            6077000700                  90.83   \n",
+      "6077000100            6077000100                  85.75   \n",
+      "6071001600            6071001600                  83.56   \n",
+      "6037204920            6037204920                  82.90   \n",
+      "\n",
+      "                 calenviroscreen_percentile  \\\n",
+      "census_tract_id                               \n",
+      "6019001100                           100.00   \n",
+      "6077000700                            99.99   \n",
+      "6077000100                            99.97   \n",
+      "6071001600                            99.96   \n",
+      "6037204920                            99.95   \n",
+      "\n",
+      "                calenviroscreen_priority_community  \\\n",
+      "census_tract_id                                      \n",
+      "6019001100                                    True   \n",
+      "6077000700                                    True   \n",
+      "6077000100                                    True   \n",
+      "6071001600                                    True   \n",
+      "6037204920                                    True   \n",
+      "\n",
+      "                CES Tract has at least one CEJST CBG?  \\\n",
+      "census_tract_id                                         \n",
+      "6019001100                                       True   \n",
+      "6077000700                                       True   \n",
+      "6077000100                                       True   \n",
+      "6071001600                                       True   \n",
+      "6037204920                                       True   \n",
+      "\n",
+      "                CES Tract has 100% CEJST CBGs?  \n",
+      "census_tract_id                                 \n",
+      "6019001100                                True  \n",
+      "6077000700                                True  \n",
+      "6077000100                                True  \n",
+      "6071001600                               False  \n",
+      "6037204920                                True  \n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create analysis\n",
+    "def calculate_comparison(frame):\n",
+    "    # Keep all the CES values at the Census Tract Level\n",
+    "    df = frame.loc[\n",
+    "        frame.index[0],\n",
+    "        [\n",
+    "            CENSUS_TRACT_ID_FIELD,\n",
+    "            CALENVIROSCREEN_SCORE_FIELD,\n",
+    "            CALENVIROSCREEN_PERCENTILE_FIELD,\n",
+    "            CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD,\n",
+    "        ],\n",
+    "    ]\n",
+    "\n",
+    "    # Convenience constant for whether the tract is or is not a CalEnviroScreen priority community.\n",
+    "    is_a_ces_priority_tract = frame.loc[\n",
+    "        frame.index[0], [CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD]\n",
+    "    ][0]\n",
+    "\n",
+    "    # Recall that NaN values are not falsy, so we need to check if `is_a_ces_priority_tract` is True.\n",
+    "    is_a_ces_priority_tract = is_a_ces_priority_tract is True\n",
+    "\n",
+    "    # Calculate comparison\n",
+    "    df[tract_has_at_least_one_cbg] = (\n",
+    "        frame.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].sum() > 0\n",
+    "        if is_a_ces_priority_tract\n",
+    "        else None\n",
+    "    )\n",
+    "    df[tract_has_100_percent_cbg] = (\n",
+    "        frame.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].mean() == 1\n",
+    "        if is_a_ces_priority_tract\n",
+    "        else None\n",
+    "    )\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "# Group all data by the census tract.\n",
+    "grouped_df = merged_df.groupby(CENSUS_TRACT_ID_FIELD)\n",
+    "\n",
+    "# Run the comparison function on the groups.\n",
+    "comparison_df = grouped_df.apply(calculate_comparison)\n",
+    "\n",
+    "# Sort descending by highest CES Score for convenience when viewing output file\n",
+    "comparison_df.sort_values(\n",
+    "    by=[CALENVIROSCREEN_PERCENTILE_FIELD], ascending=False, inplace=True\n",
+    ")\n",
+    "\n",
+    "# Write comparison to CSV.\n",
+    "comparison_df.to_csv(\n",
+    "    path_or_buf=TEMP_DATA_DIR / \"Comparison Output.csv\", na_rep=\"\", index=False\n",
+    ")\n",
+    "\n",
+    "print(comparison_df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "85709225",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Prepare some constants for use in the following Markdown cell.\n",
+    "\n",
+    "cejst_cbgs_ca_only = cejst_df.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].sum()\n",
+    "ces_tracts_count = comparison_df.loc[:, CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD].sum()\n",
+    "at_least_one_sum = comparison_df.loc[:, tract_has_at_least_one_cbg].sum()\n",
+    "at_least_one_sum_percent = f\"{at_least_one_sum / ces_tracts_count:.0%}\"\n",
+    "\n",
+    "all_100_sum = comparison_df.loc[:, tract_has_100_percent_cbg].sum()\n",
+    "all_100_sum_percent = f\"{all_100_sum / ces_tracts_count:.0%}\"\n",
+    "\n",
+    "# Note, for the following Markdown cell to render the variables properly, follow the steps at\n",
+    "# \"Activating variable-enabled Markdown for Jupyter notebooks\" within `score/README.md`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c534966",
+   "metadata": {
+    "variables": {
+     "all_100_sum": "1168",
+     "all_100_sum_percent": "59%",
+     "at_least_one_sum": "1817",
+     "at_least_one_sum_percent": "92%",
+     "cejst_cbgs_ca_only": "6987",
+     "ces_tracts_count": "1983"
+    }
+   },
+   "source": [
+    "# Summary of findings\n",
+    "\n",
+    "Recall that census tracts contain one or more census block groups, with up to nine census block groups per tract.\n",
+    "\n",
+    "There are {{ces_tracts_count}} census tracts designated as Disadvantaged Communities by CalEnviroScreen 4.0. \n",
+    "\n",
+    "Within California, there are {{cejst_cbgs_ca_only}} census block groups considered as priority communities by the current version of the CEJST score used in this analysis.\n",
+    "\n",
+    "Out of every CalEnviroScreen Disadvantaged Community census tract, {{at_least_one_sum}} ({{at_least_one_sum_percent}}) of these census tracts have at least one census block group within them that is considered a priority community by the current version of the CEJST score.\n",
+    "\n",
+    "Out of every CalEnviroScreen Disadvantaged Community census tract, {{all_100_sum}} ({{all_100_sum_percent}}) of these census tracts have 100% of the included census block groups within them considered priority communities by the current version of the CEJST score."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db3c7d38",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/score/ipython/test.ipynb b/score/ipython/test.ipynb
deleted file mode 100644
index da17a9c4..00000000
--- a/score/ipython/test.ipynb
+++ /dev/null
@@ -1,133 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "1a4c0c68",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "70b3a793",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.read_csv('data/fips_states.csv')  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "c514aad8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>fips</th>\n",
-       "      <th>state_name</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Alabama</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Alaska</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Arizona</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>5</td>\n",
-       "      <td>Arkansas</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>6</td>\n",
-       "      <td>California</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   fips  state_name\n",
-       "0     1    Alabama \n",
-       "1     2     Alaska \n",
-       "2     4    Arizona \n",
-       "3     5   Arkansas \n",
-       "4     6  California"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b9ee44d9",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/score/requirements.txt b/score/requirements.txt
index f3f609d7..87d00c77 100644
--- a/score/requirements.txt
+++ b/score/requirements.txt
@@ -1,5 +1,6 @@
 ipython
 jupyter
+jupyter_contrib_nbextensions
 numpy
 pandas
 requests

	ID	ACSTOTPOP	LESSHSPCT	LOWINCPCT
0	010010201001	636	0.208134	0.385220
1	010010201002	1287	0.040678	0.163170
2	010010202001	810	0.135563	0.501247
3	010010202002	1218	0.192000	0.393701
4	010010203001	2641	0.125473	0.308217
	census_block_group_id	census_block_group_population	cejst_score	score_b	cejst_percentile	score_b_percentile	score_a_top_percentile_25	score_b_top_percentile_25	cejst_priority_community	census_tract_id
10297	60014001001	3115	0.14	0.02	0.10	0.14	False	False	False	6001400100
10298	60014002001	1037	0.09	0.01	0.05	0.07	False	False	False	6001400200
10299	60014002002	988	0.15	0.02	0.11	0.12	False	False	False	6001400200
10300	60014003001	1137	0.03	0.00	0.01	0.02	False	False	False	6001400300
10301	60014003002	1404	0.34	0.09	0.31	0.31	False	False	False	6001400300
	census_tract_id	Total Population	California County	ZIP	Nearby City \\r\\n(to help approximate location only)	Longitude	Latitude	calenviroscreen_score	calenviroscreen_percentile	DRAFT CES 4.0\\r\\nPercentile Range	...	Poverty	Poverty Pctl	Unemployment	Unemployment Pctl	Housing Burden	Housing Burden Pctl	Pop. Char.	Pop. Char. Score	Pop. Char. Pctl	calenviroscreen_priority_community
0	6019001100	2760	Fresno	93706	Fresno	-119.78	36.71	94.61	100.00	95-100% (highest scores)	...	76.60	98.43	16.20	97.15	30.70	90.61	93.73	9.72	99.87	True
1	6077000700	4177	San Joaquin	95206	Stockton	-121.29	37.94	90.83	99.99	95-100% (highest scores)	...	70.60	96.43	18.50	98.45	35.20	95.61	93.40	9.68	99.84	True
2	6077000100	4055	San Joaquin	95202	Stockton	-121.29	37.95	85.75	99.97	95-100% (highest scores)	...	81.80	99.50	17.90	98.17	36.40	96.51	95.71	9.92	99.97	True
3	6071001600	5527	San Bernardino	91761	Ontario	-117.62	34.06	83.56	99.96	95-100% (highest scores)	...	67.10	94.82	6.70	57.20	32.10	92.65	80.59	8.36	93.06	True
4	6037204920	2639	Los Angeles	90023	Los Angeles	-118.20	34.02	82.90	99.95	95-100% (highest scores)	...	64.90	93.51	5.60	43.81	25.00	77.95	83.95	8.70	95.78	True