diff --git a/score/ipython/calenviroscreen_etl.ipynb b/score/ipython/calenviroscreen_etl.ipynb
new file mode 100644
index 00000000..0333deef
--- /dev/null
+++ b/score/ipython/calenviroscreen_etl.ipynb
@@ -0,0 +1,141 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "20aa3891",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import csv\n",
+    "import sys\n",
+    "import os\n",
+    "\n",
+    "module_path = os.path.abspath(os.path.join(\"..\"))\n",
+    "if module_path not in sys.path:\n",
+    "    sys.path.append(module_path)\n",
+    "\n",
+    "from etl.sources.census.etl_utils import get_state_fips_codes\n",
+    "from utils import unzip_file_from_url, remove_all_from_dir\n",
+    "\n",
+    "DATA_PATH = Path.cwd().parent / \"data\"\n",
+    "TMP_PATH = DATA_PATH / \"tmp\"\n",
+    "CALENVIROSCREEN_FTP_URL = \"https://justice40-data.s3.amazonaws.com/CalEnviroScreen/CalEnviroScreen_4.0_2021.zip\"\n",
+    "CSV_PATH = DATA_PATH / \"dataset\" / \"calenviroscreen4\"\n",
+    "\n",
+    "# Definining some variable names\n",
+    "CALENVIROSCREEN_SCORE_FIELD_NAME = \"calenviroscreen_score\"\n",
+    "CALENVIROSCREEN_PERCENTILE_FIELD_NAME = \"calenviroscreen_percentile\"\n",
+    "CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = \"calenviroscreen_priority_community\"\n",
+    "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n",
+    "\n",
+    "# Choosing constants.\n",
+    "# None of these numbers are final, but just for the purposes of comparison.\n",
+    "CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD = 75\n",
+    "\n",
+    "print(DATA_PATH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc3fb9ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# download file from ejscreen ftp\n",
+    "unzip_file_from_url(CALENVIROSCREEN_FTP_URL, TMP_PATH, TMP_PATH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15f66756",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:\n",
+    "# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip\n",
+    "calenviroscreen_4_csv_name = \"CalEnviroScreen_4.0_2021.csv\"\n",
+    "calenviroscreen_data_path = TMP_PATH.joinpath(calenviroscreen_4_csv_name)\n",
+    "\n",
+    "# Load comparison index (CalEnviroScreen 4)\n",
+    "calenviroscreen_df = pd.read_csv(\n",
+    "    calenviroscreen_data_path, dtype={\"Census Tract\": \"string\"}\n",
+    ")\n",
+    "\n",
+    "calenviroscreen_df.rename(\n",
+    "    columns={\n",
+    "        \"Census Tract\": GEOID_TRACT_FIELD_NAME,\n",
+    "        \"DRAFT CES 4.0 Score\": CALENVIROSCREEN_SCORE_FIELD_NAME,\n",
+    "        \"DRAFT CES 4.0 Percentile\": CALENVIROSCREEN_PERCENTILE_FIELD_NAME,\n",
+    "    },\n",
+    "    inplace=True,\n",
+    ")\n",
+    "\n",
+    "# Add a leading \"0\" to the Census Tract to match our format in other data frames.\n",
+    "\n",
+    "calenviroscreen_df[GEOID_TRACT_FIELD_NAME] = (\n",
+    "    \"0\" + calenviroscreen_df[GEOID_TRACT_FIELD_NAME]\n",
+    ")\n",
+    "\n",
+    "# Calculate the top K% of prioritized communities\n",
+    "calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME] = (\n",
+    "    calenviroscreen_df[CALENVIROSCREEN_PERCENTILE_FIELD_NAME]\n",
+    "    >= CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD\n",
+    ")\n",
+    "\n",
+    "calenviroscreen_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9fa2077a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write csv\n",
+    "CSV_PATH.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "# Matching other conventions in the ETL scripts, write only for the state (FIPS code 06).\n",
+    "calenviroscreen_df.to_csv(CSV_PATH / \"data06.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81b977f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# cleanup\n",
+    "remove_all_from_dir(TMP_PATH)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/score/ipython/hud_recap_etl.ipynb b/score/ipython/hud_recap_etl.ipynb
new file mode 100644
index 00000000..7d4df434
--- /dev/null
+++ b/score/ipython/hud_recap_etl.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "20aa3891",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import csv\n",
+    "import sys\n",
+    "import os\n",
+    "\n",
+    "module_path = os.path.abspath(os.path.join(\"..\"))\n",
+    "if module_path not in sys.path:\n",
+    "    sys.path.append(module_path)\n",
+    "\n",
+    "from etl.sources.census.etl_utils import get_state_fips_codes\n",
+    "from utils import unzip_file_from_url, remove_all_from_dir\n",
+    "\n",
+    "DATA_PATH = Path.cwd().parent / \"data\"\n",
+    "TMP_PATH = DATA_PATH / \"tmp\"\n",
+    "HUD_RECAP_CSV_URL = \"https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326\"\n",
+    "CSV_PATH = DATA_PATH / \"dataset\" / \"hud_recap\"\n",
+    "\n",
+    "# Definining some variable names\n",
+    "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n",
+    "HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME = \"hud_recap_priority_community\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9455da5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data from https://hudgis-hud.opendata.arcgis.com/datasets/HUD::racially-or-ethnically-concentrated-areas-of-poverty-r-ecaps/about\n",
+    "df = pd.read_csv(HUD_RECAP_CSV_URL, dtype={\"GEOID\": \"string\"})\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ca63e66c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rename some fields\n",
+    "df.rename(\n",
+    "    columns={\n",
+    "        \"GEOID\": GEOID_TRACT_FIELD_NAME,\n",
+    "        # Interestingly, there's no data dictionary for the RECAP data that I could find.\n",
+    "        # However, this site (http://www.schousing.com/library/Tax%20Credit/2020/QAP%20Instructions%20(2).pdf)\n",
+    "        # suggests:\n",
+    "        # \"If RCAP_Current for the tract in which the site is located is 1, the tract is an R/ECAP. If RCAP_Current is 0, it is not.\"\n",
+    "        \"RCAP_Current\": HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME,\n",
+    "    },\n",
+    "    inplace=True,\n",
+    ")\n",
+    "\n",
+    "# Convert to boolean\n",
+    "df[HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME] = df[\n",
+    "    HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME\n",
+    "].astype(\"bool\")\n",
+    "\n",
+    "df[HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME].value_counts()\n",
+    "\n",
+    "df.sort_values(by=GEOID_TRACT_FIELD_NAME, inplace=True)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9fa2077a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write csv\n",
+    "CSV_PATH.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "# Drop unnecessary columns.\n",
+    "df[[GEOID_TRACT_FIELD_NAME, HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME]].to_csv(\n",
+    "    CSV_PATH / \"usa.csv\", index=False\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/score/ipython/score_calc.ipynb b/score/ipython/score_calc.ipynb
index 39424812..e1eec406 100644
--- a/score/ipython/score_calc.ipynb
+++ b/score/ipython/score_calc.ipynb
@@ -16,6 +16,7 @@
     "import collections\n",
     "import functools\n",
     "from pathlib import Path\n",
+    "import matplotlib.pyplot as plt\n",
     "import pandas as pd\n",
     "import csv\n",
     "import os\n",
@@ -363,7 +364,7 @@
    },
    "outputs": [],
    "source": [
-    "# calculate percentiles\n",
+    "# Calculate percentiles for each data set.\n",
     "for data_set in data_sets:\n",
     "    df[f\"{data_set.renamed_field}{PERCENTILE_FIELD_SUFFIX}\"] = df[\n",
     "        data_set.renamed_field\n",
@@ -379,7 +380,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# calculate min max\n",
+    "# Calculate min-max for each data set.\n",
     "# Math:\n",
     "# (\n",
     "#     Observed value\n",
@@ -410,6 +411,28 @@
     "df.head()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f4eec326",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Graph distributions and correlations.\n",
+    "min_max_fields = [\n",
+    "    f\"{data_set.renamed_field}{MIN_MAX_FIELD_SUFFIX}\"\n",
+    "    for data_set in data_sets\n",
+    "    if data_set.renamed_field != GEOID_FIELD_NAME\n",
+    "]\n",
+    "df.hist(\n",
+    "    column=min_max_fields, layout=(len(min_max_fields), 1), figsize=(10, 30), bins=30\n",
+    ")\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -476,7 +499,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fields_to_use_in_score = [\n",
+    "# Calculate scores D and E.\n",
+    "fields_to_use_in_score_d_and_e = [\n",
     "    UNEMPLOYED_FIELD_NAME,\n",
     "    LINGUISTIC_ISOLATION_FIELD_NAME,\n",
     "    HOUSING_BURDEN_FIELD_NAME,\n",
@@ -484,9 +508,11 @@
     "    HIGH_SCHOOL_FIELD_NAME,\n",
     "]\n",
     "\n",
-    "fields_min_max = [f\"{field}{MIN_MAX_FIELD_SUFFIX}\" for field in fields_to_use_in_score]\n",
+    "fields_min_max = [\n",
+    "    f\"{field}{MIN_MAX_FIELD_SUFFIX}\" for field in fields_to_use_in_score_d_and_e\n",
+    "]\n",
     "fields_percentile = [\n",
-    "    f\"{field}{PERCENTILE_FIELD_SUFFIX}\" for field in fields_to_use_in_score\n",
+    "    f\"{field}{PERCENTILE_FIELD_SUFFIX}\" for field in fields_to_use_in_score_d_and_e\n",
     "]\n",
     "\n",
     "# Calculate \"Score D\", which uses min-max normalization\n",
@@ -498,6 +524,32 @@
     "print(df[\"Score E\"].describe())"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a02e5bac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Graph distributions\n",
+    "df.hist(\n",
+    "    column=fields_min_max, layout=(len(fields_min_max), 1), figsize=(10, 30), bins=30\n",
+    ")\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a0e608c8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate correlations\n",
+    "df[fields_min_max].corr()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/score/ipython/scoring_comparison.ipynb b/score/ipython/scoring_comparison.ipynb
index fc315009..64733e10 100644
--- a/score/ipython/scoring_comparison.ipynb
+++ b/score/ipython/scoring_comparison.ipynb
@@ -4,21 +4,40 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "54615cef",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "# Before running this script as it currently stands, you'll need to run two notebooks:\n",
-    "# 1. ejscreen_etl.ipynb\n",
-    "# 2. score_calc_0.1.ipynb\n",
+    "# Before running this script as it currently stands, you'll need to run these notebooks (in any order):\n",
+    "# * score_calc.ipynb\n",
+    "# * calenviroscreen_etl.ipynb\n",
+    "# * hud_recap_etl.ipynb\n",
     "\n",
+    "import collections\n",
+    "import functools\n",
+    "import IPython\n",
     "import numpy as np\n",
+    "import os\n",
     "import pandas as pd\n",
-    "from pathlib import Path\n",
+    "import pathlib\n",
+    "import pypandoc\n",
     "import requests\n",
+    "import string\n",
+    "import sys\n",
+    "import typing\n",
+    "import us\n",
     "import zipfile\n",
+    "\n",
     "from datetime import datetime\n",
     "from tqdm.notebook import tqdm_notebook\n",
     "\n",
+    "module_path = os.path.abspath(os.path.join(\"..\"))\n",
+    "if module_path not in sys.path:\n",
+    "    sys.path.append(module_path)\n",
+    "\n",
+    "from utils import remove_all_from_dir, get_excel_column_name\n",
+    "\n",
     "# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n",
     "tqdm_notebook.pandas()"
    ]
@@ -27,86 +46,77 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "49a63129",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "# Suppress scientific notation in pandas (this shows up for census tract IDs)\n",
     "pd.options.display.float_format = \"{:.2f}\".format\n",
     "\n",
     "# Set some global parameters\n",
-    "DATA_DIR = Path.cwd().parent / \"data\"\n",
-    "TEMP_DATA_DIR = Path.cwd().parent / \"data\" / \"tmp\"\n",
-    "# None of these numbers are final, but just for the purposes of comparison.\n",
-    "CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD = 75\n",
+    "DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
+    "TEMP_DATA_DIR = pathlib.Path.cwd().parent / \"data\" / \"tmp\"\n",
+    "COMPARISON_OUTPUTS_DIR = TEMP_DATA_DIR / \"comparison_outputs\"\n",
+    "\n",
+    "# Make the dirs if they don't exist\n",
+    "TEMP_DATA_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "COMPARISON_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
     "CEJST_PRIORITY_COMMUNITY_THRESHOLD = 0.75\n",
     "\n",
     "# Name fields using variables. (This makes it easy to reference the same fields frequently without using strings\n",
     "# and introducing the risk of misspelling the field name.)\n",
-    "CENSUS_BLOCK_GROUP_ID_FIELD = \"census_block_group_id\"\n",
-    "CENSUS_BLOCK_GROUP_POPULATION_FIELD = \"census_block_group_population\"\n",
-    "CENSUS_TRACT_ID_FIELD = \"census_tract_id\"\n",
-    "CALENVIROSCREEN_SCORE_FIELD = \"calenviroscreen_score\"\n",
-    "CALENVIROSCREEN_PERCENTILE_FIELD = \"calenviroscreen_percentile\"\n",
-    "CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD = \"calenviroscreen_priority_community\"\n",
     "\n",
-    "# Note: we are pretending the EJSCREEN's low income percent is the actual score for now as a placeholder.\n",
+    "GEOID_FIELD_NAME = \"GEOID10\"\n",
+    "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n",
+    "GEOID_STATE_FIELD_NAME = \"GEOID10_STATE\"\n",
+    "CENSUS_BLOCK_GROUP_POPULATION_FIELD = \"Total population\"\n",
+    "\n",
     "CEJST_SCORE_FIELD = \"cejst_score\"\n",
     "CEJST_PERCENTILE_FIELD = \"cejst_percentile\"\n",
     "CEJST_PRIORITY_COMMUNITY_FIELD = \"cejst_priority_community\"\n",
     "\n",
-    "# Comparison field names\n",
-    "any_tract_has_at_least_one_cbg = \"Tract has at least one CEJST CBG?\"\n",
-    "tract_has_at_least_one_cbg = \"CES Tract has at least one CEJST CBG?\"\n",
-    "tract_has_100_percent_cbg = \"CES Tract has 100% CEJST CBGs?\"\n",
-    "non_ces_tract_has_at_least_one_cbg = \"Non-CES Tract has at least one CEJST CBG?\"\n",
-    "non_ces_tract_has_100_percent_cbg = \"Non-CES Tract has 100% CEJST CBGs?\""
+    "# Define some suffixes\n",
+    "POPULATION_SUFFIX = \" (priority population)\""
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "2b26dccf",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "# Load CEJST score data\n",
     "cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"usa.csv\"\n",
+    "cejst_df = pd.read_csv(cejst_data_path, dtype={GEOID_FIELD_NAME: \"string\"})\n",
     "\n",
-    "cejst_df = pd.read_csv(cejst_data_path)\n",
+    "# score_used = \"Score A\"\n",
     "\n",
-    "cejst_df.head()\n",
-    "\n",
-    "# Rename unclear name \"id\" to \"census_block_group_id\", as well as other renamings.\n",
-    "\n",
-    "score_used = \"Score A\"\n",
-    "\n",
-    "cejst_df.rename(\n",
-    "    columns={\n",
-    "        \"GEOID10\": CENSUS_BLOCK_GROUP_ID_FIELD,\n",
-    "        \"Total population\": CENSUS_BLOCK_GROUP_POPULATION_FIELD,\n",
-    "        score_used: CEJST_SCORE_FIELD,\n",
-    "        f\"{score_used} (percentile)\": CEJST_PERCENTILE_FIELD,\n",
-    "    },\n",
-    "    inplace=True,\n",
-    "    errors=\"raise\",\n",
-    ")\n",
-    "\n",
-    "# Calculate the top K% of prioritized communities\n",
-    "cejst_df[CEJST_PRIORITY_COMMUNITY_FIELD] = (\n",
-    "    cejst_df[CEJST_PERCENTILE_FIELD] >= CEJST_PRIORITY_COMMUNITY_THRESHOLD\n",
-    ")\n",
+    "# # Rename unclear name \"id\" to \"census_block_group_id\", as well as other renamings.\n",
+    "# cejst_df.rename(\n",
+    "#     columns={\n",
+    "#         \"Total population\": CENSUS_BLOCK_GROUP_POPULATION_FIELD,\n",
+    "#         score_used: CEJST_SCORE_FIELD,\n",
+    "#         f\"{score_used} (percentile)\": CEJST_PERCENTILE_FIELD,\n",
+    "#     },\n",
+    "#     inplace=True,\n",
+    "#     errors=\"raise\",\n",
+    "# )\n",
     "\n",
     "# Create the CBG's Census Tract ID by dropping the last number from the FIPS CODE of the CBG.\n",
     "# The CBG ID is the last one character.\n",
     "# For more information, see https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html.\n",
-    "cejst_df.loc[:, CENSUS_TRACT_ID_FIELD] = (\n",
-    "    cejst_df.loc[:, CENSUS_BLOCK_GROUP_ID_FIELD].astype(str).str[:-1].astype(np.int64)\n",
+    "cejst_df.loc[:, GEOID_TRACT_FIELD_NAME] = (\n",
+    "    cejst_df.loc[:, GEOID_FIELD_NAME].astype(str).str[:-1]\n",
     ")\n",
     "\n",
-    "# Remove all non-California data\n",
-    "cejst_df = cejst_df.loc[\n",
-    "    cejst_df[CENSUS_BLOCK_GROUP_ID_FIELD].astype(str).str[0] == \"6\", :\n",
-    "]\n",
+    "cejst_df.loc[:, GEOID_STATE_FIELD_NAME] = (\n",
+    "    cejst_df.loc[:, GEOID_FIELD_NAME].astype(str).str[0:2]\n",
+    ")\n",
     "\n",
     "cejst_df.head()"
    ]
@@ -114,65 +124,26 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ec6b27e3",
-   "metadata": {},
+   "id": "08962382",
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
-    "# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:\n",
-    "# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip\n",
+    "# Load CalEnviroScreen 4.0\n",
+    "CALENVIROSCREEN_SCORE_FIELD = \"calenviroscreen_score\"\n",
+    "CALENVIROSCREEN_PERCENTILE_FIELD = \"calenviroscreen_percentile\"\n",
+    "CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD = \"calenviroscreen_priority_community\"\n",
     "\n",
-    "download = requests.get(\n",
-    "    \"https://justice40-data.s3.amazonaws.com/CalEnviroScreen/CalEnviroScreen_4.0_2021.zip\",\n",
-    "    verify=False,\n",
-    ")\n",
-    "file_contents = download.content\n",
-    "zip_file_path = TEMP_DATA_DIR\n",
-    "zip_file = open(zip_file_path / \"downloaded.zip\", \"wb\")\n",
-    "zip_file.write(file_contents)\n",
-    "zip_file.close()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bdf08971",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Extract zip\n",
-    "print(zip_file_path)\n",
-    "with zipfile.ZipFile(zip_file_path / \"downloaded.zip\", \"r\") as zip_ref:\n",
-    "    zip_ref.extractall(zip_file_path)\n",
-    "calenviroscreen_4_csv_name = \"CalEnviroScreen_4.0_2021.csv\"\n",
-    "calenviroscreen_data_path = TEMP_DATA_DIR.joinpath(calenviroscreen_4_csv_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "29c14b29",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load comparison index (CalEnviroScreen 4)\n",
-    "\n",
-    "calenviroscreen_df = pd.read_csv(calenviroscreen_data_path)\n",
-    "\n",
-    "calenviroscreen_df.rename(\n",
-    "    columns={\n",
-    "        \"Census Tract\": CENSUS_TRACT_ID_FIELD,\n",
-    "        \"DRAFT CES 4.0 Score\": CALENVIROSCREEN_SCORE_FIELD,\n",
-    "        \"DRAFT CES 4.0 Percentile\": CALENVIROSCREEN_PERCENTILE_FIELD,\n",
-    "    },\n",
-    "    inplace=True,\n",
+    "calenviroscreen_data_path = DATA_DIR / \"dataset\" / \"calenviroscreen4\" / \"data06.csv\"\n",
+    "calenviroscreen_df = pd.read_csv(\n",
+    "    calenviroscreen_data_path, dtype={GEOID_TRACT_FIELD_NAME: \"string\"}\n",
     ")\n",
     "\n",
-    "\n",
-    "# Calculate the top K% of prioritized communities\n",
-    "calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD] = (\n",
-    "    calenviroscreen_df[CALENVIROSCREEN_PERCENTILE_FIELD]\n",
-    "    >= CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD\n",
-    ")\n",
+    "# Convert priority community field to a bool.\n",
+    "calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD] = calenviroscreen_df[\n",
+    "    CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD\n",
+    "].astype(bool)\n",
     "\n",
     "calenviroscreen_df.head()"
    ]
@@ -180,222 +151,729 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "813e5656",
-   "metadata": {},
+   "id": "42bd28d4",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "# Join CalEnviroScreen and CEJST data.\n",
+    "# Load HUD data\n",
+    "hud_recap_data_path = DATA_DIR / \"dataset\" / \"hud_recap\" / \"usa.csv\"\n",
+    "hud_recap_df = pd.read_csv(\n",
+    "    hud_recap_data_path, dtype={GEOID_TRACT_FIELD_NAME: \"string\"}\n",
+    ")\n",
+    "\n",
+    "hud_recap_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d77cd872",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Join all dataframes that use tracts\n",
+    "census_tract_dfs = [calenviroscreen_df, hud_recap_df]\n",
+    "\n",
+    "census_tract_df = functools.reduce(\n",
+    "    lambda left, right: pd.merge(\n",
+    "        left=left, right=right, on=GEOID_TRACT_FIELD_NAME, how=\"outer\"\n",
+    "    ),\n",
+    "    census_tract_dfs,\n",
+    ")\n",
+    "\n",
+    "if census_tract_df[GEOID_TRACT_FIELD_NAME].str.len().unique() != [11]:\n",
+    "    raise ValueError(\"Some of the census tract data has the wrong length.\")\n",
+    "\n",
+    "if len(census_tract_df) > 74134:\n",
+    "    raise ValueError(\"Too many rows in the join.\")\n",
+    "\n",
+    "census_tract_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "813e5656",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Join tract indices and CEJST data.\n",
     "# Note: we're joining on the census *tract*, so there will be multiple CBG entries joined to the same census tract row from CES,\n",
     "# creating multiple rows of the same CES data.\n",
-    "\n",
-    "# For simplicity, we'll only keep certain columns from each data frame.\n",
-    "cejst_columns_to_keep = [\n",
-    "    CENSUS_BLOCK_GROUP_ID_FIELD,\n",
-    "    CENSUS_TRACT_ID_FIELD,\n",
-    "    CENSUS_BLOCK_GROUP_POPULATION_FIELD,\n",
-    "    CEJST_SCORE_FIELD,\n",
-    "    CEJST_PERCENTILE_FIELD,\n",
-    "    CEJST_PRIORITY_COMMUNITY_FIELD,\n",
-    "]\n",
-    "\n",
-    "calenviroscreen_columns_to_keep = [\n",
-    "    CENSUS_TRACT_ID_FIELD,\n",
-    "    CALENVIROSCREEN_SCORE_FIELD,\n",
-    "    CALENVIROSCREEN_PERCENTILE_FIELD,\n",
-    "    CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD,\n",
-    "]\n",
-    "\n",
-    "merged_df = cejst_df.loc[:, cejst_columns_to_keep].merge(\n",
-    "    calenviroscreen_df.loc[:, calenviroscreen_columns_to_keep],\n",
+    "merged_df = cejst_df.merge(\n",
+    "    census_tract_df,\n",
     "    how=\"left\",\n",
-    "    on=CENSUS_TRACT_ID_FIELD,\n",
+    "    on=GEOID_TRACT_FIELD_NAME,\n",
     ")\n",
     "\n",
+    "\n",
+    "if len(merged_df) > 220333:\n",
+    "    raise ValueError(\"Too many rows in the join.\")\n",
+    "\n",
     "merged_df.head()\n",
     "\n",
+    "\n",
     "# merged_df.to_csv(\n",
-    "#     path_or_buf=TEMP_DATA_DIR / \"merged.csv\",\n",
-    "#     na_rep=\"\",\n",
-    "#     index=False\n",
+    "#     path_or_buf=COMPARISON_OUTPUTS_DIR / \"merged.csv\", na_rep=\"\", index=False\n",
     "# )"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "939baea4",
+   "id": "8a801121",
    "metadata": {
     "scrolled": true
    },
    "outputs": [],
    "source": [
-    "# Create analysis\n",
-    "def calculate_comparison(frame):\n",
-    "    # Keep all the CES values at the Census Tract Level\n",
-    "    df = frame.loc[\n",
-    "        frame.index[0],\n",
-    "        [\n",
-    "            CENSUS_TRACT_ID_FIELD,\n",
-    "            CALENVIROSCREEN_SCORE_FIELD,\n",
-    "            CALENVIROSCREEN_PERCENTILE_FIELD,\n",
-    "            CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD,\n",
-    "        ],\n",
-    "    ]\n",
+    "cejst_priority_communities_fields = [\n",
+    "    \"Score A (top 25th percentile)\",\n",
+    "    \"Score B (top 25th percentile)\",\n",
+    "    \"Score C (top 25th percentile)\",\n",
+    "    \"Score D (top 25th percentile)\",\n",
+    "    \"Score E (top 25th percentile)\",\n",
+    "]\n",
     "\n",
-    "    # Convenience constant for whether the tract is or is not a CalEnviroScreen priority community.\n",
-    "    is_a_ces_priority_tract = frame.loc[\n",
-    "        frame.index[0], [CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD]\n",
-    "    ][0]\n",
-    "\n",
-    "    # Recall that NaN values are not falsy, so we need to check if `is_a_ces_priority_tract` is True.\n",
-    "    is_a_ces_priority_tract = is_a_ces_priority_tract is True\n",
-    "\n",
-    "    # Calculate whether the tract (whether or not it is a CES priority tract) includes CBGs that are priority\n",
-    "    # according to the current CEJST score.\n",
-    "    df[any_tract_has_at_least_one_cbg] = (\n",
-    "        frame.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].sum() > 0\n",
-    "    )\n",
-    "\n",
-    "    # Calculate comparison\n",
-    "    # A CES priority tract has at least one CEJST priority CBG.\n",
-    "    df[tract_has_at_least_one_cbg] = (\n",
-    "        frame.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].sum() > 0\n",
-    "        if is_a_ces_priority_tract\n",
-    "        else None\n",
-    "    )\n",
-    "\n",
-    "    # A CES priority tract has all of its contained CBGs as CEJST priority CBGs.\n",
-    "    df[tract_has_100_percent_cbg] = (\n",
-    "        frame.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].mean() == 1\n",
-    "        if is_a_ces_priority_tract\n",
-    "        else None\n",
-    "    )\n",
-    "\n",
-    "    # Calculate the inverse\n",
-    "    # A tract that is _not_ a CES priority has at least one CEJST priority CBG.\n",
-    "    df[non_ces_tract_has_at_least_one_cbg] = (\n",
-    "        frame.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].sum() > 0\n",
-    "        if not is_a_ces_priority_tract\n",
-    "        else None\n",
-    "    )\n",
-    "\n",
-    "    # A tract that is _not_ a CES priority has all of its contained CBGs as CEJST priority CBGs.\n",
-    "    df[non_ces_tract_has_100_percent_cbg] = (\n",
-    "        frame.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].mean() == 1\n",
-    "        if not is_a_ces_priority_tract\n",
-    "        else None\n",
-    "    )\n",
-    "\n",
-    "    return df\n",
-    "\n",
-    "\n",
-    "# Group all data by the census tract.\n",
-    "grouped_df = merged_df.groupby(CENSUS_TRACT_ID_FIELD)\n",
-    "\n",
-    "# Run the comparison function on the groups.\n",
-    "comparison_df = grouped_df.progress_apply(calculate_comparison)\n",
-    "\n",
-    "# Sort descending by highest CES Score for convenience when viewing output file\n",
-    "comparison_df.sort_values(\n",
-    "    by=[CALENVIROSCREEN_PERCENTILE_FIELD], ascending=False, inplace=True\n",
-    ")\n",
-    "\n",
-    "# Write comparison to CSV.\n",
-    "comparison_df.to_csv(\n",
-    "    path_or_buf=TEMP_DATA_DIR / \"Comparison Output.csv\", na_rep=\"\", index=False\n",
-    ")\n",
-    "\n",
-    "print(comparison_df.head())"
+    "comparison_priority_communities_fields = [\n",
+    "    \"calenviroscreen_priority_community\",\n",
+    "    \"hud_recap_priority_community\",\n",
+    "]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "85709225",
+   "id": "9fef0da9",
    "metadata": {
     "scrolled": true
    },
    "outputs": [],
    "source": [
-    "# Prepare some constants for use in the following Markdown cell.\n",
-    "total_cbgs_ca_only = len(cejst_df)\n",
-    "cejst_cbgs_ca_only = cejst_df.loc[:, CEJST_PRIORITY_COMMUNITY_FIELD].sum()\n",
-    "cejst_cbgs_ca_only_percent = f\"{cejst_cbgs_ca_only / total_cbgs_ca_only:.0%}\"\n",
+    "def get_state_distributions(\n",
+    "    df: pd.DataFrame, priority_communities_fields: typing.List[str]\n",
+    ") -> pd.DataFrame:\n",
+    "    \"\"\"For each boolean field of priority communities, calculate distribution across states and territories.\"\"\"\n",
     "\n",
-    "total_tracts_count = len(comparison_df)\n",
-    "ces_tracts_count = comparison_df.loc[:, CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD].sum()\n",
-    "ces_tracts_count_percent = f\"{ces_tracts_count / total_tracts_count:.0%}\"\n",
-    "non_ces_tracts_count = total_tracts_count - ces_tracts_count\n",
+    "    # Ensure each field is boolean.\n",
+    "    for priority_communities_field in priority_communities_fields:\n",
+    "        if df[priority_communities_field].dtype != bool:\n",
+    "            print(f\"Converting {priority_communities_field} to boolean.\")\n",
     "\n",
-    "total_tracts_count = len(comparison_df[CENSUS_TRACT_ID_FIELD])\n",
-    "cejst_tracts_count = comparison_df.loc[:, any_tract_has_at_least_one_cbg].sum()\n",
-    "cejst_tracts_count_percent = f\"{cejst_tracts_count / total_tracts_count:.0%}\"\n",
+    "        # Calculate the population included as priority communities per CBG. Will either be 0 or the population.\n",
+    "        df[f\"{priority_communities_field}{POPULATION_SUFFIX}\"] = (\n",
+    "            df[priority_communities_field] * df[CENSUS_BLOCK_GROUP_POPULATION_FIELD]\n",
+    "        )\n",
     "\n",
-    "# CES stats\n",
-    "at_least_one_sum = comparison_df.loc[:, tract_has_at_least_one_cbg].sum()\n",
-    "at_least_one_sum_percent = f\"{at_least_one_sum / ces_tracts_count:.0%}\"\n",
+    "    def calculate_state_comparison(frame: pd.DataFrame) -> pd.DataFrame:\n",
+    "        \"\"\"\n",
+    "        This method will be applied to a `group_by` object. Inherits some parameters from outer scope.\n",
+    "        \"\"\"\n",
+    "        state_id = frame[GEOID_STATE_FIELD_NAME].unique()[0]\n",
     "\n",
-    "all_100_sum = comparison_df.loc[:, tract_has_100_percent_cbg].sum()\n",
-    "all_100_sum_percent = f\"{all_100_sum / ces_tracts_count:.0%}\"\n",
+    "        summary_dict = {}\n",
+    "        summary_dict[GEOID_STATE_FIELD_NAME] = state_id\n",
+    "        summary_dict[\"State name\"] = us.states.lookup(state_id).name\n",
+    "        summary_dict[\"Total CBGs in state\"] = len(frame)\n",
+    "        summary_dict[\"Total population in state\"] = frame[\n",
+    "            CENSUS_BLOCK_GROUP_POPULATION_FIELD\n",
+    "        ].sum()\n",
     "\n",
-    "# Non-CES stats:\n",
-    "non_ces_at_least_one_sum = comparison_df.loc[\n",
-    "    :, non_ces_tract_has_at_least_one_cbg\n",
-    "].sum()\n",
-    "non_ces_at_least_one_sum_percent = (\n",
-    "    f\"{non_ces_at_least_one_sum / non_ces_tracts_count:.0%}\"\n",
+    "        for priority_communities_field in priority_communities_fields:\n",
+    "            summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"] = frame[\n",
+    "                f\"{priority_communities_field}{POPULATION_SUFFIX}\"\n",
+    "            ].sum()\n",
+    "\n",
+    "            summary_dict[f\"{priority_communities_field} (total CBGs)\"] = frame[\n",
+    "                f\"{priority_communities_field}\"\n",
+    "            ].sum()\n",
+    "\n",
+    "            # Calculate some combinations of other variables.\n",
+    "            summary_dict[f\"{priority_communities_field} (percent CBGs)\"] = (\n",
+    "                summary_dict[f\"{priority_communities_field} (total CBGs)\"]\n",
+    "                / summary_dict[\"Total CBGs in state\"]\n",
+    "            )\n",
+    "\n",
+    "            summary_dict[f\"{priority_communities_field} (percent population)\"] = (\n",
+    "                summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"]\n",
+    "                / summary_dict[\"Total population in state\"]\n",
+    "            )\n",
+    "\n",
+    "        df = pd.DataFrame(summary_dict, index=[0])\n",
+    "\n",
+    "        return df\n",
+    "\n",
+    "    grouped_df = df.groupby(GEOID_STATE_FIELD_NAME)\n",
+    "\n",
+    "    # Run the comparison function on the groups.\n",
+    "    state_distribution_df = grouped_df.progress_apply(calculate_state_comparison)\n",
+    "\n",
+    "    return state_distribution_df\n",
+    "\n",
+    "\n",
+    "def write_state_distribution_excel(\n",
+    "    state_distribution_df: pd.DataFrame, file_path: pathlib.PosixPath\n",
+    ") -> None:\n",
+    "    \"\"\"Write the dataframe to excel with special formatting.\"\"\"\n",
+    "    # Create a Pandas Excel writer using XlsxWriter as the engine.\n",
+    "    writer = pd.ExcelWriter(file_path, engine=\"xlsxwriter\")\n",
+    "\n",
+    "    # Convert the dataframe to an XlsxWriter Excel object. We also turn off the\n",
+    "    # index column at the left of the output dataframe.\n",
+    "    state_distribution_df.to_excel(writer, sheet_name=\"Sheet1\", index=False)\n",
+    "\n",
+    "    # Get the xlsxwriter workbook and worksheet objects.\n",
+    "    workbook = writer.book\n",
+    "    worksheet = writer.sheets[\"Sheet1\"]\n",
+    "    worksheet.autofilter(\n",
+    "        0, 0, state_distribution_df.shape[0], state_distribution_df.shape[1]\n",
+    "    )\n",
+    "\n",
+    "    for column in state_distribution_df.columns:\n",
+    "        # Special formatting for columns that capture the percent of population considered priority.\n",
+    "        if \"(percent population)\" in column:\n",
+    "            # Turn the column index into excel ranges (e.g., column #95 is \"CR\" and the range may be \"CR2:CR53\").\n",
+    "            column_index = state_distribution_df.columns.get_loc(column)\n",
+    "            column_character = get_excel_column_name(column_index)\n",
+    "            column_ranges = (\n",
+    "                f\"{column_character}2:{column_character}{len(state_distribution_df)+1}\"\n",
+    "            )\n",
+    "\n",
+    "            # Add green to red conditional formatting.\n",
+    "            worksheet.conditional_format(\n",
+    "                column_ranges,\n",
+    "                # Min: green, max: red.\n",
+    "                {\n",
+    "                    \"type\": \"2_color_scale\",\n",
+    "                    \"min_color\": \"#00FF7F\",\n",
+    "                    \"max_color\": \"#C82538\",\n",
+    "                },\n",
+    "            )\n",
+    "\n",
+    "            # TODO: text wrapping not working, fix.\n",
+    "            text_wrap = workbook.add_format({\"text_wrap\": True})\n",
+    "\n",
+    "            # Make these columns wide enough that you can read them.\n",
+    "            worksheet.set_column(\n",
+    "                f\"{column_character}:{column_character}\", 40, text_wrap\n",
+    "            )\n",
+    "\n",
+    "    writer.save()\n",
+    "\n",
+    "\n",
+    "state_distribution_df = get_state_distributions(\n",
+    "    df=merged_df,\n",
+    "    priority_communities_fields=cejst_priority_communities_fields\n",
+    "    + comparison_priority_communities_fields,\n",
     ")\n",
     "\n",
-    "non_ces_all_100_sum = comparison_df.loc[:, non_ces_tract_has_100_percent_cbg].sum()\n",
-    "non_ces_all_100_sum_percent = f\"{non_ces_all_100_sum / non_ces_tracts_count:.0%}\"\n",
+    "state_distribution_df.to_csv(\n",
+    "    path_or_buf=COMPARISON_OUTPUTS_DIR / \"Priority CBGs by state.csv\",\n",
+    "    na_rep=\"\",\n",
+    "    index=False,\n",
+    ")\n",
     "\n",
-    "# Note, for the following Markdown cell to render the variables properly, follow the steps at\n",
-    "# \"Activating variable-enabled Markdown for Jupyter notebooks\" within `score/README.md`."
+    "write_state_distribution_excel(\n",
+    "    state_distribution_df=state_distribution_df,\n",
+    "    file_path=COMPARISON_OUTPUTS_DIR / \"Priority CBGs by state.xlsx\",\n",
+    ")\n",
+    "\n",
+    "state_distribution_df.head()"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "0c534966",
-   "metadata": {
-    "variables": {
-     " total_tracts_count": "8057",
-     "all_100_sum": "1168",
-     "all_100_sum_percent": "59%",
-     "at_least_one_sum": "1817",
-     "at_least_one_sum_percent": "92%",
-     "cejst_cbgs_ca_only": "6987",
-     "cejst_cbgs_ca_only_percent": "30%",
-     "cejst_tracts_count": "3516",
-     "cejst_tracts_count_percent": "44%",
-     "ces_tracts_count": "1983",
-     "ces_tracts_count_percent": "25%",
-     "datetime.today().strftime('%Y-%m-%d')": "2021-06-28",
-     "non_ces_all_100_sum": "438",
-     "non_ces_all_100_sum_percent": "7%",
-     "non_ces_at_least_one_sum": "1699",
-     "non_ces_at_least_one_sum_percent": "28%",
-     "score_used": "Score A",
-     "total_cbgs_ca_only": "23212"
-    }
-   },
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d46667cf",
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "# Summary of findings for {{score_used}}\n",
+    "# This cell defines a couple of comparison functions. It does not run them.\n",
     "\n",
-    "(Calculated on {{datetime.today().strftime('%Y-%m-%d')}})\n",
+    "# Define a namedtuple for column names, which need to be shared between multiple parts of this comparison pipeline.\n",
+    "# Named tuples are useful here because they provide guarantees that for each instance, all properties are defined and\n",
+    "# can be accessed as properties (rather than as strings).\n",
+    "\n",
+    "# Note: if you'd like to add a field used throughout the comparison process, add it in three places.\n",
+    "# For an example `new_field`,\n",
+    "# 1. in this namedtuple, add the field as a string in `field_names` (e.g., `field_names=[..., \"new_field\"])`)\n",
+    "# 2. in the function `get_comparison_field_names`, define how the field name should be created from input data\n",
+    "#     (e.g., `...new_field=f\"New field compares {method_a_name} to {method_b_name}\")\n",
+    "# 3. In the function `get_comparison_markdown_content`, add some reporting on the new field to the markdown content.\n",
+    "#     (e.g., `The statistics indicate that {calculation_based_on_new_field} percent of census tracts are different between scores.`)\n",
+    "ComparisonFieldNames = collections.namedtuple(\n",
+    "    typename=\"ComparisonFieldNames\",\n",
+    "    field_names=[\n",
+    "        \"any_tract_has_at_least_one_method_a_cbg\",\n",
+    "        \"method_b_tract_has_at_least_one_method_a_cbg\",\n",
+    "        \"method_b_tract_has_100_percent_method_a_cbg\",\n",
+    "        \"method_b_non_priority_tract_has_at_least_one_method_a_cbg\",\n",
+    "        \"method_b_non_priority_tract_has_100_percent_method_a_cbg\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# Define a namedtuple for indices.\n",
+    "Index = collections.namedtuple(\n",
+    "    typename=\"Index\",\n",
+    "    field_names=[\n",
+    "        \"method_name\",\n",
+    "        \"priority_communities_field\",\n",
+    "        # Note: this field only used by indices defined at the census tract level.\n",
+    "        \"other_census_tract_fields_to_keep\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def get_comparison_field_names(\n",
+    "    method_a_name: str,\n",
+    "    method_b_name: str,\n",
+    ") -> ComparisonFieldNames:\n",
+    "    comparison_field_names = ComparisonFieldNames(\n",
+    "        any_tract_has_at_least_one_method_a_cbg=(\n",
+    "            f\"Any tract has at least one {method_a_name} Priority CBG?\"\n",
+    "        ),\n",
+    "        method_b_tract_has_at_least_one_method_a_cbg=(\n",
+    "            f\"{method_b_name} priority tract has at least one {method_a_name} CBG?\"\n",
+    "        ),\n",
+    "        method_b_tract_has_100_percent_method_a_cbg=(\n",
+    "            f\"{method_b_name} tract has 100% {method_a_name} priority CBGs?\"\n",
+    "        ),\n",
+    "        method_b_non_priority_tract_has_at_least_one_method_a_cbg=(\n",
+    "            f\"Non-priority {method_b_name} tract has at least one {method_a_name} priority CBG?\"\n",
+    "        ),\n",
+    "        method_b_non_priority_tract_has_100_percent_method_a_cbg=(\n",
+    "            f\"Non-priority {method_b_name} tract has 100% {method_a_name} priority CBGs?\"\n",
+    "        ),\n",
+    "    )\n",
+    "    return comparison_field_names\n",
+    "\n",
+    "\n",
+    "def get_df_with_only_shared_states(\n",
+    "    df: pd.DataFrame,\n",
+    "    field_a: str,\n",
+    "    field_b: str,\n",
+    "    state_field=GEOID_STATE_FIELD_NAME,\n",
+    ") -> pd.DataFrame:\n",
+    "    \"\"\"\n",
+    "    Useful for looking at shared geographies across two fields.\n",
+    "\n",
+    "    For a data frame and two fields, return a data frame only for states where there are non-null\n",
+    "    values for both fields in that state (or territory).\n",
+    "\n",
+    "    This is useful, for example, when running a comparison of CalEnviroScreen (only in California) against\n",
+    "    a draft score that's national, and returning only the data for California for the entire data frame.\n",
+    "    \"\"\"\n",
+    "    field_a_states = df.loc[df[field_a].notnull(), state_field].unique()\n",
+    "    field_b_states = df.loc[df[field_b].notnull(), state_field].unique()\n",
+    "\n",
+    "    shared_states = list(set(field_a_states) & set(field_b_states))\n",
+    "\n",
+    "    df = df.loc[df[state_field].isin(shared_states), :]\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_comparison_df(\n",
+    "    df: pd.DataFrame,\n",
+    "    method_a_priority_census_block_groups_field: str,\n",
+    "    method_b_priority_census_tracts_field: str,\n",
+    "    other_census_tract_fields_to_keep: typing.Optional[typing.List[str]],\n",
+    "    comparison_field_names: ComparisonFieldNames,\n",
+    "    output_dir: pathlib.PosixPath,\n",
+    ") -> None:\n",
+    "    \"\"\"Produces a comparison report for any two given boolean columns representing priority fields.\n",
+    "\n",
+    "    Args:\n",
+    "      df: a pandas dataframe including the data for this comparison.\n",
+    "      method_a_priority_census_block_groups_field: the name of a boolean column in `df`, such as the CEJST priority\n",
+    "        community field that defines communities at the level of census block groups (CBGs).\n",
+    "      method_b_priority_census_tracts_field: the name of a boolean column in `df`, such as the CalEnviroScreen priority\n",
+    "        community field that defines communities at the level of census tracts.\n",
+    "      other_census_tract_fields_to_keep (optional): a list of field names to preserve at the census tract level\n",
+    "\n",
+    "    Returns:\n",
+    "      df: a pandas dataframe with one row with the results of this comparison\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def calculate_comparison(frame: pd.DataFrame) -> pd.DataFrame:\n",
+    "        \"\"\"\n",
+    "        This method will be applied to a `group_by` object.\n",
+    "\n",
+    "        Note: It inherits from outer scope `method_a_priority_census_block_groups_field`, `method_b_priority_census_tracts_field`,\n",
+    "        and `other_census_tract_fields_to_keep`.\n",
+    "        \"\"\"\n",
+    "        # Keep all the tract values at the Census Tract Level\n",
+    "        for field in other_census_tract_fields_to_keep:\n",
+    "            if len(frame[field].unique()) != 1:\n",
+    "                raise ValueError(\n",
+    "                    f\"There are different values per CBG for field {field}.\"\n",
+    "                    \"`other_census_tract_fields_to_keep` can only be used for fields at the census tract level.\"\n",
+    "                )\n",
+    "\n",
+    "        df = frame.loc[\n",
+    "            frame.index[0],\n",
+    "            [\n",
+    "                GEOID_TRACT_FIELD_NAME,\n",
+    "                method_b_priority_census_tracts_field,\n",
+    "            ]\n",
+    "            + other_census_tract_fields_to_keep,\n",
+    "        ]\n",
+    "\n",
+    "        # Convenience constant for whether the tract is or is not a method B priority community.\n",
+    "        is_a_method_b_priority_tract = frame.loc[\n",
+    "            frame.index[0], [method_b_priority_census_tracts_field]\n",
+    "        ][0]\n",
+    "\n",
+    "        # Recall that NaN values are not falsy, so we need to check if `is_a_method_b_priority_tract` is True.\n",
+    "        is_a_method_b_priority_tract = is_a_method_b_priority_tract is True\n",
+    "\n",
+    "        # Calculate whether the tract (whether or not it is a comparison priority tract) includes CBGs that are priority\n",
+    "        # according to the current CBG score.\n",
+    "        df[comparison_field_names.any_tract_has_at_least_one_method_a_cbg] = (\n",
+    "            frame.loc[:, method_a_priority_census_block_groups_field].sum() > 0\n",
+    "        )\n",
+    "\n",
+    "        # Calculate comparison\n",
+    "        # A comparison priority tract has at least one CBG that is a priority CBG.\n",
+    "        df[comparison_field_names.method_b_tract_has_at_least_one_method_a_cbg] = (\n",
+    "            frame.loc[:, method_a_priority_census_block_groups_field].sum() > 0\n",
+    "            if is_a_method_b_priority_tract\n",
+    "            else None\n",
+    "        )\n",
+    "\n",
+    "        # A comparison priority tract has all of its contained CBGs as CBG priority CBGs.\n",
+    "        df[comparison_field_names.method_b_tract_has_100_percent_method_a_cbg] = (\n",
+    "            frame.loc[:, method_a_priority_census_block_groups_field].mean() == 1\n",
+    "            if is_a_method_b_priority_tract\n",
+    "            else None\n",
+    "        )\n",
+    "\n",
+    "        # Calculate the inverse\n",
+    "        # A tract that is _not_ a comparison priority has at least one CBG priority CBG.\n",
+    "        df[\n",
+    "            comparison_field_names.method_b_non_priority_tract_has_at_least_one_method_a_cbg\n",
+    "        ] = (\n",
+    "            frame.loc[:, method_a_priority_census_block_groups_field].sum() > 0\n",
+    "            if not is_a_method_b_priority_tract\n",
+    "            else None\n",
+    "        )\n",
+    "\n",
+    "        # A tract that is _not_ a comparison priority has all of its contained CBGs as CBG priority CBGs.\n",
+    "        df[\n",
+    "            comparison_field_names.method_b_non_priority_tract_has_100_percent_method_a_cbg\n",
+    "        ] = (\n",
+    "            frame.loc[:, method_a_priority_census_block_groups_field].mean() == 1\n",
+    "            if not is_a_method_b_priority_tract\n",
+    "            else None\n",
+    "        )\n",
+    "\n",
+    "        return df\n",
+    "\n",
+    "    # Group all data by the census tract.\n",
+    "    grouped_df = df.groupby(GEOID_TRACT_FIELD_NAME)\n",
+    "\n",
+    "    # Run the comparison function on the groups.\n",
+    "    comparison_df = grouped_df.progress_apply(calculate_comparison)\n",
+    "\n",
+    "    return comparison_df\n",
+    "\n",
+    "\n",
+    "def get_comparison_markdown_content(\n",
+    "    original_df: pd.DataFrame,\n",
+    "    comparison_df: pd.DataFrame,\n",
+    "    comparison_field_names: ComparisonFieldNames,\n",
+    "    method_a_name: str,\n",
+    "    method_b_name: str,\n",
+    "    method_a_priority_census_block_groups_field: str,\n",
+    "    method_b_priority_census_tracts_field: str,\n",
+    "    state_field: str = GEOID_STATE_FIELD_NAME,\n",
+    ") -> str:\n",
+    "    # Prepare some constants for use in the following Markdown content.\n",
+    "    total_cbgs = len(original_df)\n",
+    "\n",
+    "    # List of all states/territories in their FIPS codes:\n",
+    "    state_ids = sorted(original_df[state_field].unique())\n",
+    "    state_names = \", \".join([us.states.lookup(state_id).name for state_id in state_ids])\n",
+    "\n",
+    "    # Note: using squeeze throughout do reduce result of `sum()` to a scalar.\n",
+    "    # TODO: investigate why sums are sometimes series and sometimes scalar.\n",
+    "    method_a_priority_cbgs = (\n",
+    "        original_df.loc[:, method_a_priority_census_block_groups_field].sum().squeeze()\n",
+    "    )\n",
+    "    method_a_priority_cbgs_percent = f\"{method_a_priority_cbgs / total_cbgs:.0%}\"\n",
+    "\n",
+    "    total_tracts_count = len(comparison_df)\n",
+    "\n",
+    "    method_b_priority_tracts_count = comparison_df.loc[\n",
+    "        :, method_b_priority_census_tracts_field\n",
+    "    ].sum()\n",
+    "\n",
+    "    method_b_priority_tracts_count_percent = (\n",
+    "        f\"{method_b_priority_tracts_count / total_tracts_count:.0%}\"\n",
+    "    )\n",
+    "    method_b_non_priority_tracts_count = (\n",
+    "        total_tracts_count - method_b_priority_tracts_count\n",
+    "    )\n",
+    "\n",
+    "    method_a_tracts_count = (\n",
+    "        comparison_df.loc[\n",
+    "            :, comparison_field_names.any_tract_has_at_least_one_method_a_cbg\n",
+    "        ]\n",
+    "        .sum()\n",
+    "        .squeeze()\n",
+    "    )\n",
+    "    method_a_tracts_count_percent = f\"{method_a_tracts_count / total_tracts_count:.0%}\"\n",
+    "\n",
+    "    # Method A priority community stats\n",
+    "    method_b_tracts_with_at_least_one_method_a_cbg = comparison_df.loc[\n",
+    "        :, comparison_field_names.method_b_tract_has_at_least_one_method_a_cbg\n",
+    "    ].sum()\n",
+    "    method_b_tracts_with_at_least_one_method_a_cbg_percent = f\"{method_b_tracts_with_at_least_one_method_a_cbg / method_b_priority_tracts_count:.0%}\"\n",
+    "\n",
+    "    method_b_tracts_with_at_100_percent_method_a_cbg = comparison_df.loc[\n",
+    "        :, comparison_field_names.method_b_tract_has_100_percent_method_a_cbg\n",
+    "    ].sum()\n",
+    "    method_b_tracts_with_at_100_percent_method_a_cbg_percent = f\"{method_b_tracts_with_at_100_percent_method_a_cbg / method_b_priority_tracts_count:.0%}\"\n",
+    "\n",
+    "    # Method A non-priority community stats\n",
+    "    method_b_non_priority_tracts_with_at_least_one_method_a_cbg = comparison_df.loc[\n",
+    "        :,\n",
+    "        comparison_field_names.method_b_non_priority_tract_has_at_least_one_method_a_cbg,\n",
+    "    ].sum()\n",
+    "\n",
+    "    method_b_non_priority_tracts_with_at_least_one_method_a_cbg_percent = f\"{method_b_non_priority_tracts_with_at_least_one_method_a_cbg / method_b_non_priority_tracts_count:.0%}\"\n",
+    "\n",
+    "    method_b_non_priority_tracts_with_100_percent_method_a_cbg = comparison_df.loc[\n",
+    "        :,\n",
+    "        comparison_field_names.method_b_non_priority_tract_has_100_percent_method_a_cbg,\n",
+    "    ].sum()\n",
+    "    method_b_non_priority_tracts_with_100_percent_method_a_cbg_percent = f\"{method_b_non_priority_tracts_with_100_percent_method_a_cbg / method_b_non_priority_tracts_count:.0%}\"\n",
+    "\n",
+    "    # Create markdown content for comparisons.\n",
+    "    markdown_content = f\"\"\"\n",
+    "# {method_a_name} compared to {method_b_name}\n",
+    "\n",
+    "(This report was calculated on {datetime.today().strftime('%Y-%m-%d')}.)\n",
+    "\n",
+    "This report analyzes the following US states and territories: {state_names}.\n",
     "\n",
     "Recall that census tracts contain one or more census block groups, with up to nine census block groups per tract.\n",
     "\n",
-    "There are {{ces_tracts_count}} census tracts designated as Disadvantaged Communities by CalEnviroScreen 4.0, out of {{total_tracts_count}} total tracts ({{ces_tracts_count_percent}}). \n",
+    "Within the geographic area analyzed, there are {method_b_priority_tracts_count} census tracts designated as priority communities by {method_b_name}, out of {total_tracts_count} total tracts ({method_b_priority_tracts_count_percent}). \n",
     "\n",
-    "Within California, there are {{cejst_cbgs_ca_only}} census block groups considered as priority communities by the current version of the CEJST score used in this analysis, out of {{total_cbgs_ca_only}} CBGs in the state ({{cejst_cbgs_ca_only_percent}}). They occupy {{cejst_tracts_count}} ({{cejst_tracts_count_percent}}) of all the census tracts in California.\n",
+    "Within the geographic region analyzed, there are {method_a_priority_cbgs} census block groups considered as priority communities by {method_a_name}, out of {total_cbgs} CBGs ({method_a_priority_cbgs_percent}). They occupy {method_a_tracts_count} census tracts ({method_a_tracts_count_percent}) of the geographic area analyzed.\n",
     "\n",
-    "Out of every CalEnviroScreen Disadvantaged Community census tract, {{at_least_one_sum}} ({{at_least_one_sum_percent}}) of these census tracts have at least one census block group within them that is considered a priority community by the current version of the CEJST score.\n",
+    "Out of every {method_b_name} priority census tract, {method_b_tracts_with_at_least_one_method_a_cbg} ({method_b_tracts_with_at_least_one_method_a_cbg_percent}) of these census tracts have at least one census block group within them that is considered a priority community by {method_a_name}.\n",
     "\n",
-    "Out of every CalEnviroScreen Disadvantaged Community census tract, {{all_100_sum}} ({{all_100_sum_percent}}) of these census tracts have 100% of the included census block groups within them considered priority communities by the current version of the CEJST score.\n",
+    "Out of every {method_b_name} priority census tract, {method_b_tracts_with_at_100_percent_method_a_cbg} ({method_b_tracts_with_at_100_percent_method_a_cbg_percent}) of these census tracts have 100% of the included census block groups within them considered priority communities by {method_a_name}.\n",
     "\n",
-    "Out of every census tract in California that is __not__ marked as a CalEnviroScreen Disadvantaged Community, {{non_ces_at_least_one_sum}} ({{non_ces_at_least_one_sum_percent}}) of these census tracts have at least one census block group within them that is considered a priority community by the current version of the CEJST score.\n",
+    "Out of every census tract that is __not__ marked as a priority community by {method_b_name}, {method_b_non_priority_tracts_with_at_least_one_method_a_cbg} ({method_b_non_priority_tracts_with_at_least_one_method_a_cbg_percent}) of these census tracts have at least one census block group within them that is considered a priority community by the current version of the CEJST score.\n",
     "\n",
-    "Out of every census tract in California that is __not__ marked as a CalEnviroScreen Disadvantaged Community, {{non_ces_all_100_sum}} ({{non_ces_all_100_sum_percent}}) of these census tracts have 100% of the included census block groups within them considered priority communities by the current version of the CEJST score."
+    "Out of every census tract that is __not__ marked as a priority community by {method_b_name}, {method_b_non_priority_tracts_with_100_percent_method_a_cbg} ({method_b_non_priority_tracts_with_100_percent_method_a_cbg_percent}) of these census tracts have 100% of the included census block groups within them considered priority communities by the current version of the CEJST score.\n",
+    "\"\"\"\n",
+    "\n",
+    "    return markdown_content\n",
+    "\n",
+    "\n",
+    "def write_markdown_and_docx_content(\n",
+    "    markdown_content: str, file_dir: pathlib.PosixPath, file_name_without_extension: str\n",
+    ") -> pathlib.PosixPath:\n",
+    "    \"\"\"Write Markdown content to both .md and .docx files.\"\"\"\n",
+    "    # Set the file paths for both files.\n",
+    "    markdown_file_path = file_dir / f\"{file_name_without_extension}.md\"\n",
+    "    docx_file_path = file_dir / f\"{file_name_without_extension}.docx\"\n",
+    "\n",
+    "    # Write the markdown content to file.\n",
+    "    with open(markdown_file_path, \"w\") as text_file:\n",
+    "        text_file.write(markdown_content)\n",
+    "\n",
+    "    # Convert markdown file to Word doc.\n",
+    "    pypandoc.convert_file(\n",
+    "        source_file=str(markdown_file_path),\n",
+    "        to=\"docx\",\n",
+    "        outputfile=str(docx_file_path),\n",
+    "        extra_args=[],\n",
+    "    )\n",
+    "\n",
+    "    return docx_file_path\n",
+    "\n",
+    "\n",
+    "def execute_comparison(\n",
+    "    df: pd.DataFrame,\n",
+    "    method_a_name: str,\n",
+    "    method_b_name: str,\n",
+    "    method_a_priority_census_block_groups_field: str,\n",
+    "    method_b_priority_census_tracts_field: str,\n",
+    "    other_census_tract_fields_to_keep: typing.Optional[typing.List[str]],\n",
+    ") -> pathlib.PosixPath:\n",
+    "    \"\"\"Execute an individual comparison by creating the data frame and writing the report.\n",
+    "\n",
+    "    Args:\n",
+    "      df: a pandas dataframe including the data for this comparison.\n",
+    "      method_a_priority_census_block_groups_field: the name of a boolean column in `df`, such as the CEJST priority\n",
+    "        community field that defines communities at the level of census block groups (CBGs).\n",
+    "      method_b_priority_census_tracts_field: the name of a boolean column in `df`, such as the CalEnviroScreen priority\n",
+    "        community field that defines communities at the level of census tracts.\n",
+    "      other_census_tract_fields_to_keep (optional): a list of field names to preserve at the census tract level\n",
+    "\n",
+    "    Returns:\n",
+    "      df: a pandas dataframe with one row with the results of this comparison\n",
+    "\n",
+    "    \"\"\"\n",
+    "    comparison_field_names = get_comparison_field_names(\n",
+    "        method_a_name=method_a_name, method_b_name=method_b_name\n",
+    "    )\n",
+    "\n",
+    "    # Create or use a directory for outputs grouped by Method A.\n",
+    "    output_dir = COMPARISON_OUTPUTS_DIR / method_a_name\n",
+    "    output_dir.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "    df_with_only_shared_states = get_df_with_only_shared_states(\n",
+    "        df=df,\n",
+    "        field_a=method_a_priority_census_block_groups_field,\n",
+    "        field_b=method_b_priority_census_tracts_field,\n",
+    "    )\n",
+    "\n",
+    "    comparison_df = get_comparison_df(\n",
+    "        df=df_with_only_shared_states,\n",
+    "        method_a_priority_census_block_groups_field=method_a_priority_census_block_groups_field,\n",
+    "        method_b_priority_census_tracts_field=method_b_priority_census_tracts_field,\n",
+    "        comparison_field_names=comparison_field_names,\n",
+    "        other_census_tract_fields_to_keep=other_census_tract_fields_to_keep,\n",
+    "        output_dir=output_dir,\n",
+    "    )\n",
+    "\n",
+    "    # Choose output path\n",
+    "    file_path = (\n",
+    "        output_dir / f\"Comparison Output - {method_a_name} and {method_b_name}.csv\"\n",
+    "    )\n",
+    "\n",
+    "    # Write comparison to CSV.\n",
+    "    comparison_df.to_csv(\n",
+    "        path_or_buf=file_path,\n",
+    "        na_rep=\"\",\n",
+    "        index=False,\n",
+    "    )\n",
+    "\n",
+    "    markdown_content = get_comparison_markdown_content(\n",
+    "        original_df=df_with_only_shared_states,\n",
+    "        comparison_df=comparison_df,\n",
+    "        comparison_field_names=comparison_field_names,\n",
+    "        method_a_name=method_a_name,\n",
+    "        method_b_name=method_b_name,\n",
+    "        method_a_priority_census_block_groups_field=method_a_priority_census_block_groups_field,\n",
+    "        method_b_priority_census_tracts_field=method_b_priority_census_tracts_field,\n",
+    "    )\n",
+    "\n",
+    "    comparison_docx_file_path = write_markdown_and_docx_content(\n",
+    "        markdown_content=markdown_content,\n",
+    "        file_dir=output_dir,\n",
+    "        file_name_without_extension=f\"Comparison report - {method_a_name} and {method_b_name}\",\n",
+    "    )\n",
+    "\n",
+    "    return comparison_docx_file_path\n",
+    "\n",
+    "\n",
+    "def execute_comparisons(\n",
+    "    df: pd.DataFrame,\n",
+    "    census_block_group_indices: typing.List[Index],\n",
+    "    census_tract_indices: typing.List[Index],\n",
+    "):\n",
+    "    \"\"\"Create multiple comparison reports.\"\"\"\n",
+    "    comparison_docx_file_paths = []\n",
+    "    for cbg_index in census_block_group_indices:\n",
+    "        for census_tract_index in census_tract_indices:\n",
+    "            print(\n",
+    "                f\"Running comparisons for {cbg_index.method_name} against {census_tract_index.method_name}...\"\n",
+    "            )\n",
+    "\n",
+    "            comparison_docx_file_path = execute_comparison(\n",
+    "                df=df,\n",
+    "                method_a_name=cbg_index.method_name,\n",
+    "                method_b_name=census_tract_index.method_name,\n",
+    "                method_a_priority_census_block_groups_field=cbg_index.priority_communities_field,\n",
+    "                method_b_priority_census_tracts_field=census_tract_index.priority_communities_field,\n",
+    "                other_census_tract_fields_to_keep=census_tract_index.other_census_tract_fields_to_keep,\n",
+    "            )\n",
+    "\n",
+    "            comparison_docx_file_paths.append(comparison_docx_file_path)\n",
+    "\n",
+    "    return comparison_docx_file_paths"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48d9bf6b",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Actually execute the functions\n",
+    "\n",
+    "# # California only\n",
+    "# cal_df = merged_df[merged_df[GEOID_TRACT_FIELD_NAME].astype(str).str[0:2] == \"06\"]\n",
+    "# # cal_df = cal_df[0:1000]\n",
+    "# print(len(cal_df))\n",
+    "\n",
+    "census_block_group_indices = [\n",
+    "    Index(\n",
+    "        method_name=\"Score A\",\n",
+    "        priority_communities_field=\"Score A (top 25th percentile)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    #     Index(\n",
+    "    #         method_name=\"Score B\",\n",
+    "    #         priority_communities_field=\"Score B (top 25th percentile)\",\n",
+    "    #         other_census_tract_fields_to_keep=[],\n",
+    "    #     ),\n",
+    "    Index(\n",
+    "        method_name=\"Score C\",\n",
+    "        priority_communities_field=\"Score C (top 25th percentile)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"Score D\",\n",
+    "        priority_communities_field=\"Score D (top 25th percentile)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    #     Index(\n",
+    "    #         method_name=\"Score E\",\n",
+    "    #         priority_communities_field=\"Score E (top 25th percentile)\",\n",
+    "    #         other_census_tract_fields_to_keep=[],\n",
+    "    #     ),\n",
+    "]\n",
+    "\n",
+    "census_tract_indices = [\n",
+    "    Index(\n",
+    "        method_name=\"CalEnviroScreen 4.0\",\n",
+    "        priority_communities_field=\"calenviroscreen_priority_community\",\n",
+    "        other_census_tract_fields_to_keep=[\n",
+    "            CALENVIROSCREEN_SCORE_FIELD,\n",
+    "            CALENVIROSCREEN_PERCENTILE_FIELD,\n",
+    "        ],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"HUD RECAP\",\n",
+    "        priority_communities_field=\"hud_recap_priority_community\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "file_paths = execute_comparisons(\n",
+    "    df=merged_df,\n",
+    "    census_block_group_indices=census_block_group_indices,\n",
+    "    census_tract_indices=census_tract_indices,\n",
+    ")\n",
+    "\n",
+    "print(file_paths)"
    ]
   }
  ],
diff --git a/score/requirements.txt b/score/requirements.txt
index a3e543b4..3fd8170b 100644
Binary files a/score/requirements.txt and b/score/requirements.txt differ
diff --git a/score/utils.py b/score/utils.py
index 34b4beaa..dea5a3a6 100644
--- a/score/utils.py
+++ b/score/utils.py
@@ -74,3 +74,1014 @@ def unzip_file_from_url(
 
     # cleanup temporary file
     os.remove(zip_file_path)
+
+def get_excel_column_name(index: int) -> str:
+    """This is used to map a numeric index to the appropriate column in Excel.
+
+    E.g., column #95 is "CR".
+
+    Only works for the first 1000 columns.
+    """
+    excel_column_names = [
+    "A",
+    "B",
+    "C",
+    "D",
+    "E",
+    "F",
+    "G",
+    "H",
+    "I",
+    "J",
+    "K",
+    "L",
+    "M",
+    "N",
+    "O",
+    "P",
+    "Q",
+    "R",
+    "S",
+    "T",
+    "U",
+    "V",
+    "W",
+    "X",
+    "Y",
+    "Z",
+    "AA",
+    "AB",
+    "AC",
+    "AD",
+    "AE",
+    "AF",
+    "AG",
+    "AH",
+    "AI",
+    "AJ",
+    "AK",
+    "AL",
+    "AM",
+    "AN",
+    "AO",
+    "AP",
+    "AQ",
+    "AR",
+    "AS",
+    "AT",
+    "AU",
+    "AV",
+    "AW",
+    "AX",
+    "AY",
+    "AZ",
+    "BA",
+    "BB",
+    "BC",
+    "BD",
+    "BE",
+    "BF",
+    "BG",
+    "BH",
+    "BI",
+    "BJ",
+    "BK",
+    "BL",
+    "BM",
+    "BN",
+    "BO",
+    "BP",
+    "BQ",
+    "BR",
+    "BS",
+    "BT",
+    "BU",
+    "BV",
+    "BW",
+    "BX",
+    "BY",
+    "BZ",
+    "CA",
+    "CB",
+    "CC",
+    "CD",
+    "CE",
+    "CF",
+    "CG",
+    "CH",
+    "CI",
+    "CJ",
+    "CK",
+    "CL",
+    "CM",
+    "CN",
+    "CO",
+    "CP",
+    "CQ",
+    "CR",
+    "CS",
+    "CT",
+    "CU",
+    "CV",
+    "CW",
+    "CX",
+    "CY",
+    "CZ",
+    "DA",
+    "DB",
+    "DC",
+    "DD",
+    "DE",
+    "DF",
+    "DG",
+    "DH",
+    "DI",
+    "DJ",
+    "DK",
+    "DL",
+    "DM",
+    "DN",
+    "DO",
+    "DP",
+    "DQ",
+    "DR",
+    "DS",
+    "DT",
+    "DU",
+    "DV",
+    "DW",
+    "DX",
+    "DY",
+    "DZ",
+    "EA",
+    "EB",
+    "EC",
+    "ED",
+    "EE",
+    "EF",
+    "EG",
+    "EH",
+    "EI",
+    "EJ",
+    "EK",
+    "EL",
+    "EM",
+    "EN",
+    "EO",
+    "EP",
+    "EQ",
+    "ER",
+    "ES",
+    "ET",
+    "EU",
+    "EV",
+    "EW",
+    "EX",
+    "EY",
+    "EZ",
+    "FA",
+    "FB",
+    "FC",
+    "FD",
+    "FE",
+    "FF",
+    "FG",
+    "FH",
+    "FI",
+    "FJ",
+    "FK",
+    "FL",
+    "FM",
+    "FN",
+    "FO",
+    "FP",
+    "FQ",
+    "FR",
+    "FS",
+    "FT",
+    "FU",
+    "FV",
+    "FW",
+    "FX",
+    "FY",
+    "FZ",
+    "GA",
+    "GB",
+    "GC",
+    "GD",
+    "GE",
+    "GF",
+    "GG",
+    "GH",
+    "GI",
+    "GJ",
+    "GK",
+    "GL",
+    "GM",
+    "GN",
+    "GO",
+    "GP",
+    "GQ",
+    "GR",
+    "GS",
+    "GT",
+    "GU",
+    "GV",
+    "GW",
+    "GX",
+    "GY",
+    "GZ",
+    "HA",
+    "HB",
+    "HC",
+    "HD",
+    "HE",
+    "HF",
+    "HG",
+    "HH",
+    "HI",
+    "HJ",
+    "HK",
+    "HL",
+    "HM",
+    "HN",
+    "HO",
+    "HP",
+    "HQ",
+    "HR",
+    "HS",
+    "HT",
+    "HU",
+    "HV",
+    "HW",
+    "HX",
+    "HY",
+    "HZ",
+    "IA",
+    "IB",
+    "IC",
+    "ID",
+    "IE",
+    "IF",
+    "IG",
+    "IH",
+    "II",
+    "IJ",
+    "IK",
+    "IL",
+    "IM",
+    "IN",
+    "IO",
+    "IP",
+    "IQ",
+    "IR",
+    "IS",
+    "IT",
+    "IU",
+    "IV",
+    "IW",
+    "IX",
+    "IY",
+    "IZ",
+    "JA",
+    "JB",
+    "JC",
+    "JD",
+    "JE",
+    "JF",
+    "JG",
+    "JH",
+    "JI",
+    "JJ",
+    "JK",
+    "JL",
+    "JM",
+    "JN",
+    "JO",
+    "JP",
+    "JQ",
+    "JR",
+    "JS",
+    "JT",
+    "JU",
+    "JV",
+    "JW",
+    "JX",
+    "JY",
+    "JZ",
+    "KA",
+    "KB",
+    "KC",
+    "KD",
+    "KE",
+    "KF",
+    "KG",
+    "KH",
+    "KI",
+    "KJ",
+    "KK",
+    "KL",
+    "KM",
+    "KN",
+    "KO",
+    "KP",
+    "KQ",
+    "KR",
+    "KS",
+    "KT",
+    "KU",
+    "KV",
+    "KW",
+    "KX",
+    "KY",
+    "KZ",
+    "LA",
+    "LB",
+    "LC",
+    "LD",
+    "LE",
+    "LF",
+    "LG",
+    "LH",
+    "LI",
+    "LJ",
+    "LK",
+    "LL",
+    "LM",
+    "LN",
+    "LO",
+    "LP",
+    "LQ",
+    "LR",
+    "LS",
+    "LT",
+    "LU",
+    "LV",
+    "LW",
+    "LX",
+    "LY",
+    "LZ",
+    "MA",
+    "MB",
+    "MC",
+    "MD",
+    "ME",
+    "MF",
+    "MG",
+    "MH",
+    "MI",
+    "MJ",
+    "MK",
+    "ML",
+    "MM",
+    "MN",
+    "MO",
+    "MP",
+    "MQ",
+    "MR",
+    "MS",
+    "MT",
+    "MU",
+    "MV",
+    "MW",
+    "MX",
+    "MY",
+    "MZ",
+    "NA",
+    "NB",
+    "NC",
+    "ND",
+    "NE",
+    "NF",
+    "NG",
+    "NH",
+    "NI",
+    "NJ",
+    "NK",
+    "NL",
+    "NM",
+    "NN",
+    "NO",
+    "NP",
+    "NQ",
+    "NR",
+    "NS",
+    "NT",
+    "NU",
+    "NV",
+    "NW",
+    "NX",
+    "NY",
+    "NZ",
+    "OA",
+    "OB",
+    "OC",
+    "OD",
+    "OE",
+    "OF",
+    "OG",
+    "OH",
+    "OI",
+    "OJ",
+    "OK",
+    "OL",
+    "OM",
+    "ON",
+    "OO",
+    "OP",
+    "OQ",
+    "OR",
+    "OS",
+    "OT",
+    "OU",
+    "OV",
+    "OW",
+    "OX",
+    "OY",
+    "OZ",
+    "PA",
+    "PB",
+    "PC",
+    "PD",
+    "PE",
+    "PF",
+    "PG",
+    "PH",
+    "PI",
+    "PJ",
+    "PK",
+    "PL",
+    "PM",
+    "PN",
+    "PO",
+    "PP",
+    "PQ",
+    "PR",
+    "PS",
+    "PT",
+    "PU",
+    "PV",
+    "PW",
+    "PX",
+    "PY",
+    "PZ",
+    "QA",
+    "QB",
+    "QC",
+    "QD",
+    "QE",
+    "QF",
+    "QG",
+    "QH",
+    "QI",
+    "QJ",
+    "QK",
+    "QL",
+    "QM",
+    "QN",
+    "QO",
+    "QP",
+    "QQ",
+    "QR",
+    "QS",
+    "QT",
+    "QU",
+    "QV",
+    "QW",
+    "QX",
+    "QY",
+    "QZ",
+    "RA",
+    "RB",
+    "RC",
+    "RD",
+    "RE",
+    "RF",
+    "RG",
+    "RH",
+    "RI",
+    "RJ",
+    "RK",
+    "RL",
+    "RM",
+    "RN",
+    "RO",
+    "RP",
+    "RQ",
+    "RR",
+    "RS",
+    "RT",
+    "RU",
+    "RV",
+    "RW",
+    "RX",
+    "RY",
+    "RZ",
+    "SA",
+    "SB",
+    "SC",
+    "SD",
+    "SE",
+    "SF",
+    "SG",
+    "SH",
+    "SI",
+    "SJ",
+    "SK",
+    "SL",
+    "SM",
+    "SN",
+    "SO",
+    "SP",
+    "SQ",
+    "SR",
+    "SS",
+    "ST",
+    "SU",
+    "SV",
+    "SW",
+    "SX",
+    "SY",
+    "SZ",
+    "TA",
+    "TB",
+    "TC",
+    "TD",
+    "TE",
+    "TF",
+    "TG",
+    "TH",
+    "TI",
+    "TJ",
+    "TK",
+    "TL",
+    "TM",
+    "TN",
+    "TO",
+    "TP",
+    "TQ",
+    "TR",
+    "TS",
+    "TT",
+    "TU",
+    "TV",
+    "TW",
+    "TX",
+    "TY",
+    "TZ",
+    "UA",
+    "UB",
+    "UC",
+    "UD",
+    "UE",
+    "UF",
+    "UG",
+    "UH",
+    "UI",
+    "UJ",
+    "UK",
+    "UL",
+    "UM",
+    "UN",
+    "UO",
+    "UP",
+    "UQ",
+    "UR",
+    "US",
+    "UT",
+    "UU",
+    "UV",
+    "UW",
+    "UX",
+    "UY",
+    "UZ",
+    "VA",
+    "VB",
+    "VC",
+    "VD",
+    "VE",
+    "VF",
+    "VG",
+    "VH",
+    "VI",
+    "VJ",
+    "VK",
+    "VL",
+    "VM",
+    "VN",
+    "VO",
+    "VP",
+    "VQ",
+    "VR",
+    "VS",
+    "VT",
+    "VU",
+    "VV",
+    "VW",
+    "VX",
+    "VY",
+    "VZ",
+    "WA",
+    "WB",
+    "WC",
+    "WD",
+    "WE",
+    "WF",
+    "WG",
+    "WH",
+    "WI",
+    "WJ",
+    "WK",
+    "WL",
+    "WM",
+    "WN",
+    "WO",
+    "WP",
+    "WQ",
+    "WR",
+    "WS",
+    "WT",
+    "WU",
+    "WV",
+    "WW",
+    "WX",
+    "WY",
+    "WZ",
+    "XA",
+    "XB",
+    "XC",
+    "XD",
+    "XE",
+    "XF",
+    "XG",
+    "XH",
+    "XI",
+    "XJ",
+    "XK",
+    "XL",
+    "XM",
+    "XN",
+    "XO",
+    "XP",
+    "XQ",
+    "XR",
+    "XS",
+    "XT",
+    "XU",
+    "XV",
+    "XW",
+    "XX",
+    "XY",
+    "XZ",
+    "YA",
+    "YB",
+    "YC",
+    "YD",
+    "YE",
+    "YF",
+    "YG",
+    "YH",
+    "YI",
+    "YJ",
+    "YK",
+    "YL",
+    "YM",
+    "YN",
+    "YO",
+    "YP",
+    "YQ",
+    "YR",
+    "YS",
+    "YT",
+    "YU",
+    "YV",
+    "YW",
+    "YX",
+    "YY",
+    "YZ",
+    "ZA",
+    "ZB",
+    "ZC",
+    "ZD",
+    "ZE",
+    "ZF",
+    "ZG",
+    "ZH",
+    "ZI",
+    "ZJ",
+    "ZK",
+    "ZL",
+    "ZM",
+    "ZN",
+    "ZO",
+    "ZP",
+    "ZQ",
+    "ZR",
+    "ZS",
+    "ZT",
+    "ZU",
+    "ZV",
+    "ZW",
+    "ZX",
+    "ZY",
+    "ZZ",
+    "AAA",
+    "AAB",
+    "AAC",
+    "AAD",
+    "AAE",
+    "AAF",
+    "AAG",
+    "AAH",
+    "AAI",
+    "AAJ",
+    "AAK",
+    "AAL",
+    "AAM",
+    "AAN",
+    "AAO",
+    "AAP",
+    "AAQ",
+    "AAR",
+    "AAS",
+    "AAT",
+    "AAU",
+    "AAV",
+    "AAW",
+    "AAX",
+    "AAY",
+    "AAZ",
+    "ABA",
+    "ABB",
+    "ABC",
+    "ABD",
+    "ABE",
+    "ABF",
+    "ABG",
+    "ABH",
+    "ABI",
+    "ABJ",
+    "ABK",
+    "ABL",
+    "ABM",
+    "ABN",
+    "ABO",
+    "ABP",
+    "ABQ",
+    "ABR",
+    "ABS",
+    "ABT",
+    "ABU",
+    "ABV",
+    "ABW",
+    "ABX",
+    "ABY",
+    "ABZ",
+    "ACA",
+    "ACB",
+    "ACC",
+    "ACD",
+    "ACE",
+    "ACF",
+    "ACG",
+    "ACH",
+    "ACI",
+    "ACJ",
+    "ACK",
+    "ACL",
+    "ACM",
+    "ACN",
+    "ACO",
+    "ACP",
+    "ACQ",
+    "ACR",
+    "ACS",
+    "ACT",
+    "ACU",
+    "ACV",
+    "ACW",
+    "ACX",
+    "ACY",
+    "ACZ",
+    "ADA",
+    "ADB",
+    "ADC",
+    "ADD",
+    "ADE",
+    "ADF",
+    "ADG",
+    "ADH",
+    "ADI",
+    "ADJ",
+    "ADK",
+    "ADL",
+    "ADM",
+    "ADN",
+    "ADO",
+    "ADP",
+    "ADQ",
+    "ADR",
+    "ADS",
+    "ADT",
+    "ADU",
+    "ADV",
+    "ADW",
+    "ADX",
+    "ADY",
+    "ADZ",
+    "AEA",
+    "AEB",
+    "AEC",
+    "AED",
+    "AEE",
+    "AEF",
+    "AEG",
+    "AEH",
+    "AEI",
+    "AEJ",
+    "AEK",
+    "AEL",
+    "AEM",
+    "AEN",
+    "AEO",
+    "AEP",
+    "AEQ",
+    "AER",
+    "AES",
+    "AET",
+    "AEU",
+    "AEV",
+    "AEW",
+    "AEX",
+    "AEY",
+    "AEZ",
+    "AFA",
+    "AFB",
+    "AFC",
+    "AFD",
+    "AFE",
+    "AFF",
+    "AFG",
+    "AFH",
+    "AFI",
+    "AFJ",
+    "AFK",
+    "AFL",
+    "AFM",
+    "AFN",
+    "AFO",
+    "AFP",
+    "AFQ",
+    "AFR",
+    "AFS",
+    "AFT",
+    "AFU",
+    "AFV",
+    "AFW",
+    "AFX",
+    "AFY",
+    "AFZ",
+    "AGA",
+    "AGB",
+    "AGC",
+    "AGD",
+    "AGE",
+    "AGF",
+    "AGG",
+    "AGH",
+    "AGI",
+    "AGJ",
+    "AGK",
+    "AGL",
+    "AGM",
+    "AGN",
+    "AGO",
+    "AGP",
+    "AGQ",
+    "AGR",
+    "AGS",
+    "AGT",
+    "AGU",
+    "AGV",
+    "AGW",
+    "AGX",
+    "AGY",
+    "AGZ",
+    "AHA",
+    "AHB",
+    "AHC",
+    "AHD",
+    "AHE",
+    "AHF",
+    "AHG",
+    "AHH",
+    "AHI",
+    "AHJ",
+    "AHK",
+    "AHL",
+    "AHM",
+    "AHN",
+    "AHO",
+    "AHP",
+    "AHQ",
+    "AHR",
+    "AHS",
+    "AHT",
+    "AHU",
+    "AHV",
+    "AHW",
+    "AHX",
+    "AHY",
+    "AHZ",
+    "AIA",
+    "AIB",
+    "AIC",
+    "AID",
+    "AIE",
+    "AIF",
+    "AIG",
+    "AIH",
+    "AII",
+    "AIJ",
+    "AIK",
+    "AIL",
+    "AIM",
+    "AIN",
+    "AIO",
+    "AIP",
+    "AIQ",
+    "AIR",
+    "AIS",
+    "AIT",
+    "AIU",
+    "AIV",
+    "AIW",
+    "AIX",
+    "AIY",
+    "AIZ",
+    "AJA",
+    "AJB",
+    "AJC",
+    "AJD",
+    "AJE",
+    "AJF",
+    "AJG",
+    "AJH",
+    "AJI",
+    "AJJ",
+    "AJK",
+    "AJL",
+    "AJM",
+    "AJN",
+    "AJO",
+    "AJP",
+    "AJQ",
+    "AJR",
+    "AJS",
+    "AJT",
+    "AJU",
+    "AJV",
+    "AJW",
+    "AJX",
+    "AJY",
+    "AJZ",
+    "AKA",
+    "AKB",
+    "AKC",
+    "AKD",
+    "AKE",
+    "AKF",
+    "AKG",
+    "AKH",
+    "AKI",
+    "AKJ",
+    "AKK",
+    "AKL",
+    "AKM",
+    "AKN",
+    "AKO",
+    "AKP",
+    "AKQ",
+    "AKR",
+    "AKS",
+    "AKT",
+    "AKU",
+    "AKV",
+    "AKW",
+    "AKX",
+    "AKY",
+    "AKZ",
+    "ALA",
+    "ALB",
+    "ALC",
+    "ALD",
+    "ALE",
+    "ALF",
+    "ALG",
+    "ALH",
+    "ALI",
+    "ALJ",
+    "ALK",
+    ]
+
+    return excel_column_names[index]