diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
new file mode 100644
index 00000000..bb66c5c7
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@@ -0,0 +1,4798 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "71c4acd0",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "import collections\n",
+ "import functools\n",
+ "import IPython\n",
+ "import itertools\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import pandas as pd\n",
+ "import pathlib\n",
+ "import pypandoc\n",
+ "import requests\n",
+ "import string\n",
+ "import sys\n",
+ "import time\n",
+ "import typing\n",
+ "import us\n",
+ "import zipfile\n",
+ "\n",
+ "from datetime import datetime\n",
+ "from tqdm.notebook import tqdm_notebook\n",
+ "\n",
+ "module_path = os.path.abspath(os.path.join(\"../..\"))\n",
+ "if module_path not in sys.path:\n",
+ " sys.path.append(module_path)\n",
+ "\n",
+ "from data_pipeline.utils import remove_all_from_dir, get_excel_column_name\n",
+ "from data_pipeline.etl.base import ExtractTransformLoad\n",
+ "from data_pipeline.etl.sources.census.etl_utils import get_state_information\n",
+ "from data_pipeline.etl.sources.ejscreen_areas_of_concern.etl import (\n",
+ " EJSCREENAreasOfConcernETL,\n",
+ ")\n",
+ "\n",
+ "\n",
+ "from data_pipeline.score import field_names\n",
+ "\n",
+ "%load_ext lab_black\n",
+ "# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n",
+ "tqdm_notebook.pandas()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "2ce3170c",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Suppress scientific notation in pandas (this shows up for census tract IDs)\n",
+ "pd.options.display.float_format = \"{:.2f}\".format\n",
+ "\n",
+ "# Set some global parameters\n",
+ "DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
+ "TEMP_DATA_DIR = DATA_DIR / \"tmp\"\n",
+ "\n",
+ "time_str = time.strftime(\"%Y%m%d-%H%M%S\")\n",
+ "\n",
+ "COMPARISON_OUTPUTS_DIR = DATA_DIR / \"comparison_outputs\" / time_str\n",
+ "\n",
+ "# Make the dirs if they don't exist\n",
+ "TEMP_DATA_DIR.mkdir(parents=True, exist_ok=True)\n",
+ "COMPARISON_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ "CEJST_PRIORITY_COMMUNITY_THRESHOLD = 0.75\n",
+ "\n",
+ "# Name fields using variables. (This makes it easy to reference the same fields frequently without using strings\n",
+ "# and introducing the risk of misspelling the field name.)\n",
+ "GEOID_STATE_FIELD_NAME = \"GEOID10_STATE\"\n",
+ "COUNTRY_FIELD_NAME = \"Country\"\n",
+ "\n",
+ "# Define some suffixes\n",
+ "POPULATION_SUFFIX = \" (priority population)\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8bd39090",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/Cellar/jupyterlab/3.2.8/libexec/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3251: DtypeWarning: Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.\n",
+ " exec(code_obj, self.user_global_ns, self.user_ns)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Persistent Poverty Census Tract | \n",
+ " Housing burden (percent) | \n",
+ " Total population | \n",
+ " Median household income (% of state median household income) | \n",
+ " Current asthma among adults aged greater than or equal to 18 years | \n",
+ " Coronary heart disease among adults aged greater than or equal to 18 years | \n",
+ " Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years | \n",
+ " Current lack of health insurance among adults aged 18-64 years | \n",
+ " Diagnosed diabetes among adults aged greater than or equal to 18 years | \n",
+ " ... | \n",
+ " Score D (top 25th percentile) | \n",
+ " Score D (top 30th percentile) | \n",
+ " Score D (top 35th percentile) | \n",
+ " Score D (top 40th percentile) | \n",
+ " Score E (percentile) | \n",
+ " Score E (top 25th percentile) | \n",
+ " Score E (top 30th percentile) | \n",
+ " Score E (top 35th percentile) | \n",
+ " Score E (top 40th percentile) | \n",
+ " GEOID10_STATE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 01073001100 | \n",
+ " True | \n",
+ " 0.28 | \n",
+ " 4897.00 | \n",
+ " 0.73 | \n",
+ " 11.20 | \n",
+ " 7.20 | \n",
+ " 6.70 | \n",
+ " 16.60 | \n",
+ " 19.30 | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " 0.33 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 01073001400 | \n",
+ " True | \n",
+ " 0.18 | \n",
+ " 1906.00 | \n",
+ " 0.71 | \n",
+ " 11.10 | \n",
+ " 9.10 | \n",
+ " 7.30 | \n",
+ " 21.40 | \n",
+ " 22.40 | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " True | \n",
+ " 0.73 | \n",
+ " False | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 01073002000 | \n",
+ " False | \n",
+ " 0.44 | \n",
+ " 4215.00 | \n",
+ " 0.54 | \n",
+ " 13.50 | \n",
+ " 9.50 | \n",
+ " 6.10 | \n",
+ " 25.40 | \n",
+ " 22.80 | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 0.93 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 01073003802 | \n",
+ " False | \n",
+ " 0.41 | \n",
+ " 5149.00 | \n",
+ " 0.77 | \n",
+ " 12.00 | \n",
+ " 6.60 | \n",
+ " 5.60 | \n",
+ " 20.90 | \n",
+ " 18.60 | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 0.76 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 01073004000 | \n",
+ " True | \n",
+ " 0.47 | \n",
+ " 2621.00 | \n",
+ " 0.37 | \n",
+ " 13.10 | \n",
+ " 10.00 | \n",
+ " 6.30 | \n",
+ " 24.50 | \n",
+ " 25.00 | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 0.95 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 554 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Persistent Poverty Census Tract Housing burden (percent) \\\n",
+ "0 01073001100 True 0.28 \n",
+ "1 01073001400 True 0.18 \n",
+ "2 01073002000 False 0.44 \n",
+ "3 01073003802 False 0.41 \n",
+ "4 01073004000 True 0.47 \n",
+ "\n",
+ " Total population \\\n",
+ "0 4897.00 \n",
+ "1 1906.00 \n",
+ "2 4215.00 \n",
+ "3 5149.00 \n",
+ "4 2621.00 \n",
+ "\n",
+ " Median household income (% of state median household income) \\\n",
+ "0 0.73 \n",
+ "1 0.71 \n",
+ "2 0.54 \n",
+ "3 0.77 \n",
+ "4 0.37 \n",
+ "\n",
+ " Current asthma among adults aged greater than or equal to 18 years \\\n",
+ "0 11.20 \n",
+ "1 11.10 \n",
+ "2 13.50 \n",
+ "3 12.00 \n",
+ "4 13.10 \n",
+ "\n",
+ " Coronary heart disease among adults aged greater than or equal to 18 years \\\n",
+ "0 7.20 \n",
+ "1 9.10 \n",
+ "2 9.50 \n",
+ "3 6.60 \n",
+ "4 10.00 \n",
+ "\n",
+ " Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years \\\n",
+ "0 6.70 \n",
+ "1 7.30 \n",
+ "2 6.10 \n",
+ "3 5.60 \n",
+ "4 6.30 \n",
+ "\n",
+ " Current lack of health insurance among adults aged 18-64 years \\\n",
+ "0 16.60 \n",
+ "1 21.40 \n",
+ "2 25.40 \n",
+ "3 20.90 \n",
+ "4 24.50 \n",
+ "\n",
+ " Diagnosed diabetes among adults aged greater than or equal to 18 years \\\n",
+ "0 19.30 \n",
+ "1 22.40 \n",
+ "2 22.80 \n",
+ "3 18.60 \n",
+ "4 25.00 \n",
+ "\n",
+ " ... Score D (top 25th percentile) Score D (top 30th percentile) \\\n",
+ "0 ... False False \n",
+ "1 ... False False \n",
+ "2 ... True True \n",
+ "3 ... True True \n",
+ "4 ... True True \n",
+ "\n",
+ " Score D (top 35th percentile) Score D (top 40th percentile) \\\n",
+ "0 False False \n",
+ "1 True True \n",
+ "2 True True \n",
+ "3 True True \n",
+ "4 True True \n",
+ "\n",
+ " Score E (percentile) Score E (top 25th percentile) \\\n",
+ "0 0.33 False \n",
+ "1 0.73 False \n",
+ "2 0.93 True \n",
+ "3 0.76 True \n",
+ "4 0.95 True \n",
+ "\n",
+ " Score E (top 30th percentile) Score E (top 35th percentile) \\\n",
+ "0 False False \n",
+ "1 True True \n",
+ "2 True True \n",
+ "3 True True \n",
+ "4 True True \n",
+ "\n",
+ " Score E (top 40th percentile) GEOID10_STATE \n",
+ "0 False 01 \n",
+ "1 True 01 \n",
+ "2 True 01 \n",
+ "3 True 01 \n",
+ "4 True 01 \n",
+ "\n",
+ "[5 rows x 554 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load CEJST score data\n",
+ "cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa.csv\"\n",
+ "cejst_df = pd.read_csv(\n",
+ " cejst_data_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "# Create the state ID by taking the first two digits of the FIPS CODE of the tract.\n",
+ "# For more information, see https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html.\n",
+ "cejst_df.loc[:, GEOID_STATE_FIELD_NAME] = (\n",
+ " cejst_df.loc[:, ExtractTransformLoad.GEOID_TRACT_FIELD_NAME].astype(str).str[0:2]\n",
+ ")\n",
+ "\n",
+ "cejst_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "a251a0fb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "EJSCREEN areas of concern data does not exist locally. Not attempting to load data into comparison tool.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Load EJSCREEN Areas of Concern data.\n",
+ "\n",
+ "# Load EJ Screen Areas of Concern\n",
+ "# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
+ "# Note: this data is provided privately and is not currently publicly available.\n",
+ "# To enable the ETL code for EJSCREEN AoCs to run appropriately whether or not the person\n",
+ "# running it has access to that data, `ejscreen_areas_of_concern_data_exists` checks whether the source file exists.\n",
+ "# If it does exist, code can and should include this data. If it does not exist, code should\n",
+ "# not reference this data.\n",
+ "ejscreen_areas_of_concern_df: pd.DataFrame = None\n",
+ "\n",
+ "if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
+ " print(\"Loading EJSCREEN Areas of Concern data for score pipeline.\")\n",
+ " ejscreen_areas_of_concern_csv = (\n",
+ " DATA_DIR / \"dataset\" / \"ejscreen_areas_of_concern\" / \"usa.csv\"\n",
+ " )\n",
+ " ejscreen_areas_of_concern_df = pd.read_csv(\n",
+ " ejscreen_areas_of_concern_csv,\n",
+ " dtype={ExtractTransformLoad.GEOID_FIELD_NAME: \"string\"},\n",
+ " low_memory=False,\n",
+ " )\n",
+ "else:\n",
+ " print(\n",
+ " \"EJSCREEN areas of concern data does not exist locally. Not attempting to load data into comparison tool.\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "e43a9e23",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Persistent Poverty Census Tract | \n",
+ " Housing burden (percent) | \n",
+ " Total population | \n",
+ " Median household income (% of state median household income) | \n",
+ " Current asthma among adults aged greater than or equal to 18 years | \n",
+ " Coronary heart disease among adults aged greater than or equal to 18 years | \n",
+ " Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years | \n",
+ " Current lack of health insurance among adults aged 18-64 years | \n",
+ " Diagnosed diabetes among adults aged greater than or equal to 18 years | \n",
+ " ... | \n",
+ " Score D (top 25th percentile) | \n",
+ " Score D (top 30th percentile) | \n",
+ " Score D (top 35th percentile) | \n",
+ " Score D (top 40th percentile) | \n",
+ " Score E (percentile) | \n",
+ " Score E (top 25th percentile) | \n",
+ " Score E (top 30th percentile) | \n",
+ " Score E (top 35th percentile) | \n",
+ " Score E (top 40th percentile) | \n",
+ " GEOID10_STATE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 01073001100 | \n",
+ " True | \n",
+ " 0.28 | \n",
+ " 4897.00 | \n",
+ " 0.73 | \n",
+ " 11.20 | \n",
+ " 7.20 | \n",
+ " 6.70 | \n",
+ " 16.60 | \n",
+ " 19.30 | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " 0.33 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 01073001400 | \n",
+ " True | \n",
+ " 0.18 | \n",
+ " 1906.00 | \n",
+ " 0.71 | \n",
+ " 11.10 | \n",
+ " 9.10 | \n",
+ " 7.30 | \n",
+ " 21.40 | \n",
+ " 22.40 | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " True | \n",
+ " 0.73 | \n",
+ " False | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 01073002000 | \n",
+ " False | \n",
+ " 0.44 | \n",
+ " 4215.00 | \n",
+ " 0.54 | \n",
+ " 13.50 | \n",
+ " 9.50 | \n",
+ " 6.10 | \n",
+ " 25.40 | \n",
+ " 22.80 | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 0.93 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 01073003802 | \n",
+ " False | \n",
+ " 0.41 | \n",
+ " 5149.00 | \n",
+ " 0.77 | \n",
+ " 12.00 | \n",
+ " 6.60 | \n",
+ " 5.60 | \n",
+ " 20.90 | \n",
+ " 18.60 | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 0.76 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 01073004000 | \n",
+ " True | \n",
+ " 0.47 | \n",
+ " 2621.00 | \n",
+ " 0.37 | \n",
+ " 13.10 | \n",
+ " 10.00 | \n",
+ " 6.30 | \n",
+ " 24.50 | \n",
+ " 25.00 | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 0.95 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " 01 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 554 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Persistent Poverty Census Tract Housing burden (percent) \\\n",
+ "0 01073001100 True 0.28 \n",
+ "1 01073001400 True 0.18 \n",
+ "2 01073002000 False 0.44 \n",
+ "3 01073003802 False 0.41 \n",
+ "4 01073004000 True 0.47 \n",
+ "\n",
+ " Total population \\\n",
+ "0 4897.00 \n",
+ "1 1906.00 \n",
+ "2 4215.00 \n",
+ "3 5149.00 \n",
+ "4 2621.00 \n",
+ "\n",
+ " Median household income (% of state median household income) \\\n",
+ "0 0.73 \n",
+ "1 0.71 \n",
+ "2 0.54 \n",
+ "3 0.77 \n",
+ "4 0.37 \n",
+ "\n",
+ " Current asthma among adults aged greater than or equal to 18 years \\\n",
+ "0 11.20 \n",
+ "1 11.10 \n",
+ "2 13.50 \n",
+ "3 12.00 \n",
+ "4 13.10 \n",
+ "\n",
+ " Coronary heart disease among adults aged greater than or equal to 18 years \\\n",
+ "0 7.20 \n",
+ "1 9.10 \n",
+ "2 9.50 \n",
+ "3 6.60 \n",
+ "4 10.00 \n",
+ "\n",
+ " Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years \\\n",
+ "0 6.70 \n",
+ "1 7.30 \n",
+ "2 6.10 \n",
+ "3 5.60 \n",
+ "4 6.30 \n",
+ "\n",
+ " Current lack of health insurance among adults aged 18-64 years \\\n",
+ "0 16.60 \n",
+ "1 21.40 \n",
+ "2 25.40 \n",
+ "3 20.90 \n",
+ "4 24.50 \n",
+ "\n",
+ " Diagnosed diabetes among adults aged greater than or equal to 18 years \\\n",
+ "0 19.30 \n",
+ "1 22.40 \n",
+ "2 22.80 \n",
+ "3 18.60 \n",
+ "4 25.00 \n",
+ "\n",
+ " ... Score D (top 25th percentile) Score D (top 30th percentile) \\\n",
+ "0 ... False False \n",
+ "1 ... False False \n",
+ "2 ... True True \n",
+ "3 ... True True \n",
+ "4 ... True True \n",
+ "\n",
+ " Score D (top 35th percentile) Score D (top 40th percentile) \\\n",
+ "0 False False \n",
+ "1 True True \n",
+ "2 True True \n",
+ "3 True True \n",
+ "4 True True \n",
+ "\n",
+ " Score E (percentile) Score E (top 25th percentile) \\\n",
+ "0 0.33 False \n",
+ "1 0.73 False \n",
+ "2 0.93 True \n",
+ "3 0.76 True \n",
+ "4 0.95 True \n",
+ "\n",
+ " Score E (top 30th percentile) Score E (top 35th percentile) \\\n",
+ "0 False False \n",
+ "1 True True \n",
+ "2 True True \n",
+ "3 True True \n",
+ "4 True True \n",
+ "\n",
+ " Score E (top 40th percentile) GEOID10_STATE \n",
+ "0 False 01 \n",
+ "1 True 01 \n",
+ "2 True 01 \n",
+ "3 True 01 \n",
+ "4 True 01 \n",
+ "\n",
+ "[5 rows x 554 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Merge EJSCREEN AoCs into CEJST data.\n",
+ "# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
+ "if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
+ " # If available, merge EJSCREEN AoC data into CBG dfs.\n",
+ " # TODO: When we get AoC data at the tract level, fix this.\n",
+ " # Right now commenting this out to avoid merging CBG-level areas of concern on a tract-level CEJST definition.\n",
+ " # cejst_df = cejst_df.merge(\n",
+ " # ejscreen_areas_of_concern_df, on=GEOID_FIELD_NAME, how=\"outer\"\n",
+ " # )\n",
+ " pass\n",
+ "else:\n",
+ " pass\n",
+ "\n",
+ "cejst_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "38c0dc2f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "~~~~Analysis for field `Percent enrollment in college or graduate school`~~~~\n",
+ "count 73298.00\n",
+ "mean 0.08\n",
+ "std 0.09\n",
+ "min 0.00\n",
+ "25% 0.04\n",
+ "50% 0.06\n",
+ "75% 0.09\n",
+ "max 1.00\n",
+ "Name: Percent enrollment in college or graduate school, dtype: float64\n",
+ "\n",
+ "There are 1.16% of values missing.\n",
+ "\n",
+ "Quantile at 0.9 is 0.12186292304275304\n",
+ "AxesSubplot(0.125,0.125;0.775x0.755)\n",
+ "\n",
+ "~~~~Analysis for field `Percent individuals age 25 or over with less than high school degree`~~~~\n",
+ "count 73280.00\n",
+ "mean 0.13\n",
+ "std 0.10\n",
+ "min 0.00\n",
+ "25% 0.05\n",
+ "50% 0.10\n",
+ "75% 0.17\n",
+ "max 1.00\n",
+ "Name: Percent individuals age 25 or over with less than high school degree, dtype: float64\n",
+ "\n",
+ "There are 1.19% of values missing.\n",
+ "\n",
+ "Quantile at 0.9 is 0.2693215167829206\n",
+ "AxesSubplot(0.125,0.125;0.775x0.755)\n",
+ "\n",
+ "~~~~Analysis for field `Median household income as a percent of area median income`~~~~\n",
+ "count 68232.00\n",
+ "mean 1.01\n",
+ "std 0.43\n",
+ "min 0.04\n",
+ "25% 0.72\n",
+ "50% 0.94\n",
+ "75% 1.21\n",
+ "max 4.46\n",
+ "Name: Median household income as a percent of area median income, dtype: float64\n",
+ "\n",
+ "There are 7.99% of values missing.\n",
+ "\n",
+ "Quantile at 0.9 is 1.5445838485220498\n",
+ "AxesSubplot(0.125,0.125;0.775x0.755)\n",
+ "\n",
+ "~~~~Analysis for field `Percent of individuals below 200% Federal Poverty Line`~~~~\n",
+ "count 73149.00\n",
+ "mean 0.33\n",
+ "std 0.19\n",
+ "min 0.00\n",
+ "25% 0.18\n",
+ "50% 0.30\n",
+ "75% 0.45\n",
+ "max 1.00\n",
+ "Name: Percent of individuals below 200% Federal Poverty Line, dtype: float64\n",
+ "\n",
+ "There are 1.36% of values missing.\n",
+ "\n",
+ "Quantile at 0.9 is 0.6000566482486579\n",
+ "AxesSubplot(0.125,0.125;0.775x0.755)\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAT4klEQVR4nO3df4xd5X3n8fcn5kdQKAEKO0tstEaKRUpBEDICV1mtZkEFQ6oaqSkCpcGK2FjrEESlaLuw/3hJijbVKg0homit4MW0LBSljbAiUq9FuKoq1QRoKA44iFkCsi2ItzE/OomSyNnv/jGP1TueO/b19czc+fF+SVdzzvc859znPrbn43Puc89NVSFJWt7eN+wOSJKGzzCQJBkGkiTDQJKEYSBJAk4adgcGdc4559Tq1asH2venP/0pH/jAB2a3Q4uY4zGV4zGV4zHdYh2T559//p+q6txe2xZtGKxevZrnnntuoH07nQ5jY2Oz26FFzPGYyvGYyvGYbrGOSZI3ZtrmZSJJkmEgSTIMJEkYBpIkDANJEn2GQZIzk3wzyQ+T7EnyW0nOTrIzyavt51mtbZLcl2Q8yYtJLu86zobW/tUkG7rqH0uyu+1zX5LM/kuVJM2k3zODrwF/U1UfAS4F9gB3Ak9V1RrgqbYOcB2wpj02Ag8AJDkb2AxcCVwBbD4cIK3NZ7v2W3diL0uSdDyOGQZJPgj8O+BBgKr6ZVW9A6wHtrVm24Ab2vJ64OGatAs4M8l5wLXAzqo6WFVvAzuBdW3bGVW1qybvp/1w17EkSfOgnw+dXQD8X+B/JrkUeB64Axipqjdbm7eAkba8Etjbtf++VjtafV+P+jRJNjJ5tsHIyAidTqeP7k83MTEx8L5LkeMxleMxleMx3VIck37C4CTgcuD2qnomydf4l0tCAFRVJZnzb8mpqi3AFoDR0dEa9BOAX3/kCb7ydz/tue31L39i0O4tWov105RzxfGYyvGYbimOST/vGewD9lXVM239m0yGw4/bJR7azwNt+37g/K79V7Xa0eqretQlSfPkmGFQVW8Be5Nc2EpXAy8D24HDM4I2AE+05e3ALW1W0Vrg3XY5aQdwTZKz2hvH1wA72rb3kqxts4hu6TqWJGke9HujutuBR5KcArwGfIbJIHk8ya3AG8CNre2TwPXAOPCz1paqOpjkS8Czrd0Xq+pgW/4c8BBwGvCd9pAkzZO+wqCqXgBGe2y6ukfbAm6b4Thbga096s8BF/fTF0nS7PMTyJIkw0CSZBhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAk0WcYJHk9ye4kLyR5rtXOTrIzyavt51mtniT3JRlP8mKSy7uOs6G1fzXJhq76x9rxx9u+me0XKkma2fGcGfz7qrqsqkbb+p3AU1W1BniqrQNcB6xpj43AAzAZHsBm4ErgCmDz4QBpbT7btd+6gV+RJOm4nchlovXAtra8Dbihq/5wTdoFnJnkPOBaYGdVHayqt4GdwLq27Yyq2lVVBTzcdSxJ0jw4qc92BfzvJAX8j6raAoxU1Ztt+1vASFteCezt2ndfqx2tvq9HfZokG5k822BkZIROp9Nn96caOQ2+cMmhntsGPeZiNjExsSxf90wcj6kcj+mW4pj0Gwb/tqr2J/lXwM4kP+zeWFXVgmJOtRDaAjA6OlpjY2MDHefrjzzBV3b3fumvf2qwYy5mnU6HQcdyKXI8pnI8pluKY9LXZaKq2t9+HgC+xeQ1/x+3Szy0nwda8/3A+V27r2q1o9VX9ahLkubJMcMgyQeS/NrhZeAa4AfAduDwjKANwBNteTtwS5tVtBZ4t11O2gFck+Ss9sbxNcCOtu29JGvbLKJbuo4lSZoH/VwmGgG+1WZ7ngT8r6r6myTPAo8nuRV4A7ixtX8SuB4YB34GfAagqg4m+RLwbGv3xao62JY/BzwEnAZ8pz0kSfPkmGFQVa8Bl/ao/wS4uke9gNtmONZWYGuP+nPAxX30V5I0B/wEsiTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJI4jjBIsiLJ95N8u61fkOSZJONJ/jLJKa1+alsfb9tXdx3jrlZ/Jcm1XfV1rTae5M5ZfH2SpD4cz5nBHcCervU/Ab5aVR8G3gZubfVbgbdb/autHUkuAm4CfhNYB/xZC5gVwP3AdcBFwM2trSRpnvQVBklWAZ8AvtHWA1wFfLM12Qbc0JbXt3Xa9qtb+/XAY1X1i6r6ETAOXNEe41X1WlX9EnistZUkzZOT+mx3L/BHwK+19V8H3qmqQ219H7CyLa8E9gJU1aEk77b2K4FdXcfs3mfvEfUre3UiyUZgI8DIyAidTqfP7k81chp84ZJDPbcNeszFbGJiYlm+7pk4HlM5HtMtxTE5Zhgk+R3gQFU9n2Rsznt0FFW1BdgCMDo6WmNjg3Xn6488wVd2937pr39qsGMuZp1Oh0HHcilyPKZyPKZbimPSz5nBx4HfTXI98H7gDOBrwJlJTmpnB6uA/a39fuB8YF+Sk4APAj/pqh/Wvc9MdUnSPDjmewZVdVdVraqq1Uy+AfzdqvoU8DTwydZsA/BEW97e1mnbv1tV1eo3tdlGFwBrgO8BzwJr2uykU9pzbJ+VVydJ6ku/7xn08p+Bx5L8MfB94MFWfxD48yTjwEEmf7lTVS8leRx4GTgE3FZVvwJI8nlgB7AC2FpVL51AvyRJx+m4wqCqOkCnLb/G5EygI9v8HPj9Gfa/B7inR/1J4Mnj6Yskafb4CWRJkmEgSTIMJEkYBpIkDANJEic2tXTRuuR9P+L192+eYeu789oXSVoIPDOQJBkGkiTDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEmijzBI8v4k30vyj0leSnJ3q1+Q5Jkk40n+MskprX5qWx9v21d3HeuuVn8lybVd9XWtNp7kzjl4nZKko+jnzOAXwFVVdSlwGbAuyVrgT4CvVtWHgbeBW1v7W4G3W/2rrR1JLgJuAn4TWAf8WZIVSVYA9wPXARcBN7e2kqR5cswwqEkTbfXk9ijgKuCbrb4NuKEtr2/rtO1XJ0mrP1ZVv6iqHwHjwBXtMV5Vr1XVL4HHWltJ0jw5qZ9G7X/vzwMfZvJ/8f8HeKeqDrUm+4CVbXklsBegqg4leRf49Vbf1XXY7n32HlG/coZ+bAQ2AoyMjNDpdPrp/jQTp36IzoV399444DEXs4mJiYHHcilyPKZyPKZbimPSVxhU1a+Ay5KcCXwL+Mhcduoo/dgCbAEYHR2tsbGxgY7TefRexl7Z3Hvjze8O2LvFq9PpMOhYLkWOx1SOx3RLcUyOazZRVb0DPA38FnBmksNhsgrY35b3A+cDtO0fBH7SXT9in5nqkqR50s9sonPbGQFJTgN+G9jDZCh8sjXbADzRlre3ddr271ZVtfpNbbbRBcAa4HvAs8CaNjvpFCbfZN4+C69NktSnfi4TnQdsa+8bvA94vKq+neRl4LEkfwx8H3iwtX8Q+PMk48BBJn+5U1UvJXkceBk4BNzWLj+R5PPADmAFsLWqXpq1VyhJOqZjhkFVvQh8tEf9NSZnAh1Z/znw+zMc6x7gnh71J4En++jvknHJtkuG8ry7N+weyvNKWtj8BLIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJos/vQNbS0et7FDadvonbt90+p8/r9yhIC5tnBpIkw0CSZBhIkjAMJEkYBpIkDANJEk4tne6/fnCG+rvz2w9JmkeeGUiSDANJUh9hkOT8JE8neTnJS0nuaPWzk+xM8mr7eVarJ8l9ScaTvJjk8q5jbWjtX02yoav+sSS72z73JclcvFhJUm/9nBkcAr5QVRcBa4HbklwE3Ak8VVVrgKfaOsB1wJr22Ag8AJPhAWwGrgSuADYfDpDW5rNd+6078ZcmSerXMcOgqt6sqn9oy/8M7AFWAuuBba3ZNuCGtrweeLgm7QLOTHIecC2ws6oOVtXbwE5gXdt2RlXtqqoCHu46liRpHhzXewZJVgMfBZ4BRqrqzbbpLWCkLa8E9nbttq/Vjlbf16MuSZonfU8tTXI68FfAH1bVe92X9auqktQc9O/IPmxk8tITIyMjdDqdgY4zceqH6Fx49/HtNOBzzWTT6Ztm9Xgn4twV5855fwb9sxqGiYmJRdXfueZ4TLcUx6SvMEhyMpNB8EhV/XUr/zjJeVX1ZrvUc6DV9wPnd+2+qtX2A2NH1DutvqpH+2mqaguwBWB0dLTGxsZ6NTumzqP3MvbK5uPb6ebZ/ZzBXN8y+nhsOn0TD0w8MKfPsfv3Fs8trDudDoP+3VqKHI/pluKY9DObKMCDwJ6q+tOuTduBwzOCNgBPdNVvabOK1gLvtstJO4BrkpzV3ji+BtjRtr2XZG17rlu6jiVJmgf9nBl8HPg0sDvJC632X4AvA48nuRV4A7ixbXsSuB4YB34GfAagqg4m+RLwbGv3xao62JY/BzwEnAZ8pz0kSfPkmGFQVX8HzDTv/+oe7Qu4bYZjbQW29qg/B1x8rL5IkuaGn0CWJBkGkiTDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CSxHF8n8Fyt+cjvzHjtt/44Z557IkkzT7PDCRJhoEkyctE0+x57EPD7oIkzTvPDCRJhoEkyTCQJOF7BrPCaaeSFjvPDCRJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRJ9hEGSrUkOJPlBV+3sJDuTvNp+ntXqSXJfkvEkLya5vGufDa39q0k2dNU/lmR32+e+JJntFylJOrp+zgweAtYdUbsTeKqq1gBPtXWA64A17bEReAAmwwPYDFwJXAFsPhwgrc1nu/Y78rkkSXPsmGFQVX8LHDyivB7Y1pa3ATd01R+uSbuAM5OcB1wL7Kyqg1X1NrATWNe2nVFVu6qqgIe7jiVJmieD3o5ipKrebMtvASNteSWwt6vdvlY7Wn1fj3pPSTYyecbByMgInU5noM5PnPohOhfe3XPbz28/eaBjzuSN++/vWf/v/Ede+9cL44rYuSvOZdPpm+b0OQb9sxqGiYmJRdXfueZ4TLcUx+SE701UVZWkZqMzfTzXFmALwOjoaI2NjQ10nM6j9zL2yuae2+bz+wz+010L49ZQm07fxAMTD8zpc+z+vd1zevzZ1Ol0GPTv1lLkeEy3FMdk0NlEP26XeGg/D7T6fuD8rnarWu1o9VU96pKkeTRoGGwHDs8I2gA80VW/pc0qWgu82y4n7QCuSXJWe+P4GmBH2/ZekrVtFtEtXceSJM2TY16nSPIoMAack2Qfk7OCvgw8nuRW4A3gxtb8SeB6YBz4GfAZgKo6mORLwLOt3Rer6vCb0p9jcsbSacB32kOSNI+OGQZVdfMMm67u0baA22Y4zlZga4/6c8DFx+qHJGnu+AlkSZJhIEnyay81Ty7ZdslQnnf3hsUzpVUaJs8MJEmGgSTJMJAk4XsGQ/X4fzs047YbF8itKiQtD8vyN87PD548r/cgkqSFzstEkiTDQJJkGEiSMAwkSRgGkiQMA0kSy3Rq6WLgZxAkzSfPDCRJhoEkyTCQJGEYSJIwDCRJGAaSJJxaqiVukK/b3HT6Jm7fdvsJPa9ft6nFxjBYhGb6DIKfP5A0KC8TSZIMA0mSYSBJYgG9Z5BkHfA1YAXwjar68pC7tOh4PyNJg1oQvyGSrADuB34b2Ac8m2R7Vb083J5JgxlkFtNscSaTBrEgwgC4AhivqtcAkjwGrAcMg1lytLOGp++Zx45ozs12EM3GVNu5ZPjNjlTVsPtAkk8C66rqP7T1TwNXVtXnj2i3EdjYVi8EXhnwKc8B/mnAfZcix2Mqx2Mqx2O6xTom/6aqzu21YaGcGfSlqrYAW070OEmeq6rRWejSkuB4TOV4TOV4TLcUx2ShzCbaD5zftb6q1SRJ82ChhMGzwJokFyQ5BbgJ2D7kPknSsrEgLhNV1aEknwd2MDm1dGtVvTSHT3nCl5qWGMdjKsdjKsdjuiU3JgviDWRJ0nAtlMtEkqQhMgwkScsrDJKsS/JKkvEkdw67P8OWZGuSA0l+MOy+LARJzk/ydJKXk7yU5I5h92mYkrw/yfeS/GMbj7uH3aeFIMmKJN9P8u1h92U2LZsw6LrlxXXARcDNSS4abq+G7iFg3bA7sYAcAr5QVRcBa4HblvnfkV8AV1XVpcBlwLoka4fbpQXhDmDPsDsx25ZNGNB1y4uq+iVw+JYXy1ZV/S1wcNj9WCiq6s2q+oe2/M9M/oNfOdxeDU9NmmirJ7fHsp5xkmQV8AngG8Puy2xbTmGwEtjbtb6PZfwPXUeXZDXwUeCZIXdlqNolkReAA8DOqlrW4wHcC/wR8P+G3I9Zt5zCQOpLktOBvwL+sKreG3Z/hqmqflVVlzF5V4Arklw85C4NTZLfAQ5U1fPD7stcWE5h4C0vdExJTmYyCB6pqr8edn8Wiqp6B3ia5f0e08eB303yOpOXma9K8hfD7dLsWU5h4C0vdFRJAjwI7KmqPx12f4YtyblJzmzLpzH5fSM/HGqnhqiq7qqqVVW1msnfH9+tqj8YcrdmzbIJg6o6BBy+5cUe4PE5vuXFgpfkUeDvgQuT7Ety67D7NGQfBz7N5P/4XmiP64fdqSE6D3g6yYtM/mdqZ1UtqemU+hfejkKStHzODCRJMzMMJEmGgSTJMJAkYRhIkjAMJEkYBpIk4P8DGslSAnwyOSMAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Analyze one field at a time (useful for setting thresholds)\n",
+ "\n",
+ "quantile = 0.90\n",
+ "\n",
+ "for field in [\n",
+ " field_names.COLLEGE_ATTENDANCE_FIELD,\n",
+ " field_names.HIGH_SCHOOL_ED_FIELD,\n",
+ " field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,\n",
+ " field_names.POVERTY_LESS_THAN_200_FPL_FIELD,\n",
+ "]:\n",
+ " print(f\"\\n~~~~Analysis for field `{field}`~~~~\")\n",
+ " print(cejst_df[field].describe())\n",
+ " print(\n",
+ " f\"\\nThere are {cejst_df[field].isnull().sum() * 100 / len(cejst_df):.2f}% of values missing.\"\n",
+ " )\n",
+ " print(\n",
+ " f\"\\nQuantile at {quantile} is {np.nanquantile(a=cejst_df[field], q=quantile)}\"\n",
+ " )\n",
+ " print(cejst_df[field].hist())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "8c3e462c",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Total Population | \n",
+ " California County | \n",
+ " ZIP | \n",
+ " Nearby City \\n(to help approximate location only) | \n",
+ " Longitude | \n",
+ " Latitude | \n",
+ " calenviroscreen_score | \n",
+ " calenviroscreen_percentile | \n",
+ " DRAFT CES 4.0\\nPercentile Range | \n",
+ " ... | \n",
+ " Poverty | \n",
+ " Poverty Pctl | \n",
+ " Unemployment | \n",
+ " Unemployment Pctl | \n",
+ " Housing Burden | \n",
+ " Housing Burden Pctl | \n",
+ " Pop. Char. | \n",
+ " Pop. Char. Score | \n",
+ " Pop. Char. Pctl | \n",
+ " calenviroscreen_priority_community | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 06019001100 | \n",
+ " 2760 | \n",
+ " Fresno | \n",
+ " 93706 | \n",
+ " Fresno | \n",
+ " -119.78 | \n",
+ " 36.71 | \n",
+ " 94.61 | \n",
+ " 100.00 | \n",
+ " 95-100% (highest scores) | \n",
+ " ... | \n",
+ " 76.60 | \n",
+ " 98.43 | \n",
+ " 16.20 | \n",
+ " 97.15 | \n",
+ " 30.70 | \n",
+ " 90.61 | \n",
+ " 93.73 | \n",
+ " 9.72 | \n",
+ " 99.87 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 06077000700 | \n",
+ " 4177 | \n",
+ " San Joaquin | \n",
+ " 95206 | \n",
+ " Stockton | \n",
+ " -121.29 | \n",
+ " 37.94 | \n",
+ " 90.83 | \n",
+ " 99.99 | \n",
+ " 95-100% (highest scores) | \n",
+ " ... | \n",
+ " 70.60 | \n",
+ " 96.43 | \n",
+ " 18.50 | \n",
+ " 98.45 | \n",
+ " 35.20 | \n",
+ " 95.61 | \n",
+ " 93.40 | \n",
+ " 9.68 | \n",
+ " 99.84 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 06077000100 | \n",
+ " 4055 | \n",
+ " San Joaquin | \n",
+ " 95202 | \n",
+ " Stockton | \n",
+ " -121.29 | \n",
+ " 37.95 | \n",
+ " 85.75 | \n",
+ " 99.97 | \n",
+ " 95-100% (highest scores) | \n",
+ " ... | \n",
+ " 81.80 | \n",
+ " 99.50 | \n",
+ " 17.90 | \n",
+ " 98.17 | \n",
+ " 36.40 | \n",
+ " 96.51 | \n",
+ " 95.71 | \n",
+ " 9.92 | \n",
+ " 99.97 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 06071001600 | \n",
+ " 5527 | \n",
+ " San Bernardino | \n",
+ " 91761 | \n",
+ " Ontario | \n",
+ " -117.62 | \n",
+ " 34.06 | \n",
+ " 83.56 | \n",
+ " 99.96 | \n",
+ " 95-100% (highest scores) | \n",
+ " ... | \n",
+ " 67.10 | \n",
+ " 94.82 | \n",
+ " 6.70 | \n",
+ " 57.20 | \n",
+ " 32.10 | \n",
+ " 92.65 | \n",
+ " 80.59 | \n",
+ " 8.36 | \n",
+ " 93.06 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 06037204920 | \n",
+ " 2639 | \n",
+ " Los Angeles | \n",
+ " 90023 | \n",
+ " Los Angeles | \n",
+ " -118.20 | \n",
+ " 34.02 | \n",
+ " 82.90 | \n",
+ " 99.95 | \n",
+ " 95-100% (highest scores) | \n",
+ " ... | \n",
+ " 64.90 | \n",
+ " 93.51 | \n",
+ " 5.60 | \n",
+ " 43.81 | \n",
+ " 25.00 | \n",
+ " 77.95 | \n",
+ " 83.95 | \n",
+ " 8.70 | \n",
+ " 95.78 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 59 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Total Population California County ZIP \\\n",
+ "0 06019001100 2760 Fresno 93706 \n",
+ "1 06077000700 4177 San Joaquin 95206 \n",
+ "2 06077000100 4055 San Joaquin 95202 \n",
+ "3 06071001600 5527 San Bernardino 91761 \n",
+ "4 06037204920 2639 Los Angeles 90023 \n",
+ "\n",
+ " Nearby City \\n(to help approximate location only) Longitude Latitude \\\n",
+ "0 Fresno -119.78 36.71 \n",
+ "1 Stockton -121.29 37.94 \n",
+ "2 Stockton -121.29 37.95 \n",
+ "3 Ontario -117.62 34.06 \n",
+ "4 Los Angeles -118.20 34.02 \n",
+ "\n",
+ " calenviroscreen_score calenviroscreen_percentile \\\n",
+ "0 94.61 100.00 \n",
+ "1 90.83 99.99 \n",
+ "2 85.75 99.97 \n",
+ "3 83.56 99.96 \n",
+ "4 82.90 99.95 \n",
+ "\n",
+ " DRAFT CES 4.0\\nPercentile Range ... Poverty Poverty Pctl Unemployment \\\n",
+ "0 95-100% (highest scores) ... 76.60 98.43 16.20 \n",
+ "1 95-100% (highest scores) ... 70.60 96.43 18.50 \n",
+ "2 95-100% (highest scores) ... 81.80 99.50 17.90 \n",
+ "3 95-100% (highest scores) ... 67.10 94.82 6.70 \n",
+ "4 95-100% (highest scores) ... 64.90 93.51 5.60 \n",
+ "\n",
+ " Unemployment Pctl Housing Burden Housing Burden Pctl Pop. Char. \\\n",
+ "0 97.15 30.70 90.61 93.73 \n",
+ "1 98.45 35.20 95.61 93.40 \n",
+ "2 98.17 36.40 96.51 95.71 \n",
+ "3 57.20 32.10 92.65 80.59 \n",
+ "4 43.81 25.00 77.95 83.95 \n",
+ "\n",
+ " Pop. Char. Score Pop. Char. Pctl calenviroscreen_priority_community \n",
+ "0 9.72 99.87 True \n",
+ "1 9.68 99.84 True \n",
+ "2 9.92 99.97 True \n",
+ "3 8.36 93.06 True \n",
+ "4 8.70 95.78 True \n",
+ "\n",
+ "[5 rows x 59 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load CalEnviroScreen 4.0\n",
+ "CALENVIROSCREEN_SCORE_FIELD = \"calenviroscreen_score\"\n",
+ "CALENVIROSCREEN_PERCENTILE_FIELD = \"calenviroscreen_percentile\"\n",
+ "CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD = \"calenviroscreen_priority_community\"\n",
+ "\n",
+ "calenviroscreen_data_path = DATA_DIR / \"dataset\" / \"calenviroscreen4\" / \"data06.csv\"\n",
+ "calenviroscreen_df = pd.read_csv(\n",
+ " calenviroscreen_data_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "# Convert priority community field to a bool.\n",
+ "calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD] = calenviroscreen_df[\n",
+ " CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD\n",
+ "].astype(bool)\n",
+ "\n",
+ "calenviroscreen_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "b1ac2854-80c8-42a8-85e8-84c5684bbe43",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Mapping for Environmental Justice Final Percentile | \n",
+ " Mapping for Environmental Justice Final Score | \n",
+ " Mapping for Environmental Justice Priority Community | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 51097950400 | \n",
+ " 48.12 | \n",
+ " 22.70 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 51550020001 | \n",
+ " 46.20 | \n",
+ " 22.17 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 51041100206 | \n",
+ " 72.51 | \n",
+ " 32.90 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 51059481000 | \n",
+ " 74.87 | \n",
+ " 34.25 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 51059451400 | \n",
+ " 96.41 | \n",
+ " 57.83 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Mapping for Environmental Justice Final Percentile \\\n",
+ "0 51097950400 48.12 \n",
+ "1 51550020001 46.20 \n",
+ "2 51041100206 72.51 \n",
+ "3 51059481000 74.87 \n",
+ "4 51059451400 96.41 \n",
+ "\n",
+ " Mapping for Environmental Justice Final Score \\\n",
+ "0 22.70 \n",
+ "1 22.17 \n",
+ "2 32.90 \n",
+ "3 34.25 \n",
+ "4 57.83 \n",
+ "\n",
+ " Mapping for Environmental Justice Priority Community \n",
+ "0 False \n",
+ "1 False \n",
+ "2 False \n",
+ "3 False \n",
+ "4 True "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Mapping for EJ\n",
+ "mapping_for_ej_path = DATA_DIR / \"dataset\" / \"mapping_for_ej\" / \"co_va.csv\"\n",
+ "\n",
+ "mapping_for_ej_df = pd.read_csv(\n",
+ " mapping_for_ej_path,\n",
+ " dtype={\n",
+ " ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\",\n",
+ " field_names.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD: \"bool\",\n",
+ " },\n",
+ ")\n",
+ "\n",
+ "mapping_for_ej_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d8ec43dc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Individuals in Poverty (percent) (1990) | \n",
+ " Individuals in Poverty (percent) (2000) | \n",
+ " Individuals in Poverty (percent) (2010) | \n",
+ " Persistent Poverty, Tract Level | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 01001020100 | \n",
+ " 0.10 | \n",
+ " 0.13 | \n",
+ " 0.09 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 01001020200 | \n",
+ " 0.20 | \n",
+ " 0.23 | \n",
+ " 0.11 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 01001020300 | \n",
+ " 0.11 | \n",
+ " 0.08 | \n",
+ " 0.12 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 01001020400 | \n",
+ " 0.07 | \n",
+ " 0.05 | \n",
+ " 0.03 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 01001020500 | \n",
+ " 0.06 | \n",
+ " 0.04 | \n",
+ " 0.08 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 73071 | \n",
+ " 55079990000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 73072 | \n",
+ " 55083990000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 73073 | \n",
+ " 55089990000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 73074 | \n",
+ " 55101990000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 73075 | \n",
+ " 55117990000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
73076 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Individuals in Poverty (percent) (1990) \\\n",
+ "0 01001020100 0.10 \n",
+ "1 01001020200 0.20 \n",
+ "2 01001020300 0.11 \n",
+ "3 01001020400 0.07 \n",
+ "4 01001020500 0.06 \n",
+ "... ... ... \n",
+ "73071 55079990000 NaN \n",
+ "73072 55083990000 NaN \n",
+ "73073 55089990000 NaN \n",
+ "73074 55101990000 NaN \n",
+ "73075 55117990000 NaN \n",
+ "\n",
+ " Individuals in Poverty (percent) (2000) \\\n",
+ "0 0.13 \n",
+ "1 0.23 \n",
+ "2 0.08 \n",
+ "3 0.05 \n",
+ "4 0.04 \n",
+ "... ... \n",
+ "73071 NaN \n",
+ "73072 NaN \n",
+ "73073 NaN \n",
+ "73074 NaN \n",
+ "73075 NaN \n",
+ "\n",
+ " Individuals in Poverty (percent) (2010) \\\n",
+ "0 0.09 \n",
+ "1 0.11 \n",
+ "2 0.12 \n",
+ "3 0.03 \n",
+ "4 0.08 \n",
+ "... ... \n",
+ "73071 NaN \n",
+ "73072 NaN \n",
+ "73073 NaN \n",
+ "73074 NaN \n",
+ "73075 NaN \n",
+ "\n",
+ " Persistent Poverty, Tract Level \n",
+ "0 False \n",
+ "1 False \n",
+ "2 False \n",
+ "3 False \n",
+ "4 False \n",
+ "... ... \n",
+ "73071 False \n",
+ "73072 False \n",
+ "73073 False \n",
+ "73074 False \n",
+ "73075 False \n",
+ "\n",
+ "[73076 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load persistent poverty data\n",
+ "persistent_poverty_path = DATA_DIR / \"dataset\" / \"persistent_poverty\" / \"usa.csv\"\n",
+ "persistent_poverty_df = pd.read_csv(\n",
+ " persistent_poverty_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "# Since \"Persistent Poverty Census Tract\" is labeled in both the score file (at the CBG level) and this tract file,\n",
+ "# rename this field so it's easy to access the tract-level scores directly.\n",
+ "\n",
+ "PERSISTENT_POVERTY_TRACT_LEVEL_FIELD = \"Persistent Poverty, Tract Level\"\n",
+ "PERSISTENT_POVERTY_CBG_LEVEL_FIELD = \"Persistent Poverty Census Tract\"\n",
+ "\n",
+ "persistent_poverty_df.rename(\n",
+ " columns={PERSISTENT_POVERTY_CBG_LEVEL_FIELD: PERSISTENT_POVERTY_TRACT_LEVEL_FIELD},\n",
+ " inplace=True,\n",
+ " errors=\"raise\",\n",
+ ")\n",
+ "\n",
+ "persistent_poverty_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "81826d29",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Percent of tract that is HOLC Grade D | \n",
+ " Tract is >20% HOLC Grade D | \n",
+ " Tract is >50% HOLC Grade D | \n",
+ " Tract is >75% HOLC Grade D | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 01073000100 | \n",
+ " 0.42 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 01073000300 | \n",
+ " 0.93 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 01073000400 | \n",
+ " 0.36 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 01073000500 | \n",
+ " 0.65 | \n",
+ " True | \n",
+ " True | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 01073000700 | \n",
+ " 0.41 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 7273 | \n",
+ " 55139001100 | \n",
+ " 0.29 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 7274 | \n",
+ " 55139001200 | \n",
+ " 0.42 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 7275 | \n",
+ " 55139001300 | \n",
+ " 0.16 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 7276 | \n",
+ " 55139001400 | \n",
+ " 0.04 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 7277 | \n",
+ " 55139001500 | \n",
+ " 0.10 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7278 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Percent of tract that is HOLC Grade D \\\n",
+ "0 01073000100 0.42 \n",
+ "1 01073000300 0.93 \n",
+ "2 01073000400 0.36 \n",
+ "3 01073000500 0.65 \n",
+ "4 01073000700 0.41 \n",
+ "... ... ... \n",
+ "7273 55139001100 0.29 \n",
+ "7274 55139001200 0.42 \n",
+ "7275 55139001300 0.16 \n",
+ "7276 55139001400 0.04 \n",
+ "7277 55139001500 0.10 \n",
+ "\n",
+ " Tract is >20% HOLC Grade D Tract is >50% HOLC Grade D \\\n",
+ "0 True False \n",
+ "1 True True \n",
+ "2 True False \n",
+ "3 True True \n",
+ "4 True False \n",
+ "... ... ... \n",
+ "7273 True False \n",
+ "7274 True False \n",
+ "7275 False False \n",
+ "7276 False False \n",
+ "7277 False False \n",
+ "\n",
+ " Tract is >75% HOLC Grade D \n",
+ "0 False \n",
+ "1 True \n",
+ "2 False \n",
+ "3 False \n",
+ "4 False \n",
+ "... ... \n",
+ "7273 False \n",
+ "7274 False \n",
+ "7275 False \n",
+ "7276 False \n",
+ "7277 False \n",
+ "\n",
+ "[7278 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load mapping inequality data\n",
+ "HOLC_FACTORS = [\n",
+ " field_names.HOLC_GRADE_D_TRACT_20_PERCENT_FIELD,\n",
+ " field_names.HOLC_GRADE_D_TRACT_50_PERCENT_FIELD,\n",
+ " field_names.HOLC_GRADE_D_TRACT_75_PERCENT_FIELD,\n",
+ "]\n",
+ "mapping_inequality_path = DATA_DIR / \"dataset\" / \"mapping_inequality\" / \"usa.csv\"\n",
+ "mapping_inequality_df = pd.read_csv(\n",
+ " mapping_inequality_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "mapping_inequality_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "fceb3136",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " SVI - Socioeconomic Index (percentile) | \n",
+ " SVI - Household Composition Index (percentile) | \n",
+ " SVI- Minority Status/Language Index (percentile) | \n",
+ " SVI- Housing Type/Transportation Index (percentile) | \n",
+ " Overall rank for Social Vulnerability Indices (percentile) | \n",
+ " At or above 90 for overall percentile ranking according to Social Vulnerability Indices | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 01015981901 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 01015981902 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 01015981903 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 01097003605 | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.27 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 01097990000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT SVI - Socioeconomic Index (percentile) \\\n",
+ "0 01015981901 NaN \n",
+ "1 01015981902 NaN \n",
+ "2 01015981903 NaN \n",
+ "3 01097003605 NaN \n",
+ "4 01097990000 NaN \n",
+ "\n",
+ " SVI - Household Composition Index (percentile) \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 0.00 \n",
+ "4 NaN \n",
+ "\n",
+ " SVI- Minority Status/Language Index (percentile) \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 0.27 \n",
+ "4 NaN \n",
+ "\n",
+ " SVI- Housing Type/Transportation Index (percentile) \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " Overall rank for Social Vulnerability Indices (percentile) \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " At or above 90 for overall percentile ranking according to Social Vulnerability Indices \n",
+ "0 False \n",
+ "1 False \n",
+ "2 False \n",
+ "3 False \n",
+ "4 False "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cdc_svi_index_path = DATA_DIR / \"dataset\" / \"cdc_svi_index\" / \"usa.csv\"\n",
+ "cdc_svi_index_df = pd.read_csv(\n",
+ " cdc_svi_index_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "cdc_svi_index_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "0c290efa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Maryland Environmental Justice Score | \n",
+ " Maryland EJSCREEN Priority Community | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1379 | \n",
+ " 24027601107 | \n",
+ " 0.66 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 1380 | \n",
+ " 24027602201 | \n",
+ " 0.30 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1381 | \n",
+ " 24027605504 | \n",
+ " 0.28 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1382 | \n",
+ " 24027606904 | \n",
+ " 0.43 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1383 | \n",
+ " 24027606906 | \n",
+ " 0.54 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Maryland Environmental Justice Score \\\n",
+ "1379 24027601107 0.66 \n",
+ "1380 24027602201 0.30 \n",
+ "1381 24027605504 0.28 \n",
+ "1382 24027606904 0.43 \n",
+ "1383 24027606906 0.54 \n",
+ "\n",
+ " Maryland EJSCREEN Priority Community \n",
+ "1379 True \n",
+ "1380 False \n",
+ "1381 False \n",
+ "1382 False \n",
+ "1383 False "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load Maryland EJScreen\n",
+ "maryland_ejscreen_data_path = (\n",
+ " DATA_DIR / \"dataset\" / \"maryland_ejscreen\" / \"maryland.csv\"\n",
+ ")\n",
+ "maryland_ejscreen_df = pd.read_csv(\n",
+ " maryland_ejscreen_data_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "maryland_ejscreen_df.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "605af1ff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Energy-related alternative definition of communities | \n",
+ " Coal employment | \n",
+ " Outage Events | \n",
+ " Homelessness | \n",
+ " Disabled population | \n",
+ " Outage Duration | \n",
+ " Job Access | \n",
+ " Fossil energy employment | \n",
+ " Food Desert | \n",
+ " Incomplete Plumbing | \n",
+ " Non-grid-connected heating fuel | \n",
+ " Parks | \n",
+ " Greater than 30 min commute | \n",
+ " Internet Access | \n",
+ " Mobile Home | \n",
+ " Single Parent | \n",
+ " Transportation Costs | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 35029000500 | \n",
+ " True | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.22 | \n",
+ " 0.00 | \n",
+ " -1.40 | \n",
+ " 0.02 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 0.36 | \n",
+ " 0.00 | \n",
+ " 0.25 | \n",
+ " 30.70 | \n",
+ " 0.53 | \n",
+ " 0.75 | \n",
+ " 45.00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 48311950100 | \n",
+ " False | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.17 | \n",
+ " 0.00 | \n",
+ " -1.30 | \n",
+ " 0.01 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ " 0.04 | \n",
+ " -1.00 | \n",
+ " 0.31 | \n",
+ " 21.90 | \n",
+ " 0.33 | \n",
+ " 0.18 | \n",
+ " 34.00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 48131950500 | \n",
+ " True | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.29 | \n",
+ " 0.00 | \n",
+ " -1.50 | \n",
+ " 0.01 | \n",
+ " 1.00 | \n",
+ " 0.02 | \n",
+ " 0.15 | \n",
+ " 0.00 | \n",
+ " 0.54 | \n",
+ " 33.90 | \n",
+ " 0.15 | \n",
+ " 0.45 | \n",
+ " 40.00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 48247950200 | \n",
+ " False | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.01 | \n",
+ " 0.22 | \n",
+ " 1140.00 | \n",
+ " -3.00 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.01 | \n",
+ " 0.00 | \n",
+ " 0.33 | \n",
+ " 29.40 | \n",
+ " 0.11 | \n",
+ " 0.53 | \n",
+ " 37.00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 48247950400 | \n",
+ " True | \n",
+ " 0.00 | \n",
+ " 1.00 | \n",
+ " 0.01 | \n",
+ " 0.22 | \n",
+ " 1140.00 | \n",
+ " -2.70 | \n",
+ " 0.01 | \n",
+ " 1.00 | \n",
+ " 0.01 | \n",
+ " 0.03 | \n",
+ " 0.00 | \n",
+ " 0.36 | \n",
+ " 53.10 | \n",
+ " 0.11 | \n",
+ " 0.78 | \n",
+ " 41.00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 73051 | \n",
+ " 55111000500 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.12 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.30 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.10 | \n",
+ " 0.32 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 73052 | \n",
+ " 55111000700 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.11 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.01 | \n",
+ " 0.07 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.25 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 73053 | \n",
+ " 55111000800 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.14 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.01 | \n",
+ " 0.46 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.05 | \n",
+ " 0.22 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 73054 | \n",
+ " 55111000900 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.11 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.03 | \n",
+ " 0.78 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.02 | \n",
+ " 0.25 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 73055 | \n",
+ " 55117990000 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
73056 rows × 18 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Energy-related alternative definition of communities \\\n",
+ "0 35029000500 True \n",
+ "1 48311950100 False \n",
+ "2 48131950500 True \n",
+ "3 48247950200 False \n",
+ "4 48247950400 True \n",
+ "... ... ... \n",
+ "73051 55111000500 False \n",
+ "73052 55111000700 False \n",
+ "73053 55111000800 False \n",
+ "73054 55111000900 False \n",
+ "73055 55117990000 False \n",
+ "\n",
+ " Coal employment Outage Events Homelessness Disabled population \\\n",
+ "0 0.00 0.00 0.01 0.22 \n",
+ "1 0.00 0.00 0.01 0.17 \n",
+ "2 0.00 0.00 0.01 0.29 \n",
+ "3 0.00 1.00 0.01 0.22 \n",
+ "4 0.00 1.00 0.01 0.22 \n",
+ "... ... ... ... ... \n",
+ "73051 NaN NaN NaN 0.12 \n",
+ "73052 NaN NaN NaN 0.11 \n",
+ "73053 NaN NaN NaN 0.14 \n",
+ "73054 NaN NaN NaN 0.11 \n",
+ "73055 NaN NaN NaN 0.00 \n",
+ "\n",
+ " Outage Duration Job Access Fossil energy employment Food Desert \\\n",
+ "0 0.00 -1.40 0.02 1.00 \n",
+ "1 0.00 -1.30 0.01 1.00 \n",
+ "2 0.00 -1.50 0.01 1.00 \n",
+ "3 1140.00 -3.00 0.01 0.00 \n",
+ "4 1140.00 -2.70 0.01 1.00 \n",
+ "... ... ... ... ... \n",
+ "73051 NaN NaN NaN NaN \n",
+ "73052 NaN NaN NaN NaN \n",
+ "73053 NaN NaN NaN NaN \n",
+ "73054 NaN NaN NaN NaN \n",
+ "73055 NaN NaN NaN NaN \n",
+ "\n",
+ " Incomplete Plumbing Non-grid-connected heating fuel Parks \\\n",
+ "0 0.00 0.36 0.00 \n",
+ "1 0.02 0.04 -1.00 \n",
+ "2 0.02 0.15 0.00 \n",
+ "3 0.00 0.01 0.00 \n",
+ "4 0.01 0.03 0.00 \n",
+ "... ... ... ... \n",
+ "73051 0.00 0.30 NaN \n",
+ "73052 0.01 0.07 NaN \n",
+ "73053 0.01 0.46 NaN \n",
+ "73054 0.03 0.78 NaN \n",
+ "73055 0.00 0.00 NaN \n",
+ "\n",
+ " Greater than 30 min commute Internet Access Mobile Home \\\n",
+ "0 0.25 30.70 0.53 \n",
+ "1 0.31 21.90 0.33 \n",
+ "2 0.54 33.90 0.15 \n",
+ "3 0.33 29.40 0.11 \n",
+ "4 0.36 53.10 0.11 \n",
+ "... ... ... ... \n",
+ "73051 NaN NaN 0.10 \n",
+ "73052 NaN NaN 0.00 \n",
+ "73053 NaN NaN 0.05 \n",
+ "73054 NaN NaN 0.02 \n",
+ "73055 NaN NaN 0.00 \n",
+ "\n",
+ " Single Parent Transportation Costs \n",
+ "0 0.75 45.00 \n",
+ "1 0.18 34.00 \n",
+ "2 0.45 40.00 \n",
+ "3 0.53 37.00 \n",
+ "4 0.78 41.00 \n",
+ "... ... ... \n",
+ "73051 0.32 NaN \n",
+ "73052 0.25 NaN \n",
+ "73053 0.22 NaN \n",
+ "73054 0.25 NaN \n",
+ "73055 0.00 NaN \n",
+ "\n",
+ "[73056 rows x 18 columns]"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load alternative energy-related definition\n",
+ "energy_definition_alternative_draft_path = (\n",
+ " DATA_DIR / \"dataset\" / \"energy_definition_alternative_draft\" / \"usa.csv\"\n",
+ ")\n",
+ "energy_definition_alternative_draft_df = pd.read_csv(\n",
+ " energy_definition_alternative_draft_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "energy_definition_alternative_draft_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "fe4a2939",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Michigan EJSCREEN Score Field | \n",
+ " Michigan EJSCREEN Percentile Field | \n",
+ " Michigan EJSCREEN Priority Community | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 26081003900 | \n",
+ " 93.99 | \n",
+ " 1.00 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 26077000202 | \n",
+ " 91.95 | \n",
+ " 1.00 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 26077000300 | \n",
+ " 90.17 | \n",
+ " 1.00 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 26081003800 | \n",
+ " 89.50 | \n",
+ " 1.00 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 26081004000 | \n",
+ " 87.74 | \n",
+ " 1.00 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Michigan EJSCREEN Score Field \\\n",
+ "0 26081003900 93.99 \n",
+ "1 26077000202 91.95 \n",
+ "2 26077000300 90.17 \n",
+ "3 26081003800 89.50 \n",
+ "4 26081004000 87.74 \n",
+ "\n",
+ " Michigan EJSCREEN Percentile Field Michigan EJSCREEN Priority Community \n",
+ "0 1.00 True \n",
+ "1 1.00 True \n",
+ "2 1.00 True \n",
+ "3 1.00 True \n",
+ "4 1.00 True "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load Michigan EJSCREEN\n",
+ "michigan_ejscreen_data_path = (\n",
+ " DATA_DIR / \"dataset\" / \"michigan_ejscreen\" / \"michigan_ejscreen.csv\"\n",
+ ")\n",
+ "michigan_ejscreen_df = pd.read_csv(\n",
+ " michigan_ejscreen_data_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "michigan_ejscreen_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "b39342aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Number of facilities affecting the tract | \n",
+ " Number of releases affecting the tract | \n",
+ " Number of chemicals affecting the tract | \n",
+ " Average toxicity-weighted concentration of the cells in the tract | \n",
+ " RSEI Risk Score | \n",
+ " RSEI Risk Score (Cancer toxicity weights) | \n",
+ " RSEI Risk Score (Noncancer toxicity weights) | \n",
+ " Sum of the population of the cells in the tract | \n",
+ " At or above 75 for overall percentile for the RSEI score | \n",
+ " RSEI Risk Score (percentile) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 36047023200 | \n",
+ " 193 | \n",
+ " 904 | \n",
+ " 96 | \n",
+ " 485.18 | \n",
+ " 759.71 | \n",
+ " 610.32 | \n",
+ " 154.85 | \n",
+ " 6199.22 | \n",
+ " False | \n",
+ " 0.53 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 36029007202 | \n",
+ " 84 | \n",
+ " 376 | \n",
+ " 81 | \n",
+ " 227.37 | \n",
+ " 79.50 | \n",
+ " 63.05 | \n",
+ " 14.81 | \n",
+ " 1601.87 | \n",
+ " False | \n",
+ " 0.21 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 42003429201 | \n",
+ " 144 | \n",
+ " 1624 | \n",
+ " 251 | \n",
+ " 2705.16 | \n",
+ " 2553.95 | \n",
+ " 2447.72 | \n",
+ " 113.75 | \n",
+ " 4527.60 | \n",
+ " False | \n",
+ " 0.74 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 17201004100 | \n",
+ " 80 | \n",
+ " 365 | \n",
+ " 57 | \n",
+ " 3626.62 | \n",
+ " 3984.97 | \n",
+ " 3969.13 | \n",
+ " 27.06 | \n",
+ " 5766.72 | \n",
+ " True | \n",
+ " 0.82 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 47065011203 | \n",
+ " 79 | \n",
+ " 358 | \n",
+ " 79 | \n",
+ " 1133.40 | \n",
+ " 1885.51 | \n",
+ " 1410.31 | \n",
+ " 447.77 | \n",
+ " 7820.66 | \n",
+ " False | \n",
+ " 0.69 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Number of facilities affecting the tract \\\n",
+ "0 36047023200 193 \n",
+ "1 36029007202 84 \n",
+ "2 42003429201 144 \n",
+ "3 17201004100 80 \n",
+ "4 47065011203 79 \n",
+ "\n",
+ " Number of releases affecting the tract \\\n",
+ "0 904 \n",
+ "1 376 \n",
+ "2 1624 \n",
+ "3 365 \n",
+ "4 358 \n",
+ "\n",
+ " Number of chemicals affecting the tract \\\n",
+ "0 96 \n",
+ "1 81 \n",
+ "2 251 \n",
+ "3 57 \n",
+ "4 79 \n",
+ "\n",
+ " Average toxicity-weighted concentration of the cells in the tract \\\n",
+ "0 485.18 \n",
+ "1 227.37 \n",
+ "2 2705.16 \n",
+ "3 3626.62 \n",
+ "4 1133.40 \n",
+ "\n",
+ " RSEI Risk Score RSEI Risk Score (Cancer toxicity weights) \\\n",
+ "0 759.71 610.32 \n",
+ "1 79.50 63.05 \n",
+ "2 2553.95 2447.72 \n",
+ "3 3984.97 3969.13 \n",
+ "4 1885.51 1410.31 \n",
+ "\n",
+ " RSEI Risk Score (Noncancer toxicity weights) \\\n",
+ "0 154.85 \n",
+ "1 14.81 \n",
+ "2 113.75 \n",
+ "3 27.06 \n",
+ "4 447.77 \n",
+ "\n",
+ " Sum of the population of the cells in the tract \\\n",
+ "0 6199.22 \n",
+ "1 1601.87 \n",
+ "2 4527.60 \n",
+ "3 5766.72 \n",
+ "4 7820.66 \n",
+ "\n",
+ " At or above 75 for overall percentile for the RSEI score \\\n",
+ "0 False \n",
+ "1 False \n",
+ "2 False \n",
+ "3 True \n",
+ "4 False \n",
+ "\n",
+ " RSEI Risk Score (percentile) \n",
+ "0 0.53 \n",
+ "1 0.21 \n",
+ "2 0.74 \n",
+ "3 0.82 \n",
+ "4 0.69 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load EPA RSEI EJSCREEN\n",
+ "epa_rsei_data_path = DATA_DIR / \"dataset\" / \"epa_rsei\" / \"usa.csv\"\n",
+ "epa_rsei_df = pd.read_csv(\n",
+ " epa_rsei_data_path,\n",
+ " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
+ ")\n",
+ "\n",
+ "epa_rsei_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "65659c26",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " Persistent Poverty Census Tract | \n",
+ " Housing burden (percent) | \n",
+ " Total population | \n",
+ " Median household income (% of state median household income) | \n",
+ " Current asthma among adults aged greater than or equal to 18 years | \n",
+ " Coronary heart disease among adults aged greater than or equal to 18 years | \n",
+ " Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years | \n",
+ " Current lack of health insurance among adults aged 18-64 years | \n",
+ " Diagnosed diabetes among adults aged greater than or equal to 18 years | \n",
+ " ... | \n",
+ " Michigan EJSCREEN Priority Community | \n",
+ " SVI - Socioeconomic Index (percentile) | \n",
+ " SVI - Household Composition Index (percentile) | \n",
+ " SVI- Minority Status/Language Index (percentile) | \n",
+ " SVI- Housing Type/Transportation Index (percentile) | \n",
+ " Overall rank for Social Vulnerability Indices (percentile) | \n",
+ " At or above 90 for overall percentile ranking according to Social Vulnerability Indices | \n",
+ " Mapping for Environmental Justice Final Percentile | \n",
+ " Mapping for Environmental Justice Final Score | \n",
+ " Mapping for Environmental Justice Priority Community | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 01073001100 | \n",
+ " True | \n",
+ " 0.28 | \n",
+ " 4897.00 | \n",
+ " 0.73 | \n",
+ " 11.20 | \n",
+ " 7.20 | \n",
+ " 6.70 | \n",
+ " 16.60 | \n",
+ " 19.30 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0.62 | \n",
+ " 0.98 | \n",
+ " 0.50 | \n",
+ " 0.40 | \n",
+ " 0.69 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 01073001400 | \n",
+ " True | \n",
+ " 0.18 | \n",
+ " 1906.00 | \n",
+ " 0.71 | \n",
+ " 11.10 | \n",
+ " 9.10 | \n",
+ " 7.30 | \n",
+ " 21.40 | \n",
+ " 22.40 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0.80 | \n",
+ " 0.57 | \n",
+ " 0.85 | \n",
+ " 0.23 | \n",
+ " 0.68 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 01073002000 | \n",
+ " False | \n",
+ " 0.44 | \n",
+ " 4215.00 | \n",
+ " 0.54 | \n",
+ " 13.50 | \n",
+ " 9.50 | \n",
+ " 6.10 | \n",
+ " 25.40 | \n",
+ " 22.80 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0.95 | \n",
+ " 0.82 | \n",
+ " 0.73 | \n",
+ " 0.96 | \n",
+ " 0.97 | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 01073003802 | \n",
+ " False | \n",
+ " 0.41 | \n",
+ " 5149.00 | \n",
+ " 0.77 | \n",
+ " 12.00 | \n",
+ " 6.60 | \n",
+ " 5.60 | \n",
+ " 20.90 | \n",
+ " 18.60 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0.85 | \n",
+ " 0.81 | \n",
+ " 0.50 | \n",
+ " 0.49 | \n",
+ " 0.76 | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 01073004000 | \n",
+ " True | \n",
+ " 0.47 | \n",
+ " 2621.00 | \n",
+ " 0.37 | \n",
+ " 13.10 | \n",
+ " 10.00 | \n",
+ " 6.30 | \n",
+ " 24.50 | \n",
+ " 25.00 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0.96 | \n",
+ " 0.71 | \n",
+ " 0.86 | \n",
+ " 0.85 | \n",
+ " 0.95 | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 661 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT Persistent Poverty Census Tract Housing burden (percent) \\\n",
+ "0 01073001100 True 0.28 \n",
+ "1 01073001400 True 0.18 \n",
+ "2 01073002000 False 0.44 \n",
+ "3 01073003802 False 0.41 \n",
+ "4 01073004000 True 0.47 \n",
+ "\n",
+ " Total population \\\n",
+ "0 4897.00 \n",
+ "1 1906.00 \n",
+ "2 4215.00 \n",
+ "3 5149.00 \n",
+ "4 2621.00 \n",
+ "\n",
+ " Median household income (% of state median household income) \\\n",
+ "0 0.73 \n",
+ "1 0.71 \n",
+ "2 0.54 \n",
+ "3 0.77 \n",
+ "4 0.37 \n",
+ "\n",
+ " Current asthma among adults aged greater than or equal to 18 years \\\n",
+ "0 11.20 \n",
+ "1 11.10 \n",
+ "2 13.50 \n",
+ "3 12.00 \n",
+ "4 13.10 \n",
+ "\n",
+ " Coronary heart disease among adults aged greater than or equal to 18 years \\\n",
+ "0 7.20 \n",
+ "1 9.10 \n",
+ "2 9.50 \n",
+ "3 6.60 \n",
+ "4 10.00 \n",
+ "\n",
+ " Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years \\\n",
+ "0 6.70 \n",
+ "1 7.30 \n",
+ "2 6.10 \n",
+ "3 5.60 \n",
+ "4 6.30 \n",
+ "\n",
+ " Current lack of health insurance among adults aged 18-64 years \\\n",
+ "0 16.60 \n",
+ "1 21.40 \n",
+ "2 25.40 \n",
+ "3 20.90 \n",
+ "4 24.50 \n",
+ "\n",
+ " Diagnosed diabetes among adults aged greater than or equal to 18 years \\\n",
+ "0 19.30 \n",
+ "1 22.40 \n",
+ "2 22.80 \n",
+ "3 18.60 \n",
+ "4 25.00 \n",
+ "\n",
+ " ... Michigan EJSCREEN Priority Community \\\n",
+ "0 ... NaN \n",
+ "1 ... NaN \n",
+ "2 ... NaN \n",
+ "3 ... NaN \n",
+ "4 ... NaN \n",
+ "\n",
+ " SVI - Socioeconomic Index (percentile) \\\n",
+ "0 0.62 \n",
+ "1 0.80 \n",
+ "2 0.95 \n",
+ "3 0.85 \n",
+ "4 0.96 \n",
+ "\n",
+ " SVI - Household Composition Index (percentile) \\\n",
+ "0 0.98 \n",
+ "1 0.57 \n",
+ "2 0.82 \n",
+ "3 0.81 \n",
+ "4 0.71 \n",
+ "\n",
+ " SVI- Minority Status/Language Index (percentile) \\\n",
+ "0 0.50 \n",
+ "1 0.85 \n",
+ "2 0.73 \n",
+ "3 0.50 \n",
+ "4 0.86 \n",
+ "\n",
+ " SVI- Housing Type/Transportation Index (percentile) \\\n",
+ "0 0.40 \n",
+ "1 0.23 \n",
+ "2 0.96 \n",
+ "3 0.49 \n",
+ "4 0.85 \n",
+ "\n",
+ " Overall rank for Social Vulnerability Indices (percentile) \\\n",
+ "0 0.69 \n",
+ "1 0.68 \n",
+ "2 0.97 \n",
+ "3 0.76 \n",
+ "4 0.95 \n",
+ "\n",
+ " At or above 90 for overall percentile ranking according to Social Vulnerability Indices \\\n",
+ "0 False \n",
+ "1 False \n",
+ "2 True \n",
+ "3 False \n",
+ "4 True \n",
+ "\n",
+ " Mapping for Environmental Justice Final Percentile \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " Mapping for Environmental Justice Final Score \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " Mapping for Environmental Justice Priority Community \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ "[5 rows x 661 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Join all dataframes that use tracts\n",
+ "census_tract_dfs = [\n",
+ " cejst_df,\n",
+ " calenviroscreen_df,\n",
+ " persistent_poverty_df,\n",
+ " mapping_inequality_df,\n",
+ " epa_rsei_df,\n",
+ " maryland_ejscreen_df,\n",
+ " energy_definition_alternative_draft_df,\n",
+ " michigan_ejscreen_df,\n",
+ " cdc_svi_index_df,\n",
+ " mapping_for_ej_df,\n",
+ "]\n",
+ "\n",
+ "merged_df = functools.reduce(\n",
+ " lambda left, right: pd.merge(\n",
+ " left=left,\n",
+ " right=right,\n",
+ " on=ExtractTransformLoad.GEOID_TRACT_FIELD_NAME,\n",
+ " how=\"outer\",\n",
+ " ),\n",
+ " census_tract_dfs,\n",
+ ")\n",
+ "\n",
+ "tract_values = merged_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME].str.len().unique()\n",
+ "if any(tract_values != [11]):\n",
+ " print(tract_values)\n",
+ " raise ValueError(\"Some of the census tract data has the wrong length.\")\n",
+ "\n",
+ "if len(merged_df) > ExtractTransformLoad.EXPECTED_MAX_CENSUS_TRACTS:\n",
+ " raise ValueError(f\"Too many rows in the join: {len(merged_df)}.\")\n",
+ "\n",
+ "merged_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "2de78f71",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Tract is >20% HOLC Grade D | \n",
+ " Tract is >50% HOLC Grade D | \n",
+ " Tract is >75% HOLC Grade D | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " True | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Tract is >20% HOLC Grade D Tract is >50% HOLC Grade D \\\n",
+ "0 True False \n",
+ "1 True True \n",
+ "2 False False \n",
+ "3 True False \n",
+ "4 True False \n",
+ "\n",
+ " Tract is >75% HOLC Grade D \n",
+ "0 False \n",
+ "1 True \n",
+ "2 False \n",
+ "3 False \n",
+ "4 False "
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Special handling for HOLC.\n",
+ "# Fill in the null HOLC values as `False`. Otherwise the comparison tool will not run comparisons in states\n",
+ "# without HOLC scores, and for HOLC, we'd like to see it across the whole US.\n",
+ "for holc_factor in HOLC_FACTORS:\n",
+ " merged_df[holc_factor] = merged_df[holc_factor].fillna(False)\n",
+ "\n",
+ "merged_df[HOLC_FACTORS].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "980c0f66",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Define a namedtuple for indices.\n",
+ "Index = collections.namedtuple(\n",
+ " typename=\"Index\",\n",
+ " field_names=[\n",
+ " \"method_name\",\n",
+ " \"priority_communities_field\",\n",
+ " ],\n",
+ ")\n",
+ "\n",
+ "# Define the indices used for CEJST scoring (`census_block_group_indices`) as well as comparison\n",
+ "# (`census_tract_indices`).\n",
+ "definition_l_factors = [\n",
+ " field_names.L_CLIMATE,\n",
+ " field_names.L_ENERGY,\n",
+ " field_names.L_TRANSPORTATION,\n",
+ " field_names.L_HOUSING,\n",
+ " field_names.L_POLLUTION,\n",
+ " field_names.L_WATER,\n",
+ " field_names.L_HEALTH,\n",
+ " field_names.L_WORKFORCE,\n",
+ " # Also include a combined factor for all the non-workforce elements.\n",
+ " field_names.L_NON_WORKFORCE,\n",
+ "]\n",
+ "\n",
+ "definition_m_factors = [\n",
+ " field_names.M_CLIMATE,\n",
+ " field_names.M_ENERGY,\n",
+ " field_names.M_TRANSPORTATION,\n",
+ " field_names.M_HOUSING,\n",
+ " field_names.M_POLLUTION,\n",
+ " field_names.M_WATER,\n",
+ " field_names.M_HEALTH,\n",
+ " field_names.M_WORKFORCE,\n",
+ " # Also include a combined factor for all the non-workforce elements.\n",
+ " field_names.M_NON_WORKFORCE,\n",
+ "]\n",
+ "\n",
+ "census_tract_indices = (\n",
+ " [\n",
+ " Index(\n",
+ " method_name=\"Definition M\",\n",
+ " priority_communities_field=field_names.SCORE_M_COMMUNITIES,\n",
+ " ),\n",
+ " ]\n",
+ " + [\n",
+ " Index(\n",
+ " method_name=\"Definition L\",\n",
+ " priority_communities_field=field_names.SCORE_L_COMMUNITIES,\n",
+ " ),\n",
+ " ]\n",
+ " # Insert indices for each of the factors from Definition M.\n",
+ " # Note: since these involve no renaming, we write them using list comprehension.\n",
+ " + [\n",
+ " Index(\n",
+ " method_name=factor,\n",
+ " priority_communities_field=factor,\n",
+ " )\n",
+ " for factor in definition_m_factors\n",
+ " ]\n",
+ " # Insert indices for each of the factors from Definition L.\n",
+ " # Note: since these involve no renaming, we write them using list comprehension.\n",
+ " + [\n",
+ " Index(\n",
+ " method_name=factor,\n",
+ " priority_communities_field=factor,\n",
+ " )\n",
+ " for factor in definition_l_factors\n",
+ " ]\n",
+ " + [\n",
+ " Index(\n",
+ " # Note: we're renaming Score G as NMTC Modified for clarity, since that's what Score G is under the hood.\n",
+ " method_name=\"NMTC Modified\",\n",
+ " priority_communities_field=field_names.SCORE_G_COMMUNITIES,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"NMTC\",\n",
+ " priority_communities_field=\"NMTC (communities)\",\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Score C\",\n",
+ " priority_communities_field=\"Score C (top 25th percentile)\",\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Score D (30th percentile)\",\n",
+ " priority_communities_field=\"Score D (top 30th percentile)\",\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Score D (25th percentile)\",\n",
+ " priority_communities_field=\"Score D (top 25th percentile)\",\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Score F\",\n",
+ " priority_communities_field=field_names.SCORE_F_COMMUNITIES,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"CalEnviroScreen 4.0\",\n",
+ " priority_communities_field=\"calenviroscreen_priority_community\",\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Mapping for EJ\",\n",
+ " priority_communities_field=field_names.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"EPA RSEI Aggregate Microdata\",\n",
+ " priority_communities_field=field_names.EPA_RSEI_SCORE_THRESHOLD_FIELD,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Persistent Poverty\",\n",
+ " priority_communities_field=PERSISTENT_POVERTY_TRACT_LEVEL_FIELD,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Maryland EJSCREEN\",\n",
+ " priority_communities_field=field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=field_names.ENERGY_RELATED_COMMUNITIES_DEFINITION_ALTERNATIVE,\n",
+ " priority_communities_field=field_names.ENERGY_RELATED_COMMUNITIES_DEFINITION_ALTERNATIVE,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"CDC SVI Index\",\n",
+ " priority_communities_field=field_names.CDC_SVI_INDEX_THEMES_PRIORITY_COMMUNITY,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"Michigan EJSCREEN\",\n",
+ " priority_communities_field=field_names.MICHIGAN_EJSCREEN_PRIORITY_COMMUNITY_FIELD,\n",
+ " ),\n",
+ " ]\n",
+ " # Insert indices for each of the HOLC factors.\n",
+ " # Note: since these involve no renaming, we write them using list comprehension.\n",
+ " + [\n",
+ " Index(\n",
+ " method_name=factor,\n",
+ " priority_communities_field=factor,\n",
+ " )\n",
+ " for factor in HOLC_FACTORS\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "\n",
+ "ejscreen_areas_of_concern_census_block_group_indices = [\n",
+ " Index(\n",
+ " method_name=\"EJSCREEN Areas of Concern, National, 80th percentile\",\n",
+ " priority_communities_field=field_names.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_80TH_PERCENTILE_COMMUNITIES_FIELD,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"EJSCREEN Areas of Concern, National, 90th percentile\",\n",
+ " priority_communities_field=field_names.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_90TH_PERCENTILE_COMMUNITIES_FIELD,\n",
+ " ),\n",
+ " Index(\n",
+ " method_name=\"EJSCREEN Areas of Concern, National, 95th percentile\",\n",
+ " priority_communities_field=field_names.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_95TH_PERCENTILE_COMMUNITIES_FIELD,\n",
+ " ),\n",
+ "]\n",
+ "\n",
+ "# Before including EJSCREEN AoC indicators are included, check whether or not the EJSCREEN AoC data is available locally.\n",
+ "if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
+ " # Add EJSCREEN AoCs to all of the CBG indices.\n",
+ " # TODO: When we get AoC data at the tract level, fix this.\n",
+ " # Right now commenting this out to avoid merging CBG-level areas of concern on a tract-level CEJST definition.\n",
+ " # census_block_group_indices.extend(\n",
+ " # ejscreen_areas_of_concern_census_block_group_indices\n",
+ " # )\n",
+ " pass\n",
+ "else:\n",
+ " pass\n",
+ "\n",
+ "# These fields will be used for statistical comparisons.\n",
+ "comparison_fields = [\n",
+ " field_names.POVERTY_LESS_THAN_100_FPL_FIELD,\n",
+ " field_names.POVERTY_LESS_THAN_200_FPL_FIELD,\n",
+ " field_names.COLLEGE_ATTENDANCE_FIELD,\n",
+ " field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,\n",
+ " field_names.LINGUISTIC_ISO_FIELD,\n",
+ " field_names.UNEMPLOYMENT_FIELD,\n",
+ " field_names.HIGH_SCHOOL_ED_FIELD,\n",
+ " field_names.MEDIAN_INCOME_FIELD,\n",
+ " field_names.URBAN_HEURISTIC_FIELD,\n",
+ " field_names.LIFE_EXPECTANCY_FIELD,\n",
+ " field_names.HEALTH_INSURANCE_FIELD,\n",
+ " field_names.PHYS_HEALTH_NOT_GOOD_FIELD,\n",
+ " field_names.DIABETES_FIELD,\n",
+ " field_names.LOW_READING_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "4b510cb1",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Converting calenviroscreen_priority_community to boolean.\n",
+ "Converting Mapping for Environmental Justice Priority Community to boolean.\n",
+ "Converting At or above 75 for overall percentile for the RSEI score to boolean.\n",
+ "Converting Persistent Poverty, Tract Level to boolean.\n",
+ "Converting Maryland EJSCREEN Priority Community to boolean.\n",
+ "Converting Energy-related alternative definition of communities to boolean.\n",
+ "Converting At or above 90 for overall percentile ranking according to Social Vulnerability Indices to boolean.\n",
+ "Converting Michigan EJSCREEN Priority Community to boolean.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "98501c61ec76447fa7b2e30d230c5886",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b6c45a7c99a3499a9806a5395f6bfe8c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/56 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "cb93489fbec44e30af9013f63a650c02",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/9 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "54640ab6c9974bddaf5f7d3a06e5624c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/14 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2e85423d07404cf98daf2ef0c024b5c1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/2 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "def get_state_distributions(\n",
+ " df: pd.DataFrame, priority_communities_fields: typing.List[str]\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"For each boolean field of priority communities, calculate distribution across states and territories.\"\"\"\n",
+ "\n",
+ " # Ensure each field is boolean.\n",
+ " for priority_communities_field in priority_communities_fields:\n",
+ " if df[priority_communities_field].dtype != bool:\n",
+ " print(f\"Converting {priority_communities_field} to boolean.\")\n",
+ "\n",
+ " # Calculate the population included as priority communities per tract. Will either be 0 or the population.\n",
+ " df[f\"{priority_communities_field}{POPULATION_SUFFIX}\"] = (\n",
+ " df[priority_communities_field]\n",
+ " * df[field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010]\n",
+ " )\n",
+ "\n",
+ " def calculate_state_comparison(\n",
+ " frame: pd.DataFrame, geography_field: str\n",
+ " ) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " This method will be applied to a `group_by` object. Inherits some parameters from outer scope.\n",
+ "\n",
+ " \"\"\"\n",
+ " summary_dict = {}\n",
+ " summary_dict[COUNTRY_FIELD_NAME] = frame[COUNTRY_FIELD_NAME].unique()[0]\n",
+ "\n",
+ " summary_dict[\"Analysis grouped by\"] = geography_field\n",
+ "\n",
+ " if geography_field == COUNTRY_FIELD_NAME:\n",
+ " summary_dict[GEOID_STATE_FIELD_NAME] = \"00\"\n",
+ " summary_dict[\"Geography name\"] = \"(Entire USA)\"\n",
+ "\n",
+ " if geography_field == GEOID_STATE_FIELD_NAME:\n",
+ " state_id = frame[GEOID_STATE_FIELD_NAME].unique()[0]\n",
+ " summary_dict[GEOID_STATE_FIELD_NAME] = state_id\n",
+ " summary_dict[\"Geography name\"] = us.states.lookup(state_id).name\n",
+ "\n",
+ " # Also add region information\n",
+ " region_id = frame[\"region\"].unique()[0]\n",
+ " summary_dict[\"region\"] = region_id\n",
+ "\n",
+ " if geography_field == \"region\":\n",
+ " region_id = frame[\"region\"].unique()[0]\n",
+ " summary_dict[\"region\"] = region_id\n",
+ " summary_dict[\"Geography name\"] = region_id\n",
+ "\n",
+ " if geography_field == \"division\":\n",
+ " division_id = frame[\"division\"].unique()[0]\n",
+ " summary_dict[\"division\"] = division_id\n",
+ " summary_dict[\"Geography name\"] = division_id\n",
+ "\n",
+ " total_tracts_in_geography = len(frame)\n",
+ " total_population_in_geography = frame[\n",
+ " field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010\n",
+ " ].sum()\n",
+ "\n",
+ " if geography_field == field_names.URBAN_HEURISTIC_FIELD:\n",
+ " urban_flag = frame[field_names.URBAN_HEURISTIC_FIELD].unique()[0]\n",
+ " summary_dict[\"Urban vs Rural\"] = \"Urban\" if urban_flag else \"Rural\"\n",
+ " summary_dict[\"Geography name\"] = summary_dict[\"Urban vs Rural\"]\n",
+ "\n",
+ " for priority_communities_field in priority_communities_fields:\n",
+ " summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"] = frame[\n",
+ " f\"{priority_communities_field}{POPULATION_SUFFIX}\"\n",
+ " ].sum()\n",
+ "\n",
+ " summary_dict[f\"{priority_communities_field} (total tracts)\"] = frame[\n",
+ " f\"{priority_communities_field}\"\n",
+ " ].sum()\n",
+ "\n",
+ " # Calculate some combinations of other variables.\n",
+ " summary_dict[f\"{priority_communities_field} (percent tracts)\"] = (\n",
+ " summary_dict[f\"{priority_communities_field} (total tracts)\"]\n",
+ " / total_tracts_in_geography\n",
+ " )\n",
+ "\n",
+ " summary_dict[f\"{priority_communities_field} (percent population)\"] = (\n",
+ " summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"]\n",
+ " / total_population_in_geography\n",
+ " )\n",
+ "\n",
+ " unwanted_keys = [\n",
+ " f\"{priority_communities_field}{POPULATION_SUFFIX}\",\n",
+ " f\"{priority_communities_field} (total tracts)\",\n",
+ " ]\n",
+ "\n",
+ " # Remove unneeded columns:\n",
+ " for unwanted_key in unwanted_keys:\n",
+ " del summary_dict[unwanted_key]\n",
+ "\n",
+ " df = pd.DataFrame(summary_dict, index=[0])\n",
+ "\n",
+ " return df\n",
+ "\n",
+ " # Add a field for country so we can do aggregations across the entire country.\n",
+ " df[COUNTRY_FIELD_NAME] = \"USA\"\n",
+ "\n",
+ " # First, run the comparison by the whole country\n",
+ " usa_grouped_df = df.groupby(COUNTRY_FIELD_NAME)\n",
+ "\n",
+ " # Run the comparison function on the groups.\n",
+ " usa_distribution_df = usa_grouped_df.progress_apply(\n",
+ " lambda frame: calculate_state_comparison(\n",
+ " frame, geography_field=COUNTRY_FIELD_NAME\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " # Next, run the comparison by state\n",
+ " state_grouped_df = df.groupby(GEOID_STATE_FIELD_NAME)\n",
+ "\n",
+ " # Run the comparison function on the groups.\n",
+ " state_distribution_df = state_grouped_df.progress_apply(\n",
+ " lambda frame: calculate_state_comparison(\n",
+ " frame, geography_field=GEOID_STATE_FIELD_NAME\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " # Next, run the comparison by region\n",
+ " region_grouped_df = df.groupby(\"region\")\n",
+ "\n",
+ " # Run the comparison function on the groups.\n",
+ " region_distribution_df = region_grouped_df.progress_apply(\n",
+ " lambda frame: calculate_state_comparison(frame, geography_field=\"region\")\n",
+ " )\n",
+ "\n",
+ " # Next, run the comparison by division\n",
+ " division_grouped_df = df.groupby(\"division\")\n",
+ "\n",
+ " # Run the comparison function on the groups.\n",
+ " division_distribution_df = division_grouped_df.progress_apply(\n",
+ " lambda frame: calculate_state_comparison(frame, geography_field=\"division\")\n",
+ " )\n",
+ "\n",
+ " # Next, run the comparison by urban/rural\n",
+ " urban_grouped_df = df.groupby(field_names.URBAN_HEURISTIC_FIELD)\n",
+ "\n",
+ " # Run the comparison function on the groups.\n",
+ " urban_grouped_df = urban_grouped_df.progress_apply(\n",
+ " lambda frame: calculate_state_comparison(\n",
+ " frame, geography_field=field_names.URBAN_HEURISTIC_FIELD\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " # Combine the five\n",
+ " combined_df = pd.concat(\n",
+ " [\n",
+ " usa_distribution_df,\n",
+ " state_distribution_df,\n",
+ " region_distribution_df,\n",
+ " division_distribution_df,\n",
+ " urban_grouped_df,\n",
+ " ]\n",
+ " )\n",
+ "\n",
+ " return combined_df\n",
+ "\n",
+ "\n",
+ "def write_state_distribution_excel(\n",
+ " state_distribution_df: pd.DataFrame, file_path: pathlib.PosixPath\n",
+ ") -> None:\n",
+ " \"\"\"Write the dataframe to excel with special formatting.\"\"\"\n",
+ " # Create a Pandas Excel writer using XlsxWriter as the engine.\n",
+ " writer = pd.ExcelWriter(file_path, engine=\"xlsxwriter\")\n",
+ "\n",
+ " # Convert the dataframe to an XlsxWriter Excel object. We also turn off the\n",
+ " # index column at the left of the output dataframe.\n",
+ " state_distribution_df.to_excel(writer, sheet_name=\"Sheet1\", index=False)\n",
+ "\n",
+ " # Get the xlsxwriter workbook and worksheet objects.\n",
+ " workbook = writer.book\n",
+ " worksheet = writer.sheets[\"Sheet1\"]\n",
+ " worksheet.autofilter(\n",
+ " 0, 0, state_distribution_df.shape[0], state_distribution_df.shape[1]\n",
+ " )\n",
+ "\n",
+ " # Set a width parameter for all columns\n",
+ " # Note: this is parameterized because every call to `set_column` requires setting the width.\n",
+ " column_width = 15\n",
+ "\n",
+ " for column in state_distribution_df.columns:\n",
+ " # Turn the column index into excel ranges (e.g., column #95 is \"CR\" and the range may be \"CR2:CR53\").\n",
+ " column_index = state_distribution_df.columns.get_loc(column)\n",
+ " column_character = get_excel_column_name(column_index)\n",
+ "\n",
+ " # Set all columns to larger width\n",
+ " worksheet.set_column(f\"{column_character}:{column_character}\", column_width)\n",
+ "\n",
+ " # Special formatting for all percent columns\n",
+ " # Note: we can't just search for `percent`, because that's included in the word `percentile`.\n",
+ " if \"percent \" in column or \"(percent)\" in column:\n",
+ " # Make these columns percentages.\n",
+ " percentage_format = workbook.add_format({\"num_format\": \"0%\"})\n",
+ " worksheet.set_column(\n",
+ " f\"{column_character}:{column_character}\",\n",
+ " column_width,\n",
+ " percentage_format,\n",
+ " )\n",
+ "\n",
+ " # Special formatting for columns that capture the percent of population considered priority.\n",
+ " if \"(percent population)\" in column:\n",
+ " column_ranges = (\n",
+ " f\"{column_character}2:{column_character}{len(state_distribution_df)+1}\"\n",
+ " )\n",
+ "\n",
+ " # Add green to red conditional formatting.\n",
+ " worksheet.conditional_format(\n",
+ " column_ranges,\n",
+ " # Min: green, max: red.\n",
+ " {\n",
+ " \"type\": \"2_color_scale\",\n",
+ " \"min_color\": \"#00FF7F\",\n",
+ " \"max_color\": \"#C82538\",\n",
+ " },\n",
+ " )\n",
+ "\n",
+ " header_format = workbook.add_format(\n",
+ " {\"bold\": True, \"text_wrap\": True, \"valign\": \"bottom\"}\n",
+ " )\n",
+ "\n",
+ " # Overwrite both the value and the format of each header cell\n",
+ " # This is because xlsxwriter / pandas has a known bug where it can't wrap text for a dataframe.\n",
+ " # See https://stackoverflow.com/questions/42562977/xlsxwriter-text-wrap-not-working.\n",
+ " for col_num, value in enumerate(state_distribution_df.columns.values):\n",
+ " worksheet.write(0, col_num, value, header_format)\n",
+ "\n",
+ " writer.save()\n",
+ "\n",
+ "\n",
+ "fields_to_analyze = [index.priority_communities_field for index in census_tract_indices]\n",
+ "\n",
+ "# Convert all indices to boolean\n",
+ "for field_to_analyze in fields_to_analyze:\n",
+ " if \"Areas of Concern\" in field_to_analyze:\n",
+ " print(f\"Converting {field_to_analyze} to boolean.\")\n",
+ "\n",
+ " merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(value=0)\n",
+ " merged_df[field_to_analyze] = merged_df[field_to_analyze].astype(bool)\n",
+ "\n",
+ "\n",
+ "state_fips_codes = get_state_information(DATA_DIR)\n",
+ "\n",
+ "merged_with_state_information_df = merged_df.merge(\n",
+ " right=state_fips_codes, left_on=GEOID_STATE_FIELD_NAME, right_on=\"fips\"\n",
+ ")\n",
+ "\n",
+ "state_distribution_df = get_state_distributions(\n",
+ " df=merged_with_state_information_df,\n",
+ " priority_communities_fields=fields_to_analyze,\n",
+ ")\n",
+ "\n",
+ "file_prefix = \"Priority Tracts – Different geographic groupings\"\n",
+ "\n",
+ "state_distribution_df.to_csv(\n",
+ " path_or_buf=COMPARISON_OUTPUTS_DIR / f\"{file_prefix}.csv\",\n",
+ " na_rep=\"\",\n",
+ " index=False,\n",
+ ")\n",
+ "\n",
+ "write_state_distribution_excel(\n",
+ " state_distribution_df=state_distribution_df,\n",
+ " file_path=COMPARISON_OUTPUTS_DIR / f\"{file_prefix}.xlsx\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "2bcbcabf",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Basic stats for Definition M\n",
+ "Basic stats for Definition L\n",
+ "Basic stats for Climate Factor (Definition M)\n",
+ "Basic stats for Energy Factor (Definition M)\n",
+ "Basic stats for Transportation Factor (Definition M)\n",
+ "Basic stats for Housing Factor (Definition M)\n",
+ "Basic stats for Pollution Factor (Definition M)\n",
+ "Basic stats for Water Factor (Definition M)\n",
+ "Basic stats for Health Factor (Definition M)\n",
+ "Basic stats for Workforce Factor (Definition M)\n",
+ "Basic stats for Any Non-Workforce Factor (Definition M)\n",
+ "Basic stats for Climate Factor (Definition L)\n",
+ "Basic stats for Energy Factor (Definition L)\n",
+ "Basic stats for Transportation Factor (Definition L)\n",
+ "Basic stats for Housing Factor (Definition L)\n",
+ "Basic stats for Pollution Factor (Definition L)\n",
+ "Basic stats for Water Factor (Definition L)\n",
+ "Basic stats for Health Factor (Definition L)\n",
+ "Basic stats for Workforce Factor (Definition L)\n",
+ "Basic stats for Any Non-Workforce Factor (Definition L)\n",
+ "Basic stats for NMTC Modified\n",
+ "Basic stats for NMTC\n",
+ "Basic stats for Score C\n",
+ "Basic stats for Score D (30th percentile)\n",
+ "Basic stats for Score D (25th percentile)\n",
+ "Basic stats for Score F\n",
+ "Basic stats for CalEnviroScreen 4.0\n",
+ "Basic stats for Mapping for EJ\n",
+ "Basic stats for EPA RSEI Aggregate Microdata\n",
+ "Basic stats for Persistent Poverty\n",
+ "Basic stats for Maryland EJSCREEN\n",
+ "Basic stats for Energy-related alternative definition of communities\n",
+ "Basic stats for CDC SVI Index\n",
+ "Basic stats for Michigan EJSCREEN\n",
+ "Basic stats for Tract is >20% HOLC Grade D\n",
+ "Basic stats for Tract is >50% HOLC Grade D\n",
+ "Basic stats for Tract is >75% HOLC Grade D\n"
+ ]
+ }
+ ],
+ "source": [
+ "directory = COMPARISON_OUTPUTS_DIR / \"tracts_basic_stats\"\n",
+ "directory.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ "# TODO: this Excel-writing function is extremely similar to other Excel-writing functions in this notebook.\n",
+ "# Refactor to use the same Excel-writing function.\n",
+ "def write_basic_stats_excel(\n",
+ " basic_stats_df: pd.DataFrame, file_path: pathlib.PosixPath\n",
+ ") -> None:\n",
+ " \"\"\"Write the dataframe to excel with special formatting.\"\"\"\n",
+ " # Create a Pandas Excel writer using XlsxWriter as the engine.\n",
+ " writer = pd.ExcelWriter(file_path, engine=\"xlsxwriter\")\n",
+ "\n",
+ " # Convert the dataframe to an XlsxWriter Excel object. We also turn off the\n",
+ " # index column at the left of the output dataframe.\n",
+ " basic_stats_df.to_excel(writer, sheet_name=\"Sheet1\", index=False)\n",
+ "\n",
+ " # Get the xlsxwriter workbook and worksheet objects.\n",
+ " workbook = writer.book\n",
+ " worksheet = writer.sheets[\"Sheet1\"]\n",
+ " worksheet.autofilter(0, 0, basic_stats_df.shape[0], basic_stats_df.shape[1])\n",
+ "\n",
+ " # Set a width parameter for all columns\n",
+ " # Note: this is parameterized because every call to `set_column` requires setting the width.\n",
+ " column_width = 15\n",
+ "\n",
+ " for column in basic_stats_df.columns:\n",
+ " # Turn the column index into excel ranges (e.g., column #95 is \"CR\" and the range may be \"CR2:CR53\").\n",
+ " column_index = basic_stats_df.columns.get_loc(column)\n",
+ " column_character = get_excel_column_name(column_index)\n",
+ "\n",
+ " # Set all columns to larger width\n",
+ " worksheet.set_column(f\"{column_character}:{column_character}\", column_width)\n",
+ "\n",
+ " # Add green to red conditional formatting.\n",
+ " column_ranges = f\"{column_character}2:{column_character}{len(basic_stats_df)+1}\"\n",
+ " worksheet.conditional_format(\n",
+ " column_ranges,\n",
+ " # Min: green, max: red.\n",
+ " {\n",
+ " \"type\": \"2_color_scale\",\n",
+ " \"min_color\": \"#00FF7F\",\n",
+ " \"max_color\": \"#C82538\",\n",
+ " },\n",
+ " )\n",
+ "\n",
+ " # Special formatting for all percent columns\n",
+ " # Note: we can't just search for `percent`, because that's included in the word `percentile`.\n",
+ " if \"percent \" in column or \"(percent)\" in column or \"Percent \" in column:\n",
+ " # Make these columns percentages.\n",
+ " percentage_format = workbook.add_format({\"num_format\": \"0%\"})\n",
+ " worksheet.set_column(\n",
+ " f\"{column_character}:{column_character}\",\n",
+ " column_width,\n",
+ " percentage_format,\n",
+ " )\n",
+ "\n",
+ " header_format = workbook.add_format(\n",
+ " {\"bold\": True, \"text_wrap\": True, \"valign\": \"bottom\"}\n",
+ " )\n",
+ "\n",
+ " # Overwrite both the value and the format of each header cell\n",
+ " # This is because xlsxwriter / pandas has a known bug where it can't wrap text for a dataframe.\n",
+ " # See https://stackoverflow.com/questions/42562977/xlsxwriter-text-wrap-not-working.\n",
+ " for col_num, value in enumerate(basic_stats_df.columns.values):\n",
+ " worksheet.write(0, col_num, value, header_format)\n",
+ "\n",
+ " writer.save()\n",
+ "\n",
+ "\n",
+ "for index in census_tract_indices:\n",
+ " print(f\"Basic stats for {index.method_name}\")\n",
+ " temp_df = merged_df\n",
+ " temp_df[index.priority_communities_field] = (\n",
+ " temp_df[index.priority_communities_field] == True\n",
+ " )\n",
+ "\n",
+ " grouped_df = temp_df.groupby(index.priority_communities_field).mean().reset_index()\n",
+ " result_df = grouped_df[[index.priority_communities_field] + comparison_fields]\n",
+ " result_df.to_csv(directory / f\"{index.method_name} Basic Stats.csv\", index=False)\n",
+ " write_basic_stats_excel(\n",
+ " basic_stats_df=result_df,\n",
+ " file_path=directory / f\"{index.method_name} Basic Stats.xlsx\",\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "d1eec560",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Definition L', priority_communities_field='Definition L (communities)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Definition M', priority_communities_field='Definition M (communities)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Definition L', priority_communities_field='Definition L (communities)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Climate Factor (Definition M)', priority_communities_field='Climate Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy Factor (Definition M)', priority_communities_field='Energy Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition M)', priority_communities_field='Transportation Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Housing Factor (Definition M)', priority_communities_field='Housing Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition M)', priority_communities_field='Pollution Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Water Factor (Definition M)', priority_communities_field='Water Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Health Factor (Definition M)', priority_communities_field='Health Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition M)', priority_communities_field='Workforce Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition M)', priority_communities_field='Any Non-Workforce Factor (Definition M)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Climate Factor (Definition L)', priority_communities_field='Climate Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy Factor (Definition L)', priority_communities_field='Energy Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Transportation Factor (Definition L)', priority_communities_field='Transportation Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Housing Factor (Definition L)', priority_communities_field='Housing Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Pollution Factor (Definition L)', priority_communities_field='Pollution Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Water Factor (Definition L)', priority_communities_field='Water Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Health Factor (Definition L)', priority_communities_field='Health Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Workforce Factor (Definition L)', priority_communities_field='Workforce Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Any Non-Workforce Factor (Definition L)', priority_communities_field='Any Non-Workforce Factor (Definition L)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='NMTC', priority_communities_field='NMTC (communities)').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='NMTC Modified', priority_communities_field='Score G (communities)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='NMTC', priority_communities_field='NMTC (communities)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score C', priority_communities_field='Score C (top 25th percentile)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score D (30th percentile)', priority_communities_field='Score D (top 30th percentile)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Score F', priority_communities_field='Score F (communities)').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score D (25th percentile)', priority_communities_field='Score D (top 25th percentile)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Score F', priority_communities_field='Score F (communities)') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='CalEnviroScreen 4.0', priority_communities_field='calenviroscreen_priority_community') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Mapping for EJ', priority_communities_field='Mapping for Environmental Justice Priority Community') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='EPA RSEI Aggregate Microdata', priority_communities_field='At or above 75 for overall percentile for the RSEI score') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level') and Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Persistent Poverty', priority_communities_field='Persistent Poverty, Tract Level') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community') and Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities').\n",
+ "Comparing Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Maryland EJSCREEN', priority_communities_field='Maryland EJSCREEN Priority Community') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities') and Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices').\n",
+ "Comparing Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Energy-related alternative definition of communities', priority_communities_field='Energy-related alternative definition of communities') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices') and Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community').\n",
+ "Comparing Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='CDC SVI Index', priority_communities_field='At or above 90 for overall percentile ranking according to Social Vulnerability Indices') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community') and Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D').\n",
+ "Comparing Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Michigan EJSCREEN', priority_communities_field='Michigan EJSCREEN Priority Community') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D') and Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D').\n",
+ "Comparing Index(method_name='Tract is >20% HOLC Grade D', priority_communities_field='Tract is >20% HOLC Grade D') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n",
+ "Comparing Index(method_name='Tract is >50% HOLC Grade D', priority_communities_field='Tract is >50% HOLC Grade D') and Index(method_name='Tract is >75% HOLC Grade D', priority_communities_field='Tract is >75% HOLC Grade D').\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Compare census tract scores to each other, running secondary analysis on\n",
+ "# characteristics of census tracts prioritized by one but not the other.\n",
+ "def get_census_tracts_score_comparison_df(\n",
+ " df: pd.DataFrame,\n",
+ " method_a_priority_census_tracts_field: str,\n",
+ " method_b_priority_census_tracts_field: str,\n",
+ " comparison_fields: typing.List[str],\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"Compare tract scores to each other.\n",
+ "\n",
+ " This comparison method analyzes characteristics of those census tracts, based on whether or not they are prioritized\n",
+ " or not by Method A and/or Method B.\n",
+ "\n",
+ " E.g., it might show that tracts prioritized by A but not B have a higher average income,\n",
+ " or that tracts prioritized by B but not A have a lower percent of unemployed people.\n",
+ " \"\"\"\n",
+ " fields_to_group_by = [\n",
+ " method_a_priority_census_tracts_field,\n",
+ " method_b_priority_census_tracts_field,\n",
+ " ]\n",
+ "\n",
+ " df_subset = df[fields_to_group_by + comparison_fields]\n",
+ "\n",
+ " grouped_df = df_subset.groupby(\n",
+ " fields_to_group_by,\n",
+ " dropna=False,\n",
+ " )\n",
+ "\n",
+ " # Take the mean of all fields.\n",
+ " comparison_df = grouped_df.mean()\n",
+ "\n",
+ " # Also add in the count of census tracts.\n",
+ " count_field_name = \"Count of census tracts\"\n",
+ " comparison_df[count_field_name] = grouped_df.size().to_frame(count_field_name)\n",
+ "\n",
+ " comparison_df = comparison_df.reset_index()\n",
+ "\n",
+ " criteria_description_field_name = \"Description of criteria\"\n",
+ " comparison_df[criteria_description_field_name] = comparison_df.apply(\n",
+ " func=lambda row: f\"Tracts that are {'not ' if row[method_a_priority_census_tracts_field] is False else ''}\"\n",
+ " + f\"prioritized by {method_a_priority_census_tracts_field} \"\n",
+ " + f\"and are {'not ' if row[method_b_priority_census_tracts_field] is False else ''}\"\n",
+ " + f\"prioritized by {method_b_priority_census_tracts_field}\",\n",
+ " axis=1,\n",
+ " )\n",
+ "\n",
+ " # Put criteria description column first.\n",
+ " columns_to_put_first = (\n",
+ " [criteria_description_field_name] + fields_to_group_by + [count_field_name]\n",
+ " )\n",
+ " new_column_order = columns_to_put_first + [\n",
+ " col for col in comparison_df.columns if col not in columns_to_put_first\n",
+ " ]\n",
+ "\n",
+ " comparison_df = comparison_df[new_column_order]\n",
+ "\n",
+ " # Rename fields to reflect the mean aggregation\n",
+ " comparison_df.rename(\n",
+ " mapper={\n",
+ " comparison_field: f\"{comparison_field} (mean of tracts)\"\n",
+ " for comparison_field in comparison_fields\n",
+ " },\n",
+ " axis=1,\n",
+ " inplace=True,\n",
+ " )\n",
+ "\n",
+ " return comparison_df\n",
+ "\n",
+ "\n",
+ "def write_census_tracts_score_comparison_excel(\n",
+ " census_tracts_score_comparison_df: pd.DataFrame,\n",
+ " file_path: pathlib.PosixPath,\n",
+ ") -> None:\n",
+ " \"\"\"Write the dataframe to excel with special formatting.\"\"\"\n",
+ " # Create a Pandas Excel writer using XlsxWriter as the engine.\n",
+ " writer = pd.ExcelWriter(file_path, engine=\"xlsxwriter\")\n",
+ "\n",
+ " # Convert the dataframe to an XlsxWriter Excel object. We also turn off the\n",
+ " # index column at the left of the output dataframe.\n",
+ " census_tracts_score_comparison_df.to_excel(writer, sheet_name=\"Sheet1\", index=False)\n",
+ "\n",
+ " # Get the xlsxwriter workbook and worksheet objects.\n",
+ " workbook = writer.book\n",
+ " worksheet = writer.sheets[\"Sheet1\"]\n",
+ " worksheet.autofilter(\n",
+ " 0,\n",
+ " 0,\n",
+ " census_tracts_score_comparison_df.shape[0],\n",
+ " census_tracts_score_comparison_df.shape[1],\n",
+ " )\n",
+ "\n",
+ " # Set a width parameter for all columns\n",
+ " # Note: this is parameterized because every call to `set_column` requires setting the width.\n",
+ " column_width = 15\n",
+ "\n",
+ " for column in census_tracts_score_comparison_df.columns:\n",
+ " # Turn the column index into excel ranges (e.g., column #95 is \"CR\" and the range may be \"CR2:CR53\").\n",
+ " column_index = census_tracts_score_comparison_df.columns.get_loc(column)\n",
+ " column_character = get_excel_column_name(column_index)\n",
+ "\n",
+ " # Set all columns to larger width\n",
+ " worksheet.set_column(f\"{column_character}:{column_character}\", column_width)\n",
+ "\n",
+ " # Add green to red conditional formatting.\n",
+ " column_ranges = f\"{column_character}2:{column_character}{len(census_tracts_score_comparison_df)+1}\"\n",
+ " worksheet.conditional_format(\n",
+ " column_ranges,\n",
+ " # Min: green, max: red.\n",
+ " {\n",
+ " \"type\": \"2_color_scale\",\n",
+ " \"min_color\": \"#00FF7F\",\n",
+ " \"max_color\": \"#C82538\",\n",
+ " },\n",
+ " )\n",
+ "\n",
+ " # Special formatting for all percent columns\n",
+ " # Note: we can't just search for `percent`, because that's included in the word `percentile`.\n",
+ " if \"percent \" in column or \"(percent)\" in column or \"Percent \" in column:\n",
+ " # Make these columns percentages.\n",
+ " percentage_format = workbook.add_format({\"num_format\": \"0%\"})\n",
+ " worksheet.set_column(\n",
+ " f\"{column_character}:{column_character}\",\n",
+ " column_width,\n",
+ " percentage_format,\n",
+ " )\n",
+ "\n",
+ " header_format = workbook.add_format(\n",
+ " {\"bold\": True, \"text_wrap\": True, \"valign\": \"bottom\"}\n",
+ " )\n",
+ "\n",
+ " # Overwrite both the value and the format of each header cell\n",
+ " # This is because xlsxwriter / pandas has a known bug where it can't wrap text for a dataframe.\n",
+ " # See https://stackoverflow.com/questions/42562977/xlsxwriter-text-wrap-not-working.\n",
+ " for col_num, value in enumerate(census_tracts_score_comparison_df.columns.values):\n",
+ " worksheet.write(0, col_num, value, header_format)\n",
+ "\n",
+ " writer.save()\n",
+ "\n",
+ "\n",
+ "def compare_census_tracts_scores(\n",
+ " df: pd.DataFrame,\n",
+ " index_a: Index,\n",
+ " index_b: Index,\n",
+ " output_dir: pathlib.PosixPath,\n",
+ " comparison_fields: typing.List[str],\n",
+ "):\n",
+ " # Secondary comparison DF\n",
+ " census_tracts_score_comparison_df = get_census_tracts_score_comparison_df(\n",
+ " df=df,\n",
+ " method_a_priority_census_tracts_field=index_a.priority_communities_field,\n",
+ " method_b_priority_census_tracts_field=index_b.priority_communities_field,\n",
+ " comparison_fields=comparison_fields,\n",
+ " )\n",
+ "\n",
+ " # Write secondary comparison to CSV.\n",
+ " file_name_part = f\"Census tracts comparison output - {index_a.method_name} and {index_b.method_name}\"\n",
+ " output_dir.mkdir(parents=True, exist_ok=True)\n",
+ " file_path = output_dir / (file_name_part + \".csv\")\n",
+ " file_path_xlsx = output_dir / (file_name_part + \".xlsx\")\n",
+ "\n",
+ " census_tracts_score_comparison_df.to_csv(\n",
+ " path_or_buf=file_path,\n",
+ " na_rep=\"\",\n",
+ " index=False,\n",
+ " )\n",
+ "\n",
+ " write_census_tracts_score_comparison_excel(\n",
+ " census_tracts_score_comparison_df=census_tracts_score_comparison_df,\n",
+ " file_path=file_path_xlsx,\n",
+ " )\n",
+ "\n",
+ "\n",
+ "for (index_a, index_b) in itertools.combinations(census_tract_indices, 2):\n",
+ " print(f\"Comparing {index_a} and {index_b}.\")\n",
+ " compare_census_tracts_scores(\n",
+ " df=merged_df,\n",
+ " index_a=index_a,\n",
+ " index_b=index_b,\n",
+ " comparison_fields=comparison_fields,\n",
+ " output_dir=COMPARISON_OUTPUTS_DIR / \"census_tracts_score_comparisons\",\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "48005fad",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/z6/9czv8cpx2hvcycd6slvfp4ph0000gq/T/ipykernel_35872/2594933895.py:125: RuntimeWarning: invalid value encountered in long_scalars\n",
+ " f\"{true_true_census_tracts} ({true_true_census_tracts / total_census_tracts:.0%}) \"\n",
+ "/var/folders/z6/9czv8cpx2hvcycd6slvfp4ph0000gq/T/ipykernel_35872/2594933895.py:128: RuntimeWarning: invalid value encountered in long_scalars\n",
+ " f\"{true_false_census_tracts} ({true_false_census_tracts / total_census_tracts:.0%}) \"\n",
+ "/var/folders/z6/9czv8cpx2hvcycd6slvfp4ph0000gq/T/ipykernel_35872/2594933895.py:131: RuntimeWarning: invalid value encountered in long_scalars\n",
+ " f\"{false_true_census_tracts} ({false_true_census_tracts / total_census_tracts:.0%}) \"\n",
+ "/var/folders/z6/9czv8cpx2hvcycd6slvfp4ph0000gq/T/ipykernel_35872/2594933895.py:134: RuntimeWarning: invalid value encountered in long_scalars\n",
+ " f\"{false_false_census_tracts} ({false_false_census_tracts / total_census_tracts:.0%}) \"\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/Users/emmausds/Desktop/justice40-tool/data/data-pipeline/data_pipeline/data/comparison_outputs/20220217-115053/Comparison report - All census tract indices.docx')"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def write_markdown_and_docx_content(\n",
+ " markdown_content: str,\n",
+ " file_dir: pathlib.PosixPath,\n",
+ " file_name_without_extension: str,\n",
+ ") -> pathlib.PosixPath:\n",
+ " \"\"\"Write Markdown content to both .md and .docx files.\"\"\"\n",
+ " # Set the file paths for both files.\n",
+ " markdown_file_path = file_dir / f\"{file_name_without_extension}.md\"\n",
+ " docx_file_path = file_dir / f\"{file_name_without_extension}.docx\"\n",
+ "\n",
+ " # Write the markdown content to file.\n",
+ " with open(markdown_file_path, \"w\") as text_file:\n",
+ " text_file.write(markdown_content)\n",
+ "\n",
+ " # Convert markdown file to Word doc.\n",
+ " pypandoc.convert_file(\n",
+ " source_file=str(markdown_file_path),\n",
+ " to=\"docx\",\n",
+ " outputfile=str(docx_file_path),\n",
+ " extra_args=[],\n",
+ " )\n",
+ "\n",
+ " return docx_file_path\n",
+ "\n",
+ "\n",
+ "def get_markdown_comparing_census_tract_indices(\n",
+ " census_tract_indices=typing.List[Index],\n",
+ " df=pd.DataFrame,\n",
+ " state_field=GEOID_STATE_FIELD_NAME,\n",
+ ") -> str:\n",
+ " \"\"\"Generate a Markdown string of analysis of multiple census tract indices.\"\"\"\n",
+ " count_field_name = \"Count of census tracts\"\n",
+ "\n",
+ " # Create markdown content for comparisons.\n",
+ " markdown_content = f\"\"\"\n",
+ "# Comparing multiple indices at the census tract level\n",
+ "\n",
+ "(This report was calculated on {datetime.today().strftime('%Y-%m-%d')}.)\n",
+ "\n",
+ "This report compares the following indices: {\", \".join([index.method_name for index in census_tract_indices])}.\n",
+ "\n",
+ "\"\"\"\n",
+ "\n",
+ " for (index1, index2) in itertools.combinations(census_tract_indices, 2):\n",
+ " # First, find out geographic overlap in indices by finding all state and territory\n",
+ " # names where both indices are not null.\n",
+ " df_subset_for_states = df[\n",
+ " [\n",
+ " state_field,\n",
+ " index1.priority_communities_field,\n",
+ " index2.priority_communities_field,\n",
+ " ]\n",
+ " ]\n",
+ "\n",
+ " df_subset_for_states = df_subset_for_states.dropna()\n",
+ "\n",
+ " # List of all states/territories in their FIPS codes:\n",
+ " # TODO: move \"This report analyzes the following US states and territories\" inside the comparison?\n",
+ " state_ids = sorted(df_subset_for_states[state_field].unique())\n",
+ " this_comparison_state_names = \", \".join(\n",
+ " [us.states.lookup(state_id).name for state_id in state_ids]\n",
+ " )\n",
+ "\n",
+ " # Group all data by their different values on Priority Communities Field for Index1 vs Priority Communities Field for Index2.\n",
+ " count_df = (\n",
+ " df.groupby(\n",
+ " [\n",
+ " index1.priority_communities_field,\n",
+ " index2.priority_communities_field,\n",
+ " ]\n",
+ " )[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]\n",
+ " .count()\n",
+ " .reset_index(name=count_field_name)\n",
+ " )\n",
+ "\n",
+ " total_census_tracts = count_df[count_field_name].sum()\n",
+ "\n",
+ " # Returns a series\n",
+ " true_true_census_tracts_series = count_df.loc[\n",
+ " count_df[index1.priority_communities_field]\n",
+ " & count_df[index2.priority_communities_field],\n",
+ " count_field_name,\n",
+ " ]\n",
+ " true_false_census_tracts_series = count_df.loc[\n",
+ " count_df[index1.priority_communities_field]\n",
+ " & ~count_df[index2.priority_communities_field],\n",
+ " count_field_name,\n",
+ " ]\n",
+ " false_true_census_tracts_series = count_df.loc[\n",
+ " ~count_df[index1.priority_communities_field]\n",
+ " & count_df[index2.priority_communities_field],\n",
+ " count_field_name,\n",
+ " ]\n",
+ " false_false_census_tracts_series = count_df.loc[\n",
+ " ~count_df[index1.priority_communities_field]\n",
+ " & ~count_df[index2.priority_communities_field],\n",
+ " count_field_name,\n",
+ " ]\n",
+ "\n",
+ " # Convert from series to a scalar value, including accounting for if no data exists for that pairing.\n",
+ " true_true_census_tracts = (\n",
+ " true_true_census_tracts_series.iloc[0]\n",
+ " if len(true_true_census_tracts_series) > 0\n",
+ " else 0\n",
+ " )\n",
+ " true_false_census_tracts = (\n",
+ " true_false_census_tracts_series.iloc[0]\n",
+ " if len(true_false_census_tracts_series) > 0\n",
+ " else 0\n",
+ " )\n",
+ " false_true_census_tracts = (\n",
+ " false_true_census_tracts_series.iloc[0]\n",
+ " if len(false_true_census_tracts_series) > 0\n",
+ " else 0\n",
+ " )\n",
+ " false_false_census_tracts = (\n",
+ " false_false_census_tracts_series.iloc[0]\n",
+ " if len(false_false_census_tracts_series) > 0\n",
+ " else 0\n",
+ " )\n",
+ "\n",
+ " markdown_content += (\n",
+ " \"*** \\n\\n\"\n",
+ " \"There are \"\n",
+ " f\"{true_true_census_tracts} ({true_true_census_tracts / total_census_tracts:.0%}) \"\n",
+ " f\"census tracts that are both {index1.method_name} priority communities and {index2.method_name} priority communities.\\n\\n\"\n",
+ " \"There are \"\n",
+ " f\"{true_false_census_tracts} ({true_false_census_tracts / total_census_tracts:.0%}) \"\n",
+ " f\"census tracts that are {index1.method_name} priority communities but not {index2.method_name} priority communities.\\n\\n\"\n",
+ " \"There are \"\n",
+ " f\"{false_true_census_tracts} ({false_true_census_tracts / total_census_tracts:.0%}) \"\n",
+ " f\"census tracts that are not {index1.method_name} priority communities but are {index2.method_name} priority communities.\\n\\n\"\n",
+ " \"There are \"\n",
+ " f\"{false_false_census_tracts} ({false_false_census_tracts / total_census_tracts:.0%}) \"\n",
+ " f\"census tracts that are neither {index1.method_name} priority communities nor {index2.method_name} priority communities.\\n\\n\"\n",
+ " f\"This comparison analyzed the following US states and territories: {this_comparison_state_names}.\\n\\n\"\n",
+ " \"\\n\\n\"\n",
+ " )\n",
+ "\n",
+ " return markdown_content\n",
+ "\n",
+ "\n",
+ "def get_comparison_census_tract_indices(\n",
+ " census_tract_indices=typing.List[Index],\n",
+ " df=pd.DataFrame,\n",
+ " state_field=GEOID_STATE_FIELD_NAME,\n",
+ ") -> pathlib.PosixPath:\n",
+ " markdown_content = get_markdown_comparing_census_tract_indices(\n",
+ " census_tract_indices=census_tract_indices,\n",
+ " df=df,\n",
+ " )\n",
+ "\n",
+ " comparison_docx_file_path = write_markdown_and_docx_content(\n",
+ " markdown_content=markdown_content,\n",
+ " file_dir=COMPARISON_OUTPUTS_DIR,\n",
+ " file_name_without_extension=f\"Comparison report - All census tract indices\",\n",
+ " )\n",
+ "\n",
+ " return comparison_docx_file_path\n",
+ "\n",
+ "\n",
+ "# Compare multiple scores at the census tract level\n",
+ "get_comparison_census_tract_indices(\n",
+ " census_tract_indices=census_tract_indices,\n",
+ " df=merged_with_state_information_df,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "7d095ebd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Note: this is helpful because this file is long-running, so it alerts the user when the\n",
+ "# data analysis is done. Can be removed when converted into scripts. -LMB.\n",
+ "import os\n",
+ "\n",
+ "os.system(\"say 'data analysis is written.'\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "40821ff3-0a06-4881-a33c-f9db487ac3b2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}