From 78615e9b1ac63cb4ea44d5ffc08acbdeb984e70f Mon Sep 17 00:00:00 2001
From: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
Date: Thu, 17 Jun 2021 18:12:39 -0400
Subject: [PATCH] ACS data baked in for map (#153)

* starting etl for score

* projection fix

* projection flags

* proper ejscreen etl csv generation

* failing CSV merge -- investigating

* checkpoint

* some etl changes

* completed ticket

* small typo
---
 .gitignore                                   |   2 +
 score/__init__.py                            |   0
 score/data/dataset/ejscreen_2020/__init__.py |   0
 score/data/score/geojson/__init__.py         |   0
 score/data/tmp/__init__.py                   |   0
 score/ipython/ejscreen_etl.ipynb             | 406 ++++++-------------
 score/ipython/score_calc_0.1.ipynb           | 100 +++++
 score/scripts/__init__.py                    |   0
 score/scripts/download_cbg.py                | 115 +++---
 score/scripts/generate_mbtiles.py            |  34 +-
 score/scripts/utils.py                       |  20 +
 11 files changed, 321 insertions(+), 356 deletions(-)
 create mode 100644 score/__init__.py
 create mode 100644 score/data/dataset/ejscreen_2020/__init__.py
 create mode 100644 score/data/score/geojson/__init__.py
 create mode 100644 score/data/tmp/__init__.py
 create mode 100644 score/ipython/score_calc_0.1.ipynb
 create mode 100644 score/scripts/__init__.py
 create mode 100644 score/scripts/utils.py

diff --git a/.gitignore b/.gitignore
index 6d4c3ea1..12dd1862 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,5 @@ cython_debug/
 score/data/census
 score/data/tiles
 score/data/tmp
+score/data/dataset
+score/data/score
diff --git a/score/__init__.py b/score/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/data/dataset/ejscreen_2020/__init__.py b/score/data/dataset/ejscreen_2020/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/data/score/geojson/__init__.py b/score/data/score/geojson/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/data/tmp/__init__.py b/score/data/tmp/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/ipython/ejscreen_etl.ipynb b/score/ipython/ejscreen_etl.ipynb
index 3b24556b..5a95a935 100644
--- a/score/ipython/ejscreen_etl.ipynb
+++ b/score/ipython/ejscreen_etl.ipynb
@@ -2,41 +2,27 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "f4d63367",
+   "execution_count": 1,
+   "id": "20aa3891",
    "metadata": {},
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
+    "import requests\n",
+    "import zipfile\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import csv\n",
     "\n",
-    "data_path = Path.cwd().parent / \"data\" / \"tmp\""
+    "data_path = Path.cwd().parent / \"data\"\n",
+    "fips_csv_path = data_path / \"fips_states_2010.csv\"\n",
+    "csv_path = data_path / \"dataset\" / \"ejscreen_2020\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "0e6eb55e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "WindowsPath('C:/opt/justice40-tool/score/data/tmp')"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data_path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "a1431996",
+   "execution_count": 3,
+   "id": "67a58c24",
    "metadata": {},
    "outputs": [
     {
@@ -49,304 +35,144 @@
     }
    ],
    "source": [
-    "import requests\n",
     "download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n",
     "file_contents = download.content\n",
-    "zip_file_path = data_path / \"downloaded.zip\"\n",
-    "zip_file = open(zip_file_path, \"wb\")\n",
+    "zip_file_path = data_path / \"tmp\"\n",
+    "zip_file = open(zip_file_path / \"downloaded.zip\", \"wb\")\n",
     "zip_file.write(file_contents)\n",
     "zip_file.close()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "id": "bc5f3466",
+   "execution_count": 4,
+   "id": "cc3fb9ec",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import zipfile\n",
-    "with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n",
-    "    zip_ref.extractall(data_path)\n",
-    "ejscreen_csv = data_path / \"EJSCREEN_2020_StatePctile.csv\""
+    "with zipfile.ZipFile(zip_file_path / \"downloaded.zip\", \"r\") as zip_ref:\n",
+    "    zip_ref.extractall(zip_file_path)\n",
+    "ejscreen_csv = data_path / \"tmp\" / \"EJSCREEN_2020_StatePctile.csv\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
-   "id": "392ccb67",
+   "execution_count": 5,
+   "id": "b25738bb",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>OBJECTID</th>\n",
-       "      <th>ID</th>\n",
-       "      <th>STATE_NAME</th>\n",
-       "      <th>ST_ABBREV</th>\n",
-       "      <th>REGION</th>\n",
-       "      <th>ACSTOTPOP</th>\n",
-       "      <th>D_PM25_2</th>\n",
-       "      <th>B_PM25_D2</th>\n",
-       "      <th>P_PM25_D2</th>\n",
-       "      <th>D_OZONE_2</th>\n",
-       "      <th>...</th>\n",
-       "      <th>T_PNPL</th>\n",
-       "      <th>T_PNPL_D2</th>\n",
-       "      <th>T_PRMP</th>\n",
-       "      <th>T_PRMP_D2</th>\n",
-       "      <th>T_PTSDF</th>\n",
-       "      <th>T_PTSDF_D2</th>\n",
-       "      <th>T_PWDIS</th>\n",
-       "      <th>T_PWDIS_D2</th>\n",
-       "      <th>Shape_Length</th>\n",
-       "      <th>Shape_Area</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>10010201001</td>\n",
-       "      <td>Alabama</td>\n",
-       "      <td>AL</td>\n",
-       "      <td>4</td>\n",
-       "      <td>636</td>\n",
-       "      <td>-492.025529412</td>\n",
-       "      <td>6</td>\n",
-       "      <td>52.0</td>\n",
-       "      <td>-1866.38637046</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.071 facilities/km distance (79%ile)</td>\n",
-       "      <td>40%ile</td>\n",
-       "      <td>0.085 facilities/km distance (23%ile)</td>\n",
-       "      <td>53%ile</td>\n",
-       "      <td>0.59 facilities/km distance (57%ile)</td>\n",
-       "      <td>38%ile</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>13443.155206</td>\n",
-       "      <td>6.040790e+06</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>10010201002</td>\n",
-       "      <td>Alabama</td>\n",
-       "      <td>AL</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1287</td>\n",
-       "      <td>-2053.08341364</td>\n",
-       "      <td>4</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>-7787.90260177</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.064 facilities/km distance (76%ile)</td>\n",
-       "      <td>19%ile</td>\n",
-       "      <td>0.074 facilities/km distance (17%ile)</td>\n",
-       "      <td>42%ile</td>\n",
-       "      <td>0.45 facilities/km distance (52%ile)</td>\n",
-       "      <td>23%ile</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>11917.089598</td>\n",
-       "      <td>7.834160e+06</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>10010202001</td>\n",
-       "      <td>Alabama</td>\n",
-       "      <td>AL</td>\n",
-       "      <td>4</td>\n",
-       "      <td>810</td>\n",
-       "      <td>1846.12693767</td>\n",
-       "      <td>8</td>\n",
-       "      <td>75.0</td>\n",
-       "      <td>7002.78371663</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.069 facilities/km distance (78%ile)</td>\n",
-       "      <td>85%ile</td>\n",
-       "      <td>0.078 facilities/km distance (20%ile)</td>\n",
-       "      <td>67%ile</td>\n",
-       "      <td>0.65 facilities/km distance (59%ile)</td>\n",
-       "      <td>77%ile</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>7770.915121</td>\n",
-       "      <td>2.900774e+06</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>10010202002</td>\n",
-       "      <td>Alabama</td>\n",
-       "      <td>AL</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1218</td>\n",
-       "      <td>1392.07530488</td>\n",
-       "      <td>8</td>\n",
-       "      <td>72.0</td>\n",
-       "      <td>5280.46153188</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.076 facilities/km distance (81%ile)</td>\n",
-       "      <td>83%ile</td>\n",
-       "      <td>0.087 facilities/km distance (24%ile)</td>\n",
-       "      <td>66%ile</td>\n",
-       "      <td>1 facilities/km distance (69%ile)</td>\n",
-       "      <td>78%ile</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>6506.804784</td>\n",
-       "      <td>1.793332e+06</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>10010203001</td>\n",
-       "      <td>Alabama</td>\n",
-       "      <td>AL</td>\n",
-       "      <td>4</td>\n",
-       "      <td>2641</td>\n",
-       "      <td>-769.374640358</td>\n",
-       "      <td>5</td>\n",
-       "      <td>48.0</td>\n",
-       "      <td>-2911.8926061</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.074 facilities/km distance (80%ile)</td>\n",
-       "      <td>32%ile</td>\n",
-       "      <td>0.08 facilities/km distance (21%ile)</td>\n",
-       "      <td>51%ile</td>\n",
-       "      <td>1.2 facilities/km distance (74%ile)</td>\n",
-       "      <td>24%ile</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>11070.367848</td>\n",
-       "      <td>5.461602e+06</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 124 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   OBJECTID           ID STATE_NAME ST_ABBREV  REGION  ACSTOTPOP  \\\n",
-       "0         1  10010201001    Alabama        AL       4        636   \n",
-       "1         2  10010201002    Alabama        AL       4       1287   \n",
-       "2         3  10010202001    Alabama        AL       4        810   \n",
-       "3         4  10010202002    Alabama        AL       4       1218   \n",
-       "4         5  10010203001    Alabama        AL       4       2641   \n",
-       "\n",
-       "         D_PM25_2  B_PM25_D2 P_PM25_D2       D_OZONE_2  ...  \\\n",
-       "0  -492.025529412          6      52.0  -1866.38637046  ...   \n",
-       "1  -2053.08341364          4      30.0  -7787.90260177  ...   \n",
-       "2   1846.12693767          8      75.0   7002.78371663  ...   \n",
-       "3   1392.07530488          8      72.0   5280.46153188  ...   \n",
-       "4  -769.374640358          5      48.0   -2911.8926061  ...   \n",
-       "\n",
-       "                                  T_PNPL T_PNPL_D2  \\\n",
-       "0  0.071 facilities/km distance (79%ile)    40%ile   \n",
-       "1  0.064 facilities/km distance (76%ile)    19%ile   \n",
-       "2  0.069 facilities/km distance (78%ile)    85%ile   \n",
-       "3  0.076 facilities/km distance (81%ile)    83%ile   \n",
-       "4  0.074 facilities/km distance (80%ile)    32%ile   \n",
-       "\n",
-       "                                  T_PRMP  T_PRMP_D2  \\\n",
-       "0  0.085 facilities/km distance (23%ile)     53%ile   \n",
-       "1  0.074 facilities/km distance (17%ile)     42%ile   \n",
-       "2  0.078 facilities/km distance (20%ile)     67%ile   \n",
-       "3  0.087 facilities/km distance (24%ile)     66%ile   \n",
-       "4   0.08 facilities/km distance (21%ile)     51%ile   \n",
-       "\n",
-       "                                T_PTSDF T_PTSDF_D2  T_PWDIS T_PWDIS_D2  \\\n",
-       "0  0.59 facilities/km distance (57%ile)     38%ile     None       None   \n",
-       "1  0.45 facilities/km distance (52%ile)     23%ile     None       None   \n",
-       "2  0.65 facilities/km distance (59%ile)     77%ile     None       None   \n",
-       "3     1 facilities/km distance (69%ile)     78%ile     None       None   \n",
-       "4   1.2 facilities/km distance (74%ile)     24%ile     None       None   \n",
-       "\n",
-       "   Shape_Length    Shape_Area  \n",
-       "0  13443.155206  6.040790e+06  \n",
-       "1  11917.089598  7.834160e+06  \n",
-       "2   7770.915121  2.900774e+06  \n",
-       "3   6506.804784  1.793332e+06  \n",
-       "4  11070.367848  5.461602e+06  \n",
-       "\n",
-       "[5 rows x 124 columns]"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "df = pd.read_csv(ejscreen_csv, low_memory=False)\n",
-    "df.head()"
+    "df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
-   "id": "0ce9e22a",
+   "execution_count": 6,
+   "id": "e6994f2d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9fa2077a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write nationwide csv\n",
+    "df.to_csv(csv_path / f\"usa.csv\", index = False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "5e5cc12a",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "<bound method DataFrame.count of                   ID  ACSTOTPOP  LESSHSPCT  LOWINCPCT\n",
-       "0        10010201001        636   0.208134   0.385220\n",
-       "1        10010201002       1287   0.040678   0.163170\n",
-       "2        10010202001        810   0.135563   0.501247\n",
-       "3        10010202002       1218   0.192000   0.393701\n",
-       "4        10010203001       2641   0.125473   0.308217\n",
-       "...              ...        ...        ...        ...\n",
-       "220328  721537506011        699   0.391389   0.902718\n",
-       "220329  721537506012       2432   0.185852   0.783717\n",
-       "220330  721537506013        976   0.018116   0.776639\n",
-       "220331  721537506021       1707   0.375422   0.867377\n",
-       "220332  721537506022        804   0.162791   0.942786\n",
-       "\n",
-       "[220333 rows x 4 columns]>"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generating data01 csv\n",
+      "Generating data02 csv\n",
+      "Generating data04 csv\n",
+      "Generating data05 csv\n",
+      "Generating data06 csv\n",
+      "Generating data08 csv\n",
+      "Generating data09 csv\n",
+      "Generating data10 csv\n",
+      "Generating data11 csv\n",
+      "Generating data12 csv\n",
+      "Generating data13 csv\n",
+      "Generating data15 csv\n",
+      "Generating data16 csv\n",
+      "Generating data17 csv\n",
+      "Generating data18 csv\n",
+      "Generating data19 csv\n",
+      "Generating data20 csv\n",
+      "Generating data21 csv\n",
+      "Generating data22 csv\n",
+      "Generating data23 csv\n",
+      "Generating data24 csv\n",
+      "Generating data25 csv\n",
+      "Generating data26 csv\n",
+      "Generating data27 csv\n",
+      "Generating data28 csv\n",
+      "Generating data29 csv\n",
+      "Generating data30 csv\n",
+      "Generating data31 csv\n",
+      "Generating data32 csv\n",
+      "Generating data33 csv\n",
+      "Generating data34 csv\n",
+      "Generating data35 csv\n",
+      "Generating data36 csv\n",
+      "Generating data37 csv\n",
+      "Generating data38 csv\n",
+      "Generating data39 csv\n",
+      "Generating data40 csv\n",
+      "Generating data41 csv\n",
+      "Generating data42 csv\n",
+      "Generating data44 csv\n",
+      "Generating data45 csv\n",
+      "Generating data46 csv\n",
+      "Generating data47 csv\n",
+      "Generating data48 csv\n",
+      "Generating data49 csv\n",
+      "Generating data50 csv\n",
+      "Generating data51 csv\n",
+      "Generating data53 csv\n",
+      "Generating data54 csv\n",
+      "Generating data55 csv\n",
+      "Generating data56 csv\n"
+     ]
     }
    ],
    "source": [
-    "df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]\n",
-    "df.head()\n",
-    "df.count"
+    "# write per state csvs\n",
+    "with open(fips_csv_path) as csv_file:\n",
+    "    csv_reader = csv.reader(csv_file, delimiter=\",\")\n",
+    "    line_count = 0\n",
+    "\n",
+    "    for row in csv_reader:\n",
+    "        if line_count == 0:\n",
+    "            line_count += 1\n",
+    "        else:\n",
+    "            fips = row[0].strip()\n",
+    "            print(f\"Generating data{fips} csv\")\n",
+    "            df1 = df[df.ID.str[:2] == fips]\n",
+    "            # we need to name the file data01.csv for ogr2ogr csv merge to work\n",
+    "            df1.to_csv(csv_path / f\"data{fips}.csv\", index = False)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e051623b",
+   "id": "2674fb20",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/score/ipython/score_calc_0.1.ipynb b/score/ipython/score_calc_0.1.ipynb
new file mode 100644
index 00000000..781bac64
--- /dev/null
+++ b/score/ipython/score_calc_0.1.ipynb
@@ -0,0 +1,100 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a664f981",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[WindowsPath('C:/opt/justice40-tool/score/scripts'), WindowsPath('C:/opt/justice40-tool/score/scripts'), WindowsPath('C:/opt/justice40-tool/score/scripts'), 'C:\\\\opt\\\\justice40-tool\\\\score\\\\ipython', 'C:\\\\Python39\\\\python39.zip', 'C:\\\\Python39\\\\DLLs', 'C:\\\\Python39\\\\lib', 'C:\\\\Python39', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv', '', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\win32', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\win32\\\\lib', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\Pythonwin', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\j\\\\.ipython']\n"
+     ]
+    },
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'utils'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-3-e0c1285d1cc1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      8\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mutils\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[1;33m*\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     12\u001b[0m \u001b[0mdata_path\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mPath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcwd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;34m\"data\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'utils'"
+     ]
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "import csv\n",
+    "import sys\n",
+    "\n",
+    "script_path = Path.cwd().parent / \"scripts\"\n",
+    "sys.path.insert(0, script_path)\n",
+    "print(sys.path)\n",
+    "\n",
+    "from utils import *\n",
+    "\n",
+    "data_path = Path.cwd().parent / \"data\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1b750f0e",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'get_state_fips_codes' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-2-fec7b31c5df6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# store all fips codes in list\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mfips_state_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_state_fips_codes\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[0mfips_state_list\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'get_state_fips_codes' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "# store all fips codes in list\n",
+    "fips_state_list = get_state_fips_codes\n",
+    "fips_state_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7df430cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# EJSCreen ETL Load\n",
+    "csv_path = data_path / \"dataset\" / \"ejscreen_2020\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/score/scripts/__init__.py b/score/scripts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/scripts/download_cbg.py b/score/scripts/download_cbg.py
index ff58451c..b091d431 100644
--- a/score/scripts/download_cbg.py
+++ b/score/scripts/download_cbg.py
@@ -5,70 +5,64 @@ import os
 import json
 from pathlib import Path
 
+from utils import get_state_fips_codes
+
 data_path = Path.cwd() / "data"
 
 with requests.Session() as s:
     # the fips_states_2010.csv is generated from data here
     # https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
-    fips_csv_path = data_path / "fips_states_2010.csv"
-    with open(fips_csv_path) as csv_file:
-        csv_reader = csv.reader(csv_file, delimiter=",")
-        line_count = 0
-        for row in csv_reader:
-            if line_count == 0:
-                line_count += 1
+    state_fips_codes = get_state_fips_codes()
+    for fips in state_fips_codes:
+        # check if file exists
+        shp_file_path = data_path.joinpath(
+            "census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
+        )
+        if not os.path.isfile(shp_file_path):
+            print(f"downloading {row[1]}")
+
+            # 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
+            # But using 2010 for now
+            cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
+            download = s.get(cbg_state_url)
+            file_contents = download.content
+            zip_file_path = data_path / "census" / "downloaded.zip"
+            zip_file = open(zip_file_path, "wb")
+            zip_file.write(file_contents)
+            zip_file.close()
+
+            print(f"extracting {row[1]}")
+
+            with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+                shp_dir_path = data_path / "census" / "shp" / fips
+                zip_ref.extractall(shp_dir_path)
+
+        geojson_dir_path = data_path.joinpath(
+            "census",
+            "geojson",
+        )
+        if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
+            # ogr2ogr
+            print(f"encoding GeoJSON for {row[1]}")
+
+            # PWD is different for Windows
+            if os.name == "nt":
+                pwd = "%cd%"
             else:
-                fips = row[0].strip()
-
-                # check if file exists
-                shp_file_path = data_path.joinpath(
-                    "census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
-                )
-                if not os.path.isfile(shp_file_path):
-                    print(f"downloading {row[1]}")
-
-                    # 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
-                    # But using 2010 for now
-                    cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
-                    download = s.get(cbg_state_url)
-                    file_contents = download.content
-                    zip_file_path = data_path / "census" / "downloaded.zip"
-                    zip_file = open(zip_file_path, "wb")
-                    zip_file.write(file_contents)
-                    zip_file.close()
-
-                    print(f"extracting {row[1]}")
-
-                    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
-                        shp_dir_path = data_path / "census" / "shp" / fips
-                        zip_ref.extractall(shp_dir_path)
-
-                geojson_dir_path = data_path.joinpath(
-                    "census",
-                    "geojson",
-                )
-                if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
-                    # ogr2ogr
-                    print(f"encoding GeoJSON for {row[1]}")
-
-                    # PWD is different for Windows
-                    if os.name == "nt":
-                        pwd = "%cd%"
-                    else:
-                        pwd = "${PWD}"
-                    cmd = (
-                        'docker run --rm -it -v "'
-                        + pwd
-                        + '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
-                        + fips
-                        + ".json /home/data/census/shp/"
-                        + fips
-                        + "/tl_2010_"
-                        + fips
-                        + "_bg10.shp"
-                    )
-                    print(cmd)
-                    os.system(cmd)
+                pwd = "${PWD}"
+            cmd = (
+                'docker run --rm -it -v "'
+                + pwd
+                + '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
+                + fips
+                + ".json /home/data/census/shp/"
+                + fips
+                + "/tl_2010_"
+                + fips
+                + "_bg10.shp"
+            )
+            print(cmd)
+            os.system(cmd)
 
     # generate CBG CSV table for pandas
     ## load in memory
@@ -87,10 +81,7 @@ with requests.Session() as s:
                         cbg_per_state_list[geoid10_state_id] = []
                     cbg_per_state_list[geoid10_state_id].append(geoid10)
 
-    csv_dir_path = data_path.joinpath(
-        "census",
-        "csv",
-    )
+    csv_dir_path = data_path / "census" / "csv"
     ## write to individual state csv
     for state_id in cbg_per_state_list:
         geoid10_list = cbg_per_state_list[state_id]
diff --git a/score/scripts/generate_mbtiles.py b/score/scripts/generate_mbtiles.py
index 22678438..ad85ea76 100644
--- a/score/scripts/generate_mbtiles.py
+++ b/score/scripts/generate_mbtiles.py
@@ -2,6 +2,8 @@ import os
 from pathlib import Path
 import shutil
 
+from utils import get_state_fips_codes
+
 data_path = Path.cwd() / "data"
 
 # remove existing mbtiles file
@@ -14,17 +16,41 @@ mvt_tiles_path = data_path / "tiles" / "mvt"
 if os.path.exists(mvt_tiles_path):
     shutil.rmtree(mvt_tiles_path)
 
+# Merge scores into json
+# TODO: for this first pass, just merging ACS EJScren indicators
+#       Per https://github.com/usds/justice40-tool/issues/102
+
+if os.name == "nt":
+    pwd = "%cd%"
+else:
+    pwd = "${PWD}"
+
+state_fips_codes = get_state_fips_codes()
+for fips in state_fips_codes:
+    cmd = (
+        'docker run --rm -v "'
+        + pwd
+        + '"/:/home '
+        + "osgeo/gdal:alpine-small-latest ogr2ogr -f GeoJSON "
+        + f"-sql \"SELECT * FROM tl_2010_{fips}_bg10 LEFT JOIN '/home/data/dataset/ejscreen_2020/data{fips}.csv'.data{fips} ON tl_2010_{fips}_bg10.GEOID10 = data{fips}.ID\" "
+        + f"/home/data/score/geojson/{fips}.json /home/data/census/shp/{fips}/tl_2010_{fips}_bg10.dbf"
+    )
+    print(cmd)
+    os.system(cmd)
+
 # get a list of all json files to plug in the docker commands below
 # (workaround since *.json doesn't seem to work)
 geojson_list = ""
-geojson_path = data_path / "census" / "geojson"
+geojson_path = data_path / "score" / "geojson"
 for file in os.listdir(geojson_path):
     if file.endswith(".json"):
-        geojson_list += f"/home/data/census/geojson/{file} "
+        geojson_list += f"/home/data/score/geojson/{file} "
 
 if geojson_list == "":
-    print("No GeoJson files found. Please run download_cbg.py first")
+    print("No GeoJson files found. Please run scripts/download_cbg.py first")
 
+
+# generate mbtiles file
 # PWD is different for Windows
 if os.name == "nt":
     pwd = "%cd%"
@@ -33,7 +59,7 @@ else:
 cmd = (
     'docker run --rm -it -v "'
     + pwd
-    + '"/:/home klokantech/tippecanoe tippecanoe -s_srs EPSG:4269 -t_srs EPSG:4326 --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --drop-densest-as-needed --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
+    + '"/:/home klokantech/tippecanoe tippecanoe --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
     + geojson_list
 )
 print(cmd)
diff --git a/score/scripts/utils.py b/score/scripts/utils.py
new file mode 100644
index 00000000..48b656db
--- /dev/null
+++ b/score/scripts/utils.py
@@ -0,0 +1,20 @@
+# common usage functions
+import csv
+from pathlib import Path
+
+
+def get_state_fips_codes():
+    data_path = Path.cwd() / "data"
+    fips_csv_path = data_path / "fips_states_2010.csv"
+    fips_state_list = []
+    with open(fips_csv_path) as csv_file:
+        csv_reader = csv.reader(csv_file, delimiter=",")
+        line_count = 0
+
+        for row in csv_reader:
+            if line_count == 0:
+                line_count += 1
+            else:
+                fips = row[0].strip()
+                fips_state_list.append(fips)
+    return fips_state_list