diff --git a/.gitignore b/.gitignore
index 85e43381..6d4c3ea1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -131,3 +131,4 @@ cython_debug/
# temporary census data
score/data/census
score/data/tiles
+score/data/tmp
diff --git a/score/etl/__init__.oy b/score/etl/__init__.oy
new file mode 100644
index 00000000..e69de29b
diff --git a/score/etl/datasets/__init__.py b/score/etl/datasets/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/etl/datasets/ejscreen_2020.py b/score/etl/datasets/ejscreen_2020.py
new file mode 100644
index 00000000..12f433f1
--- /dev/null
+++ b/score/etl/datasets/ejscreen_2020.py
@@ -0,0 +1 @@
+# https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip
diff --git a/score/ipython/ejscreen_etl.ipynb b/score/ipython/ejscreen_etl.ipynb
new file mode 100644
index 00000000..3b24556b
--- /dev/null
+++ b/score/ipython/ejscreen_etl.ipynb
@@ -0,0 +1,376 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "f4d63367",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "\n",
+ "data_path = Path.cwd().parent / \"data\" / \"tmp\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0e6eb55e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "WindowsPath('C:/opt/justice40-tool/score/data/tmp')"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "a1431996",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\opt\\justice40-tool\\score\\venv\\lib\\site-packages\\urllib3\\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'gaftp.epa.gov'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "import requests\n",
+ "download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n",
+ "file_contents = download.content\n",
+ "zip_file_path = data_path / \"downloaded.zip\"\n",
+ "zip_file = open(zip_file_path, \"wb\")\n",
+ "zip_file.write(file_contents)\n",
+ "zip_file.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "bc5f3466",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import zipfile\n",
+ "with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n",
+ " zip_ref.extractall(data_path)\n",
+ "ejscreen_csv = data_path / \"EJSCREEN_2020_StatePctile.csv\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "392ccb67",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " OBJECTID | \n",
+ " ID | \n",
+ " STATE_NAME | \n",
+ " ST_ABBREV | \n",
+ " REGION | \n",
+ " ACSTOTPOP | \n",
+ " D_PM25_2 | \n",
+ " B_PM25_D2 | \n",
+ " P_PM25_D2 | \n",
+ " D_OZONE_2 | \n",
+ " ... | \n",
+ " T_PNPL | \n",
+ " T_PNPL_D2 | \n",
+ " T_PRMP | \n",
+ " T_PRMP_D2 | \n",
+ " T_PTSDF | \n",
+ " T_PTSDF_D2 | \n",
+ " T_PWDIS | \n",
+ " T_PWDIS_D2 | \n",
+ " Shape_Length | \n",
+ " Shape_Area | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 10010201001 | \n",
+ " Alabama | \n",
+ " AL | \n",
+ " 4 | \n",
+ " 636 | \n",
+ " -492.025529412 | \n",
+ " 6 | \n",
+ " 52.0 | \n",
+ " -1866.38637046 | \n",
+ " ... | \n",
+ " 0.071 facilities/km distance (79%ile) | \n",
+ " 40%ile | \n",
+ " 0.085 facilities/km distance (23%ile) | \n",
+ " 53%ile | \n",
+ " 0.59 facilities/km distance (57%ile) | \n",
+ " 38%ile | \n",
+ " None | \n",
+ " None | \n",
+ " 13443.155206 | \n",
+ " 6.040790e+06 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 10010201002 | \n",
+ " Alabama | \n",
+ " AL | \n",
+ " 4 | \n",
+ " 1287 | \n",
+ " -2053.08341364 | \n",
+ " 4 | \n",
+ " 30.0 | \n",
+ " -7787.90260177 | \n",
+ " ... | \n",
+ " 0.064 facilities/km distance (76%ile) | \n",
+ " 19%ile | \n",
+ " 0.074 facilities/km distance (17%ile) | \n",
+ " 42%ile | \n",
+ " 0.45 facilities/km distance (52%ile) | \n",
+ " 23%ile | \n",
+ " None | \n",
+ " None | \n",
+ " 11917.089598 | \n",
+ " 7.834160e+06 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 10010202001 | \n",
+ " Alabama | \n",
+ " AL | \n",
+ " 4 | \n",
+ " 810 | \n",
+ " 1846.12693767 | \n",
+ " 8 | \n",
+ " 75.0 | \n",
+ " 7002.78371663 | \n",
+ " ... | \n",
+ " 0.069 facilities/km distance (78%ile) | \n",
+ " 85%ile | \n",
+ " 0.078 facilities/km distance (20%ile) | \n",
+ " 67%ile | \n",
+ " 0.65 facilities/km distance (59%ile) | \n",
+ " 77%ile | \n",
+ " None | \n",
+ " None | \n",
+ " 7770.915121 | \n",
+ " 2.900774e+06 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 10010202002 | \n",
+ " Alabama | \n",
+ " AL | \n",
+ " 4 | \n",
+ " 1218 | \n",
+ " 1392.07530488 | \n",
+ " 8 | \n",
+ " 72.0 | \n",
+ " 5280.46153188 | \n",
+ " ... | \n",
+ " 0.076 facilities/km distance (81%ile) | \n",
+ " 83%ile | \n",
+ " 0.087 facilities/km distance (24%ile) | \n",
+ " 66%ile | \n",
+ " 1 facilities/km distance (69%ile) | \n",
+ " 78%ile | \n",
+ " None | \n",
+ " None | \n",
+ " 6506.804784 | \n",
+ " 1.793332e+06 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 10010203001 | \n",
+ " Alabama | \n",
+ " AL | \n",
+ " 4 | \n",
+ " 2641 | \n",
+ " -769.374640358 | \n",
+ " 5 | \n",
+ " 48.0 | \n",
+ " -2911.8926061 | \n",
+ " ... | \n",
+ " 0.074 facilities/km distance (80%ile) | \n",
+ " 32%ile | \n",
+ " 0.08 facilities/km distance (21%ile) | \n",
+ " 51%ile | \n",
+ " 1.2 facilities/km distance (74%ile) | \n",
+ " 24%ile | \n",
+ " None | \n",
+ " None | \n",
+ " 11070.367848 | \n",
+ " 5.461602e+06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 124 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " OBJECTID ID STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n",
+ "0 1 10010201001 Alabama AL 4 636 \n",
+ "1 2 10010201002 Alabama AL 4 1287 \n",
+ "2 3 10010202001 Alabama AL 4 810 \n",
+ "3 4 10010202002 Alabama AL 4 1218 \n",
+ "4 5 10010203001 Alabama AL 4 2641 \n",
+ "\n",
+ " D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... \\\n",
+ "0 -492.025529412 6 52.0 -1866.38637046 ... \n",
+ "1 -2053.08341364 4 30.0 -7787.90260177 ... \n",
+ "2 1846.12693767 8 75.0 7002.78371663 ... \n",
+ "3 1392.07530488 8 72.0 5280.46153188 ... \n",
+ "4 -769.374640358 5 48.0 -2911.8926061 ... \n",
+ "\n",
+ " T_PNPL T_PNPL_D2 \\\n",
+ "0 0.071 facilities/km distance (79%ile) 40%ile \n",
+ "1 0.064 facilities/km distance (76%ile) 19%ile \n",
+ "2 0.069 facilities/km distance (78%ile) 85%ile \n",
+ "3 0.076 facilities/km distance (81%ile) 83%ile \n",
+ "4 0.074 facilities/km distance (80%ile) 32%ile \n",
+ "\n",
+ " T_PRMP T_PRMP_D2 \\\n",
+ "0 0.085 facilities/km distance (23%ile) 53%ile \n",
+ "1 0.074 facilities/km distance (17%ile) 42%ile \n",
+ "2 0.078 facilities/km distance (20%ile) 67%ile \n",
+ "3 0.087 facilities/km distance (24%ile) 66%ile \n",
+ "4 0.08 facilities/km distance (21%ile) 51%ile \n",
+ "\n",
+ " T_PTSDF T_PTSDF_D2 T_PWDIS T_PWDIS_D2 \\\n",
+ "0 0.59 facilities/km distance (57%ile) 38%ile None None \n",
+ "1 0.45 facilities/km distance (52%ile) 23%ile None None \n",
+ "2 0.65 facilities/km distance (59%ile) 77%ile None None \n",
+ "3 1 facilities/km distance (69%ile) 78%ile None None \n",
+ "4 1.2 facilities/km distance (74%ile) 24%ile None None \n",
+ "\n",
+ " Shape_Length Shape_Area \n",
+ "0 13443.155206 6.040790e+06 \n",
+ "1 11917.089598 7.834160e+06 \n",
+ "2 7770.915121 2.900774e+06 \n",
+ "3 6506.804784 1.793332e+06 \n",
+ "4 11070.367848 5.461602e+06 \n",
+ "\n",
+ "[5 rows x 124 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "df = pd.read_csv(ejscreen_csv, low_memory=False)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "0ce9e22a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]\n",
+ "df.head()\n",
+ "df.count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e051623b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/score/scripts/download_cbg.py b/score/scripts/download_cbg.py
index dbeefeb5..ff58451c 100644
--- a/score/scripts/download_cbg.py
+++ b/score/scripts/download_cbg.py
@@ -10,7 +10,7 @@ data_path = Path.cwd() / "data"
with requests.Session() as s:
# the fips_states_2010.csv is generated from data here
# https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
- fips_csv_path = data_path.joinpath("fips_states_2010.csv")
+ fips_csv_path = data_path / "fips_states_2010.csv"
with open(fips_csv_path) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
line_count = 0
@@ -27,10 +27,12 @@ with requests.Session() as s:
if not os.path.isfile(shp_file_path):
print(f"downloading {row[1]}")
+ # 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
+ # But using 2010 for now
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
download = s.get(cbg_state_url)
file_contents = download.content
- zip_file_path = data_path.joinpath("census", "downloaded.zip")
+ zip_file_path = data_path / "census" / "downloaded.zip"
zip_file = open(zip_file_path, "wb")
zip_file.write(file_contents)
zip_file.close()
@@ -38,7 +40,7 @@ with requests.Session() as s:
print(f"extracting {row[1]}")
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
- shp_dir_path = data_path.joinpath("census", "shp", fips)
+ shp_dir_path = data_path / "census" / "shp" / fips
zip_ref.extractall(shp_dir_path)
geojson_dir_path = data_path.joinpath(
diff --git a/score/scripts/generate_mbtiles.py b/score/scripts/generate_mbtiles.py
index 036d077e..22678438 100644
--- a/score/scripts/generate_mbtiles.py
+++ b/score/scripts/generate_mbtiles.py
@@ -22,8 +22,6 @@ for file in os.listdir(geojson_path):
if file.endswith(".json"):
geojson_list += f"/home/data/census/geojson/{file} "
-breakpoint()
-
if geojson_list == "":
print("No GeoJson files found. Please run download_cbg.py first")
@@ -35,7 +33,7 @@ else:
cmd = (
'docker run --rm -it -v "'
+ pwd
- + '"/:/home klokantech/tippecanoe tippecanoe -zg -o /home/data/tiles/block2010.mbtiles --drop-densest-as-needed --extend-zooms-if-still-dropping -l cbg2010 '
+ + '"/:/home klokantech/tippecanoe tippecanoe -s_srs EPSG:4269 -t_srs EPSG:4326 --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --drop-densest-as-needed --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
+ geojson_list
)
print(cmd)
@@ -51,7 +49,7 @@ else:
cmd = (
'docker run --rm -it -v "'
+ pwd
- + '"/:/home klokantech/tippecanoe tippecanoe --no-tile-compression -zg -e /home/data/tiles/mvt /home/data/census/geojson/01.json '
+ + '"/:/home klokantech/tippecanoe tippecanoe --drop-densest-as-needed --no-tile-compression -zg -e /home/data/tiles/mvt '
+ geojson_list
)
print(cmd)