From 78615e9b1ac63cb4ea44d5ffc08acbdeb984e70f Mon Sep 17 00:00:00 2001
From: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
Date: Thu, 17 Jun 2021 18:12:39 -0400
Subject: [PATCH] ACS data baked in for map (#153)
* starting etl for score
* projection fix
* projection flags
* proper ejscreen etl csv generation
* failing CSV merge -- investigating
* checkpoint
* some etl changes
* completed ticket
* small typo
---
.gitignore | 2 +
score/__init__.py | 0
score/data/dataset/ejscreen_2020/__init__.py | 0
score/data/score/geojson/__init__.py | 0
score/data/tmp/__init__.py | 0
score/ipython/ejscreen_etl.ipynb | 406 ++++++-------------
score/ipython/score_calc_0.1.ipynb | 100 +++++
score/scripts/__init__.py | 0
score/scripts/download_cbg.py | 115 +++---
score/scripts/generate_mbtiles.py | 34 +-
score/scripts/utils.py | 20 +
11 files changed, 321 insertions(+), 356 deletions(-)
create mode 100644 score/__init__.py
create mode 100644 score/data/dataset/ejscreen_2020/__init__.py
create mode 100644 score/data/score/geojson/__init__.py
create mode 100644 score/data/tmp/__init__.py
create mode 100644 score/ipython/score_calc_0.1.ipynb
create mode 100644 score/scripts/__init__.py
create mode 100644 score/scripts/utils.py
diff --git a/.gitignore b/.gitignore
index 6d4c3ea1..12dd1862 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,5 @@ cython_debug/
score/data/census
score/data/tiles
score/data/tmp
+score/data/dataset
+score/data/score
diff --git a/score/__init__.py b/score/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/data/dataset/ejscreen_2020/__init__.py b/score/data/dataset/ejscreen_2020/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/data/score/geojson/__init__.py b/score/data/score/geojson/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/data/tmp/__init__.py b/score/data/tmp/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/ipython/ejscreen_etl.ipynb b/score/ipython/ejscreen_etl.ipynb
index 3b24556b..5a95a935 100644
--- a/score/ipython/ejscreen_etl.ipynb
+++ b/score/ipython/ejscreen_etl.ipynb
@@ -2,41 +2,27 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 6,
- "id": "f4d63367",
+ "execution_count": 1,
+ "id": "20aa3891",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
+ "import requests\n",
+ "import zipfile\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import csv\n",
"\n",
- "data_path = Path.cwd().parent / \"data\" / \"tmp\""
+ "data_path = Path.cwd().parent / \"data\"\n",
+ "fips_csv_path = data_path / \"fips_states_2010.csv\"\n",
+ "csv_path = data_path / \"dataset\" / \"ejscreen_2020\""
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "0e6eb55e",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "WindowsPath('C:/opt/justice40-tool/score/data/tmp')"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "data_path"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "a1431996",
+ "execution_count": 3,
+ "id": "67a58c24",
"metadata": {},
"outputs": [
{
@@ -49,304 +35,144 @@
}
],
"source": [
- "import requests\n",
"download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n",
"file_contents = download.content\n",
- "zip_file_path = data_path / \"downloaded.zip\"\n",
- "zip_file = open(zip_file_path, \"wb\")\n",
+ "zip_file_path = data_path / \"tmp\"\n",
+ "zip_file = open(zip_file_path / \"downloaded.zip\", \"wb\")\n",
"zip_file.write(file_contents)\n",
"zip_file.close()"
]
},
{
"cell_type": "code",
- "execution_count": 20,
- "id": "bc5f3466",
+ "execution_count": 4,
+ "id": "cc3fb9ec",
"metadata": {},
"outputs": [],
"source": [
- "import zipfile\n",
- "with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n",
- " zip_ref.extractall(data_path)\n",
- "ejscreen_csv = data_path / \"EJSCREEN_2020_StatePctile.csv\""
+ "with zipfile.ZipFile(zip_file_path / \"downloaded.zip\", \"r\") as zip_ref:\n",
+ " zip_ref.extractall(zip_file_path)\n",
+ "ejscreen_csv = data_path / \"tmp\" / \"EJSCREEN_2020_StatePctile.csv\""
]
},
{
"cell_type": "code",
- "execution_count": 22,
- "id": "392ccb67",
+ "execution_count": 5,
+ "id": "b25738bb",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " OBJECTID | \n",
- " ID | \n",
- " STATE_NAME | \n",
- " ST_ABBREV | \n",
- " REGION | \n",
- " ACSTOTPOP | \n",
- " D_PM25_2 | \n",
- " B_PM25_D2 | \n",
- " P_PM25_D2 | \n",
- " D_OZONE_2 | \n",
- " ... | \n",
- " T_PNPL | \n",
- " T_PNPL_D2 | \n",
- " T_PRMP | \n",
- " T_PRMP_D2 | \n",
- " T_PTSDF | \n",
- " T_PTSDF_D2 | \n",
- " T_PWDIS | \n",
- " T_PWDIS_D2 | \n",
- " Shape_Length | \n",
- " Shape_Area | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " 10010201001 | \n",
- " Alabama | \n",
- " AL | \n",
- " 4 | \n",
- " 636 | \n",
- " -492.025529412 | \n",
- " 6 | \n",
- " 52.0 | \n",
- " -1866.38637046 | \n",
- " ... | \n",
- " 0.071 facilities/km distance (79%ile) | \n",
- " 40%ile | \n",
- " 0.085 facilities/km distance (23%ile) | \n",
- " 53%ile | \n",
- " 0.59 facilities/km distance (57%ile) | \n",
- " 38%ile | \n",
- " None | \n",
- " None | \n",
- " 13443.155206 | \n",
- " 6.040790e+06 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 2 | \n",
- " 10010201002 | \n",
- " Alabama | \n",
- " AL | \n",
- " 4 | \n",
- " 1287 | \n",
- " -2053.08341364 | \n",
- " 4 | \n",
- " 30.0 | \n",
- " -7787.90260177 | \n",
- " ... | \n",
- " 0.064 facilities/km distance (76%ile) | \n",
- " 19%ile | \n",
- " 0.074 facilities/km distance (17%ile) | \n",
- " 42%ile | \n",
- " 0.45 facilities/km distance (52%ile) | \n",
- " 23%ile | \n",
- " None | \n",
- " None | \n",
- " 11917.089598 | \n",
- " 7.834160e+06 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3 | \n",
- " 10010202001 | \n",
- " Alabama | \n",
- " AL | \n",
- " 4 | \n",
- " 810 | \n",
- " 1846.12693767 | \n",
- " 8 | \n",
- " 75.0 | \n",
- " 7002.78371663 | \n",
- " ... | \n",
- " 0.069 facilities/km distance (78%ile) | \n",
- " 85%ile | \n",
- " 0.078 facilities/km distance (20%ile) | \n",
- " 67%ile | \n",
- " 0.65 facilities/km distance (59%ile) | \n",
- " 77%ile | \n",
- " None | \n",
- " None | \n",
- " 7770.915121 | \n",
- " 2.900774e+06 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 4 | \n",
- " 10010202002 | \n",
- " Alabama | \n",
- " AL | \n",
- " 4 | \n",
- " 1218 | \n",
- " 1392.07530488 | \n",
- " 8 | \n",
- " 72.0 | \n",
- " 5280.46153188 | \n",
- " ... | \n",
- " 0.076 facilities/km distance (81%ile) | \n",
- " 83%ile | \n",
- " 0.087 facilities/km distance (24%ile) | \n",
- " 66%ile | \n",
- " 1 facilities/km distance (69%ile) | \n",
- " 78%ile | \n",
- " None | \n",
- " None | \n",
- " 6506.804784 | \n",
- " 1.793332e+06 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 5 | \n",
- " 10010203001 | \n",
- " Alabama | \n",
- " AL | \n",
- " 4 | \n",
- " 2641 | \n",
- " -769.374640358 | \n",
- " 5 | \n",
- " 48.0 | \n",
- " -2911.8926061 | \n",
- " ... | \n",
- " 0.074 facilities/km distance (80%ile) | \n",
- " 32%ile | \n",
- " 0.08 facilities/km distance (21%ile) | \n",
- " 51%ile | \n",
- " 1.2 facilities/km distance (74%ile) | \n",
- " 24%ile | \n",
- " None | \n",
- " None | \n",
- " 11070.367848 | \n",
- " 5.461602e+06 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 124 columns
\n",
- "
"
- ],
- "text/plain": [
- " OBJECTID ID STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n",
- "0 1 10010201001 Alabama AL 4 636 \n",
- "1 2 10010201002 Alabama AL 4 1287 \n",
- "2 3 10010202001 Alabama AL 4 810 \n",
- "3 4 10010202002 Alabama AL 4 1218 \n",
- "4 5 10010203001 Alabama AL 4 2641 \n",
- "\n",
- " D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... \\\n",
- "0 -492.025529412 6 52.0 -1866.38637046 ... \n",
- "1 -2053.08341364 4 30.0 -7787.90260177 ... \n",
- "2 1846.12693767 8 75.0 7002.78371663 ... \n",
- "3 1392.07530488 8 72.0 5280.46153188 ... \n",
- "4 -769.374640358 5 48.0 -2911.8926061 ... \n",
- "\n",
- " T_PNPL T_PNPL_D2 \\\n",
- "0 0.071 facilities/km distance (79%ile) 40%ile \n",
- "1 0.064 facilities/km distance (76%ile) 19%ile \n",
- "2 0.069 facilities/km distance (78%ile) 85%ile \n",
- "3 0.076 facilities/km distance (81%ile) 83%ile \n",
- "4 0.074 facilities/km distance (80%ile) 32%ile \n",
- "\n",
- " T_PRMP T_PRMP_D2 \\\n",
- "0 0.085 facilities/km distance (23%ile) 53%ile \n",
- "1 0.074 facilities/km distance (17%ile) 42%ile \n",
- "2 0.078 facilities/km distance (20%ile) 67%ile \n",
- "3 0.087 facilities/km distance (24%ile) 66%ile \n",
- "4 0.08 facilities/km distance (21%ile) 51%ile \n",
- "\n",
- " T_PTSDF T_PTSDF_D2 T_PWDIS T_PWDIS_D2 \\\n",
- "0 0.59 facilities/km distance (57%ile) 38%ile None None \n",
- "1 0.45 facilities/km distance (52%ile) 23%ile None None \n",
- "2 0.65 facilities/km distance (59%ile) 77%ile None None \n",
- "3 1 facilities/km distance (69%ile) 78%ile None None \n",
- "4 1.2 facilities/km distance (74%ile) 24%ile None None \n",
- "\n",
- " Shape_Length Shape_Area \n",
- "0 13443.155206 6.040790e+06 \n",
- "1 11917.089598 7.834160e+06 \n",
- "2 7770.915121 2.900774e+06 \n",
- "3 6506.804784 1.793332e+06 \n",
- "4 11070.367848 5.461602e+06 \n",
- "\n",
- "[5 rows x 124 columns]"
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "import numpy as np\n",
- "import pandas as pd\n",
- "df = pd.read_csv(ejscreen_csv, low_memory=False)\n",
- "df.head()"
+ "df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)"
]
},
{
"cell_type": "code",
- "execution_count": 32,
- "id": "0ce9e22a",
+ "execution_count": 6,
+ "id": "e6994f2d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "9fa2077a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# write nationwide csv\n",
+ "df.to_csv(csv_path / f\"usa.csv\", index = False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "5e5cc12a",
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Generating data01 csv\n",
+ "Generating data02 csv\n",
+ "Generating data04 csv\n",
+ "Generating data05 csv\n",
+ "Generating data06 csv\n",
+ "Generating data08 csv\n",
+ "Generating data09 csv\n",
+ "Generating data10 csv\n",
+ "Generating data11 csv\n",
+ "Generating data12 csv\n",
+ "Generating data13 csv\n",
+ "Generating data15 csv\n",
+ "Generating data16 csv\n",
+ "Generating data17 csv\n",
+ "Generating data18 csv\n",
+ "Generating data19 csv\n",
+ "Generating data20 csv\n",
+ "Generating data21 csv\n",
+ "Generating data22 csv\n",
+ "Generating data23 csv\n",
+ "Generating data24 csv\n",
+ "Generating data25 csv\n",
+ "Generating data26 csv\n",
+ "Generating data27 csv\n",
+ "Generating data28 csv\n",
+ "Generating data29 csv\n",
+ "Generating data30 csv\n",
+ "Generating data31 csv\n",
+ "Generating data32 csv\n",
+ "Generating data33 csv\n",
+ "Generating data34 csv\n",
+ "Generating data35 csv\n",
+ "Generating data36 csv\n",
+ "Generating data37 csv\n",
+ "Generating data38 csv\n",
+ "Generating data39 csv\n",
+ "Generating data40 csv\n",
+ "Generating data41 csv\n",
+ "Generating data42 csv\n",
+ "Generating data44 csv\n",
+ "Generating data45 csv\n",
+ "Generating data46 csv\n",
+ "Generating data47 csv\n",
+ "Generating data48 csv\n",
+ "Generating data49 csv\n",
+ "Generating data50 csv\n",
+ "Generating data51 csv\n",
+ "Generating data53 csv\n",
+ "Generating data54 csv\n",
+ "Generating data55 csv\n",
+ "Generating data56 csv\n"
+ ]
}
],
"source": [
- "df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]\n",
- "df.head()\n",
- "df.count"
+ "# write per state csvs\n",
+ "with open(fips_csv_path) as csv_file:\n",
+ " csv_reader = csv.reader(csv_file, delimiter=\",\")\n",
+ " line_count = 0\n",
+ "\n",
+ " for row in csv_reader:\n",
+ " if line_count == 0:\n",
+ " line_count += 1\n",
+ " else:\n",
+ " fips = row[0].strip()\n",
+ " print(f\"Generating data{fips} csv\")\n",
+ " df1 = df[df.ID.str[:2] == fips]\n",
+ " # we need to name the file data01.csv for ogr2ogr csv merge to work\n",
+ " df1.to_csv(csv_path / f\"data{fips}.csv\", index = False)"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "e051623b",
+ "id": "2674fb20",
"metadata": {},
"outputs": [],
"source": []
diff --git a/score/ipython/score_calc_0.1.ipynb b/score/ipython/score_calc_0.1.ipynb
new file mode 100644
index 00000000..781bac64
--- /dev/null
+++ b/score/ipython/score_calc_0.1.ipynb
@@ -0,0 +1,100 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "a664f981",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[WindowsPath('C:/opt/justice40-tool/score/scripts'), WindowsPath('C:/opt/justice40-tool/score/scripts'), WindowsPath('C:/opt/justice40-tool/score/scripts'), 'C:\\\\opt\\\\justice40-tool\\\\score\\\\ipython', 'C:\\\\Python39\\\\python39.zip', 'C:\\\\Python39\\\\DLLs', 'C:\\\\Python39\\\\lib', 'C:\\\\Python39', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv', '', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\win32', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\win32\\\\lib', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\Pythonwin', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\j\\\\.ipython']\n"
+ ]
+ },
+ {
+ "ename": "ModuleNotFoundError",
+ "evalue": "No module named 'utils'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mutils\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[1;33m*\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mdata_path\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mPath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcwd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;34m\"data\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'utils'"
+ ]
+ }
+ ],
+ "source": [
+ "from pathlib import Path\n",
+ "import pandas as pd\n",
+ "import csv\n",
+ "import sys\n",
+ "\n",
+ "script_path = Path.cwd().parent / \"scripts\"\n",
+ "sys.path.insert(0, script_path)\n",
+ "print(sys.path)\n",
+ "\n",
+ "from utils import *\n",
+ "\n",
+ "data_path = Path.cwd().parent / \"data\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "1b750f0e",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'get_state_fips_codes' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# store all fips codes in list\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mfips_state_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_state_fips_codes\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mfips_state_list\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'get_state_fips_codes' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "# store all fips codes in list\n",
+ "fips_state_list = get_state_fips_codes\n",
+ "fips_state_list"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "7df430cb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# EJSCreen ETL Load\n",
+ "csv_path = data_path / \"dataset\" / \"ejscreen_2020\""
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/score/scripts/__init__.py b/score/scripts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/score/scripts/download_cbg.py b/score/scripts/download_cbg.py
index ff58451c..b091d431 100644
--- a/score/scripts/download_cbg.py
+++ b/score/scripts/download_cbg.py
@@ -5,70 +5,64 @@ import os
import json
from pathlib import Path
+from utils import get_state_fips_codes
+
data_path = Path.cwd() / "data"
with requests.Session() as s:
# the fips_states_2010.csv is generated from data here
# https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
- fips_csv_path = data_path / "fips_states_2010.csv"
- with open(fips_csv_path) as csv_file:
- csv_reader = csv.reader(csv_file, delimiter=",")
- line_count = 0
- for row in csv_reader:
- if line_count == 0:
- line_count += 1
+ state_fips_codes = get_state_fips_codes()
+ for fips in state_fips_codes:
+ # check if file exists
+ shp_file_path = data_path.joinpath(
+ "census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
+ )
+ if not os.path.isfile(shp_file_path):
+ print(f"downloading {row[1]}")
+
+ # 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
+ # But using 2010 for now
+ cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
+ download = s.get(cbg_state_url)
+ file_contents = download.content
+ zip_file_path = data_path / "census" / "downloaded.zip"
+ zip_file = open(zip_file_path, "wb")
+ zip_file.write(file_contents)
+ zip_file.close()
+
+ print(f"extracting {row[1]}")
+
+ with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+ shp_dir_path = data_path / "census" / "shp" / fips
+ zip_ref.extractall(shp_dir_path)
+
+ geojson_dir_path = data_path.joinpath(
+ "census",
+ "geojson",
+ )
+ if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
+ # ogr2ogr
+ print(f"encoding GeoJSON for {row[1]}")
+
+ # PWD is different for Windows
+ if os.name == "nt":
+ pwd = "%cd%"
else:
- fips = row[0].strip()
-
- # check if file exists
- shp_file_path = data_path.joinpath(
- "census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
- )
- if not os.path.isfile(shp_file_path):
- print(f"downloading {row[1]}")
-
- # 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
- # But using 2010 for now
- cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
- download = s.get(cbg_state_url)
- file_contents = download.content
- zip_file_path = data_path / "census" / "downloaded.zip"
- zip_file = open(zip_file_path, "wb")
- zip_file.write(file_contents)
- zip_file.close()
-
- print(f"extracting {row[1]}")
-
- with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
- shp_dir_path = data_path / "census" / "shp" / fips
- zip_ref.extractall(shp_dir_path)
-
- geojson_dir_path = data_path.joinpath(
- "census",
- "geojson",
- )
- if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
- # ogr2ogr
- print(f"encoding GeoJSON for {row[1]}")
-
- # PWD is different for Windows
- if os.name == "nt":
- pwd = "%cd%"
- else:
- pwd = "${PWD}"
- cmd = (
- 'docker run --rm -it -v "'
- + pwd
- + '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
- + fips
- + ".json /home/data/census/shp/"
- + fips
- + "/tl_2010_"
- + fips
- + "_bg10.shp"
- )
- print(cmd)
- os.system(cmd)
+ pwd = "${PWD}"
+ cmd = (
+ 'docker run --rm -it -v "'
+ + pwd
+ + '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
+ + fips
+ + ".json /home/data/census/shp/"
+ + fips
+ + "/tl_2010_"
+ + fips
+ + "_bg10.shp"
+ )
+ print(cmd)
+ os.system(cmd)
# generate CBG CSV table for pandas
## load in memory
@@ -87,10 +81,7 @@ with requests.Session() as s:
cbg_per_state_list[geoid10_state_id] = []
cbg_per_state_list[geoid10_state_id].append(geoid10)
- csv_dir_path = data_path.joinpath(
- "census",
- "csv",
- )
+ csv_dir_path = data_path / "census" / "csv"
## write to individual state csv
for state_id in cbg_per_state_list:
geoid10_list = cbg_per_state_list[state_id]
diff --git a/score/scripts/generate_mbtiles.py b/score/scripts/generate_mbtiles.py
index 22678438..ad85ea76 100644
--- a/score/scripts/generate_mbtiles.py
+++ b/score/scripts/generate_mbtiles.py
@@ -2,6 +2,8 @@ import os
from pathlib import Path
import shutil
+from utils import get_state_fips_codes
+
data_path = Path.cwd() / "data"
# remove existing mbtiles file
@@ -14,17 +16,41 @@ mvt_tiles_path = data_path / "tiles" / "mvt"
if os.path.exists(mvt_tiles_path):
shutil.rmtree(mvt_tiles_path)
+# Merge scores into json
+# TODO: for this first pass, just merging ACS EJScren indicators
+# Per https://github.com/usds/justice40-tool/issues/102
+
+if os.name == "nt":
+ pwd = "%cd%"
+else:
+ pwd = "${PWD}"
+
+state_fips_codes = get_state_fips_codes()
+for fips in state_fips_codes:
+ cmd = (
+ 'docker run --rm -v "'
+ + pwd
+ + '"/:/home '
+ + "osgeo/gdal:alpine-small-latest ogr2ogr -f GeoJSON "
+ + f"-sql \"SELECT * FROM tl_2010_{fips}_bg10 LEFT JOIN '/home/data/dataset/ejscreen_2020/data{fips}.csv'.data{fips} ON tl_2010_{fips}_bg10.GEOID10 = data{fips}.ID\" "
+ + f"/home/data/score/geojson/{fips}.json /home/data/census/shp/{fips}/tl_2010_{fips}_bg10.dbf"
+ )
+ print(cmd)
+ os.system(cmd)
+
# get a list of all json files to plug in the docker commands below
# (workaround since *.json doesn't seem to work)
geojson_list = ""
-geojson_path = data_path / "census" / "geojson"
+geojson_path = data_path / "score" / "geojson"
for file in os.listdir(geojson_path):
if file.endswith(".json"):
- geojson_list += f"/home/data/census/geojson/{file} "
+ geojson_list += f"/home/data/score/geojson/{file} "
if geojson_list == "":
- print("No GeoJson files found. Please run download_cbg.py first")
+ print("No GeoJson files found. Please run scripts/download_cbg.py first")
+
+# generate mbtiles file
# PWD is different for Windows
if os.name == "nt":
pwd = "%cd%"
@@ -33,7 +59,7 @@ else:
cmd = (
'docker run --rm -it -v "'
+ pwd
- + '"/:/home klokantech/tippecanoe tippecanoe -s_srs EPSG:4269 -t_srs EPSG:4326 --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --drop-densest-as-needed --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
+ + '"/:/home klokantech/tippecanoe tippecanoe --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
+ geojson_list
)
print(cmd)
diff --git a/score/scripts/utils.py b/score/scripts/utils.py
new file mode 100644
index 00000000..48b656db
--- /dev/null
+++ b/score/scripts/utils.py
@@ -0,0 +1,20 @@
+# common usage functions
+import csv
+from pathlib import Path
+
+
+def get_state_fips_codes():
+ data_path = Path.cwd() / "data"
+ fips_csv_path = data_path / "fips_states_2010.csv"
+ fips_state_list = []
+ with open(fips_csv_path) as csv_file:
+ csv_reader = csv.reader(csv_file, delimiter=",")
+ line_count = 0
+
+ for row in csv_reader:
+ if line_count == 0:
+ line_count += 1
+ else:
+ fips = row[0].strip()
+ fips_state_list.append(fips)
+ return fips_state_list