mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
ACS data baked in for map (#153)
* starting etl for score * projection fix * projection flags * proper ejscreen etl csv generation * failing CSV merge -- investigating * checkpoint * some etl changes * completed ticket * small typo
This commit is contained in:
parent
eed9bd311d
commit
78615e9b1a
11 changed files with 321 additions and 356 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -132,3 +132,5 @@ cython_debug/
|
||||||
score/data/census
|
score/data/census
|
||||||
score/data/tiles
|
score/data/tiles
|
||||||
score/data/tmp
|
score/data/tmp
|
||||||
|
score/data/dataset
|
||||||
|
score/data/score
|
||||||
|
|
0
score/__init__.py
Normal file
0
score/__init__.py
Normal file
0
score/data/dataset/ejscreen_2020/__init__.py
Normal file
0
score/data/dataset/ejscreen_2020/__init__.py
Normal file
0
score/data/score/geojson/__init__.py
Normal file
0
score/data/score/geojson/__init__.py
Normal file
0
score/data/tmp/__init__.py
Normal file
0
score/data/tmp/__init__.py
Normal file
|
@ -2,41 +2,27 @@
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 1,
|
||||||
"id": "f4d63367",
|
"id": "20aa3891",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
|
"import requests\n",
|
||||||
|
"import zipfile\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import csv\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data_path = Path.cwd().parent / \"data\" / \"tmp\""
|
"data_path = Path.cwd().parent / \"data\"\n",
|
||||||
|
"fips_csv_path = data_path / \"fips_states_2010.csv\"\n",
|
||||||
|
"csv_path = data_path / \"dataset\" / \"ejscreen_2020\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 3,
|
||||||
"id": "0e6eb55e",
|
"id": "67a58c24",
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"WindowsPath('C:/opt/justice40-tool/score/data/tmp')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"data_path"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 15,
|
|
||||||
"id": "a1431996",
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -49,304 +35,144 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import requests\n",
|
|
||||||
"download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n",
|
"download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n",
|
||||||
"file_contents = download.content\n",
|
"file_contents = download.content\n",
|
||||||
"zip_file_path = data_path / \"downloaded.zip\"\n",
|
"zip_file_path = data_path / \"tmp\"\n",
|
||||||
"zip_file = open(zip_file_path, \"wb\")\n",
|
"zip_file = open(zip_file_path / \"downloaded.zip\", \"wb\")\n",
|
||||||
"zip_file.write(file_contents)\n",
|
"zip_file.write(file_contents)\n",
|
||||||
"zip_file.close()"
|
"zip_file.close()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": 4,
|
||||||
"id": "bc5f3466",
|
"id": "cc3fb9ec",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import zipfile\n",
|
"with zipfile.ZipFile(zip_file_path / \"downloaded.zip\", \"r\") as zip_ref:\n",
|
||||||
"with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n",
|
" zip_ref.extractall(zip_file_path)\n",
|
||||||
" zip_ref.extractall(data_path)\n",
|
"ejscreen_csv = data_path / \"tmp\" / \"EJSCREEN_2020_StatePctile.csv\""
|
||||||
"ejscreen_csv = data_path / \"EJSCREEN_2020_StatePctile.csv\""
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 5,
|
||||||
"id": "392ccb67",
|
"id": "b25738bb",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>OBJECTID</th>\n",
|
|
||||||
" <th>ID</th>\n",
|
|
||||||
" <th>STATE_NAME</th>\n",
|
|
||||||
" <th>ST_ABBREV</th>\n",
|
|
||||||
" <th>REGION</th>\n",
|
|
||||||
" <th>ACSTOTPOP</th>\n",
|
|
||||||
" <th>D_PM25_2</th>\n",
|
|
||||||
" <th>B_PM25_D2</th>\n",
|
|
||||||
" <th>P_PM25_D2</th>\n",
|
|
||||||
" <th>D_OZONE_2</th>\n",
|
|
||||||
" <th>...</th>\n",
|
|
||||||
" <th>T_PNPL</th>\n",
|
|
||||||
" <th>T_PNPL_D2</th>\n",
|
|
||||||
" <th>T_PRMP</th>\n",
|
|
||||||
" <th>T_PRMP_D2</th>\n",
|
|
||||||
" <th>T_PTSDF</th>\n",
|
|
||||||
" <th>T_PTSDF_D2</th>\n",
|
|
||||||
" <th>T_PWDIS</th>\n",
|
|
||||||
" <th>T_PWDIS_D2</th>\n",
|
|
||||||
" <th>Shape_Length</th>\n",
|
|
||||||
" <th>Shape_Area</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>0</th>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>10010201001</td>\n",
|
|
||||||
" <td>Alabama</td>\n",
|
|
||||||
" <td>AL</td>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>636</td>\n",
|
|
||||||
" <td>-492.025529412</td>\n",
|
|
||||||
" <td>6</td>\n",
|
|
||||||
" <td>52.0</td>\n",
|
|
||||||
" <td>-1866.38637046</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>0.071 facilities/km distance (79%ile)</td>\n",
|
|
||||||
" <td>40%ile</td>\n",
|
|
||||||
" <td>0.085 facilities/km distance (23%ile)</td>\n",
|
|
||||||
" <td>53%ile</td>\n",
|
|
||||||
" <td>0.59 facilities/km distance (57%ile)</td>\n",
|
|
||||||
" <td>38%ile</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>13443.155206</td>\n",
|
|
||||||
" <td>6.040790e+06</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>1</th>\n",
|
|
||||||
" <td>2</td>\n",
|
|
||||||
" <td>10010201002</td>\n",
|
|
||||||
" <td>Alabama</td>\n",
|
|
||||||
" <td>AL</td>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>1287</td>\n",
|
|
||||||
" <td>-2053.08341364</td>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>30.0</td>\n",
|
|
||||||
" <td>-7787.90260177</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>0.064 facilities/km distance (76%ile)</td>\n",
|
|
||||||
" <td>19%ile</td>\n",
|
|
||||||
" <td>0.074 facilities/km distance (17%ile)</td>\n",
|
|
||||||
" <td>42%ile</td>\n",
|
|
||||||
" <td>0.45 facilities/km distance (52%ile)</td>\n",
|
|
||||||
" <td>23%ile</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>11917.089598</td>\n",
|
|
||||||
" <td>7.834160e+06</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2</th>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>10010202001</td>\n",
|
|
||||||
" <td>Alabama</td>\n",
|
|
||||||
" <td>AL</td>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>810</td>\n",
|
|
||||||
" <td>1846.12693767</td>\n",
|
|
||||||
" <td>8</td>\n",
|
|
||||||
" <td>75.0</td>\n",
|
|
||||||
" <td>7002.78371663</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>0.069 facilities/km distance (78%ile)</td>\n",
|
|
||||||
" <td>85%ile</td>\n",
|
|
||||||
" <td>0.078 facilities/km distance (20%ile)</td>\n",
|
|
||||||
" <td>67%ile</td>\n",
|
|
||||||
" <td>0.65 facilities/km distance (59%ile)</td>\n",
|
|
||||||
" <td>77%ile</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>7770.915121</td>\n",
|
|
||||||
" <td>2.900774e+06</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>3</th>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>10010202002</td>\n",
|
|
||||||
" <td>Alabama</td>\n",
|
|
||||||
" <td>AL</td>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>1218</td>\n",
|
|
||||||
" <td>1392.07530488</td>\n",
|
|
||||||
" <td>8</td>\n",
|
|
||||||
" <td>72.0</td>\n",
|
|
||||||
" <td>5280.46153188</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>0.076 facilities/km distance (81%ile)</td>\n",
|
|
||||||
" <td>83%ile</td>\n",
|
|
||||||
" <td>0.087 facilities/km distance (24%ile)</td>\n",
|
|
||||||
" <td>66%ile</td>\n",
|
|
||||||
" <td>1 facilities/km distance (69%ile)</td>\n",
|
|
||||||
" <td>78%ile</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>6506.804784</td>\n",
|
|
||||||
" <td>1.793332e+06</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>4</th>\n",
|
|
||||||
" <td>5</td>\n",
|
|
||||||
" <td>10010203001</td>\n",
|
|
||||||
" <td>Alabama</td>\n",
|
|
||||||
" <td>AL</td>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>2641</td>\n",
|
|
||||||
" <td>-769.374640358</td>\n",
|
|
||||||
" <td>5</td>\n",
|
|
||||||
" <td>48.0</td>\n",
|
|
||||||
" <td>-2911.8926061</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>0.074 facilities/km distance (80%ile)</td>\n",
|
|
||||||
" <td>32%ile</td>\n",
|
|
||||||
" <td>0.08 facilities/km distance (21%ile)</td>\n",
|
|
||||||
" <td>51%ile</td>\n",
|
|
||||||
" <td>1.2 facilities/km distance (74%ile)</td>\n",
|
|
||||||
" <td>24%ile</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>None</td>\n",
|
|
||||||
" <td>11070.367848</td>\n",
|
|
||||||
" <td>5.461602e+06</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"<p>5 rows × 124 columns</p>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" OBJECTID ID STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n",
|
|
||||||
"0 1 10010201001 Alabama AL 4 636 \n",
|
|
||||||
"1 2 10010201002 Alabama AL 4 1287 \n",
|
|
||||||
"2 3 10010202001 Alabama AL 4 810 \n",
|
|
||||||
"3 4 10010202002 Alabama AL 4 1218 \n",
|
|
||||||
"4 5 10010203001 Alabama AL 4 2641 \n",
|
|
||||||
"\n",
|
|
||||||
" D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... \\\n",
|
|
||||||
"0 -492.025529412 6 52.0 -1866.38637046 ... \n",
|
|
||||||
"1 -2053.08341364 4 30.0 -7787.90260177 ... \n",
|
|
||||||
"2 1846.12693767 8 75.0 7002.78371663 ... \n",
|
|
||||||
"3 1392.07530488 8 72.0 5280.46153188 ... \n",
|
|
||||||
"4 -769.374640358 5 48.0 -2911.8926061 ... \n",
|
|
||||||
"\n",
|
|
||||||
" T_PNPL T_PNPL_D2 \\\n",
|
|
||||||
"0 0.071 facilities/km distance (79%ile) 40%ile \n",
|
|
||||||
"1 0.064 facilities/km distance (76%ile) 19%ile \n",
|
|
||||||
"2 0.069 facilities/km distance (78%ile) 85%ile \n",
|
|
||||||
"3 0.076 facilities/km distance (81%ile) 83%ile \n",
|
|
||||||
"4 0.074 facilities/km distance (80%ile) 32%ile \n",
|
|
||||||
"\n",
|
|
||||||
" T_PRMP T_PRMP_D2 \\\n",
|
|
||||||
"0 0.085 facilities/km distance (23%ile) 53%ile \n",
|
|
||||||
"1 0.074 facilities/km distance (17%ile) 42%ile \n",
|
|
||||||
"2 0.078 facilities/km distance (20%ile) 67%ile \n",
|
|
||||||
"3 0.087 facilities/km distance (24%ile) 66%ile \n",
|
|
||||||
"4 0.08 facilities/km distance (21%ile) 51%ile \n",
|
|
||||||
"\n",
|
|
||||||
" T_PTSDF T_PTSDF_D2 T_PWDIS T_PWDIS_D2 \\\n",
|
|
||||||
"0 0.59 facilities/km distance (57%ile) 38%ile None None \n",
|
|
||||||
"1 0.45 facilities/km distance (52%ile) 23%ile None None \n",
|
|
||||||
"2 0.65 facilities/km distance (59%ile) 77%ile None None \n",
|
|
||||||
"3 1 facilities/km distance (69%ile) 78%ile None None \n",
|
|
||||||
"4 1.2 facilities/km distance (74%ile) 24%ile None None \n",
|
|
||||||
"\n",
|
|
||||||
" Shape_Length Shape_Area \n",
|
|
||||||
"0 13443.155206 6.040790e+06 \n",
|
|
||||||
"1 11917.089598 7.834160e+06 \n",
|
|
||||||
"2 7770.915121 2.900774e+06 \n",
|
|
||||||
"3 6506.804784 1.793332e+06 \n",
|
|
||||||
"4 11070.367848 5.461602e+06 \n",
|
|
||||||
"\n",
|
|
||||||
"[5 rows x 124 columns]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 22,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"import numpy as np\n",
|
"df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)"
|
||||||
"import pandas as pd\n",
|
|
||||||
"df = pd.read_csv(ejscreen_csv, low_memory=False)\n",
|
|
||||||
"df.head()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": 6,
|
||||||
"id": "0ce9e22a",
|
"id": "e6994f2d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "9fa2077a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# write nationwide csv\n",
|
||||||
|
"df.to_csv(csv_path / f\"usa.csv\", index = False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "5e5cc12a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"name": "stdout",
|
||||||
"text/plain": [
|
"output_type": "stream",
|
||||||
"<bound method DataFrame.count of ID ACSTOTPOP LESSHSPCT LOWINCPCT\n",
|
"text": [
|
||||||
"0 10010201001 636 0.208134 0.385220\n",
|
"Generating data01 csv\n",
|
||||||
"1 10010201002 1287 0.040678 0.163170\n",
|
"Generating data02 csv\n",
|
||||||
"2 10010202001 810 0.135563 0.501247\n",
|
"Generating data04 csv\n",
|
||||||
"3 10010202002 1218 0.192000 0.393701\n",
|
"Generating data05 csv\n",
|
||||||
"4 10010203001 2641 0.125473 0.308217\n",
|
"Generating data06 csv\n",
|
||||||
"... ... ... ... ...\n",
|
"Generating data08 csv\n",
|
||||||
"220328 721537506011 699 0.391389 0.902718\n",
|
"Generating data09 csv\n",
|
||||||
"220329 721537506012 2432 0.185852 0.783717\n",
|
"Generating data10 csv\n",
|
||||||
"220330 721537506013 976 0.018116 0.776639\n",
|
"Generating data11 csv\n",
|
||||||
"220331 721537506021 1707 0.375422 0.867377\n",
|
"Generating data12 csv\n",
|
||||||
"220332 721537506022 804 0.162791 0.942786\n",
|
"Generating data13 csv\n",
|
||||||
"\n",
|
"Generating data15 csv\n",
|
||||||
"[220333 rows x 4 columns]>"
|
"Generating data16 csv\n",
|
||||||
]
|
"Generating data17 csv\n",
|
||||||
},
|
"Generating data18 csv\n",
|
||||||
"execution_count": 32,
|
"Generating data19 csv\n",
|
||||||
"metadata": {},
|
"Generating data20 csv\n",
|
||||||
"output_type": "execute_result"
|
"Generating data21 csv\n",
|
||||||
|
"Generating data22 csv\n",
|
||||||
|
"Generating data23 csv\n",
|
||||||
|
"Generating data24 csv\n",
|
||||||
|
"Generating data25 csv\n",
|
||||||
|
"Generating data26 csv\n",
|
||||||
|
"Generating data27 csv\n",
|
||||||
|
"Generating data28 csv\n",
|
||||||
|
"Generating data29 csv\n",
|
||||||
|
"Generating data30 csv\n",
|
||||||
|
"Generating data31 csv\n",
|
||||||
|
"Generating data32 csv\n",
|
||||||
|
"Generating data33 csv\n",
|
||||||
|
"Generating data34 csv\n",
|
||||||
|
"Generating data35 csv\n",
|
||||||
|
"Generating data36 csv\n",
|
||||||
|
"Generating data37 csv\n",
|
||||||
|
"Generating data38 csv\n",
|
||||||
|
"Generating data39 csv\n",
|
||||||
|
"Generating data40 csv\n",
|
||||||
|
"Generating data41 csv\n",
|
||||||
|
"Generating data42 csv\n",
|
||||||
|
"Generating data44 csv\n",
|
||||||
|
"Generating data45 csv\n",
|
||||||
|
"Generating data46 csv\n",
|
||||||
|
"Generating data47 csv\n",
|
||||||
|
"Generating data48 csv\n",
|
||||||
|
"Generating data49 csv\n",
|
||||||
|
"Generating data50 csv\n",
|
||||||
|
"Generating data51 csv\n",
|
||||||
|
"Generating data53 csv\n",
|
||||||
|
"Generating data54 csv\n",
|
||||||
|
"Generating data55 csv\n",
|
||||||
|
"Generating data56 csv\n"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]\n",
|
"# write per state csvs\n",
|
||||||
"df.head()\n",
|
"with open(fips_csv_path) as csv_file:\n",
|
||||||
"df.count"
|
" csv_reader = csv.reader(csv_file, delimiter=\",\")\n",
|
||||||
|
" line_count = 0\n",
|
||||||
|
"\n",
|
||||||
|
" for row in csv_reader:\n",
|
||||||
|
" if line_count == 0:\n",
|
||||||
|
" line_count += 1\n",
|
||||||
|
" else:\n",
|
||||||
|
" fips = row[0].strip()\n",
|
||||||
|
" print(f\"Generating data{fips} csv\")\n",
|
||||||
|
" df1 = df[df.ID.str[:2] == fips]\n",
|
||||||
|
" # we need to name the file data01.csv for ogr2ogr csv merge to work\n",
|
||||||
|
" df1.to_csv(csv_path / f\"data{fips}.csv\", index = False)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "e051623b",
|
"id": "2674fb20",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
|
|
100
score/ipython/score_calc_0.1.ipynb
Normal file
100
score/ipython/score_calc_0.1.ipynb
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "a664f981",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[WindowsPath('C:/opt/justice40-tool/score/scripts'), WindowsPath('C:/opt/justice40-tool/score/scripts'), WindowsPath('C:/opt/justice40-tool/score/scripts'), 'C:\\\\opt\\\\justice40-tool\\\\score\\\\ipython', 'C:\\\\Python39\\\\python39.zip', 'C:\\\\Python39\\\\DLLs', 'C:\\\\Python39\\\\lib', 'C:\\\\Python39', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv', '', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\win32', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\win32\\\\lib', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\Pythonwin', 'c:\\\\opt\\\\justice40-tool\\\\score\\\\venv\\\\lib\\\\site-packages\\\\IPython\\\\extensions', 'C:\\\\Users\\\\j\\\\.ipython']\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ename": "ModuleNotFoundError",
|
||||||
|
"evalue": "No module named 'utils'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"\u001b[1;32m<ipython-input-3-e0c1285d1cc1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mutils\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[1;33m*\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mdata_path\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mPath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcwd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;34m\"data\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'utils'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from pathlib import Path\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import csv\n",
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"script_path = Path.cwd().parent / \"scripts\"\n",
|
||||||
|
"sys.path.insert(0, script_path)\n",
|
||||||
|
"print(sys.path)\n",
|
||||||
|
"\n",
|
||||||
|
"from utils import *\n",
|
||||||
|
"\n",
|
||||||
|
"data_path = Path.cwd().parent / \"data\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "1b750f0e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "NameError",
|
||||||
|
"evalue": "name 'get_state_fips_codes' is not defined",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"\u001b[1;32m<ipython-input-2-fec7b31c5df6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# store all fips codes in list\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mfips_state_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_state_fips_codes\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mfips_state_list\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[1;31mNameError\u001b[0m: name 'get_state_fips_codes' is not defined"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# store all fips codes in list\n",
|
||||||
|
"fips_state_list = get_state_fips_codes\n",
|
||||||
|
"fips_state_list"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "7df430cb",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# EJSCreen ETL Load\n",
|
||||||
|
"csv_path = data_path / \"dataset\" / \"ejscreen_2020\""
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
0
score/scripts/__init__.py
Normal file
0
score/scripts/__init__.py
Normal file
|
@ -5,70 +5,64 @@ import os
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from utils import get_state_fips_codes
|
||||||
|
|
||||||
data_path = Path.cwd() / "data"
|
data_path = Path.cwd() / "data"
|
||||||
|
|
||||||
with requests.Session() as s:
|
with requests.Session() as s:
|
||||||
# the fips_states_2010.csv is generated from data here
|
# the fips_states_2010.csv is generated from data here
|
||||||
# https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
|
# https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
|
||||||
fips_csv_path = data_path / "fips_states_2010.csv"
|
state_fips_codes = get_state_fips_codes()
|
||||||
with open(fips_csv_path) as csv_file:
|
for fips in state_fips_codes:
|
||||||
csv_reader = csv.reader(csv_file, delimiter=",")
|
# check if file exists
|
||||||
line_count = 0
|
shp_file_path = data_path.joinpath(
|
||||||
for row in csv_reader:
|
"census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
|
||||||
if line_count == 0:
|
)
|
||||||
line_count += 1
|
if not os.path.isfile(shp_file_path):
|
||||||
|
print(f"downloading {row[1]}")
|
||||||
|
|
||||||
|
# 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
|
||||||
|
# But using 2010 for now
|
||||||
|
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
|
||||||
|
download = s.get(cbg_state_url)
|
||||||
|
file_contents = download.content
|
||||||
|
zip_file_path = data_path / "census" / "downloaded.zip"
|
||||||
|
zip_file = open(zip_file_path, "wb")
|
||||||
|
zip_file.write(file_contents)
|
||||||
|
zip_file.close()
|
||||||
|
|
||||||
|
print(f"extracting {row[1]}")
|
||||||
|
|
||||||
|
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
||||||
|
shp_dir_path = data_path / "census" / "shp" / fips
|
||||||
|
zip_ref.extractall(shp_dir_path)
|
||||||
|
|
||||||
|
geojson_dir_path = data_path.joinpath(
|
||||||
|
"census",
|
||||||
|
"geojson",
|
||||||
|
)
|
||||||
|
if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
|
||||||
|
# ogr2ogr
|
||||||
|
print(f"encoding GeoJSON for {row[1]}")
|
||||||
|
|
||||||
|
# PWD is different for Windows
|
||||||
|
if os.name == "nt":
|
||||||
|
pwd = "%cd%"
|
||||||
else:
|
else:
|
||||||
fips = row[0].strip()
|
pwd = "${PWD}"
|
||||||
|
cmd = (
|
||||||
# check if file exists
|
'docker run --rm -it -v "'
|
||||||
shp_file_path = data_path.joinpath(
|
+ pwd
|
||||||
"census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
|
+ '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
|
||||||
)
|
+ fips
|
||||||
if not os.path.isfile(shp_file_path):
|
+ ".json /home/data/census/shp/"
|
||||||
print(f"downloading {row[1]}")
|
+ fips
|
||||||
|
+ "/tl_2010_"
|
||||||
# 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
|
+ fips
|
||||||
# But using 2010 for now
|
+ "_bg10.shp"
|
||||||
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
|
)
|
||||||
download = s.get(cbg_state_url)
|
print(cmd)
|
||||||
file_contents = download.content
|
os.system(cmd)
|
||||||
zip_file_path = data_path / "census" / "downloaded.zip"
|
|
||||||
zip_file = open(zip_file_path, "wb")
|
|
||||||
zip_file.write(file_contents)
|
|
||||||
zip_file.close()
|
|
||||||
|
|
||||||
print(f"extracting {row[1]}")
|
|
||||||
|
|
||||||
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
|
||||||
shp_dir_path = data_path / "census" / "shp" / fips
|
|
||||||
zip_ref.extractall(shp_dir_path)
|
|
||||||
|
|
||||||
geojson_dir_path = data_path.joinpath(
|
|
||||||
"census",
|
|
||||||
"geojson",
|
|
||||||
)
|
|
||||||
if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
|
|
||||||
# ogr2ogr
|
|
||||||
print(f"encoding GeoJSON for {row[1]}")
|
|
||||||
|
|
||||||
# PWD is different for Windows
|
|
||||||
if os.name == "nt":
|
|
||||||
pwd = "%cd%"
|
|
||||||
else:
|
|
||||||
pwd = "${PWD}"
|
|
||||||
cmd = (
|
|
||||||
'docker run --rm -it -v "'
|
|
||||||
+ pwd
|
|
||||||
+ '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
|
|
||||||
+ fips
|
|
||||||
+ ".json /home/data/census/shp/"
|
|
||||||
+ fips
|
|
||||||
+ "/tl_2010_"
|
|
||||||
+ fips
|
|
||||||
+ "_bg10.shp"
|
|
||||||
)
|
|
||||||
print(cmd)
|
|
||||||
os.system(cmd)
|
|
||||||
|
|
||||||
# generate CBG CSV table for pandas
|
# generate CBG CSV table for pandas
|
||||||
## load in memory
|
## load in memory
|
||||||
|
@ -87,10 +81,7 @@ with requests.Session() as s:
|
||||||
cbg_per_state_list[geoid10_state_id] = []
|
cbg_per_state_list[geoid10_state_id] = []
|
||||||
cbg_per_state_list[geoid10_state_id].append(geoid10)
|
cbg_per_state_list[geoid10_state_id].append(geoid10)
|
||||||
|
|
||||||
csv_dir_path = data_path.joinpath(
|
csv_dir_path = data_path / "census" / "csv"
|
||||||
"census",
|
|
||||||
"csv",
|
|
||||||
)
|
|
||||||
## write to individual state csv
|
## write to individual state csv
|
||||||
for state_id in cbg_per_state_list:
|
for state_id in cbg_per_state_list:
|
||||||
geoid10_list = cbg_per_state_list[state_id]
|
geoid10_list = cbg_per_state_list[state_id]
|
||||||
|
|
|
@ -2,6 +2,8 @@ import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
from utils import get_state_fips_codes
|
||||||
|
|
||||||
data_path = Path.cwd() / "data"
|
data_path = Path.cwd() / "data"
|
||||||
|
|
||||||
# remove existing mbtiles file
|
# remove existing mbtiles file
|
||||||
|
@ -14,17 +16,41 @@ mvt_tiles_path = data_path / "tiles" / "mvt"
|
||||||
if os.path.exists(mvt_tiles_path):
|
if os.path.exists(mvt_tiles_path):
|
||||||
shutil.rmtree(mvt_tiles_path)
|
shutil.rmtree(mvt_tiles_path)
|
||||||
|
|
||||||
|
# Merge scores into json
|
||||||
|
# TODO: for this first pass, just merging ACS EJScren indicators
|
||||||
|
# Per https://github.com/usds/justice40-tool/issues/102
|
||||||
|
|
||||||
|
if os.name == "nt":
|
||||||
|
pwd = "%cd%"
|
||||||
|
else:
|
||||||
|
pwd = "${PWD}"
|
||||||
|
|
||||||
|
state_fips_codes = get_state_fips_codes()
|
||||||
|
for fips in state_fips_codes:
|
||||||
|
cmd = (
|
||||||
|
'docker run --rm -v "'
|
||||||
|
+ pwd
|
||||||
|
+ '"/:/home '
|
||||||
|
+ "osgeo/gdal:alpine-small-latest ogr2ogr -f GeoJSON "
|
||||||
|
+ f"-sql \"SELECT * FROM tl_2010_{fips}_bg10 LEFT JOIN '/home/data/dataset/ejscreen_2020/data{fips}.csv'.data{fips} ON tl_2010_{fips}_bg10.GEOID10 = data{fips}.ID\" "
|
||||||
|
+ f"/home/data/score/geojson/{fips}.json /home/data/census/shp/{fips}/tl_2010_{fips}_bg10.dbf"
|
||||||
|
)
|
||||||
|
print(cmd)
|
||||||
|
os.system(cmd)
|
||||||
|
|
||||||
# get a list of all json files to plug in the docker commands below
|
# get a list of all json files to plug in the docker commands below
|
||||||
# (workaround since *.json doesn't seem to work)
|
# (workaround since *.json doesn't seem to work)
|
||||||
geojson_list = ""
|
geojson_list = ""
|
||||||
geojson_path = data_path / "census" / "geojson"
|
geojson_path = data_path / "score" / "geojson"
|
||||||
for file in os.listdir(geojson_path):
|
for file in os.listdir(geojson_path):
|
||||||
if file.endswith(".json"):
|
if file.endswith(".json"):
|
||||||
geojson_list += f"/home/data/census/geojson/{file} "
|
geojson_list += f"/home/data/score/geojson/{file} "
|
||||||
|
|
||||||
if geojson_list == "":
|
if geojson_list == "":
|
||||||
print("No GeoJson files found. Please run download_cbg.py first")
|
print("No GeoJson files found. Please run scripts/download_cbg.py first")
|
||||||
|
|
||||||
|
|
||||||
|
# generate mbtiles file
|
||||||
# PWD is different for Windows
|
# PWD is different for Windows
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
pwd = "%cd%"
|
pwd = "%cd%"
|
||||||
|
@ -33,7 +59,7 @@ else:
|
||||||
cmd = (
|
cmd = (
|
||||||
'docker run --rm -it -v "'
|
'docker run --rm -it -v "'
|
||||||
+ pwd
|
+ pwd
|
||||||
+ '"/:/home klokantech/tippecanoe tippecanoe -s_srs EPSG:4269 -t_srs EPSG:4326 --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --drop-densest-as-needed --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
|
+ '"/:/home klokantech/tippecanoe tippecanoe --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
|
||||||
+ geojson_list
|
+ geojson_list
|
||||||
)
|
)
|
||||||
print(cmd)
|
print(cmd)
|
||||||
|
|
20
score/scripts/utils.py
Normal file
20
score/scripts/utils.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# common usage functions
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def get_state_fips_codes():
|
||||||
|
data_path = Path.cwd() / "data"
|
||||||
|
fips_csv_path = data_path / "fips_states_2010.csv"
|
||||||
|
fips_state_list = []
|
||||||
|
with open(fips_csv_path) as csv_file:
|
||||||
|
csv_reader = csv.reader(csv_file, delimiter=",")
|
||||||
|
line_count = 0
|
||||||
|
|
||||||
|
for row in csv_reader:
|
||||||
|
if line_count == 0:
|
||||||
|
line_count += 1
|
||||||
|
else:
|
||||||
|
fips = row[0].strip()
|
||||||
|
fips_state_list.append(fips)
|
||||||
|
return fips_state_list
|
Loading…
Add table
Reference in a new issue