mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 18:14:19 -08:00
376 lines
12 KiB
Text
376 lines
12 KiB
Text
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "f4d63367",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"data_path = Path.cwd().parent / \"data\" / \"tmp\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "0e6eb55e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"WindowsPath('C:/opt/justice40-tool/score/data/tmp')"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data_path"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "a1431996",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"c:\\opt\\justice40-tool\\score\\venv\\lib\\site-packages\\urllib3\\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'gaftp.epa.gov'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n",
|
||
" warnings.warn(\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import requests\n",
|
||
"download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n",
|
||
"file_contents = download.content\n",
|
||
"zip_file_path = data_path / \"downloaded.zip\"\n",
|
||
"zip_file = open(zip_file_path, \"wb\")\n",
|
||
"zip_file.write(file_contents)\n",
|
||
"zip_file.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "bc5f3466",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import zipfile\n",
|
||
"with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n",
|
||
" zip_ref.extractall(data_path)\n",
|
||
"ejscreen_csv = data_path / \"EJSCREEN_2020_StatePctile.csv\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "392ccb67",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>OBJECTID</th>\n",
|
||
" <th>ID</th>\n",
|
||
" <th>STATE_NAME</th>\n",
|
||
" <th>ST_ABBREV</th>\n",
|
||
" <th>REGION</th>\n",
|
||
" <th>ACSTOTPOP</th>\n",
|
||
" <th>D_PM25_2</th>\n",
|
||
" <th>B_PM25_D2</th>\n",
|
||
" <th>P_PM25_D2</th>\n",
|
||
" <th>D_OZONE_2</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>T_PNPL</th>\n",
|
||
" <th>T_PNPL_D2</th>\n",
|
||
" <th>T_PRMP</th>\n",
|
||
" <th>T_PRMP_D2</th>\n",
|
||
" <th>T_PTSDF</th>\n",
|
||
" <th>T_PTSDF_D2</th>\n",
|
||
" <th>T_PWDIS</th>\n",
|
||
" <th>T_PWDIS_D2</th>\n",
|
||
" <th>Shape_Length</th>\n",
|
||
" <th>Shape_Area</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>10010201001</td>\n",
|
||
" <td>Alabama</td>\n",
|
||
" <td>AL</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>636</td>\n",
|
||
" <td>-492.025529412</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>52.0</td>\n",
|
||
" <td>-1866.38637046</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.071 facilities/km distance (79%ile)</td>\n",
|
||
" <td>40%ile</td>\n",
|
||
" <td>0.085 facilities/km distance (23%ile)</td>\n",
|
||
" <td>53%ile</td>\n",
|
||
" <td>0.59 facilities/km distance (57%ile)</td>\n",
|
||
" <td>38%ile</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>13443.155206</td>\n",
|
||
" <td>6.040790e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>10010201002</td>\n",
|
||
" <td>Alabama</td>\n",
|
||
" <td>AL</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1287</td>\n",
|
||
" <td>-2053.08341364</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>-7787.90260177</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.064 facilities/km distance (76%ile)</td>\n",
|
||
" <td>19%ile</td>\n",
|
||
" <td>0.074 facilities/km distance (17%ile)</td>\n",
|
||
" <td>42%ile</td>\n",
|
||
" <td>0.45 facilities/km distance (52%ile)</td>\n",
|
||
" <td>23%ile</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>11917.089598</td>\n",
|
||
" <td>7.834160e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>10010202001</td>\n",
|
||
" <td>Alabama</td>\n",
|
||
" <td>AL</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>810</td>\n",
|
||
" <td>1846.12693767</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>75.0</td>\n",
|
||
" <td>7002.78371663</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.069 facilities/km distance (78%ile)</td>\n",
|
||
" <td>85%ile</td>\n",
|
||
" <td>0.078 facilities/km distance (20%ile)</td>\n",
|
||
" <td>67%ile</td>\n",
|
||
" <td>0.65 facilities/km distance (59%ile)</td>\n",
|
||
" <td>77%ile</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>7770.915121</td>\n",
|
||
" <td>2.900774e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>10010202002</td>\n",
|
||
" <td>Alabama</td>\n",
|
||
" <td>AL</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1218</td>\n",
|
||
" <td>1392.07530488</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>72.0</td>\n",
|
||
" <td>5280.46153188</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.076 facilities/km distance (81%ile)</td>\n",
|
||
" <td>83%ile</td>\n",
|
||
" <td>0.087 facilities/km distance (24%ile)</td>\n",
|
||
" <td>66%ile</td>\n",
|
||
" <td>1 facilities/km distance (69%ile)</td>\n",
|
||
" <td>78%ile</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>6506.804784</td>\n",
|
||
" <td>1.793332e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>10010203001</td>\n",
|
||
" <td>Alabama</td>\n",
|
||
" <td>AL</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2641</td>\n",
|
||
" <td>-769.374640358</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>48.0</td>\n",
|
||
" <td>-2911.8926061</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.074 facilities/km distance (80%ile)</td>\n",
|
||
" <td>32%ile</td>\n",
|
||
" <td>0.08 facilities/km distance (21%ile)</td>\n",
|
||
" <td>51%ile</td>\n",
|
||
" <td>1.2 facilities/km distance (74%ile)</td>\n",
|
||
" <td>24%ile</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>None</td>\n",
|
||
" <td>11070.367848</td>\n",
|
||
" <td>5.461602e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 124 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" OBJECTID ID STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n",
|
||
"0 1 10010201001 Alabama AL 4 636 \n",
|
||
"1 2 10010201002 Alabama AL 4 1287 \n",
|
||
"2 3 10010202001 Alabama AL 4 810 \n",
|
||
"3 4 10010202002 Alabama AL 4 1218 \n",
|
||
"4 5 10010203001 Alabama AL 4 2641 \n",
|
||
"\n",
|
||
" D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... \\\n",
|
||
"0 -492.025529412 6 52.0 -1866.38637046 ... \n",
|
||
"1 -2053.08341364 4 30.0 -7787.90260177 ... \n",
|
||
"2 1846.12693767 8 75.0 7002.78371663 ... \n",
|
||
"3 1392.07530488 8 72.0 5280.46153188 ... \n",
|
||
"4 -769.374640358 5 48.0 -2911.8926061 ... \n",
|
||
"\n",
|
||
" T_PNPL T_PNPL_D2 \\\n",
|
||
"0 0.071 facilities/km distance (79%ile) 40%ile \n",
|
||
"1 0.064 facilities/km distance (76%ile) 19%ile \n",
|
||
"2 0.069 facilities/km distance (78%ile) 85%ile \n",
|
||
"3 0.076 facilities/km distance (81%ile) 83%ile \n",
|
||
"4 0.074 facilities/km distance (80%ile) 32%ile \n",
|
||
"\n",
|
||
" T_PRMP T_PRMP_D2 \\\n",
|
||
"0 0.085 facilities/km distance (23%ile) 53%ile \n",
|
||
"1 0.074 facilities/km distance (17%ile) 42%ile \n",
|
||
"2 0.078 facilities/km distance (20%ile) 67%ile \n",
|
||
"3 0.087 facilities/km distance (24%ile) 66%ile \n",
|
||
"4 0.08 facilities/km distance (21%ile) 51%ile \n",
|
||
"\n",
|
||
" T_PTSDF T_PTSDF_D2 T_PWDIS T_PWDIS_D2 \\\n",
|
||
"0 0.59 facilities/km distance (57%ile) 38%ile None None \n",
|
||
"1 0.45 facilities/km distance (52%ile) 23%ile None None \n",
|
||
"2 0.65 facilities/km distance (59%ile) 77%ile None None \n",
|
||
"3 1 facilities/km distance (69%ile) 78%ile None None \n",
|
||
"4 1.2 facilities/km distance (74%ile) 24%ile None None \n",
|
||
"\n",
|
||
" Shape_Length Shape_Area \n",
|
||
"0 13443.155206 6.040790e+06 \n",
|
||
"1 11917.089598 7.834160e+06 \n",
|
||
"2 7770.915121 2.900774e+06 \n",
|
||
"3 6506.804784 1.793332e+06 \n",
|
||
"4 11070.367848 5.461602e+06 \n",
|
||
"\n",
|
||
"[5 rows x 124 columns]"
|
||
]
|
||
},
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"import pandas as pd\n",
|
||
"df = pd.read_csv(ejscreen_csv, low_memory=False)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "0ce9e22a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<bound method DataFrame.count of ID ACSTOTPOP LESSHSPCT LOWINCPCT\n",
|
||
"0 10010201001 636 0.208134 0.385220\n",
|
||
"1 10010201002 1287 0.040678 0.163170\n",
|
||
"2 10010202001 810 0.135563 0.501247\n",
|
||
"3 10010202002 1218 0.192000 0.393701\n",
|
||
"4 10010203001 2641 0.125473 0.308217\n",
|
||
"... ... ... ... ...\n",
|
||
"220328 721537506011 699 0.391389 0.902718\n",
|
||
"220329 721537506012 2432 0.185852 0.783717\n",
|
||
"220330 721537506013 976 0.018116 0.776639\n",
|
||
"220331 721537506021 1707 0.375422 0.867377\n",
|
||
"220332 721537506022 804 0.162791 0.942786\n",
|
||
"\n",
|
||
"[220333 rows x 4 columns]>"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]\n",
|
||
"df.head()\n",
|
||
"df.count"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e051623b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.9.0"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|