{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "f4d63367", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "data_path = Path.cwd().parent / \"data\" / \"tmp\"" ] }, { "cell_type": "code", "execution_count": 7, "id": "0e6eb55e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "WindowsPath('C:/opt/justice40-tool/score/data/tmp')" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_path" ] }, { "cell_type": "code", "execution_count": 15, "id": "a1431996", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\opt\\justice40-tool\\score\\venv\\lib\\site-packages\\urllib3\\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'gaftp.epa.gov'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n", " warnings.warn(\n" ] } ], "source": [ "import requests\n", "download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n", "file_contents = download.content\n", "zip_file_path = data_path / \"downloaded.zip\"\n", "zip_file = open(zip_file_path, \"wb\")\n", "zip_file.write(file_contents)\n", "zip_file.close()" ] }, { "cell_type": "code", "execution_count": 20, "id": "bc5f3466", "metadata": {}, "outputs": [], "source": [ "import zipfile\n", "with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n", " zip_ref.extractall(data_path)\n", "ejscreen_csv = data_path / \"EJSCREEN_2020_StatePctile.csv\"" ] }, { "cell_type": "code", "execution_count": 22, "id": "392ccb67", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OBJECTIDIDSTATE_NAMEST_ABBREVREGIONACSTOTPOPD_PM25_2B_PM25_D2P_PM25_D2D_OZONE_2...T_PNPLT_PNPL_D2T_PRMPT_PRMP_D2T_PTSDFT_PTSDF_D2T_PWDIST_PWDIS_D2Shape_LengthShape_Area
0110010201001AlabamaAL4636-492.025529412652.0-1866.38637046...0.071 facilities/km distance (79%ile)40%ile0.085 facilities/km distance (23%ile)53%ile0.59 facilities/km distance (57%ile)38%ileNoneNone13443.1552066.040790e+06
1210010201002AlabamaAL41287-2053.08341364430.0-7787.90260177...0.064 facilities/km distance (76%ile)19%ile0.074 facilities/km distance (17%ile)42%ile0.45 facilities/km distance (52%ile)23%ileNoneNone11917.0895987.834160e+06
2310010202001AlabamaAL48101846.12693767875.07002.78371663...0.069 facilities/km distance (78%ile)85%ile0.078 facilities/km distance (20%ile)67%ile0.65 facilities/km distance (59%ile)77%ileNoneNone7770.9151212.900774e+06
3410010202002AlabamaAL412181392.07530488872.05280.46153188...0.076 facilities/km distance (81%ile)83%ile0.087 facilities/km distance (24%ile)66%ile1 facilities/km distance (69%ile)78%ileNoneNone6506.8047841.793332e+06
4510010203001AlabamaAL42641-769.374640358548.0-2911.8926061...0.074 facilities/km distance (80%ile)32%ile0.08 facilities/km distance (21%ile)51%ile1.2 facilities/km distance (74%ile)24%ileNoneNone11070.3678485.461602e+06
\n", "

5 rows × 124 columns

\n", "
" ], "text/plain": [ " OBJECTID ID STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n", "0 1 10010201001 Alabama AL 4 636 \n", "1 2 10010201002 Alabama AL 4 1287 \n", "2 3 10010202001 Alabama AL 4 810 \n", "3 4 10010202002 Alabama AL 4 1218 \n", "4 5 10010203001 Alabama AL 4 2641 \n", "\n", " D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... \\\n", "0 -492.025529412 6 52.0 -1866.38637046 ... \n", "1 -2053.08341364 4 30.0 -7787.90260177 ... \n", "2 1846.12693767 8 75.0 7002.78371663 ... \n", "3 1392.07530488 8 72.0 5280.46153188 ... \n", "4 -769.374640358 5 48.0 -2911.8926061 ... \n", "\n", " T_PNPL T_PNPL_D2 \\\n", "0 0.071 facilities/km distance (79%ile) 40%ile \n", "1 0.064 facilities/km distance (76%ile) 19%ile \n", "2 0.069 facilities/km distance (78%ile) 85%ile \n", "3 0.076 facilities/km distance (81%ile) 83%ile \n", "4 0.074 facilities/km distance (80%ile) 32%ile \n", "\n", " T_PRMP T_PRMP_D2 \\\n", "0 0.085 facilities/km distance (23%ile) 53%ile \n", "1 0.074 facilities/km distance (17%ile) 42%ile \n", "2 0.078 facilities/km distance (20%ile) 67%ile \n", "3 0.087 facilities/km distance (24%ile) 66%ile \n", "4 0.08 facilities/km distance (21%ile) 51%ile \n", "\n", " T_PTSDF T_PTSDF_D2 T_PWDIS T_PWDIS_D2 \\\n", "0 0.59 facilities/km distance (57%ile) 38%ile None None \n", "1 0.45 facilities/km distance (52%ile) 23%ile None None \n", "2 0.65 facilities/km distance (59%ile) 77%ile None None \n", "3 1 facilities/km distance (69%ile) 78%ile None None \n", "4 1.2 facilities/km distance (74%ile) 24%ile None None \n", "\n", " Shape_Length Shape_Area \n", "0 13443.155206 6.040790e+06 \n", "1 11917.089598 7.834160e+06 \n", "2 7770.915121 2.900774e+06 \n", "3 6506.804784 1.793332e+06 \n", "4 11070.367848 5.461602e+06 \n", "\n", "[5 rows x 124 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "df = pd.read_csv(ejscreen_csv, low_memory=False)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 32, "id": "0ce9e22a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]\n", "df.head()\n", "df.count" ] }, { "cell_type": "code", "execution_count": null, "id": "e051623b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 5 }