{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "20aa3891", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import requests\n", "import zipfile\n", "import numpy as np\n", "import pandas as pd\n", "import csv\n", "\n", "data_path = Path.cwd().parent / \"data\"\n", "fips_csv_path = data_path / \"fips_states_2010.csv\"\n", "csv_path = data_path / \"dataset\" / \"ejscreen_2020\"" ] }, { "cell_type": "code", "execution_count": null, "id": "67a58c24", "metadata": {}, "outputs": [], "source": [ "download = requests.get(\n", " \"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\",\n", " verify=False,\n", ")\n", "file_contents = download.content\n", "zip_file_path = data_path / \"tmp\"\n", "zip_file = open(zip_file_path / \"downloaded.zip\", \"wb\")\n", "zip_file.write(file_contents)\n", "zip_file.close()" ] }, { "cell_type": "code", "execution_count": null, "id": "cc3fb9ec", "metadata": {}, "outputs": [], "source": [ "with zipfile.ZipFile(zip_file_path / \"downloaded.zip\", \"r\") as zip_ref:\n", " zip_ref.extractall(zip_file_path)\n", "ejscreen_csv = data_path / \"tmp\" / \"EJSCREEN_2020_StatePctile.csv\"" ] }, { "cell_type": "code", "execution_count": null, "id": "b25738bb", "metadata": { "scrolled": true }, "outputs": [], "source": [ "df = pd.read_csv(ejscreen_csv, dtype={\"ID\": \"string\"}, low_memory=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "9fa2077a", "metadata": {}, "outputs": [], "source": [ "# write nationwide csv\n", "csv_path.mkdir(parents=True, exist_ok=True)\n", "df.to_csv(csv_path / f\"usa.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "5e5cc12a", "metadata": {}, "outputs": [], "source": [ "# write per state csvs\n", "with open(fips_csv_path) as csv_file:\n", " csv_reader = csv.reader(csv_file, delimiter=\",\")\n", " line_count = 0\n", "\n", " for row in csv_reader:\n", " if line_count == 0:\n", " line_count += 1\n", " else:\n", " fips = row[0].strip()\n", " print(f\"Generating data{fips} csv\")\n", " df1 = df[df.ID.str[:2] == fips]\n", " # we need to name the file data01.csv for ogr2ogr csv merge to work\n", " df1.to_csv(csv_path / f\"data{fips}.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "81b977f8", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 5 }