{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "20aa3891", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import numpy as np\n", "import pandas as pd\n", "import csv\n", "import sys\n", "import os\n", "\n", "module_path = os.path.abspath(os.path.join(\"..\"))\n", "if module_path not in sys.path:\n", " sys.path.append(module_path)\n", "\n", "from etl.sources.census.etl_utils import get_state_fips_codes\n", "from utils import unzip_file_from_url, remove_all_from_dir\n", "\n", "DATA_PATH = Path.cwd().parent / \"data\"\n", "TMP_PATH = DATA_PATH / \"tmp\"\n", "EJSCREEN_FTP_URL = (\n", " \"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\"\n", ")\n", "EJSCREEN_CSV = TMP_PATH / \"EJSCREEN_2020_StatePctile.csv\"\n", "CSV_PATH = DATA_PATH / \"dataset\" / \"ejscreen_2020\"\n", "print(DATA_PATH)" ] }, { "cell_type": "code", "execution_count": null, "id": "cc3fb9ec", "metadata": {}, "outputs": [], "source": [ "# download file from ejscreen ftp\n", "unzip_file_from_url(EJSCREEN_FTP_URL, TMP_PATH, TMP_PATH)" ] }, { "cell_type": "code", "execution_count": null, "id": "b25738bb", "metadata": { "scrolled": true }, "outputs": [], "source": [ "df = pd.read_csv(\n", " EJSCREEN_CSV,\n", " dtype={\"ID\": \"string\"},\n", " # EJSCREEN writes the word \"None\" for NA data.\n", " na_values=[\"None\"],\n", " low_memory=False,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "9fa2077a", "metadata": {}, "outputs": [], "source": [ "# write nationwide csv\n", "CSV_PATH.mkdir(parents=True, exist_ok=True)\n", "df.to_csv(CSV_PATH / f\"usa.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "5e5cc12a", "metadata": {}, "outputs": [], "source": [ "# write per state csvs\n", "for fips in get_state_fips_codes(DATA_PATH):\n", " print(f\"Generating data{fips} csv\")\n", " df1 = df[df.ID.str[:2] == fips]\n", " # we need to name the file data01.csv for ogr2ogr csv merge to work\n", " df1.to_csv(CSV_PATH / f\"data{fips}.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "81b977f8", "metadata": {}, "outputs": [], "source": [ "# cleanup\n", "remove_all_from_dir(TMP_PATH)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 5 }