{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "0491828b", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import censusdata\n", "import csv\n", "from pathlib import Path\n", "import os\n", "\n", "ACS_YEAR = 2019\n", "\n", "DATA_PATH = Path.cwd().parent / \"data\"\n", "FIPS_CSV_PATH = DATA_PATH / \"fips_states_2010.csv\"\n", "OUTPUT_PATH = DATA_PATH / \"dataset\" / f\"census_acs_{ACS_YEAR}\"\n", "\n", "GEOID_FIELD_NAME = \"GEOID10\"\n", "UNEMPLOYED_FIELD_NAME = \"Unemployed Civilians (fraction)\"\n", "\n", "# Some display settings to make pandas outputs more readable.\n", "pd.set_option(\"display.expand_frame_repr\", False)\n", "pd.set_option(\"display.precision\", 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "654f25a1", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.\n", "# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx\n", "censusdata.printtable(censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"B23025\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "8999cea4", "metadata": { "scrolled": false }, "outputs": [], "source": [ "def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:\n", " \"\"\"Create a FIPS code from the proprietary censusgeo index.\"\"\"\n", " fips = \"\".join([value for (key, value) in censusgeo.params()])\n", " return fips\n", "\n", "\n", "dfs = []\n", "with open(FIPS_CSV_PATH) as csv_file:\n", " csv_reader = csv.reader(csv_file, delimiter=\",\")\n", " line_count = 0\n", "\n", " for row in csv_reader:\n", " if line_count == 0:\n", " line_count += 1\n", " else:\n", " fips = row[0].strip()\n", " print(f\"Downloading data for state/territory with FIPS code {fips}\")\n", "\n", " dfs.append(\n", " censusdata.download(\n", " src=\"acs5\",\n", " year=ACS_YEAR,\n", " geo=censusdata.censusgeo(\n", " [(\"state\", fips), (\"county\", \"*\"), (\"block group\", \"*\")]\n", " ),\n", " var=[\"B23025_005E\", \"B23025_003E\"],\n", " )\n", " )\n", "\n", "df = pd.concat(dfs)\n", "\n", "df[GEOID_FIELD_NAME] = df.index.to_series().apply(func=fips_from_censusdata_censusgeo)\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "803cce31", "metadata": { "scrolled": false }, "outputs": [], "source": [ "# Calculate percent unemployment.\n", "# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.\n", "df[UNEMPLOYED_FIELD_NAME] = df.B23025_005E / df.B23025_003E\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "2a269bb1", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# mkdir census\n", "OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n", "\n", "columns_to_include = [GEOID_FIELD_NAME, UNEMPLOYED_FIELD_NAME]\n", "\n", "df[columns_to_include].to_csv(path_or_buf=OUTPUT_PATH / \"usa.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "91932af5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 5 }