{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "4899d2ef", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import censusdata\n", "import csv\n", "from pathlib import Path\n", "import os\n", "import sys\n", "\n", "module_path = os.path.abspath(os.path.join(\"../..\"))\n", "if module_path not in sys.path:\n", " sys.path.append(module_path)\n", "\n", "from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes\n", "\n", "\n", "ACS_YEAR = 2010\n", "\n", "DATA_PATH = Path.cwd().parent / \"data\"\n", "FIPS_CSV_PATH = DATA_PATH / \"fips_states_2010.csv\"\n", "\n", "GEOID_FIELD_NAME = \"GEOID10\"\n", "UNEMPLOYED_FIELD_NAME = \"Unemployed Civilians (fraction)\"\n", "\n", "# Some display settings to make pandas outputs more readable.\n", "pd.set_option(\"display.expand_frame_repr\", False)\n", "pd.set_option(\"display.precision\", 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "4dd8feec", "metadata": { "scrolled": false }, "outputs": [], "source": [ "# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.\n", "# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx\n", "# censusdata.printtable(\n", "# censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"B25077\")\n", "# )\n", "\n", "censusdata.search(\n", " src=\"acs5\", year=ACS_YEAR, field=\"label\", criterion=\"employment status\"\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "7b40afd3", "metadata": { "scrolled": true }, "outputs": [], "source": [ "def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:\n", " \"\"\"Create a FIPS code from the proprietary censusgeo index.\"\"\"\n", " fips = \"\".join([value for (key, value) in censusgeo.params()])\n", " return fips\n", "\n", "\n", "dfs = []\n", "for fips in get_state_fips_codes(DATA_PATH):\n", " print(f\"Fetching data for fips {fips}\")\n", " dfs.append(\n", " censusdata.download(\n", " src=\"acs5\",\n", " year=ACS_YEAR,\n", " geo=censusdata.censusgeo(\n", " [\n", " (\"state\", fips)\n", " # , (\"county\", \"*\"), (\"block group\", \"*\")\n", " ]\n", " ),\n", " var=[\"B23025_005E\", \"B23025_003E\", \"B19013_001E\"],\n", " )\n", " )\n", "\n", "df = pd.concat(dfs)\n", "\n", "df[GEOID_FIELD_NAME] = df.index.to_series().apply(\n", " func=fips_from_censusdata_censusgeo\n", ")\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "caa0b502", "metadata": { "scrolled": true }, "outputs": [], "source": [ "columns_to_include = [\"GEOID2\", \"Median household income (State)\"]\n", "\n", "df.rename(\n", " columns={\n", " \"GEOID10\": \"GEOID2\",\n", " \"B19013_001E\": \"Median household income (State)\",\n", " },\n", " inplace=True,\n", ")\n", "\n", "# df[columns_to_include].to_csv(path_or_buf= \"/Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "f2bddf6a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }