In [None]:
import pandas as pd
import censusdata
import csv
from pathlib import Path

ACS_YEAR = 2019

DATA_PATH = Path.cwd().parent / "data"
FIPS_CSV_PATH = DATA_PATH / "fips_states_2010.csv"
OUTPUT_PATH = DATA_PATH / "dataset" / f"census_acs_{ACS_YEAR}"

GEOID_FIELD_NAME = "GEOID10"
UNEMPLOYED_FIELD_NAME = "Unemployed Civilians (fraction)"

# Some display settings to make pandas outputs more readable.
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.precision", 2)

In [None]:
# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.
# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx
censusdata.printtable(censusdata.censustable(src="acs5", year=ACS_YEAR, table="B23025"))

In [None]:
def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:
    """Create a FIPS code from the proprietary censusgeo index."""
    fips = "".join([value for (key, value) in censusgeo.params()])
    return fips


dfs = []
with open(FIPS_CSV_PATH) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    line_count = 0

    for row in csv_reader:
        if line_count == 0:
            line_count += 1
        else:
            fips = row[0].strip()
            print(f"Downloading data for state/territory with FIPS code {fips}")

            dfs.append(
                censusdata.download(
                    src="acs5",
                    year=ACS_YEAR,
                    geo=censusdata.censusgeo(
                        [("state", fips), ("county", "*"), ("block group", "*")]
                    ),
                    var=["B23025_005E", "B23025_003E"],
                )
            )

df = pd.concat(dfs)

df[GEOID_FIELD_NAME] = df.index.to_series().apply(func=fips_from_censusdata_censusgeo)

df.head()

In [None]:
# Calculate percent unemployment.
# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
df[UNEMPLOYED_FIELD_NAME] = df.B23025_005E / df.B23025_003E

df.head()

In [None]:
# mkdir census
columns_to_include = [GEOID_FIELD_NAME, UNEMPLOYED_FIELD_NAME]

df[columns_to_include].to_csv(path_or_buf=OUTPUT_PATH / "usa.csv", index=False)