In [None]:
import pandas as pd
import censusdata
import csv
from pathlib import Path
import os
import sys

module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
 sys.path.append(module_path)

from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes


ACS_YEAR = 2010

DATA_PATH = Path.cwd().parent / "data"
FIPS_CSV_PATH = DATA_PATH / "fips_states_2010.csv"

GEOID_FIELD_NAME = "GEOID10"
UNEMPLOYED_FIELD_NAME = "Unemployed Civilians (fraction)"

# Some display settings to make pandas outputs more readable.
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.precision", 2)

In [None]:
# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.
# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx
# censusdata.printtable(
# censusdata.censustable(src="acs5", year=ACS_YEAR, table="B25077")
# )

censusdata.search(
 src="acs5", year=ACS_YEAR, field="label", criterion="employment status"
)

In [None]:
def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:
 """Create a FIPS code from the proprietary censusgeo index."""
 fips = "".join([value for (key, value) in censusgeo.params()])
 return fips


dfs = []
for fips in get_state_fips_codes(DATA_PATH):
 print(f"Fetching data for fips {fips}")
 dfs.append(
 censusdata.download(
 src="acs5",
 year=ACS_YEAR,
 geo=censusdata.censusgeo(
 [
 ("state", fips)
 # , ("county", "*"), ("block group", "*")
 ]
 ),
 var=["B23025_005E", "B23025_003E", "B19013_001E"],
 )
 )

df = pd.concat(dfs)

df[GEOID_FIELD_NAME] = df.index.to_series().apply(
 func=fips_from_censusdata_censusgeo
)

df.head()

In [None]:
columns_to_include = ["GEOID2", "Median household income (State)"]

df.rename(
 columns={
 "GEOID10": "GEOID2",
 "B19013_001E": "Median household income (State)",
 },
 inplace=True,
)

# df[columns_to_include].to_csv(path_or_buf= "/Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv", index=False)