In [None]:
import pandas as pd
import csv
from pathlib import Path
import os
import sys

In [None]:
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from data_pipeline.utils import unzip_file_from_url
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes

In [None]:
DATA_PATH = Path.cwd().parent / "data"
TMP_PATH: Path = DATA_PATH / "tmp"
STATE_CSV = DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
SCORE_CSV = DATA_PATH / "score" / "csv" / "usa.csv"
COUNTY_SCORE_CSV = DATA_PATH / "score" / "csv" / "usa-county.csv"
CENSUS_COUNTIES_ZIP_URL = "https://www2.census.gov/geo/docs/maps-data/data/gazetteer/2020_Gazetteer/2020_Gaz_counties_national.zip"
CENSUS_COUNTIES_TXT = TMP_PATH / "2020_Gaz_counties_national.txt"

In [None]:
unzip_file_from_url(CENSUS_COUNTIES_ZIP_URL, TMP_PATH, TMP_PATH)

In [None]:
counties_df = pd.read_csv(
    CENSUS_COUNTIES_TXT,
    sep="\t",
    dtype={"GEOID": "string", "USPS": "string"},
    low_memory=False,
)
counties_df = counties_df[["USPS", "GEOID", "NAME"]]
counties_df.rename(
    columns={"USPS": "State Abbreviation", "NAME": "County Name"}, inplace=True
)
counties_df.head()

In [None]:
states_df = pd.read_csv(
    STATE_CSV, dtype={"fips": "string", "state_abbreviation": "string"}
)
states_df.rename(
    columns={
        "fips": "State Code",
        "state_name": "State Name",
        "state_abbreviation": "State Abbreviation",
    },
    inplace=True,
)
states_df.head()

In [None]:
county_state_merged = counties_df.join(states_df, rsuffix=" Other")
del county_state_merged["State Abbreviation Other"]
county_state_merged.head()

In [None]:
score_df = pd.read_csv(SCORE_CSV, dtype={"GEOID10": "string"})
score_df["GEOID"] = score_df.GEOID10.str[:5]
score_df.head()

In [None]:
score_county_state_merged = score_df.join(county_state_merged, rsuffix="_OTHER")
del score_county_state_merged["GEOID_OTHER"]
score_county_state_merged.head()

In [None]:
score_county_state_merged.to_csv(COUNTY_SCORE_CSV, index=False)