In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import csv
import sys
import os

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from etl.sources.census.etl_utils import get_state_fips_codes
from utils import unzip_file_from_url, remove_all_from_dir

DATA_PATH = Path.cwd().parent / "data"
TMP_PATH = DATA_PATH / "tmp"
HUD_RECAP_CSV_URL = "https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326"
CSV_PATH = DATA_PATH / "dataset" / "hud_recap"

# Definining some variable names
GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT"
HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME = "hud_recap_priority_community"

In [None]:
# Data from https://hudgis-hud.opendata.arcgis.com/datasets/HUD::racially-or-ethnically-concentrated-areas-of-poverty-r-ecaps/about
df = pd.read_csv(HUD_RECAP_CSV_URL, dtype={"GEOID": "string"})
df.head()

In [None]:
# Rename some fields
df.rename(
    columns={
        "GEOID": GEOID_TRACT_FIELD_NAME,
        # Interestingly, there's no data dictionary for the RECAP data that I could find.
        # However, this site (http://www.schousing.com/library/Tax%20Credit/2020/QAP%20Instructions%20(2).pdf)
        # suggests:
        # "If RCAP_Current for the tract in which the site is located is 1, the tract is an R/ECAP. If RCAP_Current is 0, it is not."
        "RCAP_Current": HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME,
    },
    inplace=True,
)

# Convert to boolean
df[HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME] = df[
    HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME
].astype("bool")

df[HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME].value_counts()

df.sort_values(by=GEOID_TRACT_FIELD_NAME, inplace=True)
df.head()

In [None]:
# write csv
CSV_PATH.mkdir(parents=True, exist_ok=True)

# Drop unnecessary columns.
df[[GEOID_TRACT_FIELD_NAME, HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME]].to_csv(
    CSV_PATH / "usa.csv", index=False
)