mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 01:11:18 -07:00
Update HUD Housing Burden (#1005)
* update paths * size information added in extract function Co-authored-by: Saran Ahluwalia <sarahluw@cisco.com>
This commit is contained in:
parent
524b822651
commit
df675b231a
1 changed files with 3 additions and 9 deletions
|
@ -9,7 +9,7 @@ class HudHousingETL(ExtractTransformLoad):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "hud_housing"
|
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "hud_housing"
|
||||||
self.GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT"
|
self.GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT"
|
||||||
self.HOUSING_FTP_URL = "https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip"
|
self.HOUSING_FTP_URL = "https://www.huduser.gov/portal/datasets/cp/2014thru2018-140-csv.zip"
|
||||||
self.HOUSING_ZIP_FILE_DIR = self.TMP_PATH / "hud_housing"
|
self.HOUSING_ZIP_FILE_DIR = self.TMP_PATH / "hud_housing"
|
||||||
|
|
||||||
# We measure households earning less than 80% of HUD Area Median Family Income by county
|
# We measure households earning less than 80% of HUD Area Median Family Income by county
|
||||||
|
@ -32,7 +32,7 @@ class HudHousingETL(ExtractTransformLoad):
|
||||||
self.df: pd.DataFrame
|
self.df: pd.DataFrame
|
||||||
|
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
logger.info("Extracting HUD Housing Data")
|
logger.info("Extracting 1.09 GB HUD Housing Data")
|
||||||
super().extract(
|
super().extract(
|
||||||
self.HOUSING_FTP_URL,
|
self.HOUSING_FTP_URL,
|
||||||
self.HOUSING_ZIP_FILE_DIR,
|
self.HOUSING_ZIP_FILE_DIR,
|
||||||
|
@ -42,13 +42,7 @@ class HudHousingETL(ExtractTransformLoad):
|
||||||
logger.info("Transforming HUD Housing Data")
|
logger.info("Transforming HUD Housing Data")
|
||||||
|
|
||||||
# New file name:
|
# New file name:
|
||||||
tmp_csv_file_path = (
|
tmp_csv_file_path = self.HOUSING_ZIP_FILE_DIR / "140" / "Table8.csv"
|
||||||
self.HOUSING_ZIP_FILE_DIR
|
|
||||||
/ "2012thru2016-140-csv"
|
|
||||||
/ "2012thru2016-140-csv"
|
|
||||||
/ "140"
|
|
||||||
/ "Table8.csv"
|
|
||||||
)
|
|
||||||
self.df = pd.read_csv(
|
self.df = pd.read_csv(
|
||||||
filepath_or_buffer=tmp_csv_file_path,
|
filepath_or_buffer=tmp_csv_file_path,
|
||||||
encoding="latin-1",
|
encoding="latin-1",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue