Move Housing and Transportation Index to tracts (#903)

Update data download URL to use tract as focus, use tract field name,
and move this dataset to the tracts df list in etl_score.

Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
This commit is contained in:
Shelby Switzer 2021-11-17 16:00:10 -05:00 committed by lucasmbrown-usds
commit 0c8b32e679
2 changed files with 7 additions and 7 deletions

View file

@ -11,7 +11,7 @@ logger = get_module_logger(__name__)
class HousingTransportationETL(ExtractTransformLoad):
def __init__(self):
self.HOUSING_FTP_URL = (
"https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid="
"https://htaindex.cnt.org/download/download.php?focus=tract&geoid="
)
self.OUTPUT_PATH = (
self.DATA_PATH / "dataset" / "housing_and_transportation_index"
@ -33,7 +33,7 @@ class HousingTransportationETL(ExtractTransformLoad):
# New file name:
tmp_csv_file_path = (
zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
zip_file_dir / f"htaindex_data_tracts_{fips}.csv"
)
try:
@ -50,10 +50,10 @@ class HousingTransportationETL(ExtractTransformLoad):
def transform(self) -> None:
logger.info("Transforming Housing and Transportation Data")
# Rename and reformat block group ID
self.df.rename(columns={"blkgrp": self.GEOID_FIELD_NAME}, inplace=True)
self.df[self.GEOID_FIELD_NAME] = self.df[
self.GEOID_FIELD_NAME
# Rename and reformat tract ID
self.df.rename(columns={"tract": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
self.df[self.GEOID_TRACT_FIELD_NAME] = self.df[
self.GEOID_TRACT_FIELD_NAME
].str.replace('"', "")
def load(self) -> None: