diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 505cba1e..aa10ebc4 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -226,7 +226,6 @@ class ScoreETL(ExtractTransformLoad): census_block_group_dfs = [ self.ejscreen_df, self.census_df, - self.housing_and_transportation_df, self.census_acs_median_incomes_df, self.national_risk_index_df, ] @@ -241,6 +240,7 @@ class ScoreETL(ExtractTransformLoad): self.doe_energy_burden_df, self.geocorr_urban_rural_df, self.persistent_poverty_df, + self.housing_and_transportation_df, ] census_tract_df = self._join_tract_dfs(census_tract_dfs) diff --git a/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py b/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py index 9e8986a8..dfba2a30 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py @@ -11,7 +11,7 @@ logger = get_module_logger(__name__) class HousingTransportationETL(ExtractTransformLoad): def __init__(self): self.HOUSING_FTP_URL = ( - "https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid=" + "https://htaindex.cnt.org/download/download.php?focus=tract&geoid=" ) self.OUTPUT_PATH = ( self.DATA_PATH / "dataset" / "housing_and_transportation_index" @@ -33,7 +33,7 @@ class HousingTransportationETL(ExtractTransformLoad): # New file name: tmp_csv_file_path = ( - zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv" + zip_file_dir / f"htaindex_data_tracts_{fips}.csv" ) try: @@ -50,10 +50,10 @@ class HousingTransportationETL(ExtractTransformLoad): def transform(self) -> None: logger.info("Transforming Housing and Transportation Data") - # Rename and reformat block group ID - self.df.rename(columns={"blkgrp": self.GEOID_FIELD_NAME}, inplace=True) - self.df[self.GEOID_FIELD_NAME] = self.df[ - self.GEOID_FIELD_NAME + # Rename and reformat tract ID + self.df.rename(columns={"tract": self.GEOID_TRACT_FIELD_NAME}, inplace=True) + self.df[self.GEOID_TRACT_FIELD_NAME] = self.df[ + self.GEOID_TRACT_FIELD_NAME ].str.replace('"', "") def load(self) -> None: