From 893758f1d42fac7b5ac441eccf733ab43914f3c3 Mon Sep 17 00:00:00 2001 From: Shelby Switzer Date: Thu, 18 Nov 2021 10:37:55 -0500 Subject: [PATCH] Use tract instead of block group when calling census API (#901) * Use tract instead of block group when calling census API * fixing merge conflicts Co-authored-by: Shelby Switzer Co-authored-by: lucasmbrown-usds --- data/data-pipeline/data_pipeline/etl/score/etl_score.py | 2 +- .../data_pipeline/etl/sources/census_acs/etl.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index aa10ebc4..0f32b6c1 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -225,7 +225,6 @@ class ScoreETL(ExtractTransformLoad): # Join all the data sources that use census block groups census_block_group_dfs = [ self.ejscreen_df, - self.census_df, self.census_acs_median_incomes_df, self.national_risk_index_df, ] @@ -234,6 +233,7 @@ class ScoreETL(ExtractTransformLoad): # Join all the data sources that use census tracts census_tract_dfs = [ + self.census_df, self.hud_housing_df, self.cdc_places_df, self.cdc_life_expectancy_df, diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 79ba1258..9728a8df 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -77,7 +77,7 @@ class CensusACSETL(ExtractTransformLoad): src="acs5", year=self.ACS_YEAR, geo=censusdata.censusgeo( - [("state", fips), ("county", "*"), ("block group", "*")] + [("state", fips), ("county", "*"), ("tract", "*")] ), var=[ # Emploment fields @@ -100,7 +100,7 @@ class CensusACSETL(ExtractTransformLoad): self.df = pd.concat(dfs) - self.df[self.GEOID_FIELD_NAME] = self.df.index.to_series().apply( + self.df[self.GEOID_TRACT_FIELD_NAME] = self.df.index.to_series().apply( func=self._fips_from_censusdata_censusgeo ) @@ -179,7 +179,7 @@ class CensusACSETL(ExtractTransformLoad): self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) columns_to_include = [ - self.GEOID_FIELD_NAME, + self.GEOID_TRACT_FIELD_NAME, self.UNEMPLOYED_FIELD_NAME, self.LINGUISTIC_ISOLATION_FIELD_NAME, self.MEDIAN_INCOME_FIELD_NAME,