From 0d57dd572be027a2fc8b1625958ed68c4b900653 Mon Sep 17 00:00:00 2001 From: Lucas Merrill Brown Date: Thu, 16 Dec 2021 10:54:41 -0500 Subject: [PATCH] Stop swallowing Census API errors (#1051) --- .../etl/sources/census_acs/etl_utils.py | 42 ++++++++++--------- .../etl/sources/census_acs_2010/etl.py | 2 - 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_utils.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_utils.py index 2997e84c..08b8129f 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_utils.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_utils.py @@ -8,6 +8,8 @@ from data_pipeline.utils import get_module_logger logger = get_module_logger(__name__) +CENSUS_ACS_FIPS_CODES_TO_SKIP = ["60", "66", "69", "78"] + def _fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str: """Create a FIPS code from the proprietary censusgeo index.""" @@ -22,32 +24,34 @@ def retrieve_census_acs_data( tract_output_field_name: str, data_path_for_fips_codes: Path, acs_type="acs5", - raise_errors: bool = False, ) -> pd.DataFrame: """Retrieves and combines census ACS data for a given year.""" dfs = [] for fips in get_state_fips_codes(data_path_for_fips_codes): - logger.info( - f"Downloading data for state/territory with FIPS code {fips}" - ) - - try: - response = censusdata.download( - src=acs_type, - year=acs_year, - geo=censusdata.censusgeo( - [("state", fips), ("county", "*"), ("tract", "*")] - ), - var=variables, + if fips in CENSUS_ACS_FIPS_CODES_TO_SKIP: + logger.info( + f"Skipping download for state/territory with FIPS code {fips}" ) - dfs.append(response) - - except ValueError as e: - logger.error( - f"Could not download data for state/territory with FIPS code {fips}" + else: + logger.info( + f"Downloading data for state/territory with FIPS code {fips}" ) - if raise_errors: + try: + response = censusdata.download( + src=acs_type, + year=acs_year, + geo=censusdata.censusgeo( + [("state", fips), ("county", "*"), ("tract", "*")] + ), + var=variables, + ) + dfs.append(response) + + except ValueError as e: + logger.error( + f"Could not download data for state/territory with FIPS code {fips}" + ) raise e df = pd.concat(dfs) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs_2010/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs_2010/etl.py index 4095f60a..05d823a6 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs_2010/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs_2010/etl.py @@ -50,7 +50,6 @@ class CensusACS2010ETL(ExtractTransformLoad): ] self.EMPLOYMENT_LESS_THAN_HS_IN_LABOR_FORCE = ( - # TODO: FIX!!!!!! "B23006_005E" # Estimate!!Total!!Less than high school graduate!!In labor force!!Civilian ) @@ -115,7 +114,6 @@ class CensusACS2010ETL(ExtractTransformLoad): tract_output_field_name=self.GEOID_TRACT_FIELD_NAME, data_path_for_fips_codes=self.DATA_PATH, acs_type=self.ACS_TYPE, - raise_errors=False, ) def transform(self) -> None: