From a983a42b55a47cd95d64a1b180f54b46caaca151 Mon Sep 17 00:00:00 2001 From: Carlos Felix <63804190+carlosfelix2@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:55:34 -0500 Subject: [PATCH] Fixed Decennial 2020 tract removal bug --- .../data_pipeline/etl/sources/census_decennial/etl.py | 2 +- .../data_pipeline/tests/sources/census_decennial/test_etl.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py index 6794b45d..8d32ec98 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py @@ -128,7 +128,7 @@ class CensusDecennialETL(ExtractTransformLoad): > 0, DEC_FIELD_NAMES.TERRITORY_MEDIAN_INCOME, ] = territory["median_income"] - self.df_all = pd.concat([self.df_all, df]) + self.df_all = pd.concat([self.df_all, df], ignore_index=True) def _merge_tracts_2010_compatibility(self): """Merges tract 69120950200 to match 2010 tracts""" diff --git a/data/data-pipeline/data_pipeline/tests/sources/census_decennial/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/census_decennial/test_etl.py index feb3f8d9..0471733b 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/census_decennial/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/census_decennial/test_etl.py @@ -97,6 +97,7 @@ def test_load_data(extract_path_fixture: Path, territory_params_fixture): df = dec.df_all assert len(df) == 64 assert len(df.columns) == 30 + assert df.index.is_unique # Columns should not have any census variable names census_vars = list(DEC_TERRITORY_PARAMS[0]["xwalk"].keys()) + list(