diff --git a/data/data-pipeline/data_pipeline/etl/base.py b/data/data-pipeline/data_pipeline/etl/base.py index 5ebc8a55..9dee3915 100644 --- a/data/data-pipeline/data_pipeline/etl/base.py +++ b/data/data-pipeline/data_pipeline/etl/base.py @@ -164,9 +164,6 @@ class ExtractTransformLoad: for field in dataset_config["load_fields"]: cls.COLUMNS_TO_KEEP.append(field["long_name"]) setattr(cls, field["df_field_name"], field["long_name"]) - - # set the constants for the class - setattr(cls, field["df_field_name"], field["long_name"]) return dataset_config # This is a classmethod so it can be used by `get_data_frame` without diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py index 70b8a285..bb24ba3e 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py @@ -59,13 +59,3 @@ class TestDOEEnergyBurdenETL(TestETL): data_path / "dataset" / "doe_energy_burden" / "usa.csv" ) assert output_file_path == expected_output_file_path - - def test_tract_id_lengths(self, mock_etl, mock_paths): - etl = self._setup_etl_instance_and_run_extract( - mock_etl=mock_etl, mock_paths=mock_paths - ) - etl.transform() - etl.validate() - etl.load() - df = etl.get_data_frame() - assert (df[etl.GEOID_TRACT_FIELD_NAME].str.len() == 11).all() diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py index b2a5f44b..37b15f65 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py @@ -150,3 +150,10 @@ class TestAbandondedLandMineETL(TestETL): assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len( self._FIXTURES_SHARED_TRACT_IDS ) + + def test_tract_id_lengths(self, mock_etl, mock_paths): + with mock.patch( + "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", + new=_fake_add_tracts_for_geometries, + ): + super().test_tract_id_lengths(mock_etl, mock_paths) \ No newline at end of file diff --git a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py index 34a56083..f0d1e920 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py @@ -209,6 +209,16 @@ class TestETL: assert actual_file_path == expected_file_path + def test_tract_id_lengths(self, mock_etl, mock_paths): + etl = self._setup_etl_instance_and_run_extract( + mock_etl=mock_etl, mock_paths=mock_paths + ) + etl.transform() + etl.validate() + etl.load() + df = etl.get_data_frame() + assert (df[etl.GEOID_TRACT_FIELD_NAME].str.len() == 11).all() + def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths): """Check presence of necessary shared tract IDs. Note: We used shared census tract IDs so that later our tests can join all the diff --git a/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py index ce2b63c4..61e4ed2e 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py @@ -185,3 +185,10 @@ class TestUSArmyFUDSETL(TestETL): assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len( self._FIXTURES_SHARED_TRACT_IDS ) + + def test_tract_id_lengths(self, mock_etl, mock_paths): + with mock.patch( + "data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries", + new=_fake_add_tracts_for_geometries, + ): + return super().test_tract_id_lengths(mock_etl, mock_paths)