From 37258dd67300b87b49d77dabbb08958cf248ccb0 Mon Sep 17 00:00:00 2001 From: Jorge Escobar Date: Fri, 8 Apr 2022 14:41:15 -0400 Subject: [PATCH] passing tests --- data/data-pipeline/data_pipeline/etl/base.py | 18 +++++++++--------- .../etl/sources/national_risk_index/etl.py | 1 + .../data_pipeline/tests/conftest.py | 5 +++++ .../tests/sources/example/test_etl.py | 1 + 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/base.py b/data/data-pipeline/data_pipeline/etl/base.py index 4e4d9792..56c080bc 100644 --- a/data/data-pipeline/data_pipeline/etl/base.py +++ b/data/data-pipeline/data_pipeline/etl/base.py @@ -43,6 +43,7 @@ class ExtractTransformLoad: DATA_PATH: pathlib.Path = APP_ROOT / "data" TMP_PATH: pathlib.Path = DATA_PATH / "tmp" CONTENT_CONFIG: pathlib.Path = APP_ROOT / "content" / "config" + DATASET_CONFIG: pathlib.Path = APP_ROOT / "etl" / "score" / "config" # Parameters GEOID_FIELD_NAME: str = "GEOID10" @@ -82,10 +83,11 @@ class ExtractTransformLoad: output_df: pd.DataFrame = None - def yaml_config_load(self): + @classmethod + def yaml_config_load(cls): # check if the class instance has score YAML definitions datasets_config = load_yaml_dict_from_file( - self.APP_ROOT / "etl" / "score" / "config" / "datasets.yml", + cls.DATASET_CONFIG / "datasets.yml", DatasetsConfig, ) @@ -94,21 +96,19 @@ class ExtractTransformLoad: dataset_config = next( item for item in datasets_config.get("datasets") - if item["module_name"] == "self.NAME" + if item["module_name"] == cls.NAME ) except StopIteration: # Note: it'd be nice to log the name of the dataframe, but that's not accessible in this scope. logger.error( - f"Exception encountered while extracting dataset config for dataset {self.NAME}" + f"Exception encountered while extracting dataset config for dataset {cls.NAME}" ) sys.exit() # set the fields - self.LAST_UPDATED_YEAR = dataset_config["last_updated_year"] - self.SOURCE_URL = dataset_config["source_url"] - self.INPUT_CSV = ( - self.get_tmp_path() / dataset_config["extracted_file_name"] - ) + cls.LAST_UPDATED_YEAR = dataset_config["last_updated_year"] + cls.SOURCE_URL = dataset_config["source_url"] + cls.INPUT_EXTRACTED_FILE_NAME = dataset_config["extracted_file_name"] # This is a classmethod so it can be used by `get_data_frame` without # needing to create an instance of the class. This is a use case in `etl_score`. diff --git a/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py b/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py index 1f57ca62..936b0a50 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/national_risk_index/etl.py @@ -67,6 +67,7 @@ class NationalRiskIndexETL(ExtractTransformLoad): self.CONTAINS_AGRIVALUE, ] + self.INPUT_CSV = self.get_tmp_path() / self.INPUT_EXTRACTED_FILE_NAME self.df: pd.DataFrame def extract(self) -> None: diff --git a/data/data-pipeline/data_pipeline/tests/conftest.py b/data/data-pipeline/data_pipeline/tests/conftest.py index f1dc63ac..5535a97e 100644 --- a/data/data-pipeline/data_pipeline/tests/conftest.py +++ b/data/data-pipeline/data_pipeline/tests/conftest.py @@ -52,3 +52,8 @@ def mock_etl(monkeypatch, mock_paths) -> None: data_path, tmp_path = mock_paths monkeypatch.setattr(ExtractTransformLoad, "DATA_PATH", data_path) monkeypatch.setattr(ExtractTransformLoad, "TMP_PATH", tmp_path) + monkeypatch.setattr( + ExtractTransformLoad, + "CONTENT_CONFIG", + Path.cwd() / "data_pipeline" / "score" / "config", + ) diff --git a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py index cac6dcfb..3dc7f8b2 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py @@ -148,6 +148,7 @@ class TestETL: etl = self._get_instance_of_etl_class() data_path, tmp_path = mock_paths + etl.__init__() actual_file_path = etl._get_output_file_path() expected_file_path = (