Refactor _setup_etl_instance_and_run_extract to base (#1518)

For the three classes we've done so far, a generic _setup_etl_instance_and_run_extract will work fine, for the moment we can reuse the same setup method until we decide future classes need more flexibility --- but they can also always subclass so...
2025-02-23 10:04:18 -08:00 · 2022-08-02 10:24:47 -04:00 · 2022-08-02 10:24:47 -04:00 · 4e39be6748
commit 4e39be6748
parent e49cca005a
4 changed files with 26 additions and 100 deletions
--- a/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py
@ -1,7 +1,5 @@
 # pylint: disable=protected-access
-from unittest import mock
 import pathlib
-import requests

 from data_pipeline.etl.sources.child_opportunity_index.etl import (
    ChildOpportunityIndex,
@ -36,33 +34,6 @@ class TestChildOpportunityIndexETL(TestETL):
        """
        super().setup_method(_method=_method, filename=filename)

-    # XXX: Refactor since I just straight copied it out of NRI's
-    def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
-        with mock.patch("data_pipeline.utils.requests") as requests_mock:
-            zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / "coi.zip"
-            tmp_path = mock_paths[1]
-
-            # Create mock response.
-            with open(zip_file_fixture_src, mode="rb") as file:
-                file_contents = file.read()
-            response_mock = requests.Response()
-            response_mock.status_code = 200
-            # pylint: disable=protected-access
-            response_mock._content = file_contents
-            # Return text fixture:
-            requests_mock.get = mock.MagicMock(return_value=response_mock)
-
-            # Instantiate the ETL class.
-            etl = ChildOpportunityIndex()
-
-            # Monkey-patch the temporary directory to the one used in the test
-            etl.TMP_PATH = tmp_path
-
-            # Run the extract method.
-            etl.extract()
-
-        return etl
-
    def test_init(self, mock_etl, mock_paths):
        """Tests that the ChildOpportunityIndexETL class was initialized
        correctly.
--- a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py
@ -1,7 +1,5 @@
 # pylint: disable=protected-access
-from unittest import mock
 import pathlib
-import requests

 from data_pipeline.etl.sources.doe_energy_burden.etl import (
    DOEEnergyBurden,
@ -36,33 +34,6 @@ class TestDOEEnergyBurdenETL(TestETL):
        """
        super().setup_method(_method=_method, filename=filename)

-    # XXX: Refactor since I just straight copied it out of NRI's
-    def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
-        with mock.patch("data_pipeline.utils.requests") as requests_mock:
-            zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
-            tmp_path = mock_paths[1]
-
-            # Create mock response.
-            with open(zip_file_fixture_src, mode="rb") as file:
-                file_contents = file.read()
-            response_mock = requests.Response()
-            response_mock.status_code = 200
-            # pylint: disable=protected-access
-            response_mock._content = file_contents
-            # Return text fixture:
-            requests_mock.get = mock.MagicMock(return_value=response_mock)
-
-            # Instantiate the ETL class.
-            etl = self._ETL_CLASS()
-
-            # Monkey-patch the temporary directory to the one used in the test
-            etl.TMP_PATH = tmp_path
-
-            # Run the extract method.
-            etl.extract()
-
-        return etl
-
    def test_init(self, mock_etl, mock_paths):
        """Tests that the ChildOpportunityIndexETL class was initialized
        correctly.
@ -71,10 +42,7 @@ class TestDOEEnergyBurdenETL(TestETL):
        etl = DOEEnergyBurden()
        data_path, _ = mock_paths
        assert etl.DATA_PATH == data_path
-        assert etl.COLUMNS_TO_KEEP == [
-            "GEOID10_TRACT",
-            "Energy burden"
-        ]
+        assert etl.COLUMNS_TO_KEEP == ["GEOID10_TRACT", "Energy burden"]
        assert etl.GEOID_FIELD_NAME == "GEOID10"
        assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
        assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP"
--- a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py
@ -3,8 +3,10 @@ import copy
 import os
 import pathlib
 from typing import Type
+from unittest import mock
 import pytest

+import requests
 import numpy as np
 import pandas as pd

@ -98,17 +100,31 @@ class TestETL:
        In order to re-implement this method, usually it will involve a
        decent amount of work to monkeypatch `requests` or another method that's
        used to retrieve data in order to force that method to retrieve the fixture
-        data.
+        data. A basic version of that patching is included here for classes that can use it.
        """
-        # When running this in child classes, make sure the child class re-implements
-        # this method.
-        if self._ETL_CLASS is not ExampleETL:
-            raise NotImplementedError(
-                "Prepare and run extract method not defined for this class."
+        with mock.patch("data_pipeline.utils.requests") as requests_mock:
+            zip_file_fixture_src = (
+                self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
            )
+            tmp_path = mock_paths[1]

-        # The rest of this method applies for `ExampleETL` only.
-        etl = self._get_instance_of_etl_class()
+            # Create mock response.
+            with open(zip_file_fixture_src, mode="rb") as file:
+                file_contents = file.read()
+            response_mock = requests.Response()
+            response_mock.status_code = 200
+            # pylint: disable=protected-access
+            response_mock._content = file_contents
+            # Return text fixture:
+            requests_mock.get = mock.MagicMock(return_value=response_mock)
+
+            # Instantiate the ETL class.
+            etl = self._ETL_CLASS()
+
+            # Monkey-patch the temporary directory to the one used in the test
+            etl.TMP_PATH = tmp_path
+
+            # Run the extract method.
            etl.extract()

        return etl
--- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py
@ -36,35 +36,6 @@ class TestNationalRiskIndexETL(TestETL):
        """
        super().setup_method(_method=_method, filename=filename)

-    def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
-        with mock.patch("data_pipeline.utils.requests") as requests_mock:
-            zip_file_fixture_src = (
-                self._DATA_DIRECTORY_FOR_TEST / "NRI_Table_CensusTracts.zip"
-            )
-            tmp_path = mock_paths[1]
-
-            # Create mock response.
-            with open(zip_file_fixture_src, mode="rb") as file:
-                file_contents = file.read()
-            response_mock = requests.Response()
-            response_mock.status_code = 200
-            # pylint: disable=protected-access
-            response_mock._content = file_contents
-
-            # Return text fixture:
-            requests_mock.get = mock.MagicMock(return_value=response_mock)
-
-            # Instantiate the ETL class.
-            etl = NationalRiskIndexETL()
-
-            # Monkey-patch the temporary directory to the one used in the test
-            etl.TMP_PATH = tmp_path
-
-            # Run the extract method.
-            etl.extract()
-
-        return etl
-
    def test_init(self, mock_etl, mock_paths):
        """Tests that the mock NationalRiskIndexETL class instance was
        initiliazed correctly.