Refactor _setup_etl_instance_and_run_extract to base (#1518)

For the three classes we've done so far, a generic _setup_etl_instance_and_run_extract will work fine, for the moment we can reuse the same setup method until we decide future classes need more flexibility --- but they can also always subclass so...
2025-02-23 01:54:18 -08:00 · 2022-08-02 10:24:47 -04:00 · 2022-08-02 10:24:47 -04:00 · 4e39be6748
commit 4e39be6748
parent e49cca005a
4 changed files with 26 additions and 100 deletions
--- a/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py
@ -1,7 +1,5 @@
 # pylint: disable=protected-access
 from unittest import mock
 import pathlib
 import requests
 from data_pipeline.etl.sources.child_opportunity_index.etl import (
    ChildOpportunityIndex,
@ -36,33 +34,6 @@ class TestChildOpportunityIndexETL(TestETL):
        """
        super().setup_method(_method=_method, filename=filename)
    # XXX: Refactor since I just straight copied it out of NRI's
    def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
        with mock.patch("data_pipeline.utils.requests") as requests_mock:
            zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / "coi.zip"
            tmp_path = mock_paths[1]
            # Create mock response.
            with open(zip_file_fixture_src, mode="rb") as file:
                file_contents = file.read()
            response_mock = requests.Response()
            response_mock.status_code = 200
            # pylint: disable=protected-access
            response_mock._content = file_contents
            # Return text fixture:
            requests_mock.get = mock.MagicMock(return_value=response_mock)
            # Instantiate the ETL class.
            etl = ChildOpportunityIndex()
            # Monkey-patch the temporary directory to the one used in the test
            etl.TMP_PATH = tmp_path
            # Run the extract method.
            etl.extract()
        return etl
    def test_init(self, mock_etl, mock_paths):
        """Tests that the ChildOpportunityIndexETL class was initialized
        correctly.
--- a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py
@ -1,7 +1,5 @@
 # pylint: disable=protected-access
 from unittest import mock
 import pathlib
 import requests
 from data_pipeline.etl.sources.doe_energy_burden.etl import (
    DOEEnergyBurden,
@ -36,33 +34,6 @@ class TestDOEEnergyBurdenETL(TestETL):
        """
        super().setup_method(_method=_method, filename=filename)
    # XXX: Refactor since I just straight copied it out of NRI's
    def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
        with mock.patch("data_pipeline.utils.requests") as requests_mock:
            zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
            tmp_path = mock_paths[1]
            # Create mock response.
            with open(zip_file_fixture_src, mode="rb") as file:
                file_contents = file.read()
            response_mock = requests.Response()
            response_mock.status_code = 200
            # pylint: disable=protected-access
            response_mock._content = file_contents
            # Return text fixture:
            requests_mock.get = mock.MagicMock(return_value=response_mock)
            # Instantiate the ETL class.
            etl = self._ETL_CLASS()
            # Monkey-patch the temporary directory to the one used in the test
            etl.TMP_PATH = tmp_path
            # Run the extract method.
            etl.extract()
        return etl
    def test_init(self, mock_etl, mock_paths):
        """Tests that the ChildOpportunityIndexETL class was initialized
        correctly.
@ -71,10 +42,7 @@ class TestDOEEnergyBurdenETL(TestETL):
        etl = DOEEnergyBurden()
        data_path, _ = mock_paths
        assert etl.DATA_PATH == data_path
-        assert etl.COLUMNS_TO_KEEP == [
+        assert etl.COLUMNS_TO_KEEP == ["GEOID10_TRACT", "Energy burden"]
            "GEOID10_TRACT",
            "Energy burden"
        ]
        assert etl.GEOID_FIELD_NAME == "GEOID10"
        assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
        assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP"
--- a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py
@ -3,8 +3,10 @@ import copy
 import os
 import pathlib
 from typing import Type
 from unittest import mock
 import pytest
 import requests
 import numpy as np
 import pandas as pd
@ -98,18 +100,32 @@ class TestETL:
        In order to re-implement this method, usually it will involve a
        decent amount of work to monkeypatch `requests` or another method that's
        used to retrieve data in order to force that method to retrieve the fixture
-        data.
+        data. A basic version of that patching is included here for classes that can use it.
        """
-        # When running this in child classes, make sure the child class re-implements
+        with mock.patch("data_pipeline.utils.requests") as requests_mock:
-        # this method.
+            zip_file_fixture_src = (
-        if self._ETL_CLASS is not ExampleETL:
+                self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
            raise NotImplementedError(
                "Prepare and run extract method not defined for this class."
            )
            tmp_path = mock_paths[1]
-        # The rest of this method applies for `ExampleETL` only.
+            # Create mock response.
-        etl = self._get_instance_of_etl_class()
+            with open(zip_file_fixture_src, mode="rb") as file:
-        etl.extract()
+                file_contents = file.read()
            response_mock = requests.Response()
            response_mock.status_code = 200
            # pylint: disable=protected-access
            response_mock._content = file_contents
            # Return text fixture:
            requests_mock.get = mock.MagicMock(return_value=response_mock)
            # Instantiate the ETL class.
            etl = self._ETL_CLASS()
            # Monkey-patch the temporary directory to the one used in the test
            etl.TMP_PATH = tmp_path
            # Run the extract method.
            etl.extract()
        return etl
--- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py
@ -36,35 +36,6 @@ class TestNationalRiskIndexETL(TestETL):
        """
        super().setup_method(_method=_method, filename=filename)
    def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
        with mock.patch("data_pipeline.utils.requests") as requests_mock:
            zip_file_fixture_src = (
                self._DATA_DIRECTORY_FOR_TEST / "NRI_Table_CensusTracts.zip"
            )
            tmp_path = mock_paths[1]
            # Create mock response.
            with open(zip_file_fixture_src, mode="rb") as file:
                file_contents = file.read()
            response_mock = requests.Response()
            response_mock.status_code = 200
            # pylint: disable=protected-access
            response_mock._content = file_contents
            # Return text fixture:
            requests_mock.get = mock.MagicMock(return_value=response_mock)
            # Instantiate the ETL class.
            etl = NationalRiskIndexETL()
            # Monkey-patch the temporary directory to the one used in the test
            etl.TMP_PATH = tmp_path
            # Run the extract method.
            etl.extract()
        return etl
    def test_init(self, mock_etl, mock_paths):
        """Tests that the mock NationalRiskIndexETL class instance was
        initiliazed correctly.