From 4e39be67481cd0d555075c8cd49446fab5b5ff3a Mon Sep 17 00:00:00 2001 From: matt bowen Date: Tue, 2 Aug 2022 10:24:47 -0400 Subject: [PATCH] Refactor _setup_etl_instance_and_run_extract to base (#1518) For the three classes we've done so far, a generic _setup_etl_instance_and_run_extract will work fine, for the moment we can reuse the same setup method until we decide future classes need more flexibility --- but they can also always subclass so... --- .../child_opportunity_index/test_etl.py | 29 ---------------- .../sources/doe_energy_burden/test_etl.py | 34 +------------------ .../tests/sources/example/test_etl.py | 34 ++++++++++++++----- .../sources/national_risk_index/test_etl.py | 29 ---------------- 4 files changed, 26 insertions(+), 100 deletions(-) diff --git a/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py index 4d6affbc..f246a0c0 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/child_opportunity_index/test_etl.py @@ -1,7 +1,5 @@ # pylint: disable=protected-access -from unittest import mock import pathlib -import requests from data_pipeline.etl.sources.child_opportunity_index.etl import ( ChildOpportunityIndex, @@ -36,33 +34,6 @@ class TestChildOpportunityIndexETL(TestETL): """ super().setup_method(_method=_method, filename=filename) - # XXX: Refactor since I just straight copied it out of NRI's - def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths): - with mock.patch("data_pipeline.utils.requests") as requests_mock: - zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / "coi.zip" - tmp_path = mock_paths[1] - - # Create mock response. - with open(zip_file_fixture_src, mode="rb") as file: - file_contents = file.read() - response_mock = requests.Response() - response_mock.status_code = 200 - # pylint: disable=protected-access - response_mock._content = file_contents - # Return text fixture: - requests_mock.get = mock.MagicMock(return_value=response_mock) - - # Instantiate the ETL class. - etl = ChildOpportunityIndex() - - # Monkey-patch the temporary directory to the one used in the test - etl.TMP_PATH = tmp_path - - # Run the extract method. - etl.extract() - - return etl - def test_init(self, mock_etl, mock_paths): """Tests that the ChildOpportunityIndexETL class was initialized correctly. diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py index 548b356f..c3eaf667 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py @@ -1,7 +1,5 @@ # pylint: disable=protected-access -from unittest import mock import pathlib -import requests from data_pipeline.etl.sources.doe_energy_burden.etl import ( DOEEnergyBurden, @@ -36,33 +34,6 @@ class TestDOEEnergyBurdenETL(TestETL): """ super().setup_method(_method=_method, filename=filename) - # XXX: Refactor since I just straight copied it out of NRI's - def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths): - with mock.patch("data_pipeline.utils.requests") as requests_mock: - zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME - tmp_path = mock_paths[1] - - # Create mock response. - with open(zip_file_fixture_src, mode="rb") as file: - file_contents = file.read() - response_mock = requests.Response() - response_mock.status_code = 200 - # pylint: disable=protected-access - response_mock._content = file_contents - # Return text fixture: - requests_mock.get = mock.MagicMock(return_value=response_mock) - - # Instantiate the ETL class. - etl = self._ETL_CLASS() - - # Monkey-patch the temporary directory to the one used in the test - etl.TMP_PATH = tmp_path - - # Run the extract method. - etl.extract() - - return etl - def test_init(self, mock_etl, mock_paths): """Tests that the ChildOpportunityIndexETL class was initialized correctly. @@ -71,10 +42,7 @@ class TestDOEEnergyBurdenETL(TestETL): etl = DOEEnergyBurden() data_path, _ = mock_paths assert etl.DATA_PATH == data_path - assert etl.COLUMNS_TO_KEEP == [ - "GEOID10_TRACT", - "Energy burden" - ] + assert etl.COLUMNS_TO_KEEP == ["GEOID10_TRACT", "Energy burden"] assert etl.GEOID_FIELD_NAME == "GEOID10" assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT" assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP" diff --git a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py index 5beb872c..713e07b6 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/example/test_etl.py @@ -3,8 +3,10 @@ import copy import os import pathlib from typing import Type +from unittest import mock import pytest +import requests import numpy as np import pandas as pd @@ -98,18 +100,32 @@ class TestETL: In order to re-implement this method, usually it will involve a decent amount of work to monkeypatch `requests` or another method that's used to retrieve data in order to force that method to retrieve the fixture - data. + data. A basic version of that patching is included here for classes that can use it. """ - # When running this in child classes, make sure the child class re-implements - # this method. - if self._ETL_CLASS is not ExampleETL: - raise NotImplementedError( - "Prepare and run extract method not defined for this class." + with mock.patch("data_pipeline.utils.requests") as requests_mock: + zip_file_fixture_src = ( + self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME ) + tmp_path = mock_paths[1] - # The rest of this method applies for `ExampleETL` only. - etl = self._get_instance_of_etl_class() - etl.extract() + # Create mock response. + with open(zip_file_fixture_src, mode="rb") as file: + file_contents = file.read() + response_mock = requests.Response() + response_mock.status_code = 200 + # pylint: disable=protected-access + response_mock._content = file_contents + # Return text fixture: + requests_mock.get = mock.MagicMock(return_value=response_mock) + + # Instantiate the ETL class. + etl = self._ETL_CLASS() + + # Monkey-patch the temporary directory to the one used in the test + etl.TMP_PATH = tmp_path + + # Run the extract method. + etl.extract() return etl diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py index f428565f..4892e40a 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py @@ -36,35 +36,6 @@ class TestNationalRiskIndexETL(TestETL): """ super().setup_method(_method=_method, filename=filename) - def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths): - with mock.patch("data_pipeline.utils.requests") as requests_mock: - zip_file_fixture_src = ( - self._DATA_DIRECTORY_FOR_TEST / "NRI_Table_CensusTracts.zip" - ) - tmp_path = mock_paths[1] - - # Create mock response. - with open(zip_file_fixture_src, mode="rb") as file: - file_contents = file.read() - response_mock = requests.Response() - response_mock.status_code = 200 - # pylint: disable=protected-access - response_mock._content = file_contents - - # Return text fixture: - requests_mock.get = mock.MagicMock(return_value=response_mock) - - # Instantiate the ETL class. - etl = NationalRiskIndexETL() - - # Monkey-patch the temporary directory to the one used in the test - etl.TMP_PATH = tmp_path - - # Run the extract method. - etl.extract() - - return etl - def test_init(self, mock_etl, mock_paths): """Tests that the mock NationalRiskIndexETL class instance was initiliazed correctly.