Refactor _setup_etl_instance_and_run_extract to base (#1518)

For the three classes we've done so far, a generic
_setup_etl_instance_and_run_extract will work fine, for the moment we
can reuse the same setup method until we decide future classes need more
flexibility --- but they can also always subclass so...
This commit is contained in:
matt bowen 2022-08-02 10:24:47 -04:00
parent e49cca005a
commit 4e39be6748
4 changed files with 26 additions and 100 deletions

View file

@ -1,7 +1,5 @@
# pylint: disable=protected-access # pylint: disable=protected-access
from unittest import mock
import pathlib import pathlib
import requests
from data_pipeline.etl.sources.child_opportunity_index.etl import ( from data_pipeline.etl.sources.child_opportunity_index.etl import (
ChildOpportunityIndex, ChildOpportunityIndex,
@ -36,33 +34,6 @@ class TestChildOpportunityIndexETL(TestETL):
""" """
super().setup_method(_method=_method, filename=filename) super().setup_method(_method=_method, filename=filename)
# XXX: Refactor since I just straight copied it out of NRI's
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
with mock.patch("data_pipeline.utils.requests") as requests_mock:
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / "coi.zip"
tmp_path = mock_paths[1]
# Create mock response.
with open(zip_file_fixture_src, mode="rb") as file:
file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = ChildOpportunityIndex()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl
def test_init(self, mock_etl, mock_paths): def test_init(self, mock_etl, mock_paths):
"""Tests that the ChildOpportunityIndexETL class was initialized """Tests that the ChildOpportunityIndexETL class was initialized
correctly. correctly.

View file

@ -1,7 +1,5 @@
# pylint: disable=protected-access # pylint: disable=protected-access
from unittest import mock
import pathlib import pathlib
import requests
from data_pipeline.etl.sources.doe_energy_burden.etl import ( from data_pipeline.etl.sources.doe_energy_burden.etl import (
DOEEnergyBurden, DOEEnergyBurden,
@ -36,33 +34,6 @@ class TestDOEEnergyBurdenETL(TestETL):
""" """
super().setup_method(_method=_method, filename=filename) super().setup_method(_method=_method, filename=filename)
# XXX: Refactor since I just straight copied it out of NRI's
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
with mock.patch("data_pipeline.utils.requests") as requests_mock:
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
tmp_path = mock_paths[1]
# Create mock response.
with open(zip_file_fixture_src, mode="rb") as file:
file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = self._ETL_CLASS()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl
def test_init(self, mock_etl, mock_paths): def test_init(self, mock_etl, mock_paths):
"""Tests that the ChildOpportunityIndexETL class was initialized """Tests that the ChildOpportunityIndexETL class was initialized
correctly. correctly.
@ -71,10 +42,7 @@ class TestDOEEnergyBurdenETL(TestETL):
etl = DOEEnergyBurden() etl = DOEEnergyBurden()
data_path, _ = mock_paths data_path, _ = mock_paths
assert etl.DATA_PATH == data_path assert etl.DATA_PATH == data_path
assert etl.COLUMNS_TO_KEEP == [ assert etl.COLUMNS_TO_KEEP == ["GEOID10_TRACT", "Energy burden"]
"GEOID10_TRACT",
"Energy burden"
]
assert etl.GEOID_FIELD_NAME == "GEOID10" assert etl.GEOID_FIELD_NAME == "GEOID10"
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT" assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP" assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP"

View file

@ -3,8 +3,10 @@ import copy
import os import os
import pathlib import pathlib
from typing import Type from typing import Type
from unittest import mock
import pytest import pytest
import requests
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -98,18 +100,32 @@ class TestETL:
In order to re-implement this method, usually it will involve a In order to re-implement this method, usually it will involve a
decent amount of work to monkeypatch `requests` or another method that's decent amount of work to monkeypatch `requests` or another method that's
used to retrieve data in order to force that method to retrieve the fixture used to retrieve data in order to force that method to retrieve the fixture
data. data. A basic version of that patching is included here for classes that can use it.
""" """
# When running this in child classes, make sure the child class re-implements with mock.patch("data_pipeline.utils.requests") as requests_mock:
# this method. zip_file_fixture_src = (
if self._ETL_CLASS is not ExampleETL: self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
raise NotImplementedError(
"Prepare and run extract method not defined for this class."
) )
tmp_path = mock_paths[1]
# The rest of this method applies for `ExampleETL` only. # Create mock response.
etl = self._get_instance_of_etl_class() with open(zip_file_fixture_src, mode="rb") as file:
etl.extract() file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = self._ETL_CLASS()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl return etl

View file

@ -36,35 +36,6 @@ class TestNationalRiskIndexETL(TestETL):
""" """
super().setup_method(_method=_method, filename=filename) super().setup_method(_method=_method, filename=filename)
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
with mock.patch("data_pipeline.utils.requests") as requests_mock:
zip_file_fixture_src = (
self._DATA_DIRECTORY_FOR_TEST / "NRI_Table_CensusTracts.zip"
)
tmp_path = mock_paths[1]
# Create mock response.
with open(zip_file_fixture_src, mode="rb") as file:
file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = NationalRiskIndexETL()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl
def test_init(self, mock_etl, mock_paths): def test_init(self, mock_etl, mock_paths):
"""Tests that the mock NationalRiskIndexETL class instance was """Tests that the mock NationalRiskIndexETL class instance was
initiliazed correctly. initiliazed correctly.