Refactor _setup_etl_instance_and_run_extract to base (#1518)

For the three classes we've done so far, a generic
_setup_etl_instance_and_run_extract will work fine, for the moment we
can reuse the same setup method until we decide future classes need more
flexibility --- but they can also always subclass so...
This commit is contained in:
matt bowen 2022-08-02 10:24:47 -04:00
parent e49cca005a
commit 4e39be6748
4 changed files with 26 additions and 100 deletions

View file

@ -1,7 +1,5 @@
# pylint: disable=protected-access
from unittest import mock
import pathlib
import requests
from data_pipeline.etl.sources.child_opportunity_index.etl import (
ChildOpportunityIndex,
@ -36,33 +34,6 @@ class TestChildOpportunityIndexETL(TestETL):
"""
super().setup_method(_method=_method, filename=filename)
# XXX: Refactor since I just straight copied it out of NRI's
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
with mock.patch("data_pipeline.utils.requests") as requests_mock:
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / "coi.zip"
tmp_path = mock_paths[1]
# Create mock response.
with open(zip_file_fixture_src, mode="rb") as file:
file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = ChildOpportunityIndex()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl
def test_init(self, mock_etl, mock_paths):
"""Tests that the ChildOpportunityIndexETL class was initialized
correctly.

View file

@ -1,7 +1,5 @@
# pylint: disable=protected-access
from unittest import mock
import pathlib
import requests
from data_pipeline.etl.sources.doe_energy_burden.etl import (
DOEEnergyBurden,
@ -36,33 +34,6 @@ class TestDOEEnergyBurdenETL(TestETL):
"""
super().setup_method(_method=_method, filename=filename)
# XXX: Refactor since I just straight copied it out of NRI's
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
with mock.patch("data_pipeline.utils.requests") as requests_mock:
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
tmp_path = mock_paths[1]
# Create mock response.
with open(zip_file_fixture_src, mode="rb") as file:
file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = self._ETL_CLASS()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl
def test_init(self, mock_etl, mock_paths):
"""Tests that the ChildOpportunityIndexETL class was initialized
correctly.
@ -71,10 +42,7 @@ class TestDOEEnergyBurdenETL(TestETL):
etl = DOEEnergyBurden()
data_path, _ = mock_paths
assert etl.DATA_PATH == data_path
assert etl.COLUMNS_TO_KEEP == [
"GEOID10_TRACT",
"Energy burden"
]
assert etl.COLUMNS_TO_KEEP == ["GEOID10_TRACT", "Energy burden"]
assert etl.GEOID_FIELD_NAME == "GEOID10"
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP"

View file

@ -3,8 +3,10 @@ import copy
import os
import pathlib
from typing import Type
from unittest import mock
import pytest
import requests
import numpy as np
import pandas as pd
@ -98,17 +100,31 @@ class TestETL:
In order to re-implement this method, usually it will involve a
decent amount of work to monkeypatch `requests` or another method that's
used to retrieve data in order to force that method to retrieve the fixture
data.
data. A basic version of that patching is included here for classes that can use it.
"""
# When running this in child classes, make sure the child class re-implements
# this method.
if self._ETL_CLASS is not ExampleETL:
raise NotImplementedError(
"Prepare and run extract method not defined for this class."
with mock.patch("data_pipeline.utils.requests") as requests_mock:
zip_file_fixture_src = (
self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
)
tmp_path = mock_paths[1]
# The rest of this method applies for `ExampleETL` only.
etl = self._get_instance_of_etl_class()
# Create mock response.
with open(zip_file_fixture_src, mode="rb") as file:
file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = self._ETL_CLASS()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl

View file

@ -36,35 +36,6 @@ class TestNationalRiskIndexETL(TestETL):
"""
super().setup_method(_method=_method, filename=filename)
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
with mock.patch("data_pipeline.utils.requests") as requests_mock:
zip_file_fixture_src = (
self._DATA_DIRECTORY_FOR_TEST / "NRI_Table_CensusTracts.zip"
)
tmp_path = mock_paths[1]
# Create mock response.
with open(zip_file_fixture_src, mode="rb") as file:
file_contents = file.read()
response_mock = requests.Response()
response_mock.status_code = 200
# pylint: disable=protected-access
response_mock._content = file_contents
# Return text fixture:
requests_mock.get = mock.MagicMock(return_value=response_mock)
# Instantiate the ETL class.
etl = NationalRiskIndexETL()
# Monkey-patch the temporary directory to the one used in the test
etl.TMP_PATH = tmp_path
# Run the extract method.
etl.extract()
return etl
def test_init(self, mock_etl, mock_paths):
"""Tests that the mock NationalRiskIndexETL class instance was
initiliazed correctly.