mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Refactor _setup_etl_instance_and_run_extract to base (#1518)
For the three classes we've done so far, a generic _setup_etl_instance_and_run_extract will work fine, for the moment we can reuse the same setup method until we decide future classes need more flexibility --- but they can also always subclass so...
This commit is contained in:
parent
e49cca005a
commit
4e39be6748
4 changed files with 26 additions and 100 deletions
|
@ -1,7 +1,5 @@
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
from unittest import mock
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import requests
|
|
||||||
|
|
||||||
from data_pipeline.etl.sources.child_opportunity_index.etl import (
|
from data_pipeline.etl.sources.child_opportunity_index.etl import (
|
||||||
ChildOpportunityIndex,
|
ChildOpportunityIndex,
|
||||||
|
@ -36,33 +34,6 @@ class TestChildOpportunityIndexETL(TestETL):
|
||||||
"""
|
"""
|
||||||
super().setup_method(_method=_method, filename=filename)
|
super().setup_method(_method=_method, filename=filename)
|
||||||
|
|
||||||
# XXX: Refactor since I just straight copied it out of NRI's
|
|
||||||
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
|
|
||||||
with mock.patch("data_pipeline.utils.requests") as requests_mock:
|
|
||||||
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / "coi.zip"
|
|
||||||
tmp_path = mock_paths[1]
|
|
||||||
|
|
||||||
# Create mock response.
|
|
||||||
with open(zip_file_fixture_src, mode="rb") as file:
|
|
||||||
file_contents = file.read()
|
|
||||||
response_mock = requests.Response()
|
|
||||||
response_mock.status_code = 200
|
|
||||||
# pylint: disable=protected-access
|
|
||||||
response_mock._content = file_contents
|
|
||||||
# Return text fixture:
|
|
||||||
requests_mock.get = mock.MagicMock(return_value=response_mock)
|
|
||||||
|
|
||||||
# Instantiate the ETL class.
|
|
||||||
etl = ChildOpportunityIndex()
|
|
||||||
|
|
||||||
# Monkey-patch the temporary directory to the one used in the test
|
|
||||||
etl.TMP_PATH = tmp_path
|
|
||||||
|
|
||||||
# Run the extract method.
|
|
||||||
etl.extract()
|
|
||||||
|
|
||||||
return etl
|
|
||||||
|
|
||||||
def test_init(self, mock_etl, mock_paths):
|
def test_init(self, mock_etl, mock_paths):
|
||||||
"""Tests that the ChildOpportunityIndexETL class was initialized
|
"""Tests that the ChildOpportunityIndexETL class was initialized
|
||||||
correctly.
|
correctly.
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
from unittest import mock
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import requests
|
|
||||||
|
|
||||||
from data_pipeline.etl.sources.doe_energy_burden.etl import (
|
from data_pipeline.etl.sources.doe_energy_burden.etl import (
|
||||||
DOEEnergyBurden,
|
DOEEnergyBurden,
|
||||||
|
@ -36,33 +34,6 @@ class TestDOEEnergyBurdenETL(TestETL):
|
||||||
"""
|
"""
|
||||||
super().setup_method(_method=_method, filename=filename)
|
super().setup_method(_method=_method, filename=filename)
|
||||||
|
|
||||||
# XXX: Refactor since I just straight copied it out of NRI's
|
|
||||||
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
|
|
||||||
with mock.patch("data_pipeline.utils.requests") as requests_mock:
|
|
||||||
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
|
|
||||||
tmp_path = mock_paths[1]
|
|
||||||
|
|
||||||
# Create mock response.
|
|
||||||
with open(zip_file_fixture_src, mode="rb") as file:
|
|
||||||
file_contents = file.read()
|
|
||||||
response_mock = requests.Response()
|
|
||||||
response_mock.status_code = 200
|
|
||||||
# pylint: disable=protected-access
|
|
||||||
response_mock._content = file_contents
|
|
||||||
# Return text fixture:
|
|
||||||
requests_mock.get = mock.MagicMock(return_value=response_mock)
|
|
||||||
|
|
||||||
# Instantiate the ETL class.
|
|
||||||
etl = self._ETL_CLASS()
|
|
||||||
|
|
||||||
# Monkey-patch the temporary directory to the one used in the test
|
|
||||||
etl.TMP_PATH = tmp_path
|
|
||||||
|
|
||||||
# Run the extract method.
|
|
||||||
etl.extract()
|
|
||||||
|
|
||||||
return etl
|
|
||||||
|
|
||||||
def test_init(self, mock_etl, mock_paths):
|
def test_init(self, mock_etl, mock_paths):
|
||||||
"""Tests that the ChildOpportunityIndexETL class was initialized
|
"""Tests that the ChildOpportunityIndexETL class was initialized
|
||||||
correctly.
|
correctly.
|
||||||
|
@ -71,10 +42,7 @@ class TestDOEEnergyBurdenETL(TestETL):
|
||||||
etl = DOEEnergyBurden()
|
etl = DOEEnergyBurden()
|
||||||
data_path, _ = mock_paths
|
data_path, _ = mock_paths
|
||||||
assert etl.DATA_PATH == data_path
|
assert etl.DATA_PATH == data_path
|
||||||
assert etl.COLUMNS_TO_KEEP == [
|
assert etl.COLUMNS_TO_KEEP == ["GEOID10_TRACT", "Energy burden"]
|
||||||
"GEOID10_TRACT",
|
|
||||||
"Energy burden"
|
|
||||||
]
|
|
||||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||||
assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP"
|
assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP"
|
||||||
|
|
|
@ -3,8 +3,10 @@ import copy
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Type
|
from typing import Type
|
||||||
|
from unittest import mock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
import requests
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
@ -98,18 +100,32 @@ class TestETL:
|
||||||
In order to re-implement this method, usually it will involve a
|
In order to re-implement this method, usually it will involve a
|
||||||
decent amount of work to monkeypatch `requests` or another method that's
|
decent amount of work to monkeypatch `requests` or another method that's
|
||||||
used to retrieve data in order to force that method to retrieve the fixture
|
used to retrieve data in order to force that method to retrieve the fixture
|
||||||
data.
|
data. A basic version of that patching is included here for classes that can use it.
|
||||||
"""
|
"""
|
||||||
# When running this in child classes, make sure the child class re-implements
|
with mock.patch("data_pipeline.utils.requests") as requests_mock:
|
||||||
# this method.
|
zip_file_fixture_src = (
|
||||||
if self._ETL_CLASS is not ExampleETL:
|
self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
|
||||||
raise NotImplementedError(
|
|
||||||
"Prepare and run extract method not defined for this class."
|
|
||||||
)
|
)
|
||||||
|
tmp_path = mock_paths[1]
|
||||||
|
|
||||||
# The rest of this method applies for `ExampleETL` only.
|
# Create mock response.
|
||||||
etl = self._get_instance_of_etl_class()
|
with open(zip_file_fixture_src, mode="rb") as file:
|
||||||
etl.extract()
|
file_contents = file.read()
|
||||||
|
response_mock = requests.Response()
|
||||||
|
response_mock.status_code = 200
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
response_mock._content = file_contents
|
||||||
|
# Return text fixture:
|
||||||
|
requests_mock.get = mock.MagicMock(return_value=response_mock)
|
||||||
|
|
||||||
|
# Instantiate the ETL class.
|
||||||
|
etl = self._ETL_CLASS()
|
||||||
|
|
||||||
|
# Monkey-patch the temporary directory to the one used in the test
|
||||||
|
etl.TMP_PATH = tmp_path
|
||||||
|
|
||||||
|
# Run the extract method.
|
||||||
|
etl.extract()
|
||||||
|
|
||||||
return etl
|
return etl
|
||||||
|
|
||||||
|
|
|
@ -36,35 +36,6 @@ class TestNationalRiskIndexETL(TestETL):
|
||||||
"""
|
"""
|
||||||
super().setup_method(_method=_method, filename=filename)
|
super().setup_method(_method=_method, filename=filename)
|
||||||
|
|
||||||
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
|
|
||||||
with mock.patch("data_pipeline.utils.requests") as requests_mock:
|
|
||||||
zip_file_fixture_src = (
|
|
||||||
self._DATA_DIRECTORY_FOR_TEST / "NRI_Table_CensusTracts.zip"
|
|
||||||
)
|
|
||||||
tmp_path = mock_paths[1]
|
|
||||||
|
|
||||||
# Create mock response.
|
|
||||||
with open(zip_file_fixture_src, mode="rb") as file:
|
|
||||||
file_contents = file.read()
|
|
||||||
response_mock = requests.Response()
|
|
||||||
response_mock.status_code = 200
|
|
||||||
# pylint: disable=protected-access
|
|
||||||
response_mock._content = file_contents
|
|
||||||
|
|
||||||
# Return text fixture:
|
|
||||||
requests_mock.get = mock.MagicMock(return_value=response_mock)
|
|
||||||
|
|
||||||
# Instantiate the ETL class.
|
|
||||||
etl = NationalRiskIndexETL()
|
|
||||||
|
|
||||||
# Monkey-patch the temporary directory to the one used in the test
|
|
||||||
etl.TMP_PATH = tmp_path
|
|
||||||
|
|
||||||
# Run the extract method.
|
|
||||||
etl.extract()
|
|
||||||
|
|
||||||
return etl
|
|
||||||
|
|
||||||
def test_init(self, mock_etl, mock_paths):
|
def test_init(self, mock_etl, mock_paths):
|
||||||
"""Tests that the mock NationalRiskIndexETL class instance was
|
"""Tests that the mock NationalRiskIndexETL class instance was
|
||||||
initiliazed correctly.
|
initiliazed correctly.
|
||||||
|
|
Loading…
Add table
Reference in a new issue