From 12a6b2f10eb31d30a6139188cfb95d94d011543d Mon Sep 17 00:00:00 2001
From: Matthew Bowen <matthew.r.bowen@omb.eop.gov>
Date: Wed, 27 Jul 2022 17:17:34 -0400
Subject: [PATCH] Add tests for DOE energy budren (1518

---
 .../etl/sources/doe_energy_burden/etl.py      |  17 ++--
 .../sources/doe_energy_burden/__init__.py     |   0
 .../data/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip  | Bin 0 -> 376 bytes
 .../doe_energy_burden/data/extract.csv        |  16 ++++
 .../sources/doe_energy_burden/data/output.csv |  16 ++++
 .../doe_energy_burden/data/transform.csv      |  16 ++++
 .../sources/doe_energy_burden/test_etl.py     |  82 ++++++++++++++++++
 7 files changed, 140 insertions(+), 7 deletions(-)
 create mode 100644 data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/__init__.py
 create mode 100644 data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip
 create mode 100644 data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/extract.csv
 create mode 100644 data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/output.csv
 create mode 100644 data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/transform.csv
 create mode 100644 data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py
diff --git a/data/data-pipeline/data_pipeline/etl/sources/doe_energy_burden/etl.py b/data/data-pipeline/data_pipeline/etl/sources/doe_energy_burden/etl.py
index 80407d39..92594f89 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/doe_energy_burden/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/doe_energy_burden/etl.py
@@ -2,18 +2,22 @@ from pathlib import Path
 import pandas as pd
 
 from data_pipeline.config import settings
-from data_pipeline.etl.base import ExtractTransformLoad
+from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
 from data_pipeline.utils import get_module_logger, unzip_file_from_url
 
 logger = get_module_logger(__name__)
 
 
 class DOEEnergyBurden(ExtractTransformLoad):
+    NAME = "doe_energy_burden"
+    SOURCE_URL: str = (
+        settings.AWS_JUSTICE40_DATASOURCES_URL
+        + "/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
+    )
+    GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
+
     def __init__(self):
-        self.DOE_FILE_URL = (
-            settings.AWS_JUSTICE40_DATASOURCES_URL
-            + "/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
-        )
+        self.DOE_FILE_URL = self.SOURCE_URL
 
         self.OUTPUT_PATH: Path = (
             self.DATA_PATH / "dataset" / "doe_energy_burden"
@@ -38,12 +42,11 @@ class DOEEnergyBurden(ExtractTransformLoad):
         unzip_file_from_url(
             file_url=self.DOE_FILE_URL,
             download_path=self.get_tmp_path(),
-            unzipped_file_path=self.get_tmp_path() / "doe_energy_burden",
+            unzipped_file_path=self.get_tmp_path()
         )
 
         self.raw_df = pd.read_csv(
             filepath_or_buffer=self.get_tmp_path()
-            / "doe_energy_burden"
             / "DOE_LEAD_AMI_TRACT_2018_ALL.csv",
             # The following need to remain as strings for all of their digits, not get converted to numbers.
             dtype={
diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/__init__.py b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip
new file mode 100644
index 0000000000000000000000000000000000000000..44af808ec369897921b248863fb9a9c634734042
GIT binary patch
literal 376
zcmWIWW@Zs#U|`^2IMwhwr0_UX$Pys0n~{M*o<WAe#osmF$JNm#-qF`HJ|xJ|IV9f5
zz|bPz(Z@$GxwtGegp+}J_pFCWe`h{SDy`sVU}Sm0%)kI9VlQ~}9WvlyxnL_~E~$9C
zb8^~EC&8%!tQ|%F7f;wFpJEnyrrW0Q*1P7{9~{j0>iz9_ex>jI!+Y}l+Zorm_#FDm
zFjvEb<H>?kJQEdPDNH(}AgF1Xal*~B@e^O-veYRl8JnG#d2}|Fx^Vn=+!D^}QZ(Uu
zAB(QFNOt76fR;B2+Ttl?8{Rgq(Tl(SplMmz{OBv!Bgzd{e6L+2ru{|y_9?Mq&2Jmy
x1H2iT<d|`JUji5q3=F`KVA#?KVi61sR!Cr=1w?>1D;r2VBM_zn>AxTj0{~j%e(3-J

literal 0
HcmV?d00001

diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/extract.csv b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/extract.csv
new file mode 100644
index 00000000..1820c356
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/extract.csv
@@ -0,0 +1,16 @@
+ABV,FIP,BURDEN,QUANTILE
+HI,15001021010,0.0380000000,30
+HI,15001021101,0.0410000000,25
+HI,15001021402,0.0240000000,66
+HI,15001021800,0.0290000000,51
+HI,15003010201,0.0270000000,58
+HI,15007040603,0.0440000000,21
+HI,15007040604,0.0330000000,40
+HI,15007040700,0.0260000000,59
+HI,15009030100,0.0350000000,37
+HI,15009030201,0.0220000000,71
+HI,15009030402,0.0200000000,75
+HI,15009030800,0.0190000000,80
+CA,6007040300,0.2000000000,70
+CA,6007040500,0.5000000000,50
+CA,6001020100,0.1990000000,30
diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/output.csv b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/output.csv
new file mode 100644
index 00000000..218fde8e
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/output.csv
@@ -0,0 +1,16 @@
+GEOID10_TRACT,Energy burden
+15001021010,0.0380000000
+15001021101,0.0410000000
+15001021402,0.0240000000
+15001021800,0.0290000000
+15003010201,0.0270000000
+15007040603,0.0440000000
+15007040604,0.0330000000
+15007040700,0.0260000000
+15009030100,0.0350000000
+15009030201,0.0220000000
+15009030402,0.0200000000
+15009030800,0.0190000000
+06007040300,0.2000000000
+06007040500,0.5000000000
+06001020100,0.1990000000
diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/transform.csv b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/transform.csv
new file mode 100644
index 00000000..59808a83
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/data/transform.csv
@@ -0,0 +1,16 @@
+ABV,GEOID10_TRACT,Energy burden,QUANTILE
+HI,15001021010,0.0380000000,30
+HI,15001021101,0.0410000000,25
+HI,15001021402,0.0240000000,66
+HI,15001021800,0.0290000000,51
+HI,15003010201,0.0270000000,58
+HI,15007040603,0.0440000000,21
+HI,15007040604,0.0330000000,40
+HI,15007040700,0.0260000000,59
+HI,15009030100,0.0350000000,37
+HI,15009030201,0.0220000000,71
+HI,15009030402,0.0200000000,75
+HI,15009030800,0.0190000000,80
+CA,06007040300,0.2000000000,70
+CA,06007040500,0.5000000000,50
+CA,06001020100,0.1990000000,30
diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py
new file mode 100644
index 00000000..aa5e2824
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py
@@ -0,0 +1,82 @@
+# pylint: disable=protected-access
+from unittest import mock
+import pathlib
+import requests
+
+from data_pipeline.etl.sources.doe_energy_burden.etl import (
+    DOEEnergyBurden,
+)
+from data_pipeline.tests.sources.example.test_etl import TestETL
+from data_pipeline.utils import get_module_logger
+
+logger = get_module_logger(__name__)
+
+
+class TestDOEEnergyBurdenETL(TestETL):
+    """Tests the COI ETL.
+
+    This uses pytest-snapshot.
+    To update individual snapshots: $ poetry run pytest
+            data_pipeline/tests/sources/ndoe_energy_burden/test_etl.py::TestClassNameETL::<testname>
+            --snapshot-update
+    """
+
+    _ETL_CLASS = DOEEnergyBurden
+
+    _SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
+    _SAMPLE_DATA_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv"
+    _SAMPLE_DATA_ZIP_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
+    _EXTRACT_TMP_FOLDER_NAME = "DOEEnergyBurden"
+    _EXTRACT_CSV_FILE_NAME = "extract.csv"
+
+    def setup_method(self, _method, filename=__file__):
+        """Invoke `setup_method` from Parent, but using the current file name.
+
+        This code can be copied identically between all child classes.
+        """
+        super().setup_method(_method=_method, filename=filename)
+
+    # XXX: Refactor since I just straight copied it out of NRI's
+    def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
+        with mock.patch("data_pipeline.utils.requests") as requests_mock:
+            zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
+            tmp_path = mock_paths[1]
+
+            # Create mock response.
+            with open(zip_file_fixture_src, mode="rb") as file:
+                file_contents = file.read()
+            response_mock = requests.Response()
+            response_mock.status_code = 200
+            # pylint: disable=protected-access
+            response_mock._content = file_contents
+            # Return text fixture:
+            requests_mock.get = mock.MagicMock(return_value=response_mock)
+
+            # Instantiate the ETL class.
+            etl = self._ETL_CLASS()
+
+            # Monkey-patch the temporary directory to the one used in the test
+            etl.TMP_PATH = tmp_path
+
+            # Run the extract method.
+            etl.extract()
+
+        return etl
+
+    def test_init(self, mock_etl, mock_paths):
+        """Tests that the ChildOpportunityIndexETL class was initialized
+        correctly.
+        """
+
+        etl = DOEEnergyBurden()
+        data_path, _ = mock_paths
+        assert etl.DATA_PATH == data_path
+        assert etl.COLUMNS_TO_KEEP == [
+            "GEOID10_TRACT",
+            "Energy burden"
+        ]
+        assert etl.GEOID_FIELD_NAME == "GEOID10"
+        assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
+        assert etl.TRACT_INPUT_COLUMN_NAME == "FIP"
+        assert etl.INPUT_ENERGY_BURDEN_FIELD_NAME == "BURDEN"
+        assert etl.REVISED_ENERGY_BURDEN_FIELD_NAME == "Energy burden"