mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Add tests for DOE energy budren (1518
This commit is contained in:
parent
e77e7aef2e
commit
12a6b2f10e
7 changed files with 140 additions and 7 deletions
|
@ -2,18 +2,22 @@ from pathlib import Path
|
|||
import pandas as pd
|
||||
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||
from data_pipeline.utils import get_module_logger, unzip_file_from_url
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class DOEEnergyBurden(ExtractTransformLoad):
|
||||
NAME = "doe_energy_burden"
|
||||
SOURCE_URL: str = (
|
||||
settings.AWS_JUSTICE40_DATASOURCES_URL
|
||||
+ "/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
|
||||
)
|
||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||
|
||||
def __init__(self):
|
||||
self.DOE_FILE_URL = (
|
||||
settings.AWS_JUSTICE40_DATASOURCES_URL
|
||||
+ "/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
|
||||
)
|
||||
self.DOE_FILE_URL = self.SOURCE_URL
|
||||
|
||||
self.OUTPUT_PATH: Path = (
|
||||
self.DATA_PATH / "dataset" / "doe_energy_burden"
|
||||
|
@ -38,12 +42,11 @@ class DOEEnergyBurden(ExtractTransformLoad):
|
|||
unzip_file_from_url(
|
||||
file_url=self.DOE_FILE_URL,
|
||||
download_path=self.get_tmp_path(),
|
||||
unzipped_file_path=self.get_tmp_path() / "doe_energy_burden",
|
||||
unzipped_file_path=self.get_tmp_path()
|
||||
)
|
||||
|
||||
self.raw_df = pd.read_csv(
|
||||
filepath_or_buffer=self.get_tmp_path()
|
||||
/ "doe_energy_burden"
|
||||
/ "DOE_LEAD_AMI_TRACT_2018_ALL.csv",
|
||||
# The following need to remain as strings for all of their digits, not get converted to numbers.
|
||||
dtype={
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,16 @@
|
|||
ABV,FIP,BURDEN,QUANTILE
|
||||
HI,15001021010,0.0380000000,30
|
||||
HI,15001021101,0.0410000000,25
|
||||
HI,15001021402,0.0240000000,66
|
||||
HI,15001021800,0.0290000000,51
|
||||
HI,15003010201,0.0270000000,58
|
||||
HI,15007040603,0.0440000000,21
|
||||
HI,15007040604,0.0330000000,40
|
||||
HI,15007040700,0.0260000000,59
|
||||
HI,15009030100,0.0350000000,37
|
||||
HI,15009030201,0.0220000000,71
|
||||
HI,15009030402,0.0200000000,75
|
||||
HI,15009030800,0.0190000000,80
|
||||
CA,6007040300,0.2000000000,70
|
||||
CA,6007040500,0.5000000000,50
|
||||
CA,6001020100,0.1990000000,30
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Energy burden
|
||||
15001021010,0.0380000000
|
||||
15001021101,0.0410000000
|
||||
15001021402,0.0240000000
|
||||
15001021800,0.0290000000
|
||||
15003010201,0.0270000000
|
||||
15007040603,0.0440000000
|
||||
15007040604,0.0330000000
|
||||
15007040700,0.0260000000
|
||||
15009030100,0.0350000000
|
||||
15009030201,0.0220000000
|
||||
15009030402,0.0200000000
|
||||
15009030800,0.0190000000
|
||||
06007040300,0.2000000000
|
||||
06007040500,0.5000000000
|
||||
06001020100,0.1990000000
|
|
|
@ -0,0 +1,16 @@
|
|||
ABV,GEOID10_TRACT,Energy burden,QUANTILE
|
||||
HI,15001021010,0.0380000000,30
|
||||
HI,15001021101,0.0410000000,25
|
||||
HI,15001021402,0.0240000000,66
|
||||
HI,15001021800,0.0290000000,51
|
||||
HI,15003010201,0.0270000000,58
|
||||
HI,15007040603,0.0440000000,21
|
||||
HI,15007040604,0.0330000000,40
|
||||
HI,15007040700,0.0260000000,59
|
||||
HI,15009030100,0.0350000000,37
|
||||
HI,15009030201,0.0220000000,71
|
||||
HI,15009030402,0.0200000000,75
|
||||
HI,15009030800,0.0190000000,80
|
||||
CA,06007040300,0.2000000000,70
|
||||
CA,06007040500,0.5000000000,50
|
||||
CA,06001020100,0.1990000000,30
|
|
|
@ -0,0 +1,82 @@
|
|||
# pylint: disable=protected-access
|
||||
from unittest import mock
|
||||
import pathlib
|
||||
import requests
|
||||
|
||||
from data_pipeline.etl.sources.doe_energy_burden.etl import (
|
||||
DOEEnergyBurden,
|
||||
)
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class TestDOEEnergyBurdenETL(TestETL):
|
||||
"""Tests the COI ETL.
|
||||
|
||||
This uses pytest-snapshot.
|
||||
To update individual snapshots: $ poetry run pytest
|
||||
data_pipeline/tests/sources/ndoe_energy_burden/test_etl.py::TestClassNameETL::<testname>
|
||||
--snapshot-update
|
||||
"""
|
||||
|
||||
_ETL_CLASS = DOEEnergyBurden
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "DOEEnergyBurden"
|
||||
_EXTRACT_CSV_FILE_NAME = "extract.csv"
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
||||
This code can be copied identically between all child classes.
|
||||
"""
|
||||
super().setup_method(_method=_method, filename=filename)
|
||||
|
||||
# XXX: Refactor since I just straight copied it out of NRI's
|
||||
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
|
||||
with mock.patch("data_pipeline.utils.requests") as requests_mock:
|
||||
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
|
||||
tmp_path = mock_paths[1]
|
||||
|
||||
# Create mock response.
|
||||
with open(zip_file_fixture_src, mode="rb") as file:
|
||||
file_contents = file.read()
|
||||
response_mock = requests.Response()
|
||||
response_mock.status_code = 200
|
||||
# pylint: disable=protected-access
|
||||
response_mock._content = file_contents
|
||||
# Return text fixture:
|
||||
requests_mock.get = mock.MagicMock(return_value=response_mock)
|
||||
|
||||
# Instantiate the ETL class.
|
||||
etl = self._ETL_CLASS()
|
||||
|
||||
# Monkey-patch the temporary directory to the one used in the test
|
||||
etl.TMP_PATH = tmp_path
|
||||
|
||||
# Run the extract method.
|
||||
etl.extract()
|
||||
|
||||
return etl
|
||||
|
||||
def test_init(self, mock_etl, mock_paths):
|
||||
"""Tests that the ChildOpportunityIndexETL class was initialized
|
||||
correctly.
|
||||
"""
|
||||
|
||||
etl = DOEEnergyBurden()
|
||||
data_path, _ = mock_paths
|
||||
assert etl.DATA_PATH == data_path
|
||||
assert etl.COLUMNS_TO_KEEP == [
|
||||
"GEOID10_TRACT",
|
||||
"Energy burden"
|
||||
]
|
||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||
assert etl.TRACT_INPUT_COLUMN_NAME == "FIP"
|
||||
assert etl.INPUT_ENERGY_BURDEN_FIELD_NAME == "BURDEN"
|
||||
assert etl.REVISED_ENERGY_BURDEN_FIELD_NAME == "Energy burden"
|
Loading…
Add table
Reference in a new issue