mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Add tests for DOE energy budren (1518
This commit is contained in:
parent
e77e7aef2e
commit
12a6b2f10e
7 changed files with 140 additions and 7 deletions
|
@ -2,18 +2,22 @@ from pathlib import Path
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from data_pipeline.config import settings
|
from data_pipeline.config import settings
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||||
from data_pipeline.utils import get_module_logger, unzip_file_from_url
|
from data_pipeline.utils import get_module_logger, unzip_file_from_url
|
||||||
|
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class DOEEnergyBurden(ExtractTransformLoad):
|
class DOEEnergyBurden(ExtractTransformLoad):
|
||||||
|
NAME = "doe_energy_burden"
|
||||||
|
SOURCE_URL: str = (
|
||||||
|
settings.AWS_JUSTICE40_DATASOURCES_URL
|
||||||
|
+ "/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
|
||||||
|
)
|
||||||
|
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.DOE_FILE_URL = (
|
self.DOE_FILE_URL = self.SOURCE_URL
|
||||||
settings.AWS_JUSTICE40_DATASOURCES_URL
|
|
||||||
+ "/DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.OUTPUT_PATH: Path = (
|
self.OUTPUT_PATH: Path = (
|
||||||
self.DATA_PATH / "dataset" / "doe_energy_burden"
|
self.DATA_PATH / "dataset" / "doe_energy_burden"
|
||||||
|
@ -38,12 +42,11 @@ class DOEEnergyBurden(ExtractTransformLoad):
|
||||||
unzip_file_from_url(
|
unzip_file_from_url(
|
||||||
file_url=self.DOE_FILE_URL,
|
file_url=self.DOE_FILE_URL,
|
||||||
download_path=self.get_tmp_path(),
|
download_path=self.get_tmp_path(),
|
||||||
unzipped_file_path=self.get_tmp_path() / "doe_energy_burden",
|
unzipped_file_path=self.get_tmp_path()
|
||||||
)
|
)
|
||||||
|
|
||||||
self.raw_df = pd.read_csv(
|
self.raw_df = pd.read_csv(
|
||||||
filepath_or_buffer=self.get_tmp_path()
|
filepath_or_buffer=self.get_tmp_path()
|
||||||
/ "doe_energy_burden"
|
|
||||||
/ "DOE_LEAD_AMI_TRACT_2018_ALL.csv",
|
/ "DOE_LEAD_AMI_TRACT_2018_ALL.csv",
|
||||||
# The following need to remain as strings for all of their digits, not get converted to numbers.
|
# The following need to remain as strings for all of their digits, not get converted to numbers.
|
||||||
dtype={
|
dtype={
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,16 @@
|
||||||
|
ABV,FIP,BURDEN,QUANTILE
|
||||||
|
HI,15001021010,0.0380000000,30
|
||||||
|
HI,15001021101,0.0410000000,25
|
||||||
|
HI,15001021402,0.0240000000,66
|
||||||
|
HI,15001021800,0.0290000000,51
|
||||||
|
HI,15003010201,0.0270000000,58
|
||||||
|
HI,15007040603,0.0440000000,21
|
||||||
|
HI,15007040604,0.0330000000,40
|
||||||
|
HI,15007040700,0.0260000000,59
|
||||||
|
HI,15009030100,0.0350000000,37
|
||||||
|
HI,15009030201,0.0220000000,71
|
||||||
|
HI,15009030402,0.0200000000,75
|
||||||
|
HI,15009030800,0.0190000000,80
|
||||||
|
CA,6007040300,0.2000000000,70
|
||||||
|
CA,6007040500,0.5000000000,50
|
||||||
|
CA,6001020100,0.1990000000,30
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
GEOID10_TRACT,Energy burden
|
||||||
|
15001021010,0.0380000000
|
||||||
|
15001021101,0.0410000000
|
||||||
|
15001021402,0.0240000000
|
||||||
|
15001021800,0.0290000000
|
||||||
|
15003010201,0.0270000000
|
||||||
|
15007040603,0.0440000000
|
||||||
|
15007040604,0.0330000000
|
||||||
|
15007040700,0.0260000000
|
||||||
|
15009030100,0.0350000000
|
||||||
|
15009030201,0.0220000000
|
||||||
|
15009030402,0.0200000000
|
||||||
|
15009030800,0.0190000000
|
||||||
|
06007040300,0.2000000000
|
||||||
|
06007040500,0.5000000000
|
||||||
|
06001020100,0.1990000000
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
ABV,GEOID10_TRACT,Energy burden,QUANTILE
|
||||||
|
HI,15001021010,0.0380000000,30
|
||||||
|
HI,15001021101,0.0410000000,25
|
||||||
|
HI,15001021402,0.0240000000,66
|
||||||
|
HI,15001021800,0.0290000000,51
|
||||||
|
HI,15003010201,0.0270000000,58
|
||||||
|
HI,15007040603,0.0440000000,21
|
||||||
|
HI,15007040604,0.0330000000,40
|
||||||
|
HI,15007040700,0.0260000000,59
|
||||||
|
HI,15009030100,0.0350000000,37
|
||||||
|
HI,15009030201,0.0220000000,71
|
||||||
|
HI,15009030402,0.0200000000,75
|
||||||
|
HI,15009030800,0.0190000000,80
|
||||||
|
CA,06007040300,0.2000000000,70
|
||||||
|
CA,06007040500,0.5000000000,50
|
||||||
|
CA,06001020100,0.1990000000,30
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
from unittest import mock
|
||||||
|
import pathlib
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from data_pipeline.etl.sources.doe_energy_burden.etl import (
|
||||||
|
DOEEnergyBurden,
|
||||||
|
)
|
||||||
|
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||||
|
from data_pipeline.utils import get_module_logger
|
||||||
|
|
||||||
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDOEEnergyBurdenETL(TestETL):
|
||||||
|
"""Tests the COI ETL.
|
||||||
|
|
||||||
|
This uses pytest-snapshot.
|
||||||
|
To update individual snapshots: $ poetry run pytest
|
||||||
|
data_pipeline/tests/sources/ndoe_energy_burden/test_etl.py::TestClassNameETL::<testname>
|
||||||
|
--snapshot-update
|
||||||
|
"""
|
||||||
|
|
||||||
|
_ETL_CLASS = DOEEnergyBurden
|
||||||
|
|
||||||
|
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||||
|
_SAMPLE_DATA_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv"
|
||||||
|
_SAMPLE_DATA_ZIP_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
|
||||||
|
_EXTRACT_TMP_FOLDER_NAME = "DOEEnergyBurden"
|
||||||
|
_EXTRACT_CSV_FILE_NAME = "extract.csv"
|
||||||
|
|
||||||
|
def setup_method(self, _method, filename=__file__):
|
||||||
|
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||||
|
|
||||||
|
This code can be copied identically between all child classes.
|
||||||
|
"""
|
||||||
|
super().setup_method(_method=_method, filename=filename)
|
||||||
|
|
||||||
|
# XXX: Refactor since I just straight copied it out of NRI's
|
||||||
|
def _setup_etl_instance_and_run_extract(self, mock_etl, mock_paths):
|
||||||
|
with mock.patch("data_pipeline.utils.requests") as requests_mock:
|
||||||
|
zip_file_fixture_src = self._DATA_DIRECTORY_FOR_TEST / self._SAMPLE_DATA_ZIP_FILE_NAME
|
||||||
|
tmp_path = mock_paths[1]
|
||||||
|
|
||||||
|
# Create mock response.
|
||||||
|
with open(zip_file_fixture_src, mode="rb") as file:
|
||||||
|
file_contents = file.read()
|
||||||
|
response_mock = requests.Response()
|
||||||
|
response_mock.status_code = 200
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
response_mock._content = file_contents
|
||||||
|
# Return text fixture:
|
||||||
|
requests_mock.get = mock.MagicMock(return_value=response_mock)
|
||||||
|
|
||||||
|
# Instantiate the ETL class.
|
||||||
|
etl = self._ETL_CLASS()
|
||||||
|
|
||||||
|
# Monkey-patch the temporary directory to the one used in the test
|
||||||
|
etl.TMP_PATH = tmp_path
|
||||||
|
|
||||||
|
# Run the extract method.
|
||||||
|
etl.extract()
|
||||||
|
|
||||||
|
return etl
|
||||||
|
|
||||||
|
def test_init(self, mock_etl, mock_paths):
|
||||||
|
"""Tests that the ChildOpportunityIndexETL class was initialized
|
||||||
|
correctly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
etl = DOEEnergyBurden()
|
||||||
|
data_path, _ = mock_paths
|
||||||
|
assert etl.DATA_PATH == data_path
|
||||||
|
assert etl.COLUMNS_TO_KEEP == [
|
||||||
|
"GEOID10_TRACT",
|
||||||
|
"Energy burden"
|
||||||
|
]
|
||||||
|
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||||
|
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||||
|
assert etl.TRACT_INPUT_COLUMN_NAME == "FIP"
|
||||||
|
assert etl.INPUT_ENERGY_BURDEN_FIELD_NAME == "BURDEN"
|
||||||
|
assert etl.REVISED_ENERGY_BURDEN_FIELD_NAME == "Energy burden"
|
Loading…
Add table
Reference in a new issue