mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-14 10:41:39 -07:00
Refactor DOE Energy Burden and COI to use YAML (#1796)
* added tribalId for Supplemental dataset (#1804) * Setting zoom levels for tribal map (#1810) * NRI dataset and initial score YAML configuration (#1534) * update be staging gha * NRI dataset and initial score YAML configuration * checkpoint * adding data checks for release branch * passing tests * adding INPUT_EXTRACTED_FILE_NAME to base class * lint * columns to keep and tests * update be staging gha * checkpoint * update be staging gha * NRI dataset and initial score YAML configuration * checkpoint * adding data checks for release branch * passing tests * adding INPUT_EXTRACTED_FILE_NAME to base class * lint * columns to keep and tests * checkpoint * PR Review * renoving source url * tests * stop execution of ETL if there's a YAML schema issue * update be staging gha * adding source url as class var again * clean up * force cache bust * gha cache bust * dynamically set score vars from YAML * docsctrings * removing last updated year - optional reverse percentile * passing tests * sort order * column ordening * PR review * class level vars * Updating DatasetsConfig * fix pylint errors * moving metadata hint back to code Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov> * Correct copy typo (#1809) * Add basic test suite for COI (#1518) * Update COI to use new yaml (#1518) * Add tests for DOE energy budren (1518 * Add dataset config for energy budren (1518) * Refactor ETL to use datasets.yml (#1518) * Add fake GEOIDs to COI tests (#1518) * Refactor _setup_etl_instance_and_run_extract to base (#1518) For the three classes we've done so far, a generic _setup_etl_instance_and_run_extract will work fine, for the moment we can reuse the same setup method until we decide future classes need more flexibility --- but they can also always subclass so... * Add output-path tests (#1518) * Update YAML to match constant (#1518) * Don't blindly set float format (#1518) * Add defaults for extract (#1518) * Run YAML load on all subclasses (#1518) * Update description fields (#1518) * Update YAML per final format (#1518) * Update fixture tract IDs (#1518) * Update base class refactor (#1518) Now that NRI is final I needed to make a small number of updates to my refactored code. * Remove old comment (#1518) * Fix type signature and return (#1518) * Update per code review (#1518) Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com> Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov> Co-authored-by: Vim <86254807+vim-usds@users.noreply.github.com>
This commit is contained in:
parent
ed9b7172f7
commit
9635ef5ee2
44 changed files with 698 additions and 3640 deletions
Binary file not shown.
|
@ -0,0 +1,16 @@
|
|||
ABV,FIP,BURDEN,QUANTILE
|
||||
HI,15001021010,0.0380000000,30
|
||||
HI,15001021101,0.0410000000,25
|
||||
HI,15001021402,0.0240000000,66
|
||||
HI,15001021800,0.0290000000,51
|
||||
HI,15003010201,0.0270000000,58
|
||||
HI,15007040603,0.0440000000,21
|
||||
HI,15007040604,0.0330000000,40
|
||||
HI,15007040700,0.0260000000,59
|
||||
HI,15009030100,0.0350000000,37
|
||||
HI,15009030201,0.0220000000,71
|
||||
HI,15009030402,0.0200000000,75
|
||||
HI,15009030800,0.0190000000,80
|
||||
CA,6069000802,0.2000000000,70
|
||||
CA,6061021322,0.5000000000,50
|
||||
CA,6027000800,0.1990000000,30
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Energy burden
|
||||
15001021010,0.0380000000
|
||||
15001021101,0.0410000000
|
||||
15001021402,0.0240000000
|
||||
15001021800,0.0290000000
|
||||
15003010201,0.0270000000
|
||||
15007040603,0.0440000000
|
||||
15007040604,0.0330000000
|
||||
15007040700,0.0260000000
|
||||
15009030100,0.0350000000
|
||||
15009030201,0.0220000000
|
||||
15009030402,0.0200000000
|
||||
15009030800,0.0190000000
|
||||
06069000802,0.2000000000
|
||||
06061021322,0.5000000000
|
||||
06027000800,0.1990000000
|
|
|
@ -0,0 +1,16 @@
|
|||
ABV,GEOID10_TRACT,Energy burden,QUANTILE
|
||||
HI,15001021010,0.0380000000,30
|
||||
HI,15001021101,0.0410000000,25
|
||||
HI,15001021402,0.0240000000,66
|
||||
HI,15001021800,0.0290000000,51
|
||||
HI,15003010201,0.0270000000,58
|
||||
HI,15007040603,0.0440000000,21
|
||||
HI,15007040604,0.0330000000,40
|
||||
HI,15007040700,0.0260000000,59
|
||||
HI,15009030100,0.0350000000,37
|
||||
HI,15009030201,0.0220000000,71
|
||||
HI,15009030402,0.0200000000,75
|
||||
HI,15009030800,0.0190000000,80
|
||||
CA,06069000802,0.2000000000,70
|
||||
CA,06061021322,0.5000000000,50
|
||||
CA,06027000800,0.1990000000,30
|
|
|
@ -0,0 +1,61 @@
|
|||
# pylint: disable=protected-access
|
||||
import pathlib
|
||||
|
||||
from data_pipeline.etl.sources.doe_energy_burden.etl import (
|
||||
DOEEnergyBurden,
|
||||
)
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class TestDOEEnergyBurdenETL(TestETL):
|
||||
"""Tests the COI ETL.
|
||||
|
||||
This uses pytest-snapshot.
|
||||
To update individual snapshots: $ poetry run pytest
|
||||
data_pipeline/tests/sources/doe_energy_burden/test_etl.py::TestClassNameETL::<testname>
|
||||
--snapshot-update
|
||||
"""
|
||||
|
||||
_ETL_CLASS = DOEEnergyBurden
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = "DOE_LEAD_AMI_TRACT_2018_ALL.csv.zip"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "DOEEnergyBurden"
|
||||
_EXTRACT_CSV_FILE_NAME = "extract.csv"
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
||||
This code can be copied identically between all child classes.
|
||||
"""
|
||||
super().setup_method(_method=_method, filename=filename)
|
||||
|
||||
def test_init(self, mock_etl, mock_paths):
|
||||
"""Tests that the ChildOpportunityIndexETL class was initialized
|
||||
correctly.
|
||||
"""
|
||||
|
||||
etl = DOEEnergyBurden()
|
||||
data_path, _ = mock_paths
|
||||
assert etl.DATA_PATH == data_path
|
||||
assert etl.COLUMNS_TO_KEEP == ["GEOID10_TRACT", "Energy burden"]
|
||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||
assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "FIP"
|
||||
assert etl.INPUT_ENERGY_BURDEN_FIELD_NAME == "BURDEN"
|
||||
assert etl.REVISED_ENERGY_BURDEN_FIELD_NAME == "Energy burden"
|
||||
|
||||
def test_get_output_file_path(self, mock_etl, mock_paths):
|
||||
"""Tests the right file name is returned."""
|
||||
etl = self._ETL_CLASS()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
output_file_path = etl._get_output_file_path()
|
||||
expected_output_file_path = (
|
||||
data_path / "dataset" / "doe_energy_burden" / "usa.csv"
|
||||
)
|
||||
assert output_file_path == expected_output_file_path
|
Loading…
Add table
Add a link
Reference in a new issue