mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-23 05:10:36 -07:00
NRI dataset and initial score YAML configuration (#1534)
* update be staging gha * NRI dataset and initial score YAML configuration * checkpoint * adding data checks for release branch * passing tests * adding INPUT_EXTRACTED_FILE_NAME to base class * lint * columns to keep and tests * update be staging gha * checkpoint * update be staging gha * NRI dataset and initial score YAML configuration * checkpoint * adding data checks for release branch * passing tests * adding INPUT_EXTRACTED_FILE_NAME to base class * lint * columns to keep and tests * checkpoint * PR Review * renoving source url * tests * stop execution of ETL if there's a YAML schema issue * update be staging gha * adding source url as class var again * clean up * force cache bust * gha cache bust * dynamically set score vars from YAML * docsctrings * removing last updated year - optional reverse percentile * passing tests * sort order * column ordening * PR review * class level vars * Updating DatasetsConfig * fix pylint errors * moving metadata hint back to code Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
1833e3e794
commit
1c448a77f9
15 changed files with 272 additions and 3485 deletions
|
@ -119,6 +119,7 @@ class TestETL:
|
|||
"""
|
||||
# Setup
|
||||
etl = self._get_instance_of_etl_class()
|
||||
etl.__init__()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
assert etl.DATA_PATH == data_path
|
||||
|
@ -126,8 +127,6 @@ class TestETL:
|
|||
|
||||
# Also make sure all parameters that need to be non-null are non-null
|
||||
assert etl.NAME is not None
|
||||
assert etl.LAST_UPDATED_YEAR is not None
|
||||
assert etl.SOURCE_URL is not None
|
||||
assert etl.GEO_LEVEL is not None
|
||||
assert etl.COLUMNS_TO_KEEP is not None
|
||||
assert len(etl.COLUMNS_TO_KEEP) > 0
|
||||
|
@ -148,14 +147,10 @@ class TestETL:
|
|||
etl = self._get_instance_of_etl_class()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
etl.__init__()
|
||||
actual_file_path = etl._get_output_file_path()
|
||||
|
||||
expected_file_path = (
|
||||
data_path
|
||||
/ "dataset"
|
||||
/ f"{etl.NAME}_{etl.LAST_UPDATED_YEAR}"
|
||||
/ "usa.csv"
|
||||
)
|
||||
expected_file_path = data_path / "dataset" / etl.NAME / "usa.csv"
|
||||
|
||||
logger.info(f"Expected: {expected_file_path}")
|
||||
|
||||
|
@ -255,6 +250,7 @@ class TestETL:
|
|||
etl = self._setup_etl_instance_and_run_extract(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
etl.__init__()
|
||||
etl.transform()
|
||||
|
||||
assert etl.output_df is not None
|
||||
|
@ -272,6 +268,7 @@ class TestETL:
|
|||
"""
|
||||
# setup - input variables
|
||||
etl = self._get_instance_of_etl_class()
|
||||
etl.__init__()
|
||||
|
||||
# setup - mock transform step
|
||||
df_transform = pd.read_csv(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue