mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
* Adds dev dependencies to requirements.txt and re-runs black on codebase * Adds test and code for national risk index etl, still in progress * Removes test_data from .gitignore * Adds test data to nation_risk_index tests * Creates tests and ETL class for NRI data * Adds tests for load() and transform() methods of NationalRiskIndexETL * Updates README.md with info about the NRI dataset * Adds to dos * Moves tests and test data into a tests/ dir in national_risk_index * Moves tmp_dir for tests into data/tmp/tests/ * Promotes fixtures to conftest and relocates national_risk_index tests: The relocation of national_risk_index tests is necessary because tests can only use fixtures specified in conftests within the same package * Fixes issue with df.equals() in test_transform() * Files reformatted by black * Commit changes to other files after re-running black * Fixes unused import that caused lint checks to fail * Moves tests/ directory to app root for data_pipeline * Adds new methods to ExtractTransformLoad base class: - __init__() Initializes class attributes - _get_census_fips_codes() Loads a dataframe with the fips codes for census block group and tract - validate_init() Checks that the class was initialized correctly - validate_output() Checks that the output was loaded correctly * Adds test for ExtractTransformLoad.__init__() and base.py * Fixes failing flake8 test * Changes geo_col to geoid_col and changes is_dataset to is_census in yaml * Adds test for validate_output() * Adds remaining tests * Removes is_dataset from init method * Makes CENSUS_CSV a class attribute instead of a class global: This ensures that CENSUS_CSV is only set when the ETL class is for a non-census dataset and removes the need to overwrite the value in mock_etl fixture * Re-formats files with black and fixes broken tox tests
60 lines
2 KiB
Python
60 lines
2 KiB
Python
import os
|
|
from pathlib import Path
|
|
from shutil import copyfile
|
|
|
|
import pytest
|
|
|
|
from data_pipeline.config import settings
|
|
from data_pipeline.etl.base import ExtractTransformLoad
|
|
|
|
TMP_DIR = settings.APP_ROOT / "data" / "tmp" / "tests"
|
|
|
|
|
|
def copy_data_files(src: Path, dst: Path) -> None:
|
|
"""Copies test data from src Path to dst Path for use in testing
|
|
|
|
Args
|
|
src: pathlib.Path instance. The location of the source data file.
|
|
dst: pathlib.Path instance. Where to copy the source data file to.
|
|
|
|
Returns
|
|
None. This is a void function
|
|
"""
|
|
if not dst.exists():
|
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
copyfile(src, dst)
|
|
assert dst.exists()
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def mock_paths(tmp_path_factory) -> tuple:
|
|
"""Creates new DATA_PATH and TMP_PATH that point to a temporary local
|
|
file structure that can be used to mock data folder during testing
|
|
"""
|
|
# sets location of the temp directory inside the national_risk_index folder
|
|
os.environ["PYTEST_DEBUG_TEMPROOT"] = str(TMP_DIR)
|
|
TMP_DIR.mkdir(parents=True, exist_ok=True)
|
|
# creates DATA_PATH and TMP_PATH directories in temp directory
|
|
data_path = tmp_path_factory.mktemp("data", numbered=False)
|
|
tmp_path = data_path / "tmp"
|
|
tmp_path.mkdir()
|
|
return data_path, tmp_path
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def mock_census(mock_paths) -> Path:
|
|
data_path, tmp_path = mock_paths
|
|
census_src = settings.APP_ROOT / "tests" / "base" / "data" / "census.csv"
|
|
census_dst = data_path / "census" / "csv" / "us.csv"
|
|
copy_data_files(census_src, census_dst)
|
|
return census_dst
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_etl(monkeypatch, mock_paths, mock_census) -> None:
|
|
"""Creates a mock version of the base ExtractTransformLoad class and resets
|
|
global the variables for DATA_PATH and TMP_PATH to the local mock_paths
|
|
"""
|
|
data_path, tmp_path = mock_paths
|
|
monkeypatch.setattr(ExtractTransformLoad, "DATA_PATH", data_path)
|
|
monkeypatch.setattr(ExtractTransformLoad, "TMP_PATH", tmp_path)
|