mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-06 07:04:19 -07:00
Add pytest to tox run in CI/CD (#713)
* Add pytest to tox run in CI/CD * Try fixing tox dependencies for pytest * update poetry to get ci/cd passing * Run poetry export with --dev flag to include dev dependencies such as pytest * WIP updating test fixtures to include PDF * Remove dev dependencies from reqs and add pytest to envlist to make build faster * passing score_post tests * Add pytest tox (#729) * Fix failing pytest * Fixes failing tox tests and updates requirements.txt to include dev deps * pickle protocol 4 Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov> Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov> Co-authored-by: Billy Daly <williamdaly422@gmail.com> Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
This commit is contained in:
parent
7709836a12
commit
d3a18352fc
17 changed files with 112 additions and 64 deletions
|
@ -19,6 +19,7 @@ class ExtractTransformLoad:
|
|||
|
||||
DATA_PATH: Path = settings.APP_ROOT / "data"
|
||||
TMP_PATH: Path = DATA_PATH / "tmp"
|
||||
FILES_PATH: Path = settings.APP_ROOT / "files"
|
||||
GEOID_FIELD_NAME: str = "GEOID10"
|
||||
GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
|
||||
# TODO: investigate. Census says there are only 217,740 CBGs in the US.
|
||||
|
|
|
@ -43,7 +43,8 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
|
|||
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
|
||||
SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv"
|
||||
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / "Draft_Communities_List.pdf"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME
|
||||
SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
|
||||
SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip"
|
||||
)
|
||||
|
|
|
@ -41,6 +41,7 @@ def etl(monkeypatch, root):
|
|||
etl = PostScoreETL()
|
||||
monkeypatch.setattr(etl, "DATA_PATH", root)
|
||||
monkeypatch.setattr(etl, "TMP_PATH", tmp_path)
|
||||
|
||||
return etl
|
||||
|
||||
|
||||
|
@ -65,6 +66,11 @@ def score_data_initial(sample_data_dir):
|
|||
return sample_data_dir / "score_data_initial.csv"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def score_pdf_initial(sample_data_dir):
|
||||
return sample_data_dir / "Draft_Communities_List.pdf"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def counties_transformed_expected():
|
||||
return pd.DataFrame.from_dict(
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -2,9 +2,10 @@
|
|||
## Above disables warning about access to underscore-prefixed methods
|
||||
|
||||
from importlib import reload
|
||||
|
||||
from pathlib import Path
|
||||
import pandas.api.types as ptypes
|
||||
import pandas.testing as pdt
|
||||
|
||||
from data_pipeline.etl.score import constants
|
||||
|
||||
# See conftest.py for all fixtures used in these tests
|
||||
|
@ -117,8 +118,17 @@ def test_load_tile_csv(etl, tile_data_expected):
|
|||
assert constants.DATA_SCORE_CSV_TILES_FILE_PATH.is_file()
|
||||
|
||||
|
||||
def test_load_downloadable_zip(etl, downloadable_data_expected):
|
||||
def test_load_downloadable_zip(etl, monkeypatch, downloadable_data_expected):
|
||||
reload(constants)
|
||||
STATIC_FILES_PATH = (
|
||||
Path.cwd() / "data_pipeline" / "files"
|
||||
) # need to monkeypatch to real dir
|
||||
monkeypatch.setattr(constants, "FILES_PATH", STATIC_FILES_PATH)
|
||||
monkeypatch.setattr(
|
||||
constants,
|
||||
"SCORE_DOWNLOADABLE_PDF_FILE_PATH",
|
||||
STATIC_FILES_PATH / constants.SCORE_DOWNLOADABLE_PDF_FILE_NAME,
|
||||
)
|
||||
etl._load_downloadable_zip(
|
||||
downloadable_data_expected, constants.SCORE_DOWNLOADABLE_DIR
|
||||
)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL
|
||||
40300,05007040300,10.492015,Very Low,15.3494
|
||||
20100,05001020100,14.705854,Relatively Low,36.725828
|
||||
40500,15007040500,10.234981,Very Low,13.997993
|
||||
21010,15001021010,21.537231,Relatively Moderate,59.488033
|
||||
21101,15001021101,19.434585,Relatively Low,53.392265
|
||||
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE
|
||||
40300,05007040300,10.492015,Very Low,15.3494,11.5
|
||||
20100,05001020100,14.705854,Relatively Low,36.725828,12.5
|
||||
40500,15007040500,10.234981,Very Low,13.997993,13.5
|
||||
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5
|
||||
21101,15001021101,19.434585,Relatively Low,53.392265,15.5
|
||||
|
|
|
|
@ -1,11 +1,11 @@
|
|||
GEOID10,GEOID10_TRACT,TRACT,RISK_SCORE,RISK_RATNG,RISK_NPCTL
|
||||
050070403001,05007040300,40300,10.492015,Very Low,15.3494
|
||||
050070403002,05007040300,40300,10.492015,Very Low,15.3494
|
||||
050010201001,05001020100,20100,14.705854,Relatively Low,36.725828
|
||||
050010201002,05001020100,20100,14.705854,Relatively Low,36.725828
|
||||
150070405001,15007040500,40500,10.234981,Very Low,13.997993
|
||||
150070405002,15007040500,40500,10.234981,Very Low,13.997993
|
||||
150010210101,15001021010,21010,21.537231,Relatively Moderate,59.488033
|
||||
150010210102,15001021010,21010,21.537231,Relatively Moderate,59.488033
|
||||
150010211011,15001021101,21101,19.434585,Relatively Low,53.392265
|
||||
150010211012,15001021101,21101,19.434585,Relatively Low,53.392265
|
||||
GEOID10,FEMA Risk Index Expected Annual Loss Score
|
||||
050070403001,11.5
|
||||
050070403002,11.5
|
||||
050010201001,12.5
|
||||
050010201002,12.5
|
||||
150070405001,13.5
|
||||
150070405002,13.5
|
||||
150010210101,14.5
|
||||
150010210102,14.5
|
||||
150010211011,15.5
|
||||
150010211012,15.5
|
||||
|
|
|
|
@ -0,0 +1,11 @@
|
|||
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score
|
||||
050070403001,05007040300,11.5
|
||||
050070403002,05007040300,11.5
|
||||
050010201001,05001020100,12.5
|
||||
050010201002,05001020100,12.5
|
||||
150070405001,15007040500,13.5
|
||||
150070405002,15007040500,13.5
|
||||
150010210101,15001021010,14.5
|
||||
150010210102,15001021010,14.5
|
||||
150010211011,15001021101,15.5
|
||||
150010211012,15001021101,15.5
|
|
|
@ -73,13 +73,13 @@ class TestNationalRiskIndexETL:
|
|||
TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
|
||||
BLOCK_COL = etl.GEOID_FIELD_NAME
|
||||
expected = pd.read_csv(
|
||||
DATA_DIR / "output.csv",
|
||||
DATA_DIR / "transform.csv",
|
||||
dtype={BLOCK_COL: "string", TRACT_COL: "string"},
|
||||
)
|
||||
# execution
|
||||
etl.transform()
|
||||
# validation
|
||||
assert etl.df.shape == (10, 6)
|
||||
assert etl.df.shape == (10, 3)
|
||||
assert etl.df.equals(expected)
|
||||
|
||||
def test_load(self, mock_etl):
|
||||
|
@ -90,21 +90,23 @@ class TestNationalRiskIndexETL:
|
|||
self.OUTPUT_DIR
|
||||
- The content of the file that's written matches the data in self.df
|
||||
"""
|
||||
# setup
|
||||
# setup - input variables
|
||||
etl = NationalRiskIndexETL()
|
||||
output_path = etl.OUTPUT_DIR / "usa.csv"
|
||||
TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
|
||||
BLOCK_COL = etl.GEOID_FIELD_NAME
|
||||
expected = pd.read_csv(
|
||||
DATA_DIR / "output.csv",
|
||||
dtype={BLOCK_COL: str, TRACT_COL: str},
|
||||
output_path = etl.OUTPUT_DIR / "usa.csv"
|
||||
# setup - mock transform step
|
||||
df_transform = pd.read_csv(
|
||||
DATA_DIR / "transform.csv",
|
||||
dtype={BLOCK_COL: "string", TRACT_COL: "string"},
|
||||
)
|
||||
etl.df = expected
|
||||
etl.df = df_transform
|
||||
# setup - load expected output
|
||||
expected = pd.read_csv(DATA_DIR / "output.csv", dtype={BLOCK_COL: str})
|
||||
# execution
|
||||
etl.load()
|
||||
output = pd.read_csv(
|
||||
output_path, dtype={BLOCK_COL: str, TRACT_COL: str}
|
||||
)
|
||||
output = pd.read_csv(output_path, dtype={BLOCK_COL: str})
|
||||
# validation
|
||||
assert output_path.exists()
|
||||
assert output.shape == (10, 2)
|
||||
assert output.equals(expected)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue