Add pytest to tox run in CI/CD (#713)

* Add pytest to tox run in CI/CD

* Try fixing tox dependencies for pytest

* update poetry to get ci/cd passing

* Run poetry export with --dev flag to include dev dependencies such as pytest

* WIP updating test fixtures to include PDF

* Remove dev dependencies from reqs and add pytest to envlist to make build faster

* passing score_post tests

* Add pytest tox (#729)

* Fix failing pytest

* Fixes failing tox tests and updates requirements.txt to include dev deps

* pickle protocol 4

Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
Co-authored-by: Billy Daly <williamdaly422@gmail.com>
Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
This commit is contained in:
Shelby Switzer 2021-09-22 13:47:37 -04:00 committed by GitHub
commit d3a18352fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 112 additions and 64 deletions

View file

@ -19,6 +19,7 @@ class ExtractTransformLoad:
DATA_PATH: Path = settings.APP_ROOT / "data"
TMP_PATH: Path = DATA_PATH / "tmp"
FILES_PATH: Path = settings.APP_ROOT / "files"
GEOID_FIELD_NAME: str = "GEOID10"
GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
# TODO: investigate. Census says there are only 217,740 CBGs in the US.

View file

@ -43,7 +43,8 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv"
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx"
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / "Draft_Communities_List.pdf"
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME
SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip"
)

View file

@ -41,6 +41,7 @@ def etl(monkeypatch, root):
etl = PostScoreETL()
monkeypatch.setattr(etl, "DATA_PATH", root)
monkeypatch.setattr(etl, "TMP_PATH", tmp_path)
return etl
@ -65,6 +66,11 @@ def score_data_initial(sample_data_dir):
return sample_data_dir / "score_data_initial.csv"
@pytest.fixture()
def score_pdf_initial(sample_data_dir):
return sample_data_dir / "Draft_Communities_List.pdf"
@pytest.fixture()
def counties_transformed_expected():
return pd.DataFrame.from_dict(

File diff suppressed because one or more lines are too long

View file

@ -2,9 +2,10 @@
## Above disables warning about access to underscore-prefixed methods
from importlib import reload
from pathlib import Path
import pandas.api.types as ptypes
import pandas.testing as pdt
from data_pipeline.etl.score import constants
# See conftest.py for all fixtures used in these tests
@ -117,8 +118,17 @@ def test_load_tile_csv(etl, tile_data_expected):
assert constants.DATA_SCORE_CSV_TILES_FILE_PATH.is_file()
def test_load_downloadable_zip(etl, downloadable_data_expected):
def test_load_downloadable_zip(etl, monkeypatch, downloadable_data_expected):
reload(constants)
STATIC_FILES_PATH = (
Path.cwd() / "data_pipeline" / "files"
) # need to monkeypatch to real dir
monkeypatch.setattr(constants, "FILES_PATH", STATIC_FILES_PATH)
monkeypatch.setattr(
constants,
"SCORE_DOWNLOADABLE_PDF_FILE_PATH",
STATIC_FILES_PATH / constants.SCORE_DOWNLOADABLE_PDF_FILE_NAME,
)
etl._load_downloadable_zip(
downloadable_data_expected, constants.SCORE_DOWNLOADABLE_DIR
)

View file

@ -1,6 +1,6 @@
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL
40300,05007040300,10.492015,Very Low,15.3494
20100,05001020100,14.705854,Relatively Low,36.725828
40500,15007040500,10.234981,Very Low,13.997993
21010,15001021010,21.537231,Relatively Moderate,59.488033
21101,15001021101,19.434585,Relatively Low,53.392265
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE
40300,05007040300,10.492015,Very Low,15.3494,11.5
20100,05001020100,14.705854,Relatively Low,36.725828,12.5
40500,15007040500,10.234981,Very Low,13.997993,13.5
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5
21101,15001021101,19.434585,Relatively Low,53.392265,15.5

1 TRACT TRACTFIPS RISK_SCORE RISK_RATNG RISK_NPCTL EAL_SCORE
2 40300 05007040300 10.492015 Very Low 15.3494 11.5
3 20100 05001020100 14.705854 Relatively Low 36.725828 12.5
4 40500 15007040500 10.234981 Very Low 13.997993 13.5
5 21010 15001021010 21.537231 Relatively Moderate 59.488033 14.5
6 21101 15001021101 19.434585 Relatively Low 53.392265 15.5

View file

@ -1,11 +1,11 @@
GEOID10,GEOID10_TRACT,TRACT,RISK_SCORE,RISK_RATNG,RISK_NPCTL
050070403001,05007040300,40300,10.492015,Very Low,15.3494
050070403002,05007040300,40300,10.492015,Very Low,15.3494
050010201001,05001020100,20100,14.705854,Relatively Low,36.725828
050010201002,05001020100,20100,14.705854,Relatively Low,36.725828
150070405001,15007040500,40500,10.234981,Very Low,13.997993
150070405002,15007040500,40500,10.234981,Very Low,13.997993
150010210101,15001021010,21010,21.537231,Relatively Moderate,59.488033
150010210102,15001021010,21010,21.537231,Relatively Moderate,59.488033
150010211011,15001021101,21101,19.434585,Relatively Low,53.392265
150010211012,15001021101,21101,19.434585,Relatively Low,53.392265
GEOID10,FEMA Risk Index Expected Annual Loss Score
050070403001,11.5
050070403002,11.5
050010201001,12.5
050010201002,12.5
150070405001,13.5
150070405002,13.5
150010210101,14.5
150010210102,14.5
150010211011,15.5
150010211012,15.5

1 GEOID10 GEOID10_TRACT FEMA Risk Index Expected Annual Loss Score TRACT RISK_SCORE RISK_RATNG RISK_NPCTL
2 050070403001 05007040300 11.5 40300 10.492015 Very Low 15.3494
3 050070403002 05007040300 11.5 40300 10.492015 Very Low 15.3494
4 050010201001 05001020100 12.5 20100 14.705854 Relatively Low 36.725828
5 050010201002 05001020100 12.5 20100 14.705854 Relatively Low 36.725828
6 150070405001 15007040500 13.5 40500 10.234981 Very Low 13.997993
7 150070405002 15007040500 13.5 40500 10.234981 Very Low 13.997993
8 150010210101 15001021010 14.5 21010 21.537231 Relatively Moderate 59.488033
9 150010210102 15001021010 14.5 21010 21.537231 Relatively Moderate 59.488033
10 150010211011 15001021101 15.5 21101 19.434585 Relatively Low 53.392265
11 150010211012 15001021101 15.5 21101 19.434585 Relatively Low 53.392265

View file

@ -0,0 +1,11 @@
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score
050070403001,05007040300,11.5
050070403002,05007040300,11.5
050010201001,05001020100,12.5
050010201002,05001020100,12.5
150070405001,15007040500,13.5
150070405002,15007040500,13.5
150010210101,15001021010,14.5
150010210102,15001021010,14.5
150010211011,15001021101,15.5
150010211012,15001021101,15.5
1 GEOID10 GEOID10_TRACT FEMA Risk Index Expected Annual Loss Score
2 050070403001 05007040300 11.5
3 050070403002 05007040300 11.5
4 050010201001 05001020100 12.5
5 050010201002 05001020100 12.5
6 150070405001 15007040500 13.5
7 150070405002 15007040500 13.5
8 150010210101 15001021010 14.5
9 150010210102 15001021010 14.5
10 150010211011 15001021101 15.5
11 150010211012 15001021101 15.5

View file

@ -73,13 +73,13 @@ class TestNationalRiskIndexETL:
TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
BLOCK_COL = etl.GEOID_FIELD_NAME
expected = pd.read_csv(
DATA_DIR / "output.csv",
DATA_DIR / "transform.csv",
dtype={BLOCK_COL: "string", TRACT_COL: "string"},
)
# execution
etl.transform()
# validation
assert etl.df.shape == (10, 6)
assert etl.df.shape == (10, 3)
assert etl.df.equals(expected)
def test_load(self, mock_etl):
@ -90,21 +90,23 @@ class TestNationalRiskIndexETL:
self.OUTPUT_DIR
- The content of the file that's written matches the data in self.df
"""
# setup
# setup - input variables
etl = NationalRiskIndexETL()
output_path = etl.OUTPUT_DIR / "usa.csv"
TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
BLOCK_COL = etl.GEOID_FIELD_NAME
expected = pd.read_csv(
DATA_DIR / "output.csv",
dtype={BLOCK_COL: str, TRACT_COL: str},
output_path = etl.OUTPUT_DIR / "usa.csv"
# setup - mock transform step
df_transform = pd.read_csv(
DATA_DIR / "transform.csv",
dtype={BLOCK_COL: "string", TRACT_COL: "string"},
)
etl.df = expected
etl.df = df_transform
# setup - load expected output
expected = pd.read_csv(DATA_DIR / "output.csv", dtype={BLOCK_COL: str})
# execution
etl.load()
output = pd.read_csv(
output_path, dtype={BLOCK_COL: str, TRACT_COL: str}
)
output = pd.read_csv(output_path, dtype={BLOCK_COL: str})
# validation
assert output_path.exists()
assert output.shape == (10, 2)
assert output.equals(expected)