Add pytest to tox run in CI/CD (#713)

* Add pytest to tox run in CI/CD * Try fixing tox dependencies for pytest * update poetry to get ci/cd passing * Run poetry export with --dev flag to include dev dependencies such as pytest * WIP updating test fixtures to include PDF * Remove dev dependencies from reqs and add pytest to envlist to make build faster * passing score_post tests * Add pytest tox (#729) * Fix failing pytest * Fixes failing tox tests and updates requirements.txt to include dev deps * pickle protocol 4 Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov> Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov> Co-authored-by: Billy Daly <williamdaly422@gmail.com> Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
2025-09-30 00:43:18 -07:00 · 2021-09-22 13:47:37 -04:00 · 2021-09-22 13:47:37 -04:00 · d3a18352fc
commit d3a18352fc
parent 7709836a12
17 changed files with 112 additions and 64 deletions
--- a/data/data-pipeline/data_pipeline/etl/base.py
+++ b/data/data-pipeline/data_pipeline/etl/base.py
@ -19,6 +19,7 @@ class ExtractTransformLoad:

    DATA_PATH: Path = settings.APP_ROOT / "data"
    TMP_PATH: Path = DATA_PATH / "tmp"
+    FILES_PATH: Path = settings.APP_ROOT / "files"
    GEOID_FIELD_NAME: str = "GEOID10"
    GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
    # TODO: investigate. Census says there are only 217,740 CBGs in the US.
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -43,7 +43,8 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
 SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
 SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv"
 SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx"
-SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / "Draft_Communities_List.pdf"
+SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
+SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME
 SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
    SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip"
 )
--- a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py
@ -41,6 +41,7 @@ def etl(monkeypatch, root):
    etl = PostScoreETL()
    monkeypatch.setattr(etl, "DATA_PATH", root)
    monkeypatch.setattr(etl, "TMP_PATH", tmp_path)
+
    return etl


@ -65,6 +66,11 @@ def score_data_initial(sample_data_dir):
    return sample_data_dir / "score_data_initial.csv"


+@pytest.fixture()
+def score_pdf_initial(sample_data_dir):
+    return sample_data_dir / "Draft_Communities_List.pdf"
+
+
@pytest.fixture()
 def counties_transformed_expected():
    return pd.DataFrame.from_dict(
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py
@ -2,9 +2,10 @@
 ## Above disables warning about access to underscore-prefixed methods

 from importlib import reload
-
+from pathlib import Path
 import pandas.api.types as ptypes
 import pandas.testing as pdt
+
 from data_pipeline.etl.score import constants

 # See conftest.py for all fixtures used in these tests
@ -117,8 +118,17 @@ def test_load_tile_csv(etl, tile_data_expected):
    assert constants.DATA_SCORE_CSV_TILES_FILE_PATH.is_file()


-def test_load_downloadable_zip(etl, downloadable_data_expected):
+def test_load_downloadable_zip(etl, monkeypatch, downloadable_data_expected):
    reload(constants)
+    STATIC_FILES_PATH = (
+        Path.cwd() / "data_pipeline" / "files"
+    )  # need to monkeypatch to real dir
+    monkeypatch.setattr(constants, "FILES_PATH", STATIC_FILES_PATH)
+    monkeypatch.setattr(
+        constants,
+        "SCORE_DOWNLOADABLE_PDF_FILE_PATH",
+        STATIC_FILES_PATH / constants.SCORE_DOWNLOADABLE_PDF_FILE_NAME,
+    )
    etl._load_downloadable_zip(
        downloadable_data_expected, constants.SCORE_DOWNLOADABLE_DIR
    )
--- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv
+++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv
@ -1,6 +1,6 @@
-TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL
-40300,05007040300,10.492015,Very Low,15.3494
-20100,05001020100,14.705854,Relatively Low,36.725828
-40500,15007040500,10.234981,Very Low,13.997993
-21010,15001021010,21.537231,Relatively Moderate,59.488033
-21101,15001021101,19.434585,Relatively Low,53.392265
+TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE
+40300,05007040300,10.492015,Very Low,15.3494,11.5
+20100,05001020100,14.705854,Relatively Low,36.725828,12.5
+40500,15007040500,10.234981,Very Low,13.997993,13.5
+21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5
+21101,15001021101,19.434585,Relatively Low,53.392265,15.5
--- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv
+++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv
@ -1,11 +1,11 @@
-GEOID10,GEOID10_TRACT,TRACT,RISK_SCORE,RISK_RATNG,RISK_NPCTL
-050070403001,05007040300,40300,10.492015,Very Low,15.3494
-050070403002,05007040300,40300,10.492015,Very Low,15.3494
-050010201001,05001020100,20100,14.705854,Relatively Low,36.725828
-050010201002,05001020100,20100,14.705854,Relatively Low,36.725828
-150070405001,15007040500,40500,10.234981,Very Low,13.997993
-150070405002,15007040500,40500,10.234981,Very Low,13.997993
-150010210101,15001021010,21010,21.537231,Relatively Moderate,59.488033
-150010210102,15001021010,21010,21.537231,Relatively Moderate,59.488033
-150010211011,15001021101,21101,19.434585,Relatively Low,53.392265
-150010211012,15001021101,21101,19.434585,Relatively Low,53.392265
+GEOID10,FEMA Risk Index Expected Annual Loss Score
+050070403001,11.5
+050070403002,11.5
+050010201001,12.5
+050010201002,12.5
+150070405001,13.5
+150070405002,13.5
+150010210101,14.5
+150010210102,14.5
+150010211011,15.5
+150010211012,15.5
--- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv
+++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv
@ -0,0 +1,11 @@
+GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score
+050070403001,05007040300,11.5
+050070403002,05007040300,11.5
+050010201001,05001020100,12.5
+050010201002,05001020100,12.5
+150070405001,15007040500,13.5
+150070405002,15007040500,13.5
+150010210101,15001021010,14.5
+150010210102,15001021010,14.5
+150010211011,15001021101,15.5
+150010211012,15001021101,15.5
--- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py
@ -73,13 +73,13 @@ class TestNationalRiskIndexETL:
        TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
        BLOCK_COL = etl.GEOID_FIELD_NAME
        expected = pd.read_csv(
-            DATA_DIR / "output.csv",
+            DATA_DIR / "transform.csv",
            dtype={BLOCK_COL: "string", TRACT_COL: "string"},
        )
        # execution
        etl.transform()
        # validation
-        assert etl.df.shape == (10, 6)
+        assert etl.df.shape == (10, 3)
        assert etl.df.equals(expected)

    def test_load(self, mock_etl):
@ -90,21 +90,23 @@ class TestNationalRiskIndexETL:
          self.OUTPUT_DIR
        - The content of the file that's written matches the data in self.df
        """
-        # setup
+        # setup - input variables
        etl = NationalRiskIndexETL()
-        output_path = etl.OUTPUT_DIR / "usa.csv"
        TRACT_COL = etl.GEOID_TRACT_FIELD_NAME
        BLOCK_COL = etl.GEOID_FIELD_NAME
-        expected = pd.read_csv(
-            DATA_DIR / "output.csv",
-            dtype={BLOCK_COL: str, TRACT_COL: str},
+        output_path = etl.OUTPUT_DIR / "usa.csv"
+        # setup - mock transform step
+        df_transform = pd.read_csv(
+            DATA_DIR / "transform.csv",
+            dtype={BLOCK_COL: "string", TRACT_COL: "string"},
        )
-        etl.df = expected
+        etl.df = df_transform
+        # setup - load expected output
+        expected = pd.read_csv(DATA_DIR / "output.csv", dtype={BLOCK_COL: str})
        # execution
        etl.load()
-        output = pd.read_csv(
-            output_path, dtype={BLOCK_COL: str, TRACT_COL: str}
-        )
+        output = pd.read_csv(output_path, dtype={BLOCK_COL: str})
        # validation
        assert output_path.exists()
+        assert output.shape == (10, 2)
        assert output.equals(expected)