Pipeline PR workflow now runs code quality checks

2025-09-30 15:23:18 -07:00 · 2024-12-05 15:34:10 -05:00 · 2024-12-05 15:34:10 -05:00 · 95246c9df0
commit 95246c9df0
parent 6213ed1ac4
6 changed files with 51 additions and 76 deletions
--- a/.github/workflows/data-checks.yml
+++ b/.github/workflows/data-checks.yml
@ -1,44 +0,0 @@
-# This runs tox in the two directories under data
-name: Data Checks
-on:
-  pull_request:
-    paths:
-      - "data/**"
-jobs:
-  data-pipeline:
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: data/data-pipeline
-    strategy:
-      matrix:
-        # checks all of the versions allowed in pyproject.toml
-        python-version: [3.10.15]
-    steps:
-      # installs Python
-      # one execution of the tests per version listed above
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Upgrade wheel
-        run: pip install -U wheel
-      - name: Print variables to help debug
-        uses: hmarr/debug-action@v2
-      - name: Load cached Poetry installation
-        id: cached-poetry-dependencies
-        uses: actions/cache@v4
-        with:
-          path: ~/.cache/pypoetry/virtualenvs
-          key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/data-checks.yml') }}
-      - name: Install poetry
-        uses: snok/install-poetry@v1
-      - name: Print Poetry settings
-        run: poetry show -v
-      - name: Install dependencies
-        run: poetry install
-        # TODO: investigate why caching layer started failing.
-        # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-      - name: Run tox
-        run: poetry run tox
--- a/.github/workflows/pr_backend.yml
+++ b/.github/workflows/pr_backend.yml
@ -8,6 +8,43 @@ env:
  J40_VERSION_LABEL_STRING: ${{ vars.SCORE_VERSION }}

 jobs:
+  code-quality-checks:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: data/data-pipeline
+    strategy:
+      matrix:
+        python-version: ['3.10']
+    environment: PR
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v4
+      - name: Print variables to help debug
+        uses: hmarr/debug-action@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Load cached Poetry installation
+        id: cached-poetry-dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pypoetry/virtualenvs
+          key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }}
+      - name: Install poetry
+        uses: snok/install-poetry@v1
+      - name: Install dependencies
+        run: poetry install
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+      - name: Check code is formatted
+        run: poetry run black --check data_pipeline/
+      - name: Check code style consistency
+        run: poetry run flake8 -v data_pipeline/
+      - name: Run static code analysis
+        run: poetry run pylint data_pipeline/ --ignore-paths data_pipeline/comparator.py
+      - name: Check library safety
+        run: poetry run safety check --ignore 51457 --ignore 44715 --ignore 70612
  generate-score-tiles:
    runs-on: ubuntu-latest
    defaults:
@ -44,6 +81,9 @@ jobs:
          sudo apt-get update
          sudo apt-get -y install gdal-bin
          ogrinfo --version
+      - name: Run unit tests
+        run: |
+          poetry run pytest data_pipeline/
      - name: Cleanup Data
        run: |
          poetry run python3 -m data_pipeline.application data-cleanup
--- a/.github/workflows/pr_frontend.yml
+++ b/.github/workflows/pr_frontend.yml
@ -2,7 +2,7 @@ name: Pull Request Frontend
 on:
  pull_request:
    paths:
-      - "client/**/*"
+      - "client/**"
 jobs:
  build:
    runs-on: ubuntu-latest
--- a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py
@ -132,7 +132,9 @@ def tile_data_expected():

@pytest.fixture()
 def create_tile_score_data_input():
-    return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl")
+    return pd.read_pickle(
+        pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl"
+    )


@pytest.fixture()
--- a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py
@ -83,7 +83,9 @@ def test_create_score_data(
    )


-def test_create_tile_data(etl, create_tile_score_data_input, create_tile_data_expected):
+def test_create_tile_data(
+    etl, create_tile_score_data_input, create_tile_data_expected
+):
    output_tiles_df_actual = etl._create_tile_data(create_tile_score_data_input)
    pdt.assert_frame_equal(
        output_tiles_df_actual,
@ -159,7 +161,9 @@ def test_create_tract_search_data(census_geojson_sample_data: gpd.GeoDataFrame):
    # Sanity check
    assert len(census_geojson_sample_data) > 0

-    result = PostScoreETL()._create_tract_search_data(census_geojson_sample_data)
+    result = PostScoreETL()._create_tract_search_data(
+        census_geojson_sample_data
+    )
    assert isinstance(result, pd.DataFrame)
    assert not result.columns.empty
    columns = ["GEOID10", "INTPTLAT10", "INTPTLON10"]
--- a/data/data-pipeline/tox.ini
+++ b/data/data-pipeline/tox.ini
@ -1,27 +0,0 @@
-[tox]
-# required because we use pyproject.toml
-isolated_build = true
-envlist = py310, lint, checkdeps, pytest
-# only checks python versions installed locally
-skip_missing_interpreters = true
-
-[testenv:lint]
-deps = pytest
-# lints python code in src and tests
-commands = black data_pipeline
-           flake8 data_pipeline
-           pylint data_pipeline
-
-[testenv:checkdeps]
-# checks the dependencies for security vulnerabilities and open source licenses
-allowlist_externals = bash
-commands = pip install -U wheel
-           # known issue: https://github.com/pyupio/safety/issues/364
-           # jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17
-           safety check --ignore 51457 --ignore 44715 --ignore 70612
-           bash scripts/run-liccheck.sh
-
-[testenv:pytest]
-# Run tests
-deps = pytest
-commands = pytest --full-trace