Pipeline PR workflow now runs code quality checks

This commit is contained in:
Carlos Felix 2024-12-05 15:34:10 -05:00 committed by Carlos Felix
commit 95246c9df0
6 changed files with 51 additions and 76 deletions

View file

@ -1,44 +0,0 @@
# This runs tox in the two directories under data
name: Data Checks
on:
pull_request:
paths:
- "data/**"
jobs:
data-pipeline:
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
# checks all of the versions allowed in pyproject.toml
python-version: [3.10.15]
steps:
# installs Python
# one execution of the tests per version listed above
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade wheel
run: pip install -U wheel
- name: Print variables to help debug
uses: hmarr/debug-action@v2
- name: Load cached Poetry installation
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/data-checks.yml') }}
- name: Install poetry
uses: snok/install-poetry@v1
- name: Print Poetry settings
run: poetry show -v
- name: Install dependencies
run: poetry install
# TODO: investigate why caching layer started failing.
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
- name: Run tox
run: poetry run tox

View file

@ -8,6 +8,43 @@ env:
J40_VERSION_LABEL_STRING: ${{ vars.SCORE_VERSION }}
jobs:
code-quality-checks:
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
python-version: ['3.10']
environment: PR
steps:
- name: Checkout source
uses: actions/checkout@v4
- name: Print variables to help debug
uses: hmarr/debug-action@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Load cached Poetry installation
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }}
- name: Install poetry
uses: snok/install-poetry@v1
- name: Install dependencies
run: poetry install
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
- name: Check code is formatted
run: poetry run black --check data_pipeline/
- name: Check code style consistency
run: poetry run flake8 -v data_pipeline/
- name: Run static code analysis
run: poetry run pylint data_pipeline/ --ignore-paths data_pipeline/comparator.py
- name: Check library safety
run: poetry run safety check --ignore 51457 --ignore 44715 --ignore 70612
generate-score-tiles:
runs-on: ubuntu-latest
defaults:
@ -44,6 +81,9 @@ jobs:
sudo apt-get update
sudo apt-get -y install gdal-bin
ogrinfo --version
- name: Run unit tests
run: |
poetry run pytest data_pipeline/
- name: Cleanup Data
run: |
poetry run python3 -m data_pipeline.application data-cleanup

View file

@ -2,7 +2,7 @@ name: Pull Request Frontend
on:
pull_request:
paths:
- "client/**/*"
- "client/**"
jobs:
build:
runs-on: ubuntu-latest

View file

@ -132,7 +132,9 @@ def tile_data_expected():
@pytest.fixture()
def create_tile_score_data_input():
return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl")
return pd.read_pickle(
pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl"
)
@pytest.fixture()

View file

@ -83,7 +83,9 @@ def test_create_score_data(
)
def test_create_tile_data(etl, create_tile_score_data_input, create_tile_data_expected):
def test_create_tile_data(
etl, create_tile_score_data_input, create_tile_data_expected
):
output_tiles_df_actual = etl._create_tile_data(create_tile_score_data_input)
pdt.assert_frame_equal(
output_tiles_df_actual,
@ -159,7 +161,9 @@ def test_create_tract_search_data(census_geojson_sample_data: gpd.GeoDataFrame):
# Sanity check
assert len(census_geojson_sample_data) > 0
result = PostScoreETL()._create_tract_search_data(census_geojson_sample_data)
result = PostScoreETL()._create_tract_search_data(
census_geojson_sample_data
)
assert isinstance(result, pd.DataFrame)
assert not result.columns.empty
columns = ["GEOID10", "INTPTLAT10", "INTPTLON10"]

View file

@ -1,27 +0,0 @@
[tox]
# required because we use pyproject.toml
isolated_build = true
envlist = py310, lint, checkdeps, pytest
# only checks python versions installed locally
skip_missing_interpreters = true
[testenv:lint]
deps = pytest
# lints python code in src and tests
commands = black data_pipeline
flake8 data_pipeline
pylint data_pipeline
[testenv:checkdeps]
# checks the dependencies for security vulnerabilities and open source licenses
allowlist_externals = bash
commands = pip install -U wheel
# known issue: https://github.com/pyupio/safety/issues/364
# jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17
safety check --ignore 51457 --ignore 44715 --ignore 70612
bash scripts/run-liccheck.sh
[testenv:pytest]
# Run tests
deps = pytest
commands = pytest --full-trace