From 5f6ed5d90fad530f7719ae9dc06cd25230d150b3 Mon Sep 17 00:00:00 2001 From: Chris Alfano Date: Fri, 10 Jan 2025 11:39:13 -0500 Subject: [PATCH] Cache ETL data in GitHub Backend PR Action --- .github/workflows/deploy_backend_main.yml | 21 +++++++++++++++++---- .github/workflows/pr_backend.yml | 20 +++++++++++++------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/.github/workflows/deploy_backend_main.yml b/.github/workflows/deploy_backend_main.yml index eed89e39..73f761d9 100644 --- a/.github/workflows/deploy_backend_main.yml +++ b/.github/workflows/deploy_backend_main.yml @@ -1,5 +1,10 @@ name: Deploy Backend Main -on: workflow_dispatch +on: + push: + branches: [main] + paths: + - "data/**" + - ".github/workflows/deploy_backend_main.yml" concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -45,15 +50,23 @@ jobs: aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }} aws-region: us-east-1 + - name: Cleanup Data + run: | + poetry run python3 -m data_pipeline.application data-cleanup + - name: Cache Census Data + id: cache-census + uses: actions/cache@v4 + with: + path: data/data-pipeline/data_pipeline/data/census + key: data-census - name: Install GDAL/ogr2ogr + if: steps.cache-census.outputs.cache-hit != 'true' run: | sudo apt-get update sudo apt-get -y install gdal-bin ogrinfo --version - - name: Cleanup Data - run: | - poetry run python3 -m data_pipeline.application data-cleanup - name: Get Census Data + if: steps.cache-census.outputs.cache-hit != 'true' run: | poetry run python3 -m data_pipeline.application census-data-download - name: Run ETL diff --git a/.github/workflows/pr_backend.yml b/.github/workflows/pr_backend.yml index 8cf26fdc..3514f37b 100644 --- a/.github/workflows/pr_backend.yml +++ b/.github/workflows/pr_backend.yml @@ -6,7 +6,6 @@ concurrency: cancel-in-progress: true env: python-version: '3.10' - CENSUS_API_KEY: ${{ secrets.CENSUS_API_KEY }} J40_VERSION_LABEL_STRING: ${{ vars.SCORE_VERSION }} jobs: # JOB to run change detection @@ -51,7 +50,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry/virtualenvs - key: env-${{ runner.os }}-${{ env.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }} + key: cejst-poetry-env-${{ runner.os }}-${{ env.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }} - name: Install poetry uses: snok/install-poetry@v1 - name: Install dependencies @@ -64,7 +63,7 @@ jobs: - name: Run static code analysis run: poetry run pylint data_pipeline/ - name: Check library safety - run: poetry run safety check --ignore 51457 --ignore 44715 --ignore 70612 + run: poetry run safety check --ignore 51457 --ignore 44715 --ignore 70612 --ignore 74439 - name: Run unit tests run: | poetry run pytest data_pipeline/ @@ -91,7 +90,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry/virtualenvs - key: env-${{ runner.os }}-${{ env.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }} + key: cejst-poetry-env-${{ runner.os }}-${{ env.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }} - name: Install poetry uses: snok/install-poetry@v1 - name: Print Poetry settings @@ -104,13 +103,20 @@ jobs: sudo apt-get update sudo apt-get -y install gdal-bin ogrinfo --version - - name: Cleanup Data - run: | - poetry run python3 -m data_pipeline.application data-cleanup + - name: Load cached ETL data + id: cached-etl-data + uses: actions/cache@v4 + with: + path: | + data/data-pipeline/data_pipeline/data/census + data/data-pipeline/data_pipeline/data/dataset + key: cejst-dataset-env-${{ runner.os }}-${{ env.python-version }}-${{ hashFiles('data/data-pipeline/data_pipeline/etl/**/*') }}-${{ hashFiles('data/data-pipeline/data_pipeline/utils.py') }} - name: Get Census Data + if: steps.cached-etl-data.outputs.cache-hit != 'true' run: | poetry run python3 -m data_pipeline.application census-data-download - name: Run ETL + if: steps.cached-etl-data.outputs.cache-hit != 'true' run: | poetry run python3 -m data_pipeline.application etl-run poetry run python3 -m data_pipeline.application etl-run --dataset tribal