Data Pipeline performance improvements for Census GeoJson and Score file

This commit is contained in:
Carlos Felix 2025-01-13 09:28:14 -05:00 committed by Carlos Felix
commit c32bd1f363
37 changed files with 1305 additions and 1413 deletions

View file

@ -59,12 +59,6 @@ jobs:
with:
path: data/data-pipeline/data_pipeline/data/census
key: data-census
- name: Install GDAL/ogr2ogr
if: steps.cache-census.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get -y install gdal-bin
ogrinfo --version
- name: Get Census Data
if: steps.cache-census.outputs.cache-hit != 'true'
run: |
@ -72,7 +66,6 @@ jobs:
- name: Run ETL
run: |
poetry run python3 -m data_pipeline.application etl-run
poetry run python3 -m data_pipeline.application etl-run --dataset tribal
- name: Generate Score
run: |
poetry run python3 -m data_pipeline.application score-run

View file

@ -98,11 +98,6 @@ jobs:
- name: Install dependencies
run: poetry add s4cmd && poetry install
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
- name: Install GDAL/ogr2ogr
run: |
sudo apt-get update
sudo apt-get -y install gdal-bin
ogrinfo --version
- name: Load cached ETL data
id: cached-etl-data
uses: actions/cache@v4
@ -119,7 +114,6 @@ jobs:
if: steps.cached-etl-data.outputs.cache-hit != 'true'
run: |
poetry run python3 -m data_pipeline.application etl-run
poetry run python3 -m data_pipeline.application etl-run --dataset tribal
- name: Generate Score
run: |
poetry run python3 -m data_pipeline.application score-run