mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-10-20 13:13:52 -07:00
S3 Parallel Upload and Deletions (#1410)
* installation step * trigger action * installing to home dir * dry-run * pyenv * py 2.8 * trying s4cmd * removing pyenv * poetry s4cmd * num-threads * public read * poetry cache * s4cmd all around * poetry cache * poetry cache * install poetry packages * poetry echo * let's do this * s4cmd install on run * s4cmd * ad aws back * add aws back * testing census api key and poetry caching * census api key * census api * census api key #3 * 250 * poetry update * poetry change * check census api key * force flag * update score gen and tilefy; remove cached fips * small gdal update * invalidation * missing cache ids
This commit is contained in:
parent
e31a4f3b94
commit
7b05ee9c76
8 changed files with 307 additions and 197 deletions
37
.github/workflows/combine-tilefy.yml
vendored
37
.github/workflows/combine-tilefy.yml
vendored
|
@ -6,7 +6,8 @@ on:
|
|||
description: This will rebuild the data sources and regenerate the score, are you sure you want to proceed? (Y/n)
|
||||
default: n
|
||||
required: true
|
||||
|
||||
env:
|
||||
BE_CDN_ID: E1324VDMNCO97N
|
||||
jobs:
|
||||
deploy_data:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -25,15 +26,23 @@ jobs:
|
|||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Setup Poetry
|
||||
uses: Gr1N/setup-poetry@v7
|
||||
- name: Print poetry version
|
||||
run: poetry --version
|
||||
- name: Load cached Poetry installation
|
||||
uses: actions/cache@v2
|
||||
id: cached-poetry-dependencies
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: env-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/combine-tilefy.yml') }}
|
||||
- name: Install poetry
|
||||
uses: snok/install-poetry@v1
|
||||
- name: Print Poetry settings
|
||||
run: poetry show -v
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
run: poetry add s4cmd && poetry install
|
||||
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo add-apt-repository ppa:ubuntugis/ppa
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install gdal-bin
|
||||
ogrinfo --version
|
||||
- name: Set timezone for tippecanoe
|
||||
|
@ -65,7 +74,15 @@ jobs:
|
|||
aws-region: us-east-1
|
||||
- name: Deploy to Geoplatform AWS
|
||||
run: |
|
||||
aws s3 rm s3://justice40-data/data-pipeline/data/score/tiles --recursive
|
||||
aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline/data/score/tiles --recursive --acl public-read
|
||||
aws s3 sync ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline/data/score/geojson --acl public-read --delete
|
||||
aws s3 sync ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline/data/score/shapefile --acl public-read --delete
|
||||
poetry run s4cmd del s3://justice40-data/data-pipeline/data/score/tiles --recursive --num-threads=250
|
||||
poetry run s4cmd put ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --force --API-ACL=public-read --num-threads=250
|
||||
poetry run s4cmd put ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --force --API-ACL=public-read
|
||||
poetry run s4cmd put ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --force --API-ACL=public-read --num-threads=250
|
||||
- name: Invalidate cache on AWS CDNs
|
||||
uses: chetan/invalidate-cloudfront-action@master
|
||||
env:
|
||||
DISTRIBUTION: ${{env.BE_CDN_ID}}
|
||||
PATHS: "/*"
|
||||
AWS_REGION: "us-east-1"
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
|
|
31
.github/workflows/deploy_be_staging.yml
vendored
31
.github/workflows/deploy_be_staging.yml
vendored
|
@ -7,6 +7,7 @@ on:
|
|||
env:
|
||||
PR_NUMBER: ${{github.event.pull_request.number}}
|
||||
SHA_NUMBER: ${{github.event.pull_request.head.sha}}
|
||||
CENSUS_API_KEY: ${{ secrets.CENSUS_API_KEY }}
|
||||
jobs:
|
||||
generate-score-tiles:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -25,12 +26,19 @@ jobs:
|
|||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Setup Poetry
|
||||
uses: Gr1N/setup-poetry@v7
|
||||
- name: Print poetry version
|
||||
run: poetry --version
|
||||
- name: Load cached Poetry installation
|
||||
id: cached-poetry-dependencies
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: env-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_be_staging.yml') }}
|
||||
- name: Install poetry
|
||||
uses: snok/install-poetry@v1
|
||||
- name: Print Poetry settings
|
||||
run: poetry show -v
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
run: poetry add s4cmd && poetry install
|
||||
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
with:
|
||||
|
@ -45,8 +53,8 @@ jobs:
|
|||
poetry run python3 data_pipeline/application.py generate-score-post -s aws
|
||||
- name: Deploy Score to Geoplatform AWS
|
||||
run: |
|
||||
aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read
|
||||
poetry run s4cmd put ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --force --API-ACL=public-read
|
||||
poetry run s4cmd put ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --force --API-ACL=public-read
|
||||
- name: Update PR with deployed Score URLs
|
||||
uses: mshick/add-pr-comment@v1
|
||||
with:
|
||||
|
@ -62,6 +70,7 @@ jobs:
|
|||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo add-apt-repository ppa:ubuntugis/ppa
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install gdal-bin
|
||||
ogrinfo --version
|
||||
- name: Set timezone for tippecanoe
|
||||
|
@ -89,11 +98,9 @@ jobs:
|
|||
poetry run python3 data_pipeline/application.py generate-map-tiles
|
||||
- name: Deploy Map to Geoplatform AWS
|
||||
run: |
|
||||
aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --acl public-read
|
||||
poetry run s4cmd put ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --force --API-ACL=public-read --num-threads=250
|
||||
poetry run s4cmd put ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --force --API-ACL=public-read
|
||||
poetry run s4cmd put ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --force --API-ACL=public-read --num-threads=250
|
||||
- name: Update PR with deployed Map URL
|
||||
uses: mshick/add-pr-comment@v1
|
||||
with:
|
||||
|
|
23
.github/workflows/generate-score.yml
vendored
23
.github/workflows/generate-score.yml
vendored
|
@ -1,5 +1,5 @@
|
|||
name: Generate Score
|
||||
on:
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm-action:
|
||||
|
@ -25,12 +25,19 @@ jobs:
|
|||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Setup Poetry
|
||||
uses: Gr1N/setup-poetry@v7
|
||||
- name: Print poetry version
|
||||
run: poetry --version
|
||||
- name: Load cached Poetry installation
|
||||
id: cached-poetry-dependencies
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: env-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/generate-score.yml') }}
|
||||
- name: Install poetry
|
||||
uses: snok/install-poetry@v1
|
||||
- name: Print Poetry settings
|
||||
run: poetry show -v
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
with:
|
||||
|
@ -39,14 +46,14 @@ jobs:
|
|||
aws-region: us-east-1
|
||||
- name: Generate Score
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py score-full-run
|
||||
poetry run python3 data_pipeline/application.py score-full-run
|
||||
- name: Upload Score to AWS
|
||||
run: |
|
||||
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --acl public-read --delete
|
||||
- name: Generate Score Post
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py generate-score-post -s aws
|
||||
poetry run python3 data_pipeline/application.py generate-score-post -s aws
|
||||
- name: Upload Score Post to AWS
|
||||
run: |
|
||||
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --acl public-read --delete
|
||||
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --acl public-read --delete
|
||||
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --acl public-read --delete
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue