mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-10-20 14:13:52 -07:00
Data sources from S3 (#769)
* Started 535 * Data sources from S3 * lint * renove breakpoints * PR comments * lint * census data completed * lint * renaming data source
This commit is contained in:
parent
d1273b63c5
commit
3b04356fb3
10 changed files with 317 additions and 67 deletions
62
.github/workflows/combine-tilefy.yml
vendored
Normal file
62
.github/workflows/combine-tilefy.yml
vendored
Normal file
|
@ -0,0 +1,62 @@
|
|||
name: Combine and Tilefy
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm-action:
|
||||
description: This will rebuild the data sources and regenerate the score, are you sure you want to proceed? (Y/n)
|
||||
default: n
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
deploy_data:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: data/data-pipeline
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.9]
|
||||
steps:
|
||||
- name: Checkout source
|
||||
uses: actions/checkout@v2
|
||||
- name: Print variables to help debug
|
||||
uses: hmarr/debug-action@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Setup Poetry
|
||||
uses: Gr1N/setup-poetry@v7
|
||||
- name: Print poetry version
|
||||
run: poetry --version
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
|
||||
sudo apt-get update
|
||||
sudo apt-get install gdal-bin libgdal-dev
|
||||
pip install GDAL==3.2.3
|
||||
- name: Run Scripts
|
||||
run: |
|
||||
poetry run download_census
|
||||
poetry run etl_and_score
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-east-1
|
||||
- name: Deploy to Geoplatform AWS
|
||||
run: |
|
||||
aws s3 sync ./data_pipeline/data/dataset/ s3://justice40-data/data-pipeline/data/dataset --acl public-read --delete
|
||||
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --acl public-read --delete
|
||||
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --acl public-read --delete
|
||||
- name: Update PR with Comment about deployment
|
||||
uses: mshick/add-pr-comment@v1
|
||||
with:
|
||||
message: |
|
||||
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repo-token-user-login: "github-actions[bot]" # The user.login for temporary GitHub tokens
|
||||
allow-repeats: false # This is the default
|
59
.github/workflows/generate-census.yml
vendored
Normal file
59
.github/workflows/generate-census.yml
vendored
Normal file
|
@ -0,0 +1,59 @@
|
|||
name: Generate Census
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
confirm-action:
|
||||
description: This will rebuild the census data and upload it to S3, are you sure you want to proceed? (Y/n)
|
||||
default: n
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
deploy_data:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: data/data-pipeline
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.9]
|
||||
steps:
|
||||
- name: Checkout source
|
||||
uses: actions/checkout@v2
|
||||
- name: Print variables to help debug
|
||||
uses: hmarr/debug-action@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Setup Poetry
|
||||
uses: Gr1N/setup-poetry@v7
|
||||
- name: Print poetry version
|
||||
run: poetry --version
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
|
||||
sudo apt-get update
|
||||
sudo apt-get install gdal-bin libgdal-dev
|
||||
pip install GDAL==3.2.3
|
||||
- name: Run Census Script
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py census-data-download -zc
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-east-1
|
||||
- name: Upload Census Zip to Geoplatform AWS
|
||||
run: |
|
||||
aws s3 sync ./data_pipeline/data/tmp/census.zip s3://justice40-data/data-sources/census.zip --acl public-read --delete
|
||||
- name: Update PR with Comment about deployment
|
||||
uses: mshick/add-pr-comment@v1
|
||||
with:
|
||||
message: |
|
||||
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repo-token-user-login: 'github-actions[bot]' # The user.login for temporary GitHub tokens
|
||||
allow-repeats: false # This is the default
|
12
.github/workflows/generate-score.yml
vendored
12
.github/workflows/generate-score.yml
vendored
|
@ -31,16 +31,9 @@ jobs:
|
|||
run: poetry --version
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
|
||||
sudo apt-get update
|
||||
sudo apt-get install gdal-bin libgdal-dev
|
||||
pip install GDAL==3.2.3
|
||||
- name: Run Scripts
|
||||
run: |
|
||||
poetry run download_census
|
||||
poetry run etl_and_score
|
||||
poetry run python3 data_pipeline/application.py score_full_run
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
with:
|
||||
|
@ -49,14 +42,13 @@ jobs:
|
|||
aws-region: us-east-1
|
||||
- name: Deploy to Geoplatform AWS
|
||||
run: |
|
||||
aws s3 sync ./data_pipeline/data/dataset/ s3://justice40-data/data-pipeline/data/dataset --acl public-read --delete
|
||||
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --acl public-read --delete
|
||||
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --acl public-read --delete
|
||||
- name: Update PR with Comment about deployment
|
||||
uses: mshick/add-pr-comment@v1
|
||||
with:
|
||||
message: |
|
||||
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/
|
||||
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/score
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repo-token-user-login: 'github-actions[bot]' # The user.login for temporary GitHub tokens
|
||||
allow-repeats: false # This is the default
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue