Data sources from S3 (#769)

* Started 535

* Data sources from S3

* lint

* renove breakpoints

* PR comments

* lint

* census data completed

* lint

* renaming data source
This commit is contained in:
Jorge Escobar 2021-10-13 16:00:33 -04:00 committed by GitHub
commit 3b04356fb3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 317 additions and 67 deletions

62
.github/workflows/combine-tilefy.yml vendored Normal file
View file

@ -0,0 +1,62 @@
name: Combine and Tilefy
on:
workflow_dispatch:
inputs:
confirm-action:
description: This will rebuild the data sources and regenerate the score, are you sure you want to proceed? (Y/n)
default: n
required: true
jobs:
deploy_data:
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
python-version: [3.9]
steps:
- name: Checkout source
uses: actions/checkout@v2
- name: Print variables to help debug
uses: hmarr/debug-action@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Setup Poetry
uses: Gr1N/setup-poetry@v7
- name: Print poetry version
run: poetry --version
- name: Install dependencies
run: poetry install
- name: Install GDAL/ogr2ogr
run: |
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt-get update
sudo apt-get install gdal-bin libgdal-dev
pip install GDAL==3.2.3
- name: Run Scripts
run: |
poetry run download_census
poetry run etl_and_score
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Deploy to Geoplatform AWS
run: |
aws s3 sync ./data_pipeline/data/dataset/ s3://justice40-data/data-pipeline/data/dataset --acl public-read --delete
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --acl public-read --delete
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --acl public-read --delete
- name: Update PR with Comment about deployment
uses: mshick/add-pr-comment@v1
with:
message: |
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: "github-actions[bot]" # The user.login for temporary GitHub tokens
allow-repeats: false # This is the default

59
.github/workflows/generate-census.yml vendored Normal file
View file

@ -0,0 +1,59 @@
name: Generate Census
on:
workflow_dispatch:
inputs:
confirm-action:
description: This will rebuild the census data and upload it to S3, are you sure you want to proceed? (Y/n)
default: n
required: true
jobs:
deploy_data:
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
python-version: [3.9]
steps:
- name: Checkout source
uses: actions/checkout@v2
- name: Print variables to help debug
uses: hmarr/debug-action@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Setup Poetry
uses: Gr1N/setup-poetry@v7
- name: Print poetry version
run: poetry --version
- name: Install dependencies
run: poetry install
- name: Install GDAL/ogr2ogr
run: |
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt-get update
sudo apt-get install gdal-bin libgdal-dev
pip install GDAL==3.2.3
- name: Run Census Script
run: |
poetry run python3 data_pipeline/application.py census-data-download -zc
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Upload Census Zip to Geoplatform AWS
run: |
aws s3 sync ./data_pipeline/data/tmp/census.zip s3://justice40-data/data-sources/census.zip --acl public-read --delete
- name: Update PR with Comment about deployment
uses: mshick/add-pr-comment@v1
with:
message: |
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: 'github-actions[bot]' # The user.login for temporary GitHub tokens
allow-repeats: false # This is the default

View file

@ -31,16 +31,9 @@ jobs:
run: poetry --version
- name: Install dependencies
run: poetry install
- name: Install GDAL/ogr2ogr
run: |
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt-get update
sudo apt-get install gdal-bin libgdal-dev
pip install GDAL==3.2.3
- name: Run Scripts
run: |
poetry run download_census
poetry run etl_and_score
poetry run python3 data_pipeline/application.py score_full_run
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
@ -49,14 +42,13 @@ jobs:
aws-region: us-east-1
- name: Deploy to Geoplatform AWS
run: |
aws s3 sync ./data_pipeline/data/dataset/ s3://justice40-data/data-pipeline/data/dataset --acl public-read --delete
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --acl public-read --delete
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --acl public-read --delete
- name: Update PR with Comment about deployment
uses: mshick/add-pr-comment@v1
with:
message: |
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/
Data Synced! Find it here: s3://justice40-data/data-pipeline/data/score
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: 'github-actions[bot]' # The user.login for temporary GitHub tokens
allow-repeats: false # This is the default