ArcGIS zipping (#1391)

* ArcGIS zipping

* lint

* shapefile zip

* removing space in GMT

* adding shapefile to be staging gha
This commit is contained in:
Jorge Escobar 2022-03-09 18:00:20 -05:00 committed by GitHub
commit 7f91e2b06b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 5 deletions

View file

@ -58,7 +58,7 @@ jobs:
- Download Zip Packet: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable/Screening_Tool_Data.zip - Download Zip Packet: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable/Screening_Tool_Data.zip
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: "github-actions[bot]" repo-token-user-login: "github-actions[bot]"
allow-repeats: false allow-repeats: false
- name: Install GDAL/ogr2ogr - name: Install GDAL/ogr2ogr
run: | run: |
sudo add-apt-repository ppa:ubuntugis/ppa sudo add-apt-repository ppa:ubuntugis/ppa
@ -83,7 +83,7 @@ jobs:
tippecanoe -v tippecanoe -v
- name: Generate Score Geo - name: Generate Score Geo
run: | run: |
poetry run python3 data_pipeline/application.py geo-score poetry run python3 data_pipeline/application.py geo-score
- name: Generate Tiles - name: Generate Tiles
run: | run: |
poetry run python3 data_pipeline/application.py generate-map-tiles poetry run python3 data_pipeline/application.py generate-map-tiles
@ -92,6 +92,7 @@ jobs:
aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --acl public-read aws s3 cp ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --acl public-read aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --acl public-read
- name: Update PR with deployed Map URL - name: Update PR with deployed Map URL
uses: mshick/add-pr-comment@v1 uses: mshick/add-pr-comment@v1

View file

@ -83,7 +83,7 @@ Once we have all the data from the previous stages, we convert it to tiles to ma
#### 5. Shapefiles #### 5. Shapefiles
If you want to use the shapefiles in mapping applications, you can access them here [shp](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shp) and [shx](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shx). If you want to use the shapefiles in mapping applications, you can access them [here](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.zip).
### Score generation and comparison workflow ### Score generation and comparison workflow

View file

@ -45,7 +45,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
# Downloadable paths # Downloadable paths
current_dt = datetime.datetime.now() current_dt = datetime.datetime.now()
timestamp_str = current_dt.strftime("%Y-%m-%d-%H%M") timestamp_str = current_dt.strftime("%Y-%m-%d-%H%MGMT")
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable" SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf" SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME

View file

@ -1,5 +1,6 @@
import concurrent.futures import concurrent.futures
import math import math
import os
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import geopandas as gpd import geopandas as gpd
@ -11,7 +12,7 @@ from data_pipeline.etl.sources.census.etl_utils import (
) )
from data_pipeline.etl.score.etl_utils import check_score_data_source from data_pipeline.etl.score.etl_utils import check_score_data_source
from data_pipeline.score import field_names from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger from data_pipeline.utils import get_module_logger, zip_files
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
@ -298,11 +299,21 @@ class GeoScoreETL(ExtractTransformLoad):
# kept as strings because no downstream impacts # kept as strings because no downstream impacts
columns={0: "column", "index": "meaning"} columns={0: "column", "index": "meaning"}
).to_csv(self.SCORE_SHP_CODE_CSV, index=False) ).to_csv(self.SCORE_SHP_CODE_CSV, index=False)
self.geojson_score_usa_high.rename(columns=renaming_map).to_file( self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
self.SCORE_SHP_FILE self.SCORE_SHP_FILE
) )
logger.info("Completed writing shapefile") logger.info("Completed writing shapefile")
arcgis_zip_file_path = self.SCORE_SHP_PATH / "usa.zip"
arcgis_files = []
for file in os.listdir(self.SCORE_SHP_PATH):
# don't remove __init__ files as they conserve dir structure
if file != "__init__.py":
arcgis_files.append(self.SCORE_SHP_PATH / file)
zip_files(arcgis_zip_file_path, arcgis_files)
logger.info("Completed zipping shapefiles")
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
futures = { futures = {
executor.submit(task) executor.submit(task)