ArcGIS zipping (#1391)

* ArcGIS zipping

* lint

* shapefile zip

* removing space in GMT

* adding shapefile to be staging gha
This commit is contained in:
Jorge Escobar 2022-03-09 18:00:20 -05:00 committed by GitHub
commit 7f91e2b06b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 5 deletions

View file

@ -58,7 +58,7 @@ jobs:
- Download Zip Packet: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable/Screening_Tool_Data.zip
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: "github-actions[bot]"
allow-repeats: false
allow-repeats: false
- name: Install GDAL/ogr2ogr
run: |
sudo add-apt-repository ppa:ubuntugis/ppa
@ -83,7 +83,7 @@ jobs:
tippecanoe -v
- name: Generate Score Geo
run: |
poetry run python3 data_pipeline/application.py geo-score
poetry run python3 data_pipeline/application.py geo-score
- name: Generate Tiles
run: |
poetry run python3 data_pipeline/application.py generate-map-tiles
@ -92,6 +92,7 @@ jobs:
aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --acl public-read
aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --acl public-read
- name: Update PR with deployed Map URL
uses: mshick/add-pr-comment@v1

View file

@ -83,7 +83,7 @@ Once we have all the data from the previous stages, we convert it to tiles to ma
#### 5. Shapefiles
If you want to use the shapefiles in mapping applications, you can access them here [shp](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shp) and [shx](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shx).
If you want to use the shapefiles in mapping applications, you can access them [here](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.zip).
### Score generation and comparison workflow

View file

@ -45,7 +45,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
# Downloadable paths
current_dt = datetime.datetime.now()
timestamp_str = current_dt.strftime("%Y-%m-%d-%H%M")
timestamp_str = current_dt.strftime("%Y-%m-%d-%H%MGMT")
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME

View file

@ -1,5 +1,6 @@
import concurrent.futures
import math
import os
import numpy as np
import pandas as pd
import geopandas as gpd
@ -11,7 +12,7 @@ from data_pipeline.etl.sources.census.etl_utils import (
)
from data_pipeline.etl.score.etl_utils import check_score_data_source
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import get_module_logger, zip_files
logger = get_module_logger(__name__)
@ -298,11 +299,21 @@ class GeoScoreETL(ExtractTransformLoad):
# kept as strings because no downstream impacts
columns={0: "column", "index": "meaning"}
).to_csv(self.SCORE_SHP_CODE_CSV, index=False)
self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
self.SCORE_SHP_FILE
)
logger.info("Completed writing shapefile")
arcgis_zip_file_path = self.SCORE_SHP_PATH / "usa.zip"
arcgis_files = []
for file in os.listdir(self.SCORE_SHP_PATH):
# don't remove __init__ files as they conserve dir structure
if file != "__init__.py":
arcgis_files.append(self.SCORE_SHP_PATH / file)
zip_files(arcgis_zip_file_path, arcgis_files)
logger.info("Completed zipping shapefiles")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
executor.submit(task)