mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 14:11:17 -07:00
ArcGIS zipping (#1391)
* ArcGIS zipping * lint * shapefile zip * removing space in GMT * adding shapefile to be staging gha
This commit is contained in:
parent
1730572aa6
commit
7f91e2b06b
4 changed files with 17 additions and 5 deletions
5
.github/workflows/deploy_be_staging.yml
vendored
5
.github/workflows/deploy_be_staging.yml
vendored
|
@ -58,7 +58,7 @@ jobs:
|
|||
- Download Zip Packet: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable/Screening_Tool_Data.zip
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repo-token-user-login: "github-actions[bot]"
|
||||
allow-repeats: false
|
||||
allow-repeats: false
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo add-apt-repository ppa:ubuntugis/ppa
|
||||
|
@ -83,7 +83,7 @@ jobs:
|
|||
tippecanoe -v
|
||||
- name: Generate Score Geo
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py geo-score
|
||||
poetry run python3 data_pipeline/application.py geo-score
|
||||
- name: Generate Tiles
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py generate-map-tiles
|
||||
|
@ -92,6 +92,7 @@ jobs:
|
|||
aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --acl public-read
|
||||
aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --acl public-read
|
||||
- name: Update PR with deployed Map URL
|
||||
uses: mshick/add-pr-comment@v1
|
||||
|
|
|
@ -83,7 +83,7 @@ Once we have all the data from the previous stages, we convert it to tiles to ma
|
|||
|
||||
#### 5. Shapefiles
|
||||
|
||||
If you want to use the shapefiles in mapping applications, you can access them here [shp](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shp) and [shx](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shx).
|
||||
If you want to use the shapefiles in mapping applications, you can access them [here](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.zip).
|
||||
|
||||
|
||||
### Score generation and comparison workflow
|
||||
|
|
|
@ -45,7 +45,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
|
|||
|
||||
# Downloadable paths
|
||||
current_dt = datetime.datetime.now()
|
||||
timestamp_str = current_dt.strftime("%Y-%m-%d-%H%M")
|
||||
timestamp_str = current_dt.strftime("%Y-%m-%d-%H%MGMT")
|
||||
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import concurrent.futures
|
||||
import math
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
|
@ -11,7 +12,7 @@ from data_pipeline.etl.sources.census.etl_utils import (
|
|||
)
|
||||
from data_pipeline.etl.score.etl_utils import check_score_data_source
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.utils import get_module_logger, zip_files
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
@ -298,11 +299,21 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
# kept as strings because no downstream impacts
|
||||
columns={0: "column", "index": "meaning"}
|
||||
).to_csv(self.SCORE_SHP_CODE_CSV, index=False)
|
||||
|
||||
self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
|
||||
self.SCORE_SHP_FILE
|
||||
)
|
||||
logger.info("Completed writing shapefile")
|
||||
|
||||
arcgis_zip_file_path = self.SCORE_SHP_PATH / "usa.zip"
|
||||
arcgis_files = []
|
||||
for file in os.listdir(self.SCORE_SHP_PATH):
|
||||
# don't remove __init__ files as they conserve dir structure
|
||||
if file != "__init__.py":
|
||||
arcgis_files.append(self.SCORE_SHP_PATH / file)
|
||||
zip_files(arcgis_zip_file_path, arcgis_files)
|
||||
logger.info("Completed zipping shapefiles")
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = {
|
||||
executor.submit(task)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue