ArcGIS zipping (#1391)

* ArcGIS zipping * lint * shapefile zip * removing space in GMT * adding shapefile to be staging gha
2025-07-28 14:11:17 -07:00 · 2022-03-09 18:00:20 -05:00 · 2022-03-09 18:00:20 -05:00 · 7f91e2b06b
commit 7f91e2b06b
parent 1730572aa6
4 changed files with 17 additions and 5 deletions
--- a/.github/workflows/deploy_be_staging.yml
+++ b/.github/workflows/deploy_be_staging.yml
@ -58,7 +58,7 @@ jobs:
            - Download Zip Packet: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable/Screening_Tool_Data.zip
          repo-token: ${{ secrets.GITHUB_TOKEN }}
          repo-token-user-login: "github-actions[bot]"
-          allow-repeats: false      
+          allow-repeats: false
      - name: Install GDAL/ogr2ogr
        run: |
          sudo add-apt-repository ppa:ubuntugis/ppa
@ -83,7 +83,7 @@ jobs:
          tippecanoe -v
      - name: Generate Score Geo
        run: |
-          poetry run python3 data_pipeline/application.py geo-score          
+          poetry run python3 data_pipeline/application.py geo-score
      - name: Generate Tiles
        run: |
          poetry run python3 data_pipeline/application.py generate-map-tiles
@ -92,6 +92,7 @@ jobs:
          aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read
          aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read
          aws s3 cp ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --acl public-read
+          aws s3 cp ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --acl public-read
          aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --acl public-read
      - name: Update PR with deployed Map URL
        uses: mshick/add-pr-comment@v1
--- a/data/data-pipeline/README.md
+++ b/data/data-pipeline/README.md
@ -83,7 +83,7 @@ Once we have all the data from the previous stages, we convert it to tiles to ma

 #### 5. Shapefiles

-If you want to use the shapefiles in mapping applications, you can access them here [shp](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shp) and [shx](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shx).
+If you want to use the shapefiles in mapping applications, you can access them [here](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.zip).


 ### Score generation and comparison workflow
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -45,7 +45,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"

 # Downloadable paths
 current_dt = datetime.datetime.now()
-timestamp_str = current_dt.strftime("%Y-%m-%d-%H%M")
+timestamp_str = current_dt.strftime("%Y-%m-%d-%H%MGMT")
 SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
 SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
 SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
@ -1,5 +1,6 @@
 import concurrent.futures
 import math
+import os
 import numpy as np
 import pandas as pd
 import geopandas as gpd
@ -11,7 +12,7 @@ from data_pipeline.etl.sources.census.etl_utils import (
 )
 from data_pipeline.etl.score.etl_utils import check_score_data_source
 from data_pipeline.score import field_names
-from data_pipeline.utils import get_module_logger
+from data_pipeline.utils import get_module_logger, zip_files

 logger = get_module_logger(__name__)

@ -298,11 +299,21 @@ class GeoScoreETL(ExtractTransformLoad):
                # kept as strings because no downstream impacts
                columns={0: "column", "index": "meaning"}
            ).to_csv(self.SCORE_SHP_CODE_CSV, index=False)
+
            self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
                self.SCORE_SHP_FILE
            )
            logger.info("Completed writing shapefile")

+            arcgis_zip_file_path = self.SCORE_SHP_PATH / "usa.zip"
+            arcgis_files = []
+            for file in os.listdir(self.SCORE_SHP_PATH):
+                # don't remove __init__ files as they conserve dir structure
+                if file != "__init__.py":
+                    arcgis_files.append(self.SCORE_SHP_PATH / file)
+            zip_files(arcgis_zip_file_path, arcgis_files)
+            logger.info("Completed zipping shapefiles")
+
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = {
                executor.submit(task)