diff --git a/.github/workflows/deploy_be_staging.yml b/.github/workflows/deploy_be_staging.yml index f84e1fd2..ca739fe0 100644 --- a/.github/workflows/deploy_be_staging.yml +++ b/.github/workflows/deploy_be_staging.yml @@ -58,7 +58,7 @@ jobs: - Download Zip Packet: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable/Screening_Tool_Data.zip repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token-user-login: "github-actions[bot]" - allow-repeats: false + allow-repeats: false - name: Install GDAL/ogr2ogr run: | sudo add-apt-repository ppa:ubuntugis/ppa @@ -83,7 +83,7 @@ jobs: tippecanoe -v - name: Generate Score Geo run: | - poetry run python3 data_pipeline/application.py geo-score + poetry run python3 data_pipeline/application.py geo-score - name: Generate Tiles run: | poetry run python3 data_pipeline/application.py generate-map-tiles @@ -92,6 +92,7 @@ jobs: aws s3 cp ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --acl public-read aws s3 cp ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --acl public-read aws s3 cp ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/geojson --recursive --acl public-read + aws s3 cp ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/shapefile --recursive --acl public-read aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles --recursive --acl public-read - name: Update PR with deployed Map URL uses: mshick/add-pr-comment@v1 diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index a4e60e5d..66051aa4 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -83,7 +83,7 @@ Once we have all the data from the previous stages, we convert it to tiles to ma #### 5. Shapefiles -If you want to use the shapefiles in mapping applications, you can access them here [shp](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shp) and [shx](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.shx). +If you want to use the shapefiles in mapping applications, you can access them [here](https://justice40-data.s3.amazonaws.com/data-pipeline/data/score/shapefile/usa.zip). ### Score generation and comparison workflow diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 9a46dbe0..a0df3c82 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -45,7 +45,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles" # Downloadable paths current_dt = datetime.datetime.now() -timestamp_str = current_dt.strftime("%Y-%m-%d-%H%M") +timestamp_str = current_dt.strftime("%Y-%m-%d-%H%MGMT") SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable" SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf" SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py index b6e0529d..d085f58f 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py @@ -1,5 +1,6 @@ import concurrent.futures import math +import os import numpy as np import pandas as pd import geopandas as gpd @@ -11,7 +12,7 @@ from data_pipeline.etl.sources.census.etl_utils import ( ) from data_pipeline.etl.score.etl_utils import check_score_data_source from data_pipeline.score import field_names -from data_pipeline.utils import get_module_logger +from data_pipeline.utils import get_module_logger, zip_files logger = get_module_logger(__name__) @@ -298,11 +299,21 @@ class GeoScoreETL(ExtractTransformLoad): # kept as strings because no downstream impacts columns={0: "column", "index": "meaning"} ).to_csv(self.SCORE_SHP_CODE_CSV, index=False) + self.geojson_score_usa_high.rename(columns=renaming_map).to_file( self.SCORE_SHP_FILE ) logger.info("Completed writing shapefile") + arcgis_zip_file_path = self.SCORE_SHP_PATH / "usa.zip" + arcgis_files = [] + for file in os.listdir(self.SCORE_SHP_PATH): + # don't remove __init__ files as they conserve dir structure + if file != "__init__.py": + arcgis_files.append(self.SCORE_SHP_PATH / file) + zip_files(arcgis_zip_file_path, arcgis_files) + logger.info("Completed zipping shapefiles") + with concurrent.futures.ThreadPoolExecutor() as executor: futures = { executor.submit(task)