From 9ba4e790a7e916ea44fb502fddc64cb6c04d4964 Mon Sep 17 00:00:00 2001 From: Travis Newby <83976412+travis-newby@users.noreply.github.com> Date: Mon, 6 Feb 2023 12:43:12 -0600 Subject: [PATCH] Add pre-cleaning to a couple of zip files during geo-score (#2151) Two zip files were not getting cleared prior to running geo score. This was resulting in them growing between runs until the application ground to a halt. This fix clears those two zip files before geo score and before the full run. --- .../data-pipeline/data_pipeline/application.py | 3 +++ data/data-pipeline/data_pipeline/utils.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/data/data-pipeline/data_pipeline/application.py b/data/data-pipeline/data_pipeline/application.py index 680f72a1..576b3d6d 100644 --- a/data/data-pipeline/data_pipeline/application.py +++ b/data/data-pipeline/data_pipeline/application.py @@ -22,6 +22,7 @@ from data_pipeline.utils import downloadable_cleanup from data_pipeline.utils import get_module_logger from data_pipeline.utils import score_folder_cleanup from data_pipeline.utils import temp_folder_cleanup +from data_pipeline.utils import geo_score_folder_cleanup logger = get_module_logger(__name__) @@ -58,6 +59,7 @@ def data_cleanup(): tribal_reset(data_path) score_folder_cleanup() temp_folder_cleanup() + geo_score_folder_cleanup() logger.info("Cleaned up all data folders") sys.exit() @@ -179,6 +181,7 @@ def geo_score(data_source: str): None """ + geo_score_folder_cleanup() score_geo(data_source=data_source) sys.exit() diff --git a/data/data-pipeline/data_pipeline/utils.py b/data/data-pipeline/data_pipeline/utils.py index 55289475..a5e08c4a 100644 --- a/data/data-pipeline/data_pipeline/utils.py +++ b/data/data-pipeline/data_pipeline/utils.py @@ -17,6 +17,7 @@ from data_pipeline.config import settings from data_pipeline.content.schemas.download_schemas import CodebookConfig from data_pipeline.content.schemas.download_schemas import CSVConfig from data_pipeline.content.schemas.download_schemas import ExcelConfig +from data_pipeline.etl.score.constants import SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH from marshmallow import ValidationError from marshmallow_dataclass import class_schema @@ -218,9 +219,26 @@ def score_folder_cleanup() -> None: remove_all_from_dir(data_path / "score" / "csv") remove_all_from_dir(data_path / "score" / "geojson") remove_all_from_dir(data_path / "score" / "tiles") + remove_all_from_dir(data_path / "score" / "shapefile") downloadable_cleanup() +def geo_score_folder_cleanup() -> None: + """Removes the necessary files to run geo-score. This works out to be + zip files, since if we don't remove them python's zip utils continuously + add to them instead of overwriting the contents.""" + + data_path = settings.APP_ROOT / "data" + + logger.info("Removing zip files") + remove_files_from_dir(data_path / "score" / "shapefile", ".zip") + + shapefile_and_codebook_zipped = SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH + + if os.path.isfile(shapefile_and_codebook_zipped): + os.remove(shapefile_and_codebook_zipped) + + def downloadable_cleanup() -> None: """Remove all files from downloadable directory in the local data/score path"""