diff --git a/data/data-pipeline/data_pipeline/application.py b/data/data-pipeline/data_pipeline/application.py index 680f72a1..576b3d6d 100644 --- a/data/data-pipeline/data_pipeline/application.py +++ b/data/data-pipeline/data_pipeline/application.py @@ -22,6 +22,7 @@ from data_pipeline.utils import downloadable_cleanup from data_pipeline.utils import get_module_logger from data_pipeline.utils import score_folder_cleanup from data_pipeline.utils import temp_folder_cleanup +from data_pipeline.utils import geo_score_folder_cleanup logger = get_module_logger(__name__) @@ -58,6 +59,7 @@ def data_cleanup(): tribal_reset(data_path) score_folder_cleanup() temp_folder_cleanup() + geo_score_folder_cleanup() logger.info("Cleaned up all data folders") sys.exit() @@ -179,6 +181,7 @@ def geo_score(data_source: str): None """ + geo_score_folder_cleanup() score_geo(data_source=data_source) sys.exit() diff --git a/data/data-pipeline/data_pipeline/utils.py b/data/data-pipeline/data_pipeline/utils.py index 55289475..a5e08c4a 100644 --- a/data/data-pipeline/data_pipeline/utils.py +++ b/data/data-pipeline/data_pipeline/utils.py @@ -17,6 +17,7 @@ from data_pipeline.config import settings from data_pipeline.content.schemas.download_schemas import CodebookConfig from data_pipeline.content.schemas.download_schemas import CSVConfig from data_pipeline.content.schemas.download_schemas import ExcelConfig +from data_pipeline.etl.score.constants import SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH from marshmallow import ValidationError from marshmallow_dataclass import class_schema @@ -218,9 +219,26 @@ def score_folder_cleanup() -> None: remove_all_from_dir(data_path / "score" / "csv") remove_all_from_dir(data_path / "score" / "geojson") remove_all_from_dir(data_path / "score" / "tiles") + remove_all_from_dir(data_path / "score" / "shapefile") downloadable_cleanup() +def geo_score_folder_cleanup() -> None: + """Removes the necessary files to run geo-score. This works out to be + zip files, since if we don't remove them python's zip utils continuously + add to them instead of overwriting the contents.""" + + data_path = settings.APP_ROOT / "data" + + logger.info("Removing zip files") + remove_files_from_dir(data_path / "score" / "shapefile", ".zip") + + shapefile_and_codebook_zipped = SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH + + if os.path.isfile(shapefile_and_codebook_zipped): + os.remove(shapefile_and_codebook_zipped) + + def downloadable_cleanup() -> None: """Remove all files from downloadable directory in the local data/score path"""