From d8c73e6a02ebaf1fa3a47add82e85c55adfc27bf Mon Sep 17 00:00:00 2001 From: Shelby Switzer Date: Fri, 1 Oct 2021 15:04:37 -0400 Subject: [PATCH] Change downloadable file names (#708) * Change downloadable file names * Remove constants because we're dynamically creating these * Update to "communities" for the descriptor word based on team convo * Add timestamp in 2020-09-20-0930 format because I personally think this is the best ^.^ * Add a CLI command to run ETL Score Post so that we don't have to run the score generation just to get new downloadable files. * Also make sure the old downloadable files are cleaned up on the run of this command. * Remove unused library, thanks pylint! Co-authored-by: Shelby Switzer --- data/data-pipeline/data_pipeline/application.py | 14 +++++++++++++- data/data-pipeline/data_pipeline/etl/runner.py | 12 ++++++++++++ .../data_pipeline/etl/score/constants.py | 7 +++++-- data/data-pipeline/data_pipeline/utils.py | 7 ++++++- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/data/data-pipeline/data_pipeline/application.py b/data/data-pipeline/data_pipeline/application.py index 8105120e..4cd6e8da 100644 --- a/data/data-pipeline/data_pipeline/application.py +++ b/data/data-pipeline/data_pipeline/application.py @@ -3,7 +3,7 @@ import sys import click from data_pipeline.config import settings -from data_pipeline.etl.runner import etl_runner, score_generate, score_geo +from data_pipeline.etl.runner import etl_runner, score_generate, score_geo, score_post from data_pipeline.etl.sources.census.etl_utils import ( reset_data_directories as census_reset, ) @@ -12,6 +12,7 @@ from data_pipeline.utils import ( data_folder_cleanup, get_module_logger, score_folder_cleanup, + downloadable_cleanup, temp_folder_cleanup, check_first_run, ) @@ -136,6 +137,17 @@ def generate_map_tiles(): sys.exit() +@cli.command( + help="Run etl_score_post to create score csv, tile csv, and downloadable zip", +) +def generate_score_post(): + """CLI command to generate score, tile, and downloadable files""" + + downloadable_cleanup() + score_post() + sys.exit() + + @cli.command( help="Data Full Run (Census download, ETLs, score, combine and tile generation)", ) diff --git a/data/data-pipeline/data_pipeline/etl/runner.py b/data/data-pipeline/data_pipeline/etl/runner.py index 71e2b5a9..5ed7ca81 100644 --- a/data/data-pipeline/data_pipeline/etl/runner.py +++ b/data/data-pipeline/data_pipeline/etl/runner.py @@ -83,6 +83,18 @@ def score_generate() -> None: score_gen.transform() score_gen.load() + # Post Score Processing + score_post() + +def score_post() -> None: + """Posts the score files to the local directory + + Args: + None + + Returns: + None + """ # Post Score Processing score_post = PostScoreETL() score_post.extract() diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 7f5485d3..f05513d0 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -1,4 +1,5 @@ from pathlib import Path +import datetime import pandas as pd from data_pipeline.config import settings @@ -40,11 +41,13 @@ DATA_SCORE_CSV_TILES_FILE_PATH = DATA_SCORE_CSV_TILES_PATH / "usa.csv" DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles" # Downloadable paths +current_dt = datetime.datetime.now() +timestamp_str = current_dt.strftime("%Y-%m-%d-%H%M") SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable" -SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv" -SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx" SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf" SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME +SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / f"communities-{timestamp_str}.csv" +SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / f"communities-{timestamp_str}.xlsx" SCORE_DOWNLOADABLE_ZIP_FILE_PATH = ( SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip" ) diff --git a/data/data-pipeline/data_pipeline/utils.py b/data/data-pipeline/data_pipeline/utils.py index d6a61ead..3d1174d8 100644 --- a/data/data-pipeline/data_pipeline/utils.py +++ b/data/data-pipeline/data_pipeline/utils.py @@ -186,9 +186,14 @@ def score_folder_cleanup() -> None: logger.info("Initializing all score data") remove_all_from_dir(data_path / "score" / "csv") remove_all_from_dir(data_path / "score" / "geojson") - remove_all_from_dir(data_path / "score" / "downloadable") remove_all_from_dir(data_path / "score" / "tiles") + downloadable_cleanup() +def downloadable_cleanup() -> None: + """Remove all files from downloadable directory in the local data/score path""" + + data_path = settings.APP_ROOT / "data" + remove_all_from_dir(data_path / "score" / "downloadable") def temp_folder_cleanup() -> None: """Remove all files and directories from the local data/tmp temporary path"""