From d7274888b698587993f2dec20e8297823876c5a8 Mon Sep 17 00:00:00 2001 From: Shelby Switzer Date: Fri, 10 Sep 2021 16:06:47 -0400 Subject: [PATCH] Update downloadable zip file (#659) * Update downloadable zip file * Don't use spaces in the name, as per #620 * Add the score D columns, as per #596 * fix paths and directories in etl_score_post while the tests seemed to be passing, I encountered an error when running poetry run score, which was caused by us creating a directory called .csv, instead of creating the parent directory. Co-authored-by: Shelby Switzer --- .../data_pipeline/etl/score/constants.py | 4 +++- .../data_pipeline/etl/score/etl_score_post.py | 10 ++++------ .../snapshots/downloadable_data_expected.pkl | Bin 4039 -> 4220 bytes 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 9ba1f05c..f63e16b4 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -39,7 +39,7 @@ DATA_SCORE_TILES_FILE_PATH = DATA_SCORE_TILES_DIR / "usa.csv" SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable" SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv" SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx" -SCORE_DOWNLOADABLE_ZIP_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "Screening Tool Data.zip" +SCORE_DOWNLOADABLE_ZIP_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip" # Column subsets CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"] @@ -104,5 +104,7 @@ DOWNLOADABLE_SCORE_COLUMNS = [ "GEOID10", "County Name", "State Name", + "Score D (percentile)", + "Score D (top 25th percentile)", *DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL, ] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index 80b49e13..e5e77f0b 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -219,9 +219,7 @@ class PostScoreETL(ExtractTransformLoad): self, score_tiles_df: pd.DataFrame, tile_score_path: Path ) -> None: logger.info("Saving Tile Score CSV") - # TODO: check which are the columns we'll use - # Related to: https://github.com/usds/justice40-tool/issues/302 - tile_score_path.mkdir(parents=True, exist_ok=True) + tile_score_path.parent.mkdir(parents=True, exist_ok=True) score_tiles_df.to_csv(tile_score_path, index=False) def _load_downloadable_zip( @@ -230,9 +228,9 @@ class PostScoreETL(ExtractTransformLoad): logger.info("Saving Downloadable CSV") downloadable_info_path.mkdir(parents=True, exist_ok=True) - csv_path = downloadable_info_path / "usa.csv" - excel_path = downloadable_info_path / "usa.xlsx" - zip_path = downloadable_info_path / "Screening Tool Data.zip" + csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH + excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH + zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH logger.info("Writing downloadable csv") downloadable_df.to_csv(csv_path, index=False) diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 44e878faddb181ebc93e7931b29e60ac41562bd5..ba72a170310dab1107006a3a1a6525cc610e8233 100644 GIT binary patch delta 264 zcmX>u|3`tffpu!3z(&@KOcDi53}BED%jR@1*!ZoH@FhZ0wZ|-0fXOvWj@EA0pv=)@so~+NW%ckBsB_qCM@`Bb>{g6@liS(V z7y-C&?rbPN>0ry$;?UBoYEr;7cR*!P%ttr$xuL&-+YUmmz{CK V0P>@+o#J#=6O@9BPbOlXW=sHc#Z> YW@l`eJeS{)v1RgkejmoIlVt=F0MLRVKL7v#