diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 9ba1f05c..f63e16b4 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -39,7 +39,7 @@ DATA_SCORE_TILES_FILE_PATH = DATA_SCORE_TILES_DIR / "usa.csv" SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable" SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv" SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx" -SCORE_DOWNLOADABLE_ZIP_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "Screening Tool Data.zip" +SCORE_DOWNLOADABLE_ZIP_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip" # Column subsets CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"] @@ -104,5 +104,7 @@ DOWNLOADABLE_SCORE_COLUMNS = [ "GEOID10", "County Name", "State Name", + "Score D (percentile)", + "Score D (top 25th percentile)", *DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL, ] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index 80b49e13..e5e77f0b 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -219,9 +219,7 @@ class PostScoreETL(ExtractTransformLoad): self, score_tiles_df: pd.DataFrame, tile_score_path: Path ) -> None: logger.info("Saving Tile Score CSV") - # TODO: check which are the columns we'll use - # Related to: https://github.com/usds/justice40-tool/issues/302 - tile_score_path.mkdir(parents=True, exist_ok=True) + tile_score_path.parent.mkdir(parents=True, exist_ok=True) score_tiles_df.to_csv(tile_score_path, index=False) def _load_downloadable_zip( @@ -230,9 +228,9 @@ class PostScoreETL(ExtractTransformLoad): logger.info("Saving Downloadable CSV") downloadable_info_path.mkdir(parents=True, exist_ok=True) - csv_path = downloadable_info_path / "usa.csv" - excel_path = downloadable_info_path / "usa.xlsx" - zip_path = downloadable_info_path / "Screening Tool Data.zip" + csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH + excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH + zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH logger.info("Writing downloadable csv") downloadable_df.to_csv(csv_path, index=False) diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 44e878fa..ba72a170 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ