Rezip CSV and Excel with Codebook (#1971)

* Rezip CSV and Excel files with Codebook

* codebook version

* packages fix

* pydantic

* lint

* Remove markdown link from markdown checker (#1936)

Co-authored-by: Vim <86254807+vim-usds@users.noreply.github.com>
This commit is contained in:
Jorge Escobar 2022-10-04 15:45:09 -04:00 committed by GitHub
commit e8e951fe9a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 727 additions and 1231 deletions

View file

@ -1,3 +1,4 @@
import os
from pathlib import Path
import datetime
@ -44,22 +45,36 @@ DATA_SCORE_JSON_INDEX_FILE_PATH = (
DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
# Downloadable paths
current_dt = datetime.datetime.now()
timestamp_str = current_dt.strftime("%Y-%m-%d-%H%MGMT")
if not os.environ.get("J40_VERSION_DATE_STRING"):
current_dt = datetime.datetime.now()
timestamp_str = current_dt.strftime("%Y-%m-%d-%H%MGMT")
else:
timestamp_str = os.environ.get("J40_VERSION_DATE_STRING")
if not os.environ.get("J40_VERSION_LABEL_STRING"):
version_str = "beta"
else:
version_str = os.environ.get("J40_VERSION_LABEL_STRING")
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf"
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "draft_communities_list.pdf"
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME
SCORE_DOWNLOADABLE_CSV_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR / f"communities-{timestamp_str}.csv"
SCORE_DOWNLOADABLE_DIR / f"{version_str}-communities-{timestamp_str}.csv"
)
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR / f"communities-{timestamp_str}.xlsx"
SCORE_DOWNLOADABLE_DIR / f"{version_str}-communities-{timestamp_str}.xlsx"
)
SCORE_DOWNLOADABLE_CODEBOOK_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR / f"codebook-{timestamp_str}.csv"
SCORE_DOWNLOADABLE_DIR / f"{version_str}-codebook-{timestamp_str}.csv"
)
SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip"
SCORE_DOWNLOADABLE_CSV_ZIP_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR
/ f"{version_str}-communities-csv-{timestamp_str}.zip"
)
SCORE_DOWNLOADABLE_XLS_ZIP_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR
/ f"{version_str}-communities-xls-{timestamp_str}.zip"
)
# For the codebook

View file

@ -527,8 +527,8 @@ class PostScoreETL(ExtractTransformLoad):
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
codebook_path = constants.SCORE_DOWNLOADABLE_CODEBOOK_FILE_PATH
zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH
pdf_path = constants.SCORE_DOWNLOADABLE_PDF_FILE_PATH
csv_zip_path = constants.SCORE_DOWNLOADABLE_CSV_ZIP_FILE_PATH
xls_zip_path = constants.SCORE_DOWNLOADABLE_XLS_ZIP_FILE_PATH
logger.info("Writing downloadable excel")
excel_config = self._load_excel_from_df(
@ -577,14 +577,19 @@ class PostScoreETL(ExtractTransformLoad):
# load codebook to disk
codebook_df.to_csv(codebook_path, index=False)
logger.info("Compressing files")
logger.info("Compressing csv files")
files_to_compress = [
csv_path,
codebook_path,
]
zip_files(csv_zip_path, files_to_compress)
logger.info("Compressing xls files")
files_to_compress = [
excel_path,
codebook_path,
pdf_path,
]
zip_files(zip_path, files_to_compress)
zip_files(xls_zip_path, files_to_compress)
def load(self) -> None:
self._load_score_csv_full(

View file

@ -73,7 +73,7 @@ def score_data_initial(sample_data_dir):
@pytest.fixture()
def score_pdf_initial(sample_data_dir):
return sample_data_dir / "Draft_Communities_List.pdf"
return sample_data_dir / "draft_communities_list.pdf"
@pytest.fixture()

View file

@ -143,4 +143,5 @@ def test_load_downloadable_zip(etl, monkeypatch, score_data_expected):
assert constants.SCORE_DOWNLOADABLE_DIR.is_dir()
assert constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH.is_file()
assert constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH.is_file()
assert constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH.is_file()
assert constants.SCORE_DOWNLOADABLE_CSV_ZIP_FILE_PATH.is_file()
assert constants.SCORE_DOWNLOADABLE_XLS_ZIP_FILE_PATH.is_file()

View file

@ -1,7 +1,8 @@
# Michigan EJSCREEN
<!-- markdown-link-check-disable -->
The Michigan EJSCREEN description and publication can be found [here](https://deepblue.lib.umich.edu/bitstream/handle/2027.42/149105/AssessingtheStateofEnvironmentalJusticeinMichigan_344.pdf).
<!-- markdown-link-check-enable-->
#### Some notes about the input source data column fields: