mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-29 08:41:17 -07:00
Score versioning (#1986)
* Score versioning * adding new zip assets * tests passing * debugging * debugging 2 * get codebook from downloadable path * upload version file for codebook and shapefile
This commit is contained in:
parent
e8e951fe9a
commit
dc234de551
11 changed files with 187 additions and 11 deletions
|
@ -57,8 +57,10 @@ else:
|
|||
version_str = os.environ.get("J40_VERSION_LABEL_STRING")
|
||||
|
||||
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "draft_communities_list.pdf"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_NAME = "draft-communities-list.pdf"
|
||||
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME
|
||||
SCORE_DOWNLOADABLE_TSD_FILE_NAME = "cejst-technical-support-document.pdf"
|
||||
SCORE_DOWNLOADABLE_TSD_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_TSD_FILE_NAME
|
||||
SCORE_DOWNLOADABLE_CSV_FILE_PATH = (
|
||||
SCORE_DOWNLOADABLE_DIR / f"{version_str}-communities-{timestamp_str}.csv"
|
||||
)
|
||||
|
@ -76,6 +78,18 @@ SCORE_DOWNLOADABLE_XLS_ZIP_FILE_PATH = (
|
|||
SCORE_DOWNLOADABLE_DIR
|
||||
/ f"{version_str}-communities-xls-{timestamp_str}.zip"
|
||||
)
|
||||
SCORE_VERSIONING_DATA_DOCUMENTATION_ZIP_FILE_PATH = (
|
||||
SCORE_DOWNLOADABLE_DIR
|
||||
/ f"{version_str}-data-documentation-{timestamp_str}.zip"
|
||||
)
|
||||
SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH = (
|
||||
SCORE_DOWNLOADABLE_DIR
|
||||
/ f"{version_str}-shapefile-codebook-{timestamp_str}.zip"
|
||||
)
|
||||
SCORE_VERSIONING_README_FILE_NAME = f"README-version-{version_str}.md"
|
||||
SCORE_VERSIONING_README_FILE_PATH = (
|
||||
FILES_PATH / SCORE_VERSIONING_README_FILE_NAME
|
||||
)
|
||||
|
||||
# For the codebook
|
||||
CEJST_SCORE_COLUMN_NAME = "score_name"
|
||||
|
|
|
@ -368,6 +368,37 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
zip_files(arcgis_zip_file_path, arcgis_files)
|
||||
logger.info("Completed zipping shapefiles")
|
||||
|
||||
# Per #1557:
|
||||
# Zip file that contains the shapefiles, codebook and checksum file.
|
||||
# Normally we get the codebook file path using this constant:
|
||||
# - codebook_path = constants.SCORE_DOWNLOADABLE_CODEBOOK_FILE_PATH
|
||||
# However since we generate it on a separate script (etl_score_post)
|
||||
# the time stamp can be generated again, and thus the file is not found.
|
||||
# So we grab it from the downloadable dir and if we don't find it, it
|
||||
# means we haven't run etl_score_post, and continue
|
||||
|
||||
logger.info("Getting codebook from downloadable dir")
|
||||
codebook_path = None
|
||||
for file in os.listdir(constants.SCORE_DOWNLOADABLE_DIR):
|
||||
if "codebook" in file:
|
||||
codebook_path = constants.SCORE_DOWNLOADABLE_DIR / file
|
||||
|
||||
if codebook_path:
|
||||
version_shapefile_codebook_zip_path = (
|
||||
constants.SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH
|
||||
)
|
||||
readme_path = constants.SCORE_VERSIONING_README_FILE_PATH
|
||||
|
||||
logger.info("Compressing shapefile and codebook files")
|
||||
files_to_compress = [
|
||||
arcgis_zip_file_path,
|
||||
codebook_path,
|
||||
readme_path,
|
||||
]
|
||||
zip_files(
|
||||
version_shapefile_codebook_zip_path, files_to_compress
|
||||
)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = {
|
||||
executor.submit(task)
|
||||
|
|
|
@ -527,8 +527,18 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
|
||||
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
|
||||
codebook_path = constants.SCORE_DOWNLOADABLE_CODEBOOK_FILE_PATH
|
||||
readme_path = constants.SCORE_VERSIONING_README_FILE_PATH
|
||||
csv_zip_path = constants.SCORE_DOWNLOADABLE_CSV_ZIP_FILE_PATH
|
||||
xls_zip_path = constants.SCORE_DOWNLOADABLE_XLS_ZIP_FILE_PATH
|
||||
score_downloadable_pdf_file_path = (
|
||||
constants.SCORE_DOWNLOADABLE_PDF_FILE_PATH
|
||||
)
|
||||
score_downloadable_tsd_file_path = (
|
||||
constants.SCORE_DOWNLOADABLE_TSD_FILE_PATH
|
||||
)
|
||||
version_data_documentation_zip_path = (
|
||||
constants.SCORE_VERSIONING_DATA_DOCUMENTATION_ZIP_FILE_PATH
|
||||
)
|
||||
|
||||
logger.info("Writing downloadable excel")
|
||||
excel_config = self._load_excel_from_df(
|
||||
|
@ -577,19 +587,26 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
# load codebook to disk
|
||||
codebook_df.to_csv(codebook_path, index=False)
|
||||
|
||||
# zip assets
|
||||
logger.info("Compressing csv files")
|
||||
files_to_compress = [
|
||||
csv_path,
|
||||
codebook_path,
|
||||
]
|
||||
files_to_compress = [csv_path, codebook_path, readme_path]
|
||||
zip_files(csv_zip_path, files_to_compress)
|
||||
|
||||
logger.info("Compressing xls files")
|
||||
files_to_compress = [excel_path, codebook_path, readme_path]
|
||||
zip_files(xls_zip_path, files_to_compress)
|
||||
|
||||
# Per #1557
|
||||
# zip file that contains the .xls, .csv, .pdf, tech support document, checksum file
|
||||
logger.info("Compressing data and documentation files")
|
||||
files_to_compress = [
|
||||
excel_path,
|
||||
codebook_path,
|
||||
csv_path,
|
||||
score_downloadable_pdf_file_path,
|
||||
score_downloadable_tsd_file_path,
|
||||
readme_path,
|
||||
]
|
||||
zip_files(xls_zip_path, files_to_compress)
|
||||
zip_files(version_data_documentation_zip_path, files_to_compress)
|
||||
|
||||
def load(self) -> None:
|
||||
self._load_score_csv_full(
|
||||
|
|
|
@ -73,7 +73,7 @@ def score_data_initial(sample_data_dir):
|
|||
|
||||
@pytest.fixture()
|
||||
def score_pdf_initial(sample_data_dir):
|
||||
return sample_data_dir / "draft_communities_list.pdf"
|
||||
return sample_data_dir / "draft-communities-list.pdf"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
|
|
@ -138,6 +138,16 @@ def test_load_downloadable_zip(etl, monkeypatch, score_data_expected):
|
|||
"SCORE_DOWNLOADABLE_PDF_FILE_PATH",
|
||||
static_files_path / constants.SCORE_DOWNLOADABLE_PDF_FILE_NAME,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
constants,
|
||||
"SCORE_VERSIONING_README_FILE_PATH",
|
||||
static_files_path / constants.SCORE_VERSIONING_README_FILE_NAME,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
constants,
|
||||
"SCORE_DOWNLOADABLE_TSD_FILE_PATH",
|
||||
static_files_path / constants.SCORE_DOWNLOADABLE_TSD_FILE_NAME,
|
||||
)
|
||||
etl.output_score_county_state_merged_df = score_data_expected
|
||||
etl._load_downloadable_zip(constants.SCORE_DOWNLOADABLE_DIR)
|
||||
assert constants.SCORE_DOWNLOADABLE_DIR.is_dir()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue