mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Add assertions around codebook (#2014)
* Add assertion around codebook (#1505) * Assert csv and excel have same cols (#1505)
This commit is contained in:
parent
8b611edae6
commit
743d3ce37c
1 changed files with 16 additions and 0 deletions
|
@ -532,6 +532,22 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
"fields"
|
"fields"
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
assert codebook_df["csv_label"].equals(codebook_df["excel_label"]), (
|
||||||
|
"CSV and Excel differ. If that's intentional, "
|
||||||
|
"remove this assertion. Otherwise, fix it."
|
||||||
|
)
|
||||||
|
# Check the codebook to make sure it matches the download files
|
||||||
|
assert not set(codebook_df["csv_label"].dropna()).difference(
|
||||||
|
downloadable_df.columns
|
||||||
|
), "Codebook is missing columns from downloadable files"
|
||||||
|
assert (
|
||||||
|
len(
|
||||||
|
downloadable_df.columns.difference(
|
||||||
|
set(codebook_df["csv_label"])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
== 0
|
||||||
|
), "Codebook has columns the downloadable files do not"
|
||||||
|
|
||||||
# load codebook to disk
|
# load codebook to disk
|
||||||
codebook_df.to_csv(codebook_path, index=False)
|
codebook_df.to_csv(codebook_path, index=False)
|
||||||
|
|
Loading…
Add table
Reference in a new issue