Add assertions around codebook (#2014)

* Add assertion around codebook (#1505)

* Assert csv and excel have same cols (#1505)
This commit is contained in:
Matt Bowen 2022-10-13 16:20:04 -04:00 committed by GitHub
parent 8b611edae6
commit 743d3ce37c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -532,6 +532,22 @@ class PostScoreETL(ExtractTransformLoad):
"fields" "fields"
], ],
) )
assert codebook_df["csv_label"].equals(codebook_df["excel_label"]), (
"CSV and Excel differ. If that's intentional, "
"remove this assertion. Otherwise, fix it."
)
# Check the codebook to make sure it matches the download files
assert not set(codebook_df["csv_label"].dropna()).difference(
downloadable_df.columns
), "Codebook is missing columns from downloadable files"
assert (
len(
downloadable_df.columns.difference(
set(codebook_df["csv_label"])
)
)
== 0
), "Codebook has columns the downloadable files do not"
# load codebook to disk # load codebook to disk
codebook_df.to_csv(codebook_path, index=False) codebook_df.to_csv(codebook_path, index=False)