updated to show T/F/null vs T/F for AML and FUDS (#1866)

This commit is contained in:
Emma Nechamkin 2022-08-24 20:22:59 -04:00 committed by GitHub
commit 637b8c305c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 40 additions and 10 deletions

View file

@ -309,8 +309,10 @@ TILES_SCORE_COLUMNS = {
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
field_names.AML_BOOLEAN: "AML_ET",
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET",
field_names.AML_BOOLEAN: "AML_RAW",
field_names.AML_BOOLEAN_FILLED_IN: "AML_ET",
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_RAW",
field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME: "FUDS_ET",
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG",
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
## FPL_200 (there is no higher ed in narwhal)

View file

@ -322,6 +322,8 @@ class ScoreETL(ExtractTransformLoad):
# which are now deprecated.
if not drop_tracts:
# Create the "basic" percentile.
## note: I believe this is less performant than if we made a bunch of these PFS columns
## and then concatenated the list. For the refactor!
df[
f"{output_column_name_root}"
f"{field_names.PERCENTILE_FIELD_SUFFIX}"
@ -538,9 +540,12 @@ class ScoreETL(ExtractTransformLoad):
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
# coerce all booleans to bools
# coerce all booleans to bools preserving nan character
# since this is a boolean, need to use `None`
for col in boolean_columns:
df_copy[col] = df_copy[col].astype(bool)
tmp = df_copy[col].copy()
df_copy[col] = np.where(tmp.notna(), tmp.astype(bool), None)
logger.info(f"{col} contains {df_copy[col].isna().sum()} nulls.")
# Convert all columns to numeric and do math
# Note that we have a few special conditions here and we handle them explicitly.

File diff suppressed because one or more lines are too long