updated to show T/F/null vs T/F for AML and FUDS (#1866)

This commit is contained in:
Emma Nechamkin 2022-08-24 20:22:59 -04:00 committed by GitHub
parent 6418335219
commit 637b8c305c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 40 additions and 10 deletions

View file

@ -359,6 +359,12 @@ fields:
- score_name: Is there at least one abandoned mine in this census tract? - score_name: Is there at least one abandoned mine in this census tract?
label: Is there at least one abandoned mine in this census tract? label: Is there at least one abandoned mine in this census tract?
format: bool format: bool
- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
label: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
format: bool
- score_name: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
label: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
format: bool
- score_name: There is at least one abandoned mine in this census tract and the tract is low income. - score_name: There is at least one abandoned mine in this census tract and the tract is low income.
label: There is at least one abandoned mine in this census tract and the tract is low income. label: There is at least one abandoned mine in this census tract and the tract is low income.
format: bool format: bool

View file

@ -369,6 +369,12 @@ sheets:
- score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
format: bool format: bool
- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
label: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
format: bool
- score_name: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
label: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
format: bool
- score_name: Tract-level redlining score meets or exceeds 3.25 and is low income - score_name: Tract-level redlining score meets or exceeds 3.25 and is low income
label: Tract experienced historic underinvestment and remains low income label: Tract experienced historic underinvestment and remains low income
format: bool format: bool

View file

@ -309,8 +309,10 @@ TILES_SCORE_COLUMNS = {
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS", + field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET", field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
field_names.AML_BOOLEAN: "AML_ET", field_names.AML_BOOLEAN: "AML_RAW",
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET", field_names.AML_BOOLEAN_FILLED_IN: "AML_ET",
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_RAW",
field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME: "FUDS_ET",
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG", field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG",
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
## FPL_200 (there is no higher ed in narwhal) ## FPL_200 (there is no higher ed in narwhal)

View file

@ -322,6 +322,8 @@ class ScoreETL(ExtractTransformLoad):
# which are now deprecated. # which are now deprecated.
if not drop_tracts: if not drop_tracts:
# Create the "basic" percentile. # Create the "basic" percentile.
## note: I believe this is less performant than if we made a bunch of these PFS columns
## and then concatenated the list. For the refactor!
df[ df[
f"{output_column_name_root}" f"{output_column_name_root}"
f"{field_names.PERCENTILE_FIELD_SUFFIX}" f"{field_names.PERCENTILE_FIELD_SUFFIX}"
@ -538,9 +540,12 @@ class ScoreETL(ExtractTransformLoad):
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric) df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
# coerce all booleans to bools # coerce all booleans to bools preserving nan character
# since this is a boolean, need to use `None`
for col in boolean_columns: for col in boolean_columns:
df_copy[col] = df_copy[col].astype(bool) tmp = df_copy[col].copy()
df_copy[col] = np.where(tmp.notna(), tmp.astype(bool), None)
logger.info(f"{col} contains {df_copy[col].isna().sum()} nulls.")
# Convert all columns to numeric and do math # Convert all columns to numeric and do math
# Note that we have a few special conditions here and we handle them explicitly. # Note that we have a few special conditions here and we handle them explicitly.

File diff suppressed because one or more lines are too long

View file

@ -355,9 +355,12 @@ TRANSPORTATION_COSTS = "Transportation Costs"
# eAMLIS and FUDS variables # eAMLIS and FUDS variables
AML_BOOLEAN = "Is there at least one abandoned mine in this census tract?" AML_BOOLEAN = "Is there at least one abandoned mine in this census tract?"
AML_BOOLEAN_FILLED_IN = "Is there at least one abandoned mine in this census tract, where missing data is treated as False?"
ELIGIBLE_FUDS_BINARY_FIELD_NAME = ( ELIGIBLE_FUDS_BINARY_FIELD_NAME = (
"Is there at least one Formerly Used Defense Site (FUDS) in the tract?" "Is there at least one Formerly Used Defense Site (FUDS) in the tract?"
) )
ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME = "Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?"
##### #####
# Names for individual factors being exceeded # Names for individual factors being exceeded

View file

@ -488,13 +488,21 @@ class ScoreNarwhal(Score):
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
) )
self.df[field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME] = self.df[
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME
].fillna(False)
self.df[field_names.AML_BOOLEAN_FILLED_IN] = self.df[
field_names.AML_BOOLEAN
].fillna(False)
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = self.df[ self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = self.df[
[ [
field_names.RMP_PCTILE_THRESHOLD, field_names.RMP_PCTILE_THRESHOLD,
field_names.NPL_PCTILE_THRESHOLD, field_names.NPL_PCTILE_THRESHOLD,
field_names.TSDF_PCTILE_THRESHOLD, field_names.TSDF_PCTILE_THRESHOLD,
field_names.AML_BOOLEAN, field_names.AML_BOOLEAN_FILLED_IN,
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME, field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME,
] ]
].any(axis="columns") ].any(axis="columns")
@ -513,7 +521,7 @@ class ScoreNarwhal(Score):
) )
self.df[field_names.AML_LOW_INCOME_FIELD] = ( self.df[field_names.AML_LOW_INCOME_FIELD] = (
self.df[field_names.AML_BOOLEAN] self.df[field_names.AML_BOOLEAN_FILLED_IN]
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
) )