updated to show T/F/null vs T/F for AML and FUDS (#1866)

This commit is contained in:
Emma Nechamkin 2022-08-24 20:22:59 -04:00 committed by GitHub
parent 6418335219
commit 637b8c305c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 40 additions and 10 deletions

View file

@ -359,6 +359,12 @@ fields:
- score_name: Is there at least one abandoned mine in this census tract?
label: Is there at least one abandoned mine in this census tract?
format: bool
- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
label: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
format: bool
- score_name: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
label: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
format: bool
- score_name: There is at least one abandoned mine in this census tract and the tract is low income.
label: There is at least one abandoned mine in this census tract and the tract is low income.
format: bool

View file

@ -369,6 +369,12 @@ sheets:
- score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
format: bool
- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
label: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?
format: bool
- score_name: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
label: Is there at least one abandoned mine in this census tract, where missing data is treated as False?
format: bool
- score_name: Tract-level redlining score meets or exceeds 3.25 and is low income
label: Tract experienced historic underinvestment and remains low income
format: bool

View file

@ -309,8 +309,10 @@ TILES_SCORE_COLUMNS = {
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
field_names.AML_BOOLEAN: "AML_ET",
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET",
field_names.AML_BOOLEAN: "AML_RAW",
field_names.AML_BOOLEAN_FILLED_IN: "AML_ET",
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_RAW",
field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME: "FUDS_ET",
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG",
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
## FPL_200 (there is no higher ed in narwhal)

View file

@ -322,6 +322,8 @@ class ScoreETL(ExtractTransformLoad):
# which are now deprecated.
if not drop_tracts:
# Create the "basic" percentile.
## note: I believe this is less performant than if we made a bunch of these PFS columns
## and then concatenated the list. For the refactor!
df[
f"{output_column_name_root}"
f"{field_names.PERCENTILE_FIELD_SUFFIX}"
@ -538,9 +540,12 @@ class ScoreETL(ExtractTransformLoad):
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
# coerce all booleans to bools
# coerce all booleans to bools preserving nan character
# since this is a boolean, need to use `None`
for col in boolean_columns:
df_copy[col] = df_copy[col].astype(bool)
tmp = df_copy[col].copy()
df_copy[col] = np.where(tmp.notna(), tmp.astype(bool), None)
logger.info(f"{col} contains {df_copy[col].isna().sum()} nulls.")
# Convert all columns to numeric and do math
# Note that we have a few special conditions here and we handle them explicitly.

File diff suppressed because one or more lines are too long

View file

@ -355,9 +355,12 @@ TRANSPORTATION_COSTS = "Transportation Costs"
# eAMLIS and FUDS variables
AML_BOOLEAN = "Is there at least one abandoned mine in this census tract?"
AML_BOOLEAN_FILLED_IN = "Is there at least one abandoned mine in this census tract, where missing data is treated as False?"
ELIGIBLE_FUDS_BINARY_FIELD_NAME = (
"Is there at least one Formerly Used Defense Site (FUDS) in the tract?"
)
ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME = "Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?"
#####
# Names for individual factors being exceeded

View file

@ -488,13 +488,21 @@ class ScoreNarwhal(Score):
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
self.df[field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME] = self.df[
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME
].fillna(False)
self.df[field_names.AML_BOOLEAN_FILLED_IN] = self.df[
field_names.AML_BOOLEAN
].fillna(False)
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = self.df[
[
field_names.RMP_PCTILE_THRESHOLD,
field_names.NPL_PCTILE_THRESHOLD,
field_names.TSDF_PCTILE_THRESHOLD,
field_names.AML_BOOLEAN,
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
field_names.AML_BOOLEAN_FILLED_IN,
field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME,
]
].any(axis="columns")
@ -513,7 +521,7 @@ class ScoreNarwhal(Score):
)
self.df[field_names.AML_LOW_INCOME_FIELD] = (
self.df[field_names.AML_BOOLEAN]
self.df[field_names.AML_BOOLEAN_FILLED_IN]
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
)