diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index d43cc325..389c1af7 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -366,6 +366,7 @@ class ScoreETL(ExtractTransformLoad): numeric_columns = [ field_names.HOUSING_BURDEN_FIELD, + field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD, field_names.TOTAL_POP_FIELD, field_names.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD, field_names.ASTHMA_FIELD, @@ -416,7 +417,6 @@ class ScoreETL(ExtractTransformLoad): field_names.HEALTHY_FOOD_FIELD, field_names.IMPENETRABLE_SURFACES_FIELD, field_names.UST_FIELD, - # We have to pass this boolean here in order to include it in ag value loss percentiles. field_names.AGRICULTURAL_VALUE_BOOL_FIELD, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, ] @@ -472,9 +472,7 @@ class ScoreETL(ExtractTransformLoad): set(numeric_columns) ), "You have a double-entered column in the numeric columns list" - df_copy[numeric_columns] = df_copy[numeric_columns].apply( - pd.to_numeric - ) + df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric) # Convert all columns to numeric and do math # Note that we have a few special conditions here, that we handle explicitly. diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 895c52b1..4fa0a75e 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -176,6 +176,10 @@ LOW_INCOME_THRESHOLD = "Exceeds FPL200 threshold" # Housing HOUSING_BURDEN_FIELD = "Housing burden (percent)" +NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD = ( + "Share of homes with no kitchen or indoor plumbing (percent)" +) + HT_INDEX_FIELD = ( "Housing + Transportation Costs % Income for the Regional Typical Household" ) @@ -419,6 +423,10 @@ LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = ( f"percentile and is low income?" ) HOUSING_BURDEN_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for housing burden and is low income?" +NO_KITCHEN_OR_INDOOR_PLUMBING_LOW_INCOME_FIELD = ( + f"Greater than or equal to the {PERCENTILE}th percentile for " + + "share of homes with no kitchen or indoor plumbing and is low income?" +) # Remediation and Reduction of Legacy Pollution RMP_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to RMP sites and is low income?" @@ -624,6 +632,11 @@ LEAD_PAINT_PROXY_PCTILE_THRESHOLD = ( HOUSING_BURDEN_PCTILE_THRESHOLD = ( f"Greater than or equal to the {PERCENTILE}th percentile for housing burden" ) +NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD = ( + f"Greater than or equal to the {PERCENTILE}th percentile for share " + "of homes without indoor plumbing or a kitchen" +) + RMP_PCTILE_THRESHOLD = ( f"Greater than or equal to the {PERCENTILE}th percentile for RMP proximity" ) diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py index 0480d93a..0de2dcd0 100644 --- a/data/data-pipeline/data_pipeline/score/score_narwhal.py +++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py @@ -315,9 +315,10 @@ class ScoreNarwhal(Score): field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD, field_names.HOUSING_BURDEN_LOW_INCOME_FIELD, field_names.HISTORIC_REDLINING_SCORE_EXCEEDED_LOW_INCOME_FIELD, + field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_LOW_INCOME_FIELD, ] - # # design question -- should read in scalar with threshold here instead? + # Historic disinvestment self.df[ field_names.HISTORIC_REDLINING_SCORE_EXCEEDED_LOW_INCOME_FIELD ] = ( @@ -325,6 +326,18 @@ class ScoreNarwhal(Score): & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] ) + # Kitchen / plumbing + self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD] = ( + self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + + self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_LOW_INCOME_FIELD] = ( + self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD] + & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] + ) + + # Lead paint self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] = ( self.df[ field_names.LEAD_PAINT_FIELD @@ -339,6 +352,12 @@ class ScoreNarwhal(Score): <= self.MEDIAN_HOUSE_VALUE_THRESHOLD ) + self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = ( + self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] + & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] + ) + + # Housing burden self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD] = ( self.df[ field_names.HOUSING_BURDEN_FIELD @@ -346,29 +365,22 @@ class ScoreNarwhal(Score): ] >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) - - self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] = ( - self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] - | self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD] - ) - - # series by series indicators - self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = ( - self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] - & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] - ) - self.df[field_names.HOUSING_BURDEN_LOW_INCOME_FIELD] = ( self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD] & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] ) + # any of the burdens + self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] = self.df[ + housing_eligibility_columns + ].any(axis="columns") + self._increment_total_eligibility_exceeded( housing_eligibility_columns, skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS, ) - return self.df[housing_eligibility_columns].any(axis="columns") + return self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] def _pollution_factor(self) -> bool: # Proximity to Risk Management Plan sites is > X