diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 9eac120f..d12da2fb 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -64,6 +64,20 @@ TILES_SCORE_COLUMNS = [ "Linguistic isolation (percent) (percentile)", "Unemployed civilians (percent) (percentile)", "Housing burden (percent) (percentile)", + "Diagnosed diabetes among adults aged >=18 years (percentile)", + "Current asthma among adults aged >=18 years (percentile)", + "Coronary heart disease among adults aged >=18 years (percentile)", + "Life expectancy (years) (percentile)", + "Traffic proximity and volume (percentile)", + "FEMA Risk Index Expected Annual Loss Score (percentile)", + "Energy burden (percentile)", + "Wastewater discharge (percentile)", + "Percent pre-1960s housing (lead paint indicator) (percentile)", + "Diesel particulate matter (percentile)", + "Particulate matter (PM2.5) (percentile)", + "Median household income (% of AMI) (percentile)", + "Percent of individuals < 200% Federal Poverty Line (percentile)", + "Percent individuals age 25 or over with less than high school degree (percentile)", ] # columns to round floats to 2 decimals @@ -77,6 +91,24 @@ TILES_SCORE_FLOAT_COLUMNS = [ "Linguistic isolation (percent)", "Unemployed civilians (percent)", "Housing burden (percent)", + "Poverty (Less than 200% of federal poverty line) (percentile)", + "Percent individuals age 25 or over with less than high school degree (percentile)", + "Linguistic isolation (percent) (percentile)", + "Unemployed civilians (percent) (percentile)", + "Housing burden (percent) (percentile)", + "Diagnosed diabetes among adults aged >=18 years (percentile)", + "Current asthma among adults aged >=18 years (percentile)", + "Coronary heart disease among adults aged >=18 years (percentile)", + "Life expectancy (years) (percentile)", + "Traffic proximity and volume (percentile)", + "FEMA Risk Index Expected Annual Loss Score (percentile)", + "Energy burden (percentile)", + "Wastewater discharge (percentile)", + "Percent pre-1960s housing (lead paint indicator) (percentile)", + "Diesel particulate matter (percentile)", + "Particulate matter (PM2.5) (percentile)", + "Median household income (% of AMI) (percentile)", + "Percent of individuals < 200% Federal Poverty Line (percentile)", ] TILES_ROUND_NUM_DECIMALS = 2 diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 80b20fc3..a5b004a2 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -608,15 +608,26 @@ class ScoreETL(ExtractTransformLoad): high_school_cutoff_threshold = 0.05 high_school_cutoff_threshold_2 = 0.06 + # Score G is now modified NMTC df["Score G (communities)"] = ( + (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8) + & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold) + ) | ( + (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20) + & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold) + ) + df["Score G"] = df["Score G (communities)"].astype(int) + df["Score G (percentile)"] = df["Score G"] + + df["Score I (communities)"] = ( (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.7) & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold) ) | ( (df[self.POVERTY_LESS_THAN_200_FPL_FIELD_NAME] > 0.50) & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold) ) - df["Score G"] = df["Score G (communities)"].astype(int) - df["Score G (percentile)"] = df["Score G"] + df["Score I"] = df["Score I (communities)"].astype(int) + df["Score I (percentile)"] = df["Score I"] df["Score H (communities)"] = ( (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8) @@ -631,12 +642,13 @@ class ScoreETL(ExtractTransformLoad): (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8) ) | (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20) - df["NMTC modified (communities)"] = ( + + df["Score K (communities)"] = ( (df[self.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD_NAME] < 0.8) - & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold) + & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2) ) | ( (df[self.POVERTY_LESS_THAN_100_FPL_FIELD_NAME] > 0.20) - & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold) + & (df[self.HIGH_SCHOOL_FIELD_NAME] > high_school_cutoff_threshold_2) ) return df diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index 48fbe3e8..86a4cfcb 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -183,6 +183,7 @@ class PostScoreETL(ExtractTransformLoad): def _create_tile_data( self, score_county_state_merged_df: pd.DataFrame ) -> pd.DataFrame: + logger.info("Rounding Decimals") score_tiles = score_county_state_merged_df[ constants.TILES_SCORE_COLUMNS ] @@ -191,6 +192,7 @@ class PostScoreETL(ExtractTransformLoad): * len(constants.TILES_SCORE_FLOAT_COLUMNS), index=constants.TILES_SCORE_FLOAT_COLUMNS, ) + return score_tiles.round(decimals) def _create_downloadable_data(