diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py index 36487964..59dafe2b 100644 --- a/data/data-pipeline/data_pipeline/etl/constants.py +++ b/data/data-pipeline/data_pipeline/etl/constants.py @@ -49,6 +49,11 @@ DATASET_LIST = [ "module_dir": "geocorr", "class_name": "GeoCorrETL", }, + { + "name": "child_opportunity_index", + "module_dir": "child_opportunity_index", + "class_name": "ChildOpportunityIndex", + }, { "name": "mapping_inequality", "module_dir": "mapping_inequality", diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index f49681d8..0ce0e052 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -1,4 +1,6 @@ import functools +from collections import namedtuple + import pandas as pd from data_pipeline.etl.base import ExtractTransformLoad @@ -29,6 +31,7 @@ class ScoreETL(ExtractTransformLoad): self.persistent_poverty_df: pd.DataFrame self.census_decennial_df: pd.DataFrame self.census_2010_df: pd.DataFrame + self.child_opportunity_index_df: pd.DataFrame def extract(self) -> None: logger.info("Loading data sets from disk.") @@ -162,6 +165,19 @@ class ScoreETL(ExtractTransformLoad): low_memory=False, ) + # Load COI data + child_opportunity_index_csv = ( + constants.DATA_PATH + / "dataset" + / "child_opportunity_index" + / "usa.csv" + ) + self.child_opportunity_index_df = pd.read_csv( + child_opportunity_index_csv, + dtype={self.GEOID_TRACT_FIELD_NAME: "string"}, + low_memory=False, + ) + def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame: logger.info("Joining Census Tract dataframes") @@ -255,6 +271,7 @@ class ScoreETL(ExtractTransformLoad): self.census_acs_median_incomes_df, self.census_decennial_df, self.census_2010_df, + self.child_opportunity_index_df, ] # Sanity check each data frame before merging. @@ -323,6 +340,7 @@ class ScoreETL(ExtractTransformLoad): field_names.HIGH_SCHOOL_ED_FIELD, field_names.UNEMPLOYMENT_FIELD, field_names.MEDIAN_HOUSE_VALUE_FIELD, + field_names.COLLEGE_ATTENDANCE_FIELD, field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD, field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD, field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD, @@ -333,6 +351,9 @@ class ScoreETL(ExtractTransformLoad): field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010, field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009, field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009, + field_names.EXTREME_HEAT_FIELD, + field_names.HEALTHY_FOOD_FIELD, + field_names.IMPENETRABLE_SURFACES_FIELD, ] non_numeric_columns = [ @@ -340,7 +361,32 @@ class ScoreETL(ExtractTransformLoad): field_names.PERSISTENT_POVERTY_FIELD, ] - columns_to_keep = non_numeric_columns + numeric_columns + # For some columns, high values are "good", so we want to reverse the percentile + # so that high values are "bad" and any scoring logic can still check if it's + # >= some threshold. + # TODO: Add more fields here. + # https://github.com/usds/justice40-tool/issues/970 + ReversePercentile = namedtuple( + typename="ReversePercentile", + field_names=["field_name", "low_field_name"], + ) + reverse_percentiles = [ + # This dictionary follows the format: + # : + # for instance, 3rd grade reading level : Low 3rd grade reading level. + # This low field will not exist yet, it is only calculated for the + # percentile. + ReversePercentile( + field_name=field_names.READING_FIELD, + low_field_name=field_names.LOW_READING_FIELD, + ) + ] + + columns_to_keep = ( + non_numeric_columns + + numeric_columns + + [rp.field_name for rp in reverse_percentiles] + ) df_copy = df[columns_to_keep].copy() @@ -375,6 +421,19 @@ class ScoreETL(ExtractTransformLoad): df_copy[col] - min_value ) / (max_value - min_value) + # Create reversed percentiles for these fields + for reverse_percentile in reverse_percentiles: + # Calculate reverse percentiles + # For instance, for 3rd grade reading level (score from 0-500), + # calculate reversed percentiles and give the result the name + # `Low 3rd grade reading level (percentile)`. + df_copy[ + f"{reverse_percentile.low_field_name}" + f"{field_names.PERCENTILE_FIELD_SUFFIX}" + ] = df_copy[reverse_percentile.field_name].rank( + pct=True, ascending=False + ) + # Special logic: create a combined population field. # We sometimes run analytics on "population", and this makes a single field # that is either the island area's population in 2009 or the state's diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 6a4310a9..51097cbe 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -114,6 +114,27 @@ class CensusACSETL(ExtractTransformLoad): ) self.HIGH_SCHOOL_ED_FIELD = "Percent individuals age 25 or over with less than high school degree" + # College attendance fields + self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED = ( + "B14004_001E" # Estimate!!Total + ) + self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC = "B14004_003E" # Estimate!!Total!!Male!!Enrolled in public college or graduate school + self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE = "B14004_008E" # Estimate!!Total!!Male!!Enrolled in private college or graduate school + self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC = "B14004_019E" # Estimate!!Total!!Female!!Enrolled in public college or graduate school + self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE = "B14004_024E" # Estimate!!Total!!Female!!Enrolled in private college or graduate school + + self.COLLEGE_ATTENDANCE_FIELDS = [ + self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED, + self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC, + self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE, + self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC, + self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE, + ] + + self.COLLEGE_ATTENDANCE_FIELD = ( + "Percent enrollment in college or graduate school" + ) + self.RE_FIELDS = [ "B02001_001E", "B02001_002E", @@ -156,15 +177,30 @@ class CensusACSETL(ExtractTransformLoad): self.STATE_GEOID_FIELD_NAME = "GEOID2" + self.COLUMNS_TO_KEEP = ( + [ + self.GEOID_TRACT_FIELD_NAME, + self.UNEMPLOYED_FIELD_NAME, + self.LINGUISTIC_ISOLATION_FIELD_NAME, + self.MEDIAN_INCOME_FIELD_NAME, + self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME, + self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME, + self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME, + self.MEDIAN_HOUSE_VALUE_FIELD_NAME, + self.HIGH_SCHOOL_ED_FIELD, + self.COLLEGE_ATTENDANCE_FIELD, + ] + + self.RE_OUTPUT_FIELDS + + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS] + ) + self.df: pd.DataFrame def extract(self) -> None: # Define the variables to retrieve variables = ( [ - # Income field self.MEDIAN_INCOME_FIELD, - # House value self.MEDIAN_HOUSE_VALUE_FIELD, ] + self.EMPLOYMENT_FIELDS @@ -172,6 +208,7 @@ class CensusACSETL(ExtractTransformLoad): + self.POVERTY_FIELDS + self.EDUCATIONAL_FIELDS + self.RE_FIELDS + + self.COLLEGE_ATTENDANCE_FIELDS ) self.df = retrieve_census_acs_data( @@ -308,6 +345,14 @@ class CensusACSETL(ExtractTransformLoad): df["B03003_003E"] / df["B03003_001E"] ) + # Calculate college attendance: + df[self.COLLEGE_ATTENDANCE_FIELD] = ( + df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC] + + df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE] + + df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC] + + df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE] + ) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED] + # Save results to self. self.df = df @@ -317,23 +362,7 @@ class CensusACSETL(ExtractTransformLoad): # mkdir census self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) - columns_to_include = ( - [ - self.GEOID_TRACT_FIELD_NAME, - self.UNEMPLOYED_FIELD_NAME, - self.LINGUISTIC_ISOLATION_FIELD_NAME, - self.MEDIAN_INCOME_FIELD_NAME, - self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME, - self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME, - self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME, - self.MEDIAN_HOUSE_VALUE_FIELD_NAME, - self.HIGH_SCHOOL_ED_FIELD, - ] - + self.RE_OUTPUT_FIELDS - + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS] - ) - - self.df[columns_to_include].to_csv( + self.df[self.COLUMNS_TO_KEEP].to_csv( path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False ) diff --git a/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/README.md b/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/README.md new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/__init__.py b/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/etl.py b/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/etl.py new file mode 100644 index 00000000..ba26b165 --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/sources/child_opportunity_index/etl.py @@ -0,0 +1,120 @@ +from pathlib import Path +import pandas as pd + +from data_pipeline.etl.base import ExtractTransformLoad +from data_pipeline.score import field_names +from data_pipeline.utils import get_module_logger, unzip_file_from_url + +logger = get_module_logger(__name__) + + +class ChildOpportunityIndex(ExtractTransformLoad): + """ETL Child Opportunity Index data. + + COI compiles a number of useful data sets. In the future, we could pull these + data sets in directly from their original creators. + + Data dictionary available when you download zip from `self.COI_FILE_URL`. + + Data source overview: https://data.diversitydatakids.org/dataset/coi20-child-opportunity-index-2-0-database. + + Full technical documents: https://www.diversitydatakids.org/sites/default/files/2020-02/ddk_coi2.0_technical_documentation_20200212.pdf. + + Github repo: https://github.com/diversitydatakids/COI/ + + """ + + def __init__(self): + self.COI_FILE_URL = ( + "https://data.diversitydatakids.org/datastore/zip/f16fff12-b1e5-4f60-85d3-" + "3a0ededa30a0?format=csv" + ) + + self.OUTPUT_PATH: Path = ( + self.DATA_PATH / "dataset" / "child_opportunity_index" + ) + + self.TRACT_INPUT_COLUMN_NAME = "geoid" + self.EXTREME_HEAT_INPUT_FIELD = "HE_HEAT" + self.HEALTHY_FOOD_INPUT_FIELD = "HE_FOOD" + self.IMPENETRABLE_SURFACES_INPUT_FIELD = "HE_GREEN" + self.READING_INPUT_FIELD = "ED_READING" + + # Constants for output + self.COLUMNS_TO_KEEP = [ + self.GEOID_TRACT_FIELD_NAME, + field_names.EXTREME_HEAT_FIELD, + field_names.HEALTHY_FOOD_FIELD, + field_names.IMPENETRABLE_SURFACES_FIELD, + field_names.READING_FIELD, + ] + + self.raw_df: pd.DataFrame + self.output_df: pd.DataFrame + + def extract(self) -> None: + logger.info("Starting 51MB data download.") + + unzip_file_from_url( + file_url=self.COI_FILE_URL, + download_path=self.TMP_PATH, + unzipped_file_path=self.TMP_PATH / "child_opportunity_index", + ) + + self.raw_df = pd.read_csv( + filepath_or_buffer=self.TMP_PATH + / "child_opportunity_index" + / "raw.csv", + # The following need to remain as strings for all of their digits, not get + # converted to numbers. + dtype={ + self.TRACT_INPUT_COLUMN_NAME: "string", + }, + low_memory=False, + ) + + def transform(self) -> None: + logger.info("Starting transforms.") + + output_df = self.raw_df.rename( + columns={ + self.TRACT_INPUT_COLUMN_NAME: self.GEOID_TRACT_FIELD_NAME, + self.EXTREME_HEAT_INPUT_FIELD: field_names.EXTREME_HEAT_FIELD, + self.HEALTHY_FOOD_INPUT_FIELD: field_names.HEALTHY_FOOD_FIELD, + self.IMPENETRABLE_SURFACES_INPUT_FIELD: field_names.IMPENETRABLE_SURFACES_FIELD, + self.READING_INPUT_FIELD: field_names.READING_FIELD, + } + ) + + # Sanity check the tract field. + if len(output_df[self.GEOID_TRACT_FIELD_NAME].str.len().unique()) != 1: + raise ValueError("Wrong tract length.") + + # COI has two rows per tract: one for 2010 and one for 2015. + output_df = output_df[output_df["year"] == 2015] + + # Convert percents from 0-100 to 0-1 to standardize with our other fields. + percent_fields_to_convert = [ + field_names.HEALTHY_FOOD_FIELD, + field_names.IMPENETRABLE_SURFACES_FIELD, + ] + + for percent_field_to_convert in percent_fields_to_convert: + output_df[percent_field_to_convert] = ( + output_df[percent_field_to_convert] / 100 + ) + + self.output_df = output_df + + def validate(self) -> None: + logger.info("Validating data.") + + pass + + def load(self) -> None: + logger.info("Saving CSV") + + self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) + self.output_df[self.COLUMNS_TO_KEEP].to_csv( + path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False + ) diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 1bbcb37b..4c47a555 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -63,6 +63,8 @@ MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)" PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract" AMI_FIELD = "Area Median Income (State or metropolitan)" +COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school" + # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" EXPECTED_BUILDING_LOSS_RATE_FIELD = ( @@ -206,30 +208,63 @@ HOLC_GRADE_D_TRACT_50_PERCENT_FIELD: str = "Tract is >50% HOLC Grade D" HOLC_GRADE_D_TRACT_75_PERCENT_FIELD: str = "Tract is >75% HOLC Grade D" +# Child Opportunity Index data +# Summer days with maximum temperature above 90F. +EXTREME_HEAT_FIELD = "Summer days above 90F" + +# Percentage households without a car located further than a half-mile from the +# nearest supermarket. +HEALTHY_FOOD_FIELD = "Percent low access to healthy food" + +# Percentage impenetrable surface areas such as rooftops, roads or parking lots. +IMPENETRABLE_SURFACES_FIELD = "Percent impenetrable surface areas" + +# Percentage third graders scoring proficient on standardized reading tests, +# converted to NAEP scale score points. +READING_FIELD = "Third grade reading proficiency" +LOW_READING_FIELD = "Low third grade reading proficiency" + +# Names for individual factors being exceeded # Climate Change EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected population loss rate and is low income" EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected agriculture loss rate and is low income" EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected building loss rate and is low income" +EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = ( + f"At or above the {PERCENTILE}th percentile for summer days above 90F and " + f"the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th " + f"percentile and is low income" +) # Clean energy and efficiency PM25_EXPOSURE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for PM2.5 exposure and is low income" ENERGY_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for energy burden and is low income" + # Clean transportation DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for diesel particulate matter and is low income" TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for traffic proximity and is low income" + # Affordable and Sustainable Housing -LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD = ( +LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = ( f"At or above the {PERCENTILE}th percentile for lead paint and" - " the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th percentile and is low income" + f" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th " + f"percentile and is low income" ) HOUSING_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for housing burden and is low income" + +IMPENETRABLE_SURFACES_LOW_INCOME_FIELD = ( + f"At or above the {PERCENTILE}th percentile for impenetrable surfaces and is low " + f"income" +) + # Remediation and Reduction of Legacy Pollution RMP_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to RMP sites and is low income" SUPERFUND_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to superfund sites and is low income" HAZARDOUS_WASTE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to hazardous waste facilities and is low income" + # Critical Clean Water and Waste Infrastructure WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for wastewater discharge and is low income" -# Health Burden + +# Health Burdens DIABETES_LOW_INCOME_FIELD = ( f"At or above the {PERCENTILE}th percentile for diabetes and is low income" ) @@ -240,25 +275,35 @@ HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income" +HEALTHY_FOOD_LOW_INCOME_FIELD = ( + f"At or above the {PERCENTILE}th percentile for low " + f"access to healthy food and is low income" +) + # Workforce UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = ( f"At or above the {PERCENTILE}th percentile for unemployment" - " and low HS education" + " and has low HS education" ) LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = ( f"At or above the {PERCENTILE}th percentile for households in linguistic isolation" - " and low HS education" + " and has low HS education" ) POVERTY_LOW_HS_EDUCATION_FIELD = ( f"At or above the {PERCENTILE}th percentile for households at or below 100% federal poverty level" - " and low HS education" + " and has low HS education" +) + +LOW_READING_LOW_HS_EDUCATION_FIELD = ( + f"At or above the {PERCENTILE}th percentile for low 3rd grade reading proficiency" + " and has low HS education" ) MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = ( f"At or below the {PERCENTILE}th percentile for median income" - " and low HS education" + " and has low HS education" ) THRESHOLD_COUNT = "Total threshold criteria exceeded" diff --git a/data/data-pipeline/data_pipeline/score/score_l.py b/data/data-pipeline/data_pipeline/score/score_l.py index 35c56a65..7653ce46 100644 --- a/data/data-pipeline/data_pipeline/score/score_l.py +++ b/data/data-pipeline/data_pipeline/score/score_l.py @@ -177,6 +177,8 @@ class ScoreL(Score): field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD, field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD, field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD, + field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD, + field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD, ] expected_population_loss_threshold = ( @@ -203,6 +205,28 @@ class ScoreL(Score): >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) + extreme_heat_median_home_value_threshold = ( + self.df[ + field_names.EXTREME_HEAT_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) & ( + self.df[ + field_names.MEDIAN_HOUSE_VALUE_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + <= self.MEDIAN_HOUSE_VALUE_THRESHOLD + ) + + impenetrable_surfaces_threshold = ( + self.df[ + field_names.IMPENETRABLE_SURFACES_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = ( expected_population_loss_threshold & self.df[field_names.FPL_200_SERIES] @@ -218,6 +242,18 @@ class ScoreL(Score): & self.df[field_names.FPL_200_SERIES] ) + self.df[ + field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD + ] = ( + extreme_heat_median_home_value_threshold + & self.df[field_names.FPL_200_SERIES] + ) + + self.df[field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD] = ( + impenetrable_surfaces_threshold + & self.df[field_names.FPL_200_SERIES] + ) + self._increment_total_eligibility_exceeded(climate_eligibility_columns) return self.df[climate_eligibility_columns].any(axis="columns") @@ -320,11 +356,11 @@ class ScoreL(Score): # poverty level. Source: Census's American Community Survey] housing_eligibility_columns = [ - field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD, + field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD, field_names.HOUSING_BURDEN_LOW_INCOME_FIELD, ] - lead_paint_median_house_hold_threshold = ( + lead_paint_median_home_value_threshold = ( self.df[ field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX @@ -347,8 +383,8 @@ class ScoreL(Score): ) # series by series indicators - self.df[field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD] = ( - lead_paint_median_house_hold_threshold + self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = ( + lead_paint_median_home_value_threshold & self.df[field_names.FPL_200_SERIES] ) @@ -449,6 +485,7 @@ class ScoreL(Score): field_names.DIABETES_LOW_INCOME_FIELD, field_names.ASTHMA_LOW_INCOME_FIELD, field_names.HEART_DISEASE_LOW_INCOME_FIELD, + field_names.HEALTHY_FOOD_LOW_INCOME_FIELD, field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD, ] @@ -474,6 +511,14 @@ class ScoreL(Score): >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) + healthy_food_threshold = ( + self.df[ + field_names.HEALTHY_FOOD_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + life_expectancy_threshold = ( self.df[ field_names.LIFE_EXPECTANCY_FIELD @@ -496,6 +541,9 @@ class ScoreL(Score): self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = ( life_expectancy_threshold & self.df[field_names.FPL_200_SERIES] ) + self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = ( + healthy_food_threshold & self.df[field_names.FPL_200_SERIES] + ) self._increment_total_eligibility_exceeded(health_eligibility_columns) @@ -513,6 +561,15 @@ class ScoreL(Score): # Where the high school degree achievement rates for adults 25 years and older is less than 95% # (necessary to screen out university block groups) + # Workforce criteria for states fields. + workforce_eligibility_columns = [ + field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD, + field_names.POVERTY_LOW_HS_EDUCATION_FIELD, + field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD, + field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD, + field_names.LOW_READING_LOW_HS_EDUCATION_FIELD, + ] + high_scool_achievement_rate_threshold = ( self.df[field_names.HIGH_SCHOOL_ED_FIELD] >= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD @@ -552,6 +609,14 @@ class ScoreL(Score): >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) + low_reading_threshold = ( + self.df[ + field_names.LOW_READING_FIELD + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = ( linguistic_isolation_threshold & high_scool_achievement_rate_threshold @@ -569,15 +634,9 @@ class ScoreL(Score): unemployment_threshold & high_scool_achievement_rate_threshold ) - # Workforce criteria for states fields that create indicator columns - # for each tract in order to indicate whether they met any of the four - # criteria. We will used this create individual indicator columns. - workforce_eligibility_columns = [ - field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD, - field_names.POVERTY_LOW_HS_EDUCATION_FIELD, - field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD, - field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD, - ] + self.df[field_names.LOW_READING_LOW_HS_EDUCATION_FIELD] = ( + low_reading_threshold & high_scool_achievement_rate_threshold + ) workforce_combined_criteria_for_states = self.df[ workforce_eligibility_columns