mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
Issue 954: Add various data sources from Child Opportunity Index (#986)
* Adds four fields: * Summer days above 90F * Percent low access to healthy food * Percent impenetrable surface areas * Low third grade reading proficiency * Each of these four gets added into Definition L in various factors. * Additionally, I add college attendance fields to the ETL for Census ACS. * This PR also introduces the notion of "reverse percentiles", relevant to ticket #970.
This commit is contained in:
parent
df564658a5
commit
5a6d6d8557
8 changed files with 357 additions and 40 deletions
|
@ -49,6 +49,11 @@ DATASET_LIST = [
|
||||||
"module_dir": "geocorr",
|
"module_dir": "geocorr",
|
||||||
"class_name": "GeoCorrETL",
|
"class_name": "GeoCorrETL",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "child_opportunity_index",
|
||||||
|
"module_dir": "child_opportunity_index",
|
||||||
|
"class_name": "ChildOpportunityIndex",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "mapping_inequality",
|
"name": "mapping_inequality",
|
||||||
"module_dir": "mapping_inequality",
|
"module_dir": "mapping_inequality",
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
import functools
|
import functools
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
|
@ -29,6 +31,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
self.persistent_poverty_df: pd.DataFrame
|
self.persistent_poverty_df: pd.DataFrame
|
||||||
self.census_decennial_df: pd.DataFrame
|
self.census_decennial_df: pd.DataFrame
|
||||||
self.census_2010_df: pd.DataFrame
|
self.census_2010_df: pd.DataFrame
|
||||||
|
self.child_opportunity_index_df: pd.DataFrame
|
||||||
|
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
logger.info("Loading data sets from disk.")
|
logger.info("Loading data sets from disk.")
|
||||||
|
@ -162,6 +165,19 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
low_memory=False,
|
low_memory=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Load COI data
|
||||||
|
child_opportunity_index_csv = (
|
||||||
|
constants.DATA_PATH
|
||||||
|
/ "dataset"
|
||||||
|
/ "child_opportunity_index"
|
||||||
|
/ "usa.csv"
|
||||||
|
)
|
||||||
|
self.child_opportunity_index_df = pd.read_csv(
|
||||||
|
child_opportunity_index_csv,
|
||||||
|
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||||
|
low_memory=False,
|
||||||
|
)
|
||||||
|
|
||||||
def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
|
def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
|
||||||
logger.info("Joining Census Tract dataframes")
|
logger.info("Joining Census Tract dataframes")
|
||||||
|
|
||||||
|
@ -255,6 +271,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
self.census_acs_median_incomes_df,
|
self.census_acs_median_incomes_df,
|
||||||
self.census_decennial_df,
|
self.census_decennial_df,
|
||||||
self.census_2010_df,
|
self.census_2010_df,
|
||||||
|
self.child_opportunity_index_df,
|
||||||
]
|
]
|
||||||
|
|
||||||
# Sanity check each data frame before merging.
|
# Sanity check each data frame before merging.
|
||||||
|
@ -323,6 +340,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||||
field_names.UNEMPLOYMENT_FIELD,
|
field_names.UNEMPLOYMENT_FIELD,
|
||||||
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
||||||
|
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
||||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
||||||
|
@ -333,6 +351,9 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
|
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
|
||||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||||
|
field_names.EXTREME_HEAT_FIELD,
|
||||||
|
field_names.HEALTHY_FOOD_FIELD,
|
||||||
|
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
non_numeric_columns = [
|
non_numeric_columns = [
|
||||||
|
@ -340,7 +361,32 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.PERSISTENT_POVERTY_FIELD,
|
field_names.PERSISTENT_POVERTY_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
columns_to_keep = non_numeric_columns + numeric_columns
|
# For some columns, high values are "good", so we want to reverse the percentile
|
||||||
|
# so that high values are "bad" and any scoring logic can still check if it's
|
||||||
|
# >= some threshold.
|
||||||
|
# TODO: Add more fields here.
|
||||||
|
# https://github.com/usds/justice40-tool/issues/970
|
||||||
|
ReversePercentile = namedtuple(
|
||||||
|
typename="ReversePercentile",
|
||||||
|
field_names=["field_name", "low_field_name"],
|
||||||
|
)
|
||||||
|
reverse_percentiles = [
|
||||||
|
# This dictionary follows the format:
|
||||||
|
# <field name> : <field name for low values>
|
||||||
|
# for instance, 3rd grade reading level : Low 3rd grade reading level.
|
||||||
|
# This low field will not exist yet, it is only calculated for the
|
||||||
|
# percentile.
|
||||||
|
ReversePercentile(
|
||||||
|
field_name=field_names.READING_FIELD,
|
||||||
|
low_field_name=field_names.LOW_READING_FIELD,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
columns_to_keep = (
|
||||||
|
non_numeric_columns
|
||||||
|
+ numeric_columns
|
||||||
|
+ [rp.field_name for rp in reverse_percentiles]
|
||||||
|
)
|
||||||
|
|
||||||
df_copy = df[columns_to_keep].copy()
|
df_copy = df[columns_to_keep].copy()
|
||||||
|
|
||||||
|
@ -375,6 +421,19 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
df_copy[col] - min_value
|
df_copy[col] - min_value
|
||||||
) / (max_value - min_value)
|
) / (max_value - min_value)
|
||||||
|
|
||||||
|
# Create reversed percentiles for these fields
|
||||||
|
for reverse_percentile in reverse_percentiles:
|
||||||
|
# Calculate reverse percentiles
|
||||||
|
# For instance, for 3rd grade reading level (score from 0-500),
|
||||||
|
# calculate reversed percentiles and give the result the name
|
||||||
|
# `Low 3rd grade reading level (percentile)`.
|
||||||
|
df_copy[
|
||||||
|
f"{reverse_percentile.low_field_name}"
|
||||||
|
f"{field_names.PERCENTILE_FIELD_SUFFIX}"
|
||||||
|
] = df_copy[reverse_percentile.field_name].rank(
|
||||||
|
pct=True, ascending=False
|
||||||
|
)
|
||||||
|
|
||||||
# Special logic: create a combined population field.
|
# Special logic: create a combined population field.
|
||||||
# We sometimes run analytics on "population", and this makes a single field
|
# We sometimes run analytics on "population", and this makes a single field
|
||||||
# that is either the island area's population in 2009 or the state's
|
# that is either the island area's population in 2009 or the state's
|
||||||
|
|
|
@ -114,6 +114,27 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
self.HIGH_SCHOOL_ED_FIELD = "Percent individuals age 25 or over with less than high school degree"
|
self.HIGH_SCHOOL_ED_FIELD = "Percent individuals age 25 or over with less than high school degree"
|
||||||
|
|
||||||
|
# College attendance fields
|
||||||
|
self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED = (
|
||||||
|
"B14004_001E" # Estimate!!Total
|
||||||
|
)
|
||||||
|
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC = "B14004_003E" # Estimate!!Total!!Male!!Enrolled in public college or graduate school
|
||||||
|
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE = "B14004_008E" # Estimate!!Total!!Male!!Enrolled in private college or graduate school
|
||||||
|
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC = "B14004_019E" # Estimate!!Total!!Female!!Enrolled in public college or graduate school
|
||||||
|
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE = "B14004_024E" # Estimate!!Total!!Female!!Enrolled in private college or graduate school
|
||||||
|
|
||||||
|
self.COLLEGE_ATTENDANCE_FIELDS = [
|
||||||
|
self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED,
|
||||||
|
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC,
|
||||||
|
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE,
|
||||||
|
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC,
|
||||||
|
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE,
|
||||||
|
]
|
||||||
|
|
||||||
|
self.COLLEGE_ATTENDANCE_FIELD = (
|
||||||
|
"Percent enrollment in college or graduate school"
|
||||||
|
)
|
||||||
|
|
||||||
self.RE_FIELDS = [
|
self.RE_FIELDS = [
|
||||||
"B02001_001E",
|
"B02001_001E",
|
||||||
"B02001_002E",
|
"B02001_002E",
|
||||||
|
@ -156,15 +177,30 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
|
|
||||||
self.STATE_GEOID_FIELD_NAME = "GEOID2"
|
self.STATE_GEOID_FIELD_NAME = "GEOID2"
|
||||||
|
|
||||||
|
self.COLUMNS_TO_KEEP = (
|
||||||
|
[
|
||||||
|
self.GEOID_TRACT_FIELD_NAME,
|
||||||
|
self.UNEMPLOYED_FIELD_NAME,
|
||||||
|
self.LINGUISTIC_ISOLATION_FIELD_NAME,
|
||||||
|
self.MEDIAN_INCOME_FIELD_NAME,
|
||||||
|
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
|
||||||
|
self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
|
||||||
|
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||||
|
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||||
|
self.HIGH_SCHOOL_ED_FIELD,
|
||||||
|
self.COLLEGE_ATTENDANCE_FIELD,
|
||||||
|
]
|
||||||
|
+ self.RE_OUTPUT_FIELDS
|
||||||
|
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
|
||||||
|
)
|
||||||
|
|
||||||
self.df: pd.DataFrame
|
self.df: pd.DataFrame
|
||||||
|
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
# Define the variables to retrieve
|
# Define the variables to retrieve
|
||||||
variables = (
|
variables = (
|
||||||
[
|
[
|
||||||
# Income field
|
|
||||||
self.MEDIAN_INCOME_FIELD,
|
self.MEDIAN_INCOME_FIELD,
|
||||||
# House value
|
|
||||||
self.MEDIAN_HOUSE_VALUE_FIELD,
|
self.MEDIAN_HOUSE_VALUE_FIELD,
|
||||||
]
|
]
|
||||||
+ self.EMPLOYMENT_FIELDS
|
+ self.EMPLOYMENT_FIELDS
|
||||||
|
@ -172,6 +208,7 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
+ self.POVERTY_FIELDS
|
+ self.POVERTY_FIELDS
|
||||||
+ self.EDUCATIONAL_FIELDS
|
+ self.EDUCATIONAL_FIELDS
|
||||||
+ self.RE_FIELDS
|
+ self.RE_FIELDS
|
||||||
|
+ self.COLLEGE_ATTENDANCE_FIELDS
|
||||||
)
|
)
|
||||||
|
|
||||||
self.df = retrieve_census_acs_data(
|
self.df = retrieve_census_acs_data(
|
||||||
|
@ -308,6 +345,14 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
df["B03003_003E"] / df["B03003_001E"]
|
df["B03003_003E"] / df["B03003_001E"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Calculate college attendance:
|
||||||
|
df[self.COLLEGE_ATTENDANCE_FIELD] = (
|
||||||
|
df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC]
|
||||||
|
+ df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE]
|
||||||
|
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC]
|
||||||
|
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
|
||||||
|
) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
|
||||||
|
|
||||||
# Save results to self.
|
# Save results to self.
|
||||||
self.df = df
|
self.df = df
|
||||||
|
|
||||||
|
@ -317,23 +362,7 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
# mkdir census
|
# mkdir census
|
||||||
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
|
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
columns_to_include = (
|
self.df[self.COLUMNS_TO_KEEP].to_csv(
|
||||||
[
|
|
||||||
self.GEOID_TRACT_FIELD_NAME,
|
|
||||||
self.UNEMPLOYED_FIELD_NAME,
|
|
||||||
self.LINGUISTIC_ISOLATION_FIELD_NAME,
|
|
||||||
self.MEDIAN_INCOME_FIELD_NAME,
|
|
||||||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
|
|
||||||
self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
|
|
||||||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
|
||||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
|
||||||
self.HIGH_SCHOOL_ED_FIELD,
|
|
||||||
]
|
|
||||||
+ self.RE_OUTPUT_FIELDS
|
|
||||||
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
|
|
||||||
)
|
|
||||||
|
|
||||||
self.df[columns_to_include].to_csv(
|
|
||||||
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
|
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,120 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
|
from data_pipeline.score import field_names
|
||||||
|
from data_pipeline.utils import get_module_logger, unzip_file_from_url
|
||||||
|
|
||||||
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ChildOpportunityIndex(ExtractTransformLoad):
|
||||||
|
"""ETL Child Opportunity Index data.
|
||||||
|
|
||||||
|
COI compiles a number of useful data sets. In the future, we could pull these
|
||||||
|
data sets in directly from their original creators.
|
||||||
|
|
||||||
|
Data dictionary available when you download zip from `self.COI_FILE_URL`.
|
||||||
|
|
||||||
|
Data source overview: https://data.diversitydatakids.org/dataset/coi20-child-opportunity-index-2-0-database.
|
||||||
|
|
||||||
|
Full technical documents: https://www.diversitydatakids.org/sites/default/files/2020-02/ddk_coi2.0_technical_documentation_20200212.pdf.
|
||||||
|
|
||||||
|
Github repo: https://github.com/diversitydatakids/COI/
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.COI_FILE_URL = (
|
||||||
|
"https://data.diversitydatakids.org/datastore/zip/f16fff12-b1e5-4f60-85d3-"
|
||||||
|
"3a0ededa30a0?format=csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.OUTPUT_PATH: Path = (
|
||||||
|
self.DATA_PATH / "dataset" / "child_opportunity_index"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.TRACT_INPUT_COLUMN_NAME = "geoid"
|
||||||
|
self.EXTREME_HEAT_INPUT_FIELD = "HE_HEAT"
|
||||||
|
self.HEALTHY_FOOD_INPUT_FIELD = "HE_FOOD"
|
||||||
|
self.IMPENETRABLE_SURFACES_INPUT_FIELD = "HE_GREEN"
|
||||||
|
self.READING_INPUT_FIELD = "ED_READING"
|
||||||
|
|
||||||
|
# Constants for output
|
||||||
|
self.COLUMNS_TO_KEEP = [
|
||||||
|
self.GEOID_TRACT_FIELD_NAME,
|
||||||
|
field_names.EXTREME_HEAT_FIELD,
|
||||||
|
field_names.HEALTHY_FOOD_FIELD,
|
||||||
|
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||||
|
field_names.READING_FIELD,
|
||||||
|
]
|
||||||
|
|
||||||
|
self.raw_df: pd.DataFrame
|
||||||
|
self.output_df: pd.DataFrame
|
||||||
|
|
||||||
|
def extract(self) -> None:
|
||||||
|
logger.info("Starting 51MB data download.")
|
||||||
|
|
||||||
|
unzip_file_from_url(
|
||||||
|
file_url=self.COI_FILE_URL,
|
||||||
|
download_path=self.TMP_PATH,
|
||||||
|
unzipped_file_path=self.TMP_PATH / "child_opportunity_index",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.raw_df = pd.read_csv(
|
||||||
|
filepath_or_buffer=self.TMP_PATH
|
||||||
|
/ "child_opportunity_index"
|
||||||
|
/ "raw.csv",
|
||||||
|
# The following need to remain as strings for all of their digits, not get
|
||||||
|
# converted to numbers.
|
||||||
|
dtype={
|
||||||
|
self.TRACT_INPUT_COLUMN_NAME: "string",
|
||||||
|
},
|
||||||
|
low_memory=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def transform(self) -> None:
|
||||||
|
logger.info("Starting transforms.")
|
||||||
|
|
||||||
|
output_df = self.raw_df.rename(
|
||||||
|
columns={
|
||||||
|
self.TRACT_INPUT_COLUMN_NAME: self.GEOID_TRACT_FIELD_NAME,
|
||||||
|
self.EXTREME_HEAT_INPUT_FIELD: field_names.EXTREME_HEAT_FIELD,
|
||||||
|
self.HEALTHY_FOOD_INPUT_FIELD: field_names.HEALTHY_FOOD_FIELD,
|
||||||
|
self.IMPENETRABLE_SURFACES_INPUT_FIELD: field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||||
|
self.READING_INPUT_FIELD: field_names.READING_FIELD,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sanity check the tract field.
|
||||||
|
if len(output_df[self.GEOID_TRACT_FIELD_NAME].str.len().unique()) != 1:
|
||||||
|
raise ValueError("Wrong tract length.")
|
||||||
|
|
||||||
|
# COI has two rows per tract: one for 2010 and one for 2015.
|
||||||
|
output_df = output_df[output_df["year"] == 2015]
|
||||||
|
|
||||||
|
# Convert percents from 0-100 to 0-1 to standardize with our other fields.
|
||||||
|
percent_fields_to_convert = [
|
||||||
|
field_names.HEALTHY_FOOD_FIELD,
|
||||||
|
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||||
|
]
|
||||||
|
|
||||||
|
for percent_field_to_convert in percent_fields_to_convert:
|
||||||
|
output_df[percent_field_to_convert] = (
|
||||||
|
output_df[percent_field_to_convert] / 100
|
||||||
|
)
|
||||||
|
|
||||||
|
self.output_df = output_df
|
||||||
|
|
||||||
|
def validate(self) -> None:
|
||||||
|
logger.info("Validating data.")
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
def load(self) -> None:
|
||||||
|
logger.info("Saving CSV")
|
||||||
|
|
||||||
|
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.output_df[self.COLUMNS_TO_KEEP].to_csv(
|
||||||
|
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
|
||||||
|
)
|
|
@ -63,6 +63,8 @@ MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)"
|
||||||
PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
||||||
AMI_FIELD = "Area Median Income (State or metropolitan)"
|
AMI_FIELD = "Area Median Income (State or metropolitan)"
|
||||||
|
|
||||||
|
COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
|
||||||
|
|
||||||
# Climate
|
# Climate
|
||||||
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
||||||
EXPECTED_BUILDING_LOSS_RATE_FIELD = (
|
EXPECTED_BUILDING_LOSS_RATE_FIELD = (
|
||||||
|
@ -206,30 +208,63 @@ HOLC_GRADE_D_TRACT_50_PERCENT_FIELD: str = "Tract is >50% HOLC Grade D"
|
||||||
HOLC_GRADE_D_TRACT_75_PERCENT_FIELD: str = "Tract is >75% HOLC Grade D"
|
HOLC_GRADE_D_TRACT_75_PERCENT_FIELD: str = "Tract is >75% HOLC Grade D"
|
||||||
|
|
||||||
|
|
||||||
|
# Child Opportunity Index data
|
||||||
|
# Summer days with maximum temperature above 90F.
|
||||||
|
EXTREME_HEAT_FIELD = "Summer days above 90F"
|
||||||
|
|
||||||
|
# Percentage households without a car located further than a half-mile from the
|
||||||
|
# nearest supermarket.
|
||||||
|
HEALTHY_FOOD_FIELD = "Percent low access to healthy food"
|
||||||
|
|
||||||
|
# Percentage impenetrable surface areas such as rooftops, roads or parking lots.
|
||||||
|
IMPENETRABLE_SURFACES_FIELD = "Percent impenetrable surface areas"
|
||||||
|
|
||||||
|
# Percentage third graders scoring proficient on standardized reading tests,
|
||||||
|
# converted to NAEP scale score points.
|
||||||
|
READING_FIELD = "Third grade reading proficiency"
|
||||||
|
LOW_READING_FIELD = "Low third grade reading proficiency"
|
||||||
|
|
||||||
|
# Names for individual factors being exceeded
|
||||||
# Climate Change
|
# Climate Change
|
||||||
EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected population loss rate and is low income"
|
EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected population loss rate and is low income"
|
||||||
EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected agriculture loss rate and is low income"
|
EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected agriculture loss rate and is low income"
|
||||||
EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected building loss rate and is low income"
|
EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for expected building loss rate and is low income"
|
||||||
|
EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for summer days above 90F and "
|
||||||
|
f"the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
|
||||||
|
f"percentile and is low income"
|
||||||
|
)
|
||||||
|
|
||||||
# Clean energy and efficiency
|
# Clean energy and efficiency
|
||||||
PM25_EXPOSURE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for PM2.5 exposure and is low income"
|
PM25_EXPOSURE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for PM2.5 exposure and is low income"
|
||||||
ENERGY_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for energy burden and is low income"
|
ENERGY_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for energy burden and is low income"
|
||||||
|
|
||||||
# Clean transportation
|
# Clean transportation
|
||||||
DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for diesel particulate matter and is low income"
|
DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for diesel particulate matter and is low income"
|
||||||
TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for traffic proximity and is low income"
|
TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for traffic proximity and is low income"
|
||||||
|
|
||||||
# Affordable and Sustainable Housing
|
# Affordable and Sustainable Housing
|
||||||
LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD = (
|
LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
|
||||||
f"At or above the {PERCENTILE}th percentile for lead paint and"
|
f"At or above the {PERCENTILE}th percentile for lead paint and"
|
||||||
" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th percentile and is low income"
|
f" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
|
||||||
|
f"percentile and is low income"
|
||||||
)
|
)
|
||||||
HOUSING_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for housing burden and is low income"
|
HOUSING_BURDEN_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for housing burden and is low income"
|
||||||
|
|
||||||
|
IMPENETRABLE_SURFACES_LOW_INCOME_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for impenetrable surfaces and is low "
|
||||||
|
f"income"
|
||||||
|
)
|
||||||
|
|
||||||
# Remediation and Reduction of Legacy Pollution
|
# Remediation and Reduction of Legacy Pollution
|
||||||
RMP_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to RMP sites and is low income"
|
RMP_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to RMP sites and is low income"
|
||||||
SUPERFUND_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to superfund sites and is low income"
|
SUPERFUND_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to superfund sites and is low income"
|
||||||
HAZARDOUS_WASTE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to hazardous waste facilities and is low income"
|
HAZARDOUS_WASTE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for proximity to hazardous waste facilities and is low income"
|
||||||
|
|
||||||
# Critical Clean Water and Waste Infrastructure
|
# Critical Clean Water and Waste Infrastructure
|
||||||
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for wastewater discharge and is low income"
|
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for wastewater discharge and is low income"
|
||||||
# Health Burden
|
|
||||||
|
# Health Burdens
|
||||||
DIABETES_LOW_INCOME_FIELD = (
|
DIABETES_LOW_INCOME_FIELD = (
|
||||||
f"At or above the {PERCENTILE}th percentile for diabetes and is low income"
|
f"At or above the {PERCENTILE}th percentile for diabetes and is low income"
|
||||||
)
|
)
|
||||||
|
@ -240,25 +275,35 @@ HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for
|
||||||
|
|
||||||
LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"
|
LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"
|
||||||
|
|
||||||
|
HEALTHY_FOOD_LOW_INCOME_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for low "
|
||||||
|
f"access to healthy food and is low income"
|
||||||
|
)
|
||||||
|
|
||||||
# Workforce
|
# Workforce
|
||||||
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
||||||
f"At or above the {PERCENTILE}th percentile for unemployment"
|
f"At or above the {PERCENTILE}th percentile for unemployment"
|
||||||
" and low HS education"
|
" and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = (
|
LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = (
|
||||||
f"At or above the {PERCENTILE}th percentile for households in linguistic isolation"
|
f"At or above the {PERCENTILE}th percentile for households in linguistic isolation"
|
||||||
" and low HS education"
|
" and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
POVERTY_LOW_HS_EDUCATION_FIELD = (
|
POVERTY_LOW_HS_EDUCATION_FIELD = (
|
||||||
f"At or above the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
|
f"At or above the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
|
||||||
" and low HS education"
|
" and has low HS education"
|
||||||
|
)
|
||||||
|
|
||||||
|
LOW_READING_LOW_HS_EDUCATION_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for low 3rd grade reading proficiency"
|
||||||
|
" and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
||||||
f"At or below the {PERCENTILE}th percentile for median income"
|
f"At or below the {PERCENTILE}th percentile for median income"
|
||||||
" and low HS education"
|
" and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
THRESHOLD_COUNT = "Total threshold criteria exceeded"
|
THRESHOLD_COUNT = "Total threshold criteria exceeded"
|
||||||
|
|
|
@ -177,6 +177,8 @@ class ScoreL(Score):
|
||||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||||
|
field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||||
|
field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
expected_population_loss_threshold = (
|
expected_population_loss_threshold = (
|
||||||
|
@ -203,6 +205,28 @@ class ScoreL(Score):
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
|
extreme_heat_median_home_value_threshold = (
|
||||||
|
self.df[
|
||||||
|
field_names.EXTREME_HEAT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
]
|
||||||
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
|
) & (
|
||||||
|
self.df[
|
||||||
|
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
]
|
||||||
|
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||||
|
)
|
||||||
|
|
||||||
|
impenetrable_surfaces_threshold = (
|
||||||
|
self.df[
|
||||||
|
field_names.IMPENETRABLE_SURFACES_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
]
|
||||||
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
|
)
|
||||||
|
|
||||||
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||||
expected_population_loss_threshold
|
expected_population_loss_threshold
|
||||||
& self.df[field_names.FPL_200_SERIES]
|
& self.df[field_names.FPL_200_SERIES]
|
||||||
|
@ -218,6 +242,18 @@ class ScoreL(Score):
|
||||||
& self.df[field_names.FPL_200_SERIES]
|
& self.df[field_names.FPL_200_SERIES]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.df[
|
||||||
|
field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD
|
||||||
|
] = (
|
||||||
|
extreme_heat_median_home_value_threshold
|
||||||
|
& self.df[field_names.FPL_200_SERIES]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.df[field_names.IMPENETRABLE_SURFACES_LOW_INCOME_FIELD] = (
|
||||||
|
impenetrable_surfaces_threshold
|
||||||
|
& self.df[field_names.FPL_200_SERIES]
|
||||||
|
)
|
||||||
|
|
||||||
self._increment_total_eligibility_exceeded(climate_eligibility_columns)
|
self._increment_total_eligibility_exceeded(climate_eligibility_columns)
|
||||||
|
|
||||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||||
|
@ -320,11 +356,11 @@ class ScoreL(Score):
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
housing_eligibility_columns = [
|
housing_eligibility_columns = [
|
||||||
field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD,
|
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
lead_paint_median_house_hold_threshold = (
|
lead_paint_median_home_value_threshold = (
|
||||||
self.df[
|
self.df[
|
||||||
field_names.LEAD_PAINT_FIELD
|
field_names.LEAD_PAINT_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
@ -347,8 +383,8 @@ class ScoreL(Score):
|
||||||
)
|
)
|
||||||
|
|
||||||
# series by series indicators
|
# series by series indicators
|
||||||
self.df[field_names.LEAD_PAINT_MEDIAN_HOME_VALUE_LOW_INCOME_FIELD] = (
|
self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = (
|
||||||
lead_paint_median_house_hold_threshold
|
lead_paint_median_home_value_threshold
|
||||||
& self.df[field_names.FPL_200_SERIES]
|
& self.df[field_names.FPL_200_SERIES]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -449,6 +485,7 @@ class ScoreL(Score):
|
||||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||||
|
field_names.HEALTHY_FOOD_LOW_INCOME_FIELD,
|
||||||
field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -474,6 +511,14 @@ class ScoreL(Score):
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
|
healthy_food_threshold = (
|
||||||
|
self.df[
|
||||||
|
field_names.HEALTHY_FOOD_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
]
|
||||||
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
|
)
|
||||||
|
|
||||||
life_expectancy_threshold = (
|
life_expectancy_threshold = (
|
||||||
self.df[
|
self.df[
|
||||||
field_names.LIFE_EXPECTANCY_FIELD
|
field_names.LIFE_EXPECTANCY_FIELD
|
||||||
|
@ -496,6 +541,9 @@ class ScoreL(Score):
|
||||||
self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||||
life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
||||||
)
|
)
|
||||||
|
self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = (
|
||||||
|
healthy_food_threshold & self.df[field_names.FPL_200_SERIES]
|
||||||
|
)
|
||||||
|
|
||||||
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
||||||
|
|
||||||
|
@ -513,6 +561,15 @@ class ScoreL(Score):
|
||||||
# Where the high school degree achievement rates for adults 25 years and older is less than 95%
|
# Where the high school degree achievement rates for adults 25 years and older is less than 95%
|
||||||
# (necessary to screen out university block groups)
|
# (necessary to screen out university block groups)
|
||||||
|
|
||||||
|
# Workforce criteria for states fields.
|
||||||
|
workforce_eligibility_columns = [
|
||||||
|
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.LOW_READING_LOW_HS_EDUCATION_FIELD,
|
||||||
|
]
|
||||||
|
|
||||||
high_scool_achievement_rate_threshold = (
|
high_scool_achievement_rate_threshold = (
|
||||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||||
|
@ -552,6 +609,14 @@ class ScoreL(Score):
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
|
low_reading_threshold = (
|
||||||
|
self.df[
|
||||||
|
field_names.LOW_READING_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
]
|
||||||
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
|
)
|
||||||
|
|
||||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
|
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
|
||||||
linguistic_isolation_threshold
|
linguistic_isolation_threshold
|
||||||
& high_scool_achievement_rate_threshold
|
& high_scool_achievement_rate_threshold
|
||||||
|
@ -569,15 +634,9 @@ class ScoreL(Score):
|
||||||
unemployment_threshold & high_scool_achievement_rate_threshold
|
unemployment_threshold & high_scool_achievement_rate_threshold
|
||||||
)
|
)
|
||||||
|
|
||||||
# Workforce criteria for states fields that create indicator columns
|
self.df[field_names.LOW_READING_LOW_HS_EDUCATION_FIELD] = (
|
||||||
# for each tract in order to indicate whether they met any of the four
|
low_reading_threshold & high_scool_achievement_rate_threshold
|
||||||
# criteria. We will used this create individual indicator columns.
|
)
|
||||||
workforce_eligibility_columns = [
|
|
||||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
|
||||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
|
||||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
|
||||||
field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
|
||||||
]
|
|
||||||
|
|
||||||
workforce_combined_criteria_for_states = self.df[
|
workforce_combined_criteria_for_states = self.df[
|
||||||
workforce_eligibility_columns
|
workforce_eligibility_columns
|
||||||
|
|
Loading…
Add table
Reference in a new issue