diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml
index 591c2b72..0036686a 100644
--- a/data/data-pipeline/data_pipeline/content/config/csv.yml
+++ b/data/data-pipeline/data_pipeline/content/config/csv.yml
@@ -5,372 +5,372 @@ global_config:
float: 2
loss_rate_percentage: 4
fields:
- - score_name: GEOID10_TRACT
- label: Census tract ID
- format: string
- - score_name: County Name
- label: County Name
- format: string
- - score_name: State/Territory
- label: State/Territory
- format: string
- - score_name: Percent Black or African American
- label: Percent Black or African American alone
- format: float
- - score_name: Percent American Indian / Alaska Native
- label: Percent American Indian / Alaska Native
- format: float
- - score_name: Percent Asian
- label: Percent Asian
- format: float
- - score_name: Percent Native Hawaiian or Pacific
- label: Percent Native Hawaiian or Pacific
- format: float
- - score_name: Percent two or more races
- label: Percent two or more races
- format: float
- - score_name: Percent White
- label: Percent White
- format: float
- - score_name: Percent Hispanic or Latino
- label: Percent Hispanic or Latino
- format: float
- - score_name: Percent other races
- label: Percent other races
- format: float
- - score_name: Percent age under 10
- label: Percent age under 10
- format: float
- - score_name: Percent age 10 to 64
- label: Percent age 10 to 64
- format: float
- - score_name: Percent age over 64
- label: Percent age over 64
- format: float
- - score_name: Total threshold criteria exceeded
- label: Total threshold criteria exceeded
- format: int64
- - score_name: Total categories exceeded
- label: Total categories exceeded
- format: int64
- - score_name: Definition N (communities)
- label: Identified as disadvantaged without considering neighbors
- format: bool
- - score_name: Definition N (communities) (based on adjacency index and low income alone)
- label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
- format: bool
- - score_name: Definition M community, including adjacency index tracts
- label: Identified as disadvantaged
- format: bool
- - score_name: Definition N (communities) (average of neighbors)
- label: Share of neighbors that are identified as disadvantaged
- format: percentage
- - score_name: Total population
- label: Total population
- format: float
- - score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted
- label: Adjusted percent of individuals below 200% Federal Poverty Line
- format: float
- - score_name: Is low income and has a low percent of higher ed students?
- label: Is low income and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
- label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
- format: percentage
- - score_name: Expected agricultural loss rate (Natural Hazards Risk Index)
- label: Expected agricultural loss rate (Natural Hazards Risk Index)
- format: loss_rate_percentage
- - score_name: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)
- label: Expected building loss rate (Natural Hazards Risk Index) (percentile)
- format: percentage
- - score_name: Expected building loss rate (Natural Hazards Risk Index)
- label: Expected building loss rate (Natural Hazards Risk Index)
- format: loss_rate_percentage
- - score_name: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)
- label: Expected population loss rate (Natural Hazards Risk Index) (percentile)
- format: percentage
- - score_name: Expected population loss rate (Natural Hazards Risk Index)
- label: Expected population loss rate (Natural Hazards Risk Index)
- format: loss_rate_percentage
- - score_name: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for energy burden, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Energy burden (percentile)
- label: Energy burden (percentile)
- format: percentage
- - score_name: Energy burden
- label: Energy burden
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: PM2.5 in the air (percentile)
- label: PM2.5 in the air (percentile)
- format: percentage
- - score_name: PM2.5 in the air
- label: PM2.5 in the air
- format: float
- - score_name: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Diesel particulate matter exposure (percentile)
- label: Diesel particulate matter exposure (percentile)
- format: percentage
- - score_name: Diesel particulate matter exposure
- label: Diesel particulate matter exposure
- format: float
- - score_name: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for traffic proximity, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Traffic proximity and volume (percentile)
- label: Traffic proximity and volume (percentile)
- format: percentage
- - score_name: Traffic proximity and volume
- label: Traffic proximity and volume
- format: float
- - score_name: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for housing burden, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Housing burden (percent) (percentile)
- label: Housing burden (percent) (percentile)
- format: percentage
- - score_name: Housing burden (percent)
- label: Housing burden (percent)
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Percent pre-1960s housing (lead paint indicator) (percentile)
- label: Percent pre-1960s housing (lead paint indicator) (percentile)
- format: percentage
- - score_name: Percent pre-1960s housing (lead paint indicator)
- label: Percent pre-1960s housing (lead paint indicator)
- format: percentage
- - score_name: Median value ($) of owner-occupied housing units (percentile)
- label: Median value ($) of owner-occupied housing units (percentile)
- format: percentage
- - score_name: Median value ($) of owner-occupied housing units
- label: Median value ($) of owner-occupied housing units
- format: float
- - score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Proximity to hazardous waste sites (percentile)
- label: Proximity to hazardous waste sites (percentile)
- format: percentage
- - score_name: Proximity to hazardous waste sites
- label: Proximity to hazardous waste sites
- format: float
- - score_name: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Proximity to NPL sites (percentile)
- label: Proximity to NPL (Superfund) sites (percentile)
- format: percentage
- - score_name: Proximity to NPL sites
- label: Proximity to NPL (Superfund) sites
- format: float
- - score_name: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Proximity to Risk Management Plan (RMP) facilities (percentile)
- label: Proximity to Risk Management Plan (RMP) facilities (percentile)
- format: percentage
- - score_name: Proximity to Risk Management Plan (RMP) facilities
- label: Proximity to Risk Management Plan (RMP) facilities
- format: float
- - score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Wastewater discharge (percentile)
- label: Wastewater discharge (percentile)
- format: percentage
- - score_name: Wastewater discharge
- label: Wastewater discharge
- format: float
- - score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for asthma, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Current asthma among adults aged greater than or equal to 18 years (percentile)
- label: Current asthma among adults aged greater than or equal to 18 years (percentile)
- format: percentage
- - score_name: Current asthma among adults aged greater than or equal to 18 years
- label: Current asthma among adults aged greater than or equal to 18 years
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for diabetes, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
- label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
- format: percentage
- - score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years
- label: Diagnosed diabetes among adults aged greater than or equal to 18 years
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for heart disease, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
- label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
- format: percentage
- - score_name: Coronary heart disease among adults aged greater than or equal to 18 years
- label: Coronary heart disease among adults aged greater than or equal to 18 years
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for low life expectancy, is low income, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Low life expectancy (percentile)
- label: Low life expectancy (percentile)
- format: percentage
- - score_name: Life expectancy (years)
- label: Life expectancy (years)
- format: float
- - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Low median household income as a percent of area median income (percentile)
- label: Low median household income as a percent of area median income (percentile)
- format: percentage
- - score_name: Median household income as a percent of area median income
- label: Median household income as a percent of area median income
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Linguistic isolation (percent) (percentile)
- label: Linguistic isolation (percent) (percentile)
- format: percentage
- - score_name: Linguistic isolation (percent)
- label: Linguistic isolation (percent)
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Unemployment (percent) (percentile)
- label: Unemployment (percent) (percentile)
- format: percentage
- - score_name: Unemployment (percent)
- label: Unemployment (percent)
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and high percent of residents that are not higher ed students?
- format: bool
- - score_name: Percent of individuals below 200% Federal Poverty Line (percentile)
- label: Percent of individuals below 200% Federal Poverty Line (percentile)
- format: percentage
- - score_name: Percent of individuals below 200% Federal Poverty Line
- label: Percent of individuals below 200% Federal Poverty Line
- format: percentage
- - score_name: Percent of individuals < 100% Federal Poverty Line (percentile)
- label: Percent of individuals < 100% Federal Poverty Line (percentile)
- format: percentage
- - score_name: Percent of individuals < 100% Federal Poverty Line
- label: Percent of individuals < 100% Federal Poverty Line
- format: percentage
- - score_name: Percent individuals age 25 or over with less than high school degree (percentile)
- label: Percent individuals age 25 or over with less than high school degree (percentile)
- format: percentage
- - score_name: Percent individuals age 25 or over with less than high school degree
- label: Percent individuals age 25 or over with less than high school degree
- format: percentage
- - score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
- label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
- format: percentage
- - score_name: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
- label: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
- label: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
- format: bool
- - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
- label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
- format: bool
- - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
- label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
- format: bool
- - score_name: Percent of population not currently enrolled in college or graduate school
- label: Percent of residents who are not currently enrolled in higher ed
- format: percentage
- - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
- label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
- format: bool
- - score_name: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
- label: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
- format: bool
- - score_name: DOT Travel Barriers Score (percentile)
- label: DOT Travel Barriers Score (percentile)
- format: percentage
- - score_name: Leaky underground storage tanks (percentile)
- label: Leaky underground storage tanks (percentile)
- format: percentage
- - score_name: Leaky underground storage tanks
- label: Leaky underground storage tanks
- format: float
- - score_name: Share of properties at risk of flood in 30 years
- label: Share of properties at risk of flood in 30 years
- format: float
- - score_name: Share of properties at risk of fire in 30 years
- label: Share of properties at risk of fire in 30 years
- format: float
- - score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
- label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
- format: bool
- - score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
- label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
- format: bool
- - score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
- label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
- format: bool
- - score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
- label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
- format: bool
- - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?
- label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?
- format: bool
- - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent
- label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent
- format: bool
- - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent
- label: Share of the tract's land area that is covered by impervious surface or cropland as a percent
- format: percentage
- - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile)
- label: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile)
- format: percentage
- - score_name: Share of properties at risk of flood in 30 years (percentile)
- label: Share of properties at risk of flood in 30 years (percentile)
- format: percentage
- - score_name: Share of properties at risk of fire in 30 years (percentile)
- label: Share of properties at risk of fire in 30 years (percentile)
- format: percentage
- - score_name: Does the tract have at least 35 acres in it?
- label: Does the tract have at least 35 acres in it?
- format: bool
- - score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
- label: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
- format: bool
- - score_name: Is there at least one abandoned mine in this census tract?
- label: Is there at least one abandoned mine in this census tract?
- format: bool
- - score_name: There is at least one abandoned mine in this census tract and the tract is low income.
- label: There is at least one abandoned mine in this census tract and the tract is low income.
- format: bool
- - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
- label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
- format: bool
- - score_name: Tract-level redlining score meets or exceeds 3.25 and is low income
- label: Tract experienced historic underinvestment and remains low income
- format: bool
- - score_name: Tract-level redlining score meets or exceeds 3.25
- label: Tract experienced historic underinvestment
- format: bool
- - score_name: Income data has been estimated based on neighbor income
- label: Income data has been estimated based on geographic neighbor income
- format: bool
+- score_name: GEOID10_TRACT
+ label: Census tract ID
+ format: string
+- score_name: County Name
+ label: County Name
+ format: string
+- score_name: State/Territory
+ label: State/Territory
+ format: string
+- score_name: Percent Black or African American
+ label: Percent Black or African American alone
+ format: float
+- score_name: Percent American Indian / Alaska Native
+ label: Percent American Indian / Alaska Native
+ format: float
+- score_name: Percent Asian
+ label: Percent Asian
+ format: float
+- score_name: Percent Native Hawaiian or Pacific
+ label: Percent Native Hawaiian or Pacific
+ format: float
+- score_name: Percent two or more races
+ label: Percent two or more races
+ format: float
+- score_name: Percent White
+ label: Percent White
+ format: float
+- score_name: Percent Hispanic or Latino
+ label: Percent Hispanic or Latino
+ format: float
+- score_name: Percent other races
+ label: Percent other races
+ format: float
+- score_name: Percent age under 10
+ label: Percent age under 10
+ format: float
+- score_name: Percent age 10 to 64
+ label: Percent age 10 to 64
+ format: float
+- score_name: Percent age over 64
+ label: Percent age over 64
+ format: float
+- score_name: Total threshold criteria exceeded
+ label: Total threshold criteria exceeded
+ format: int64
+- score_name: Total categories exceeded
+ label: Total categories exceeded
+ format: int64
+- score_name: Definition N (communities)
+ label: Identified as disadvantaged without considering neighbors
+ format: bool
+- score_name: Definition N (communities) (based on adjacency index and low income alone)
+ label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
+ format: bool
+- score_name: Definition N community, including adjacency index tracts
+ label: Identified as disadvantaged
+ format: bool
+- score_name: Definition N (communities) (average of neighbors)
+ label: Share of neighbors that are identified as disadvantaged
+ format: percentage
+- score_name: Total population
+ label: Total population
+ format: float
+- score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted
+ label: Adjusted percent of individuals below 200% Federal Poverty Line
+ format: float
+- score_name: Is low income (imputed and adjusted)?
+ label: Is low income?
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?
+ label: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?
+ format: bool
+- score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
+ label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
+ format: percentage
+- score_name: Expected agricultural loss rate (Natural Hazards Risk Index)
+ label: Expected agricultural loss rate (Natural Hazards Risk Index)
+ format: loss_rate_percentage
+- score_name: Greater than or equal to the 90th percentile for expected building loss rate and is low income?
+ label: Greater than or equal to the 90th percentile for expected building loss rate and is low income?
+ format: bool
+- score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)
+ label: Expected building loss rate (Natural Hazards Risk Index) (percentile)
+ format: percentage
+- score_name: Expected building loss rate (Natural Hazards Risk Index)
+ label: Expected building loss rate (Natural Hazards Risk Index)
+ format: loss_rate_percentage
+- score_name: Greater than or equal to the 90th percentile for expected population loss rate and is low income?
+ label: Greater than or equal to the 90th percentile for expected population loss rate and is low income?
+ format: bool
+- score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)
+ label: Expected population loss rate (Natural Hazards Risk Index) (percentile)
+ format: percentage
+- score_name: Expected population loss rate (Natural Hazards Risk Index)
+ label: Expected population loss rate (Natural Hazards Risk Index)
+ format: loss_rate_percentage
+- score_name: Greater than or equal to the 90th percentile for energy burden and is low income?
+ label: Greater than or equal to the 90th percentile for energy burden and is low income?
+ format: bool
+- score_name: Energy burden (percentile)
+ label: Energy burden (percentile)
+ format: percentage
+- score_name: Energy burden
+ label: Energy burden
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?
+ label: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?
+ format: bool
+- score_name: PM2.5 in the air (percentile)
+ label: PM2.5 in the air (percentile)
+ format: percentage
+- score_name: PM2.5 in the air
+ label: PM2.5 in the air
+ format: float
+- score_name: Greater than or equal to the 90th percentile for diesel particulate matter and is low income?
+ label: Greater than or equal to the 90th percentile for diesel particulate matter and is low income?
+ format: bool
+- score_name: Diesel particulate matter exposure (percentile)
+ label: Diesel particulate matter exposure (percentile)
+ format: percentage
+- score_name: Diesel particulate matter exposure
+ label: Diesel particulate matter exposure
+ format: float
+- score_name: Greater than or equal to the 90th percentile for traffic proximity and is low income?
+ label: Greater than or equal to the 90th percentile for traffic proximity and is low income?
+ format: bool
+- score_name: Traffic proximity and volume (percentile)
+ label: Traffic proximity and volume (percentile)
+ format: percentage
+- score_name: Traffic proximity and volume
+ label: Traffic proximity and volume
+ format: float
+- score_name: Greater than or equal to the 90th percentile for housing burden and is low income?
+ label: Greater than or equal to the 90th percentile for housing burden and is low income?
+ format: bool
+- score_name: Housing burden (percent) (percentile)
+ label: Housing burden (percent) (percentile)
+ format: percentage
+- score_name: Housing burden (percent)
+ label: Housing burden (percent)
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?
+ label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile and is low income?
+ format: bool
+- score_name: Percent pre-1960s housing (lead paint indicator) (percentile)
+ label: Percent pre-1960s housing (lead paint indicator) (percentile)
+ format: percentage
+- score_name: Percent pre-1960s housing (lead paint indicator)
+ label: Percent pre-1960s housing (lead paint indicator)
+ format: percentage
+- score_name: Median value ($) of owner-occupied housing units (percentile)
+ label: Median value ($) of owner-occupied housing units (percentile)
+ format: percentage
+- score_name: Median value ($) of owner-occupied housing units
+ label: Median value ($) of owner-occupied housing units
+ format: float
+- score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?
+ label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?
+ format: bool
+- score_name: Proximity to hazardous waste sites (percentile)
+ label: Proximity to hazardous waste sites (percentile)
+ format: percentage
+- score_name: Proximity to hazardous waste sites
+ label: Proximity to hazardous waste sites
+ format: float
+- score_name: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?
+ label: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?
+ format: bool
+- score_name: Proximity to NPL sites (percentile)
+ label: Proximity to NPL (Superfund) sites (percentile)
+ format: percentage
+- score_name: Proximity to NPL sites
+ label: Proximity to NPL (Superfund) sites
+ format: float
+- score_name: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?
+ label: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?
+ format: bool
+- score_name: Proximity to Risk Management Plan (RMP) facilities (percentile)
+ label: Proximity to Risk Management Plan (RMP) facilities (percentile)
+ format: percentage
+- score_name: Proximity to Risk Management Plan (RMP) facilities
+ label: Proximity to Risk Management Plan (RMP) facilities
+ format: float
+- score_name: Greater than or equal to the 90th percentile for wastewater discharge and is low income?
+ label: Greater than or equal to the 90th percentile for wastewater discharge and is low income?
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
+ label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
+ format: bool
+- score_name: Wastewater discharge (percentile)
+ label: Wastewater discharge (percentile)
+ format: percentage
+- score_name: Leaky underground storage tanks (percentile)
+ label: Leaky underground storage tanks (percentile)
+ format: percentage
+- score_name: Wastewater discharge
+ label: Wastewater discharge
+ format: float
+- score_name: Leaky underground storage tanks
+ label: Leaky underground storage tanks
+ format: float
+- score_name: Greater than or equal to the 90th percentile for asthma and is low income?
+ label: Greater than or equal to the 90th percentile for asthma and is low income?
+ format: bool
+- score_name: Current asthma among adults aged greater than or equal to 18 years (percentile)
+ label: Current asthma among adults aged greater than or equal to 18 years (percentile)
+ format: percentage
+- score_name: Current asthma among adults aged greater than or equal to 18 years
+ label: Current asthma among adults aged greater than or equal to 18 years
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for diabetes and is low income?
+ label: Greater than or equal to the 90th percentile for diabetes and is low income?
+ format: bool
+- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
+ label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
+ format: percentage
+- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years
+ label: Diagnosed diabetes among adults aged greater than or equal to 18 years
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for heart disease and is low income?
+ label: Greater than or equal to the 90th percentile for heart disease and is low income?
+ format: bool
+- score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
+ label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
+ format: percentage
+- score_name: Coronary heart disease among adults aged greater than or equal to 18 years
+ label: Coronary heart disease among adults aged greater than or equal to 18 years
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for low life expectancy and is low income?
+ label: Greater than or equal to the 90th percentile for low life expectancy and is low income?
+ format: bool
+- score_name: Low life expectancy (percentile)
+ label: Low life expectancy (percentile)
+ format: percentage
+- score_name: Life expectancy (years)
+ label: Life expectancy (years)
+ format: float
+- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?
+ format: bool
+- score_name: Low median household income as a percent of area median income (percentile)
+ label: Low median household income as a percent of area median income (percentile)
+ format: percentage
+- score_name: Median household income as a percent of area median income
+ label: Median household income as a percent of area median income
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?
+ format: bool
+- score_name: Linguistic isolation (percent) (percentile)
+ label: Linguistic isolation (percent) (percentile)
+ format: percentage
+- score_name: Linguistic isolation (percent)
+ label: Linguistic isolation (percent)
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for unemployment and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for unemployment and has low HS attainment?
+ format: bool
+- score_name: Unemployment (percent) (percentile)
+ label: Unemployment (percent) (percentile)
+ format: percentage
+- score_name: Unemployment (percent)
+ label: Unemployment (percent)
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?
+ format: bool
+- score_name: Percent of individuals below 200% Federal Poverty Line (percentile)
+ label: Percent of individuals below 200% Federal Poverty Line (percentile)
+ format: percentage
+- score_name: Percent of individuals below 200% Federal Poverty Line
+ label: Percent of individuals below 200% Federal Poverty Line
+ format: percentage
+- score_name: Percent of individuals < 100% Federal Poverty Line (percentile)
+ label: Percent of individuals < 100% Federal Poverty Line (percentile)
+ format: percentage
+- score_name: Percent of individuals < 100% Federal Poverty Line
+ label: Percent of individuals < 100% Federal Poverty Line
+ format: percentage
+- score_name: Percent individuals age 25 or over with less than high school degree (percentile)
+ label: Percent individuals age 25 or over with less than high school degree (percentile)
+ format: percentage
+- score_name: Percent individuals age 25 or over with less than high school degree
+ label: Percent individuals age 25 or over with less than high school degree
+ format: percentage
+- score_name: Percent of population not currently enrolled in college or graduate school
+ label: Percent of residents who are not currently enrolled in higher ed
+ format: percentage
+- score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
+ label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
+ label: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
+ format: bool
+- score_name: DOT Travel Barriers Score (percentile)
+ label: DOT Travel Barriers Score (percentile)
+ format: percentage
+- score_name: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
+ label: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
+ label: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
+ label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
+ label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
+ format: bool
+- score_name: Share of properties at risk of flood in 30 years
+ label: Share of properties at risk of flood in 30 years
+ format: percentage
+- score_name: Share of properties at risk of fire in 30 years
+ label: Share of properties at risk of fire in 30 years
+ format: percentage
+- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
+ label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
+ label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
+ label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
+ label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?
+ label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?
+ format: bool
+- score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent
+ label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent
+ format: bool
+- score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent
+ label: Share of the tract's land area that is covered by impervious surface or cropland as a percent
+ format: percentage
+- score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile)
+ label: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile)
+ format: percentage
+- score_name: Share of properties at risk of flood in 30 years (percentile)
+ label: Share of properties at risk of flood in 30 years (percentile)
+ format: percentage
+- score_name: Share of properties at risk of fire in 30 years (percentile)
+ label: Share of properties at risk of fire in 30 years (percentile)
+ format: percentage
+- score_name: Does the tract have at least 35 acres in it?
+ label: Does the tract have at least 35 acres in it?
+ format: bool
+- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
+ label: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
+ format: bool
+- score_name: Is there at least one abandoned mine in this census tract?
+ label: Is there at least one abandoned mine in this census tract?
+ format: bool
+- score_name: There is at least one abandoned mine in this census tract and the tract is low income.
+ label: There is at least one abandoned mine in this census tract and the tract is low income.
+ format: bool
+- score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
+ label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
+ format: bool
+- score_name: Tract-level redlining score meets or exceeds 3.25 and is low income
+ label: Tract experienced historic underinvestment and remains low income
+ format: bool
+- score_name: Tract-level redlining score meets or exceeds 3.25
+ label: Tract experienced historic underinvestment
+ format: bool
+- score_name: Income data has been estimated based on neighbor income
+ label: Income data has been estimated based on geographic neighbor income
+ format: bool
diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml
index 03fd55a6..62e657b1 100644
--- a/data/data-pipeline/data_pipeline/content/config/excel.yml
+++ b/data/data-pipeline/data_pipeline/content/config/excel.yml
@@ -63,7 +63,7 @@ sheets:
- score_name: Definition N (communities) (based on adjacency index and low income alone)
label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
format: bool
- - score_name: Definition M community, including adjacency index tracts
+ - score_name: Definition N community, including adjacency index tracts
label: Identified as disadvantaged
format: bool
- score_name: Definition N (communities) (average of neighbors)
@@ -75,11 +75,11 @@ sheets:
- score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted
label: Adjusted percent of individuals below 200% Federal Poverty Line
format: float
- - score_name: Is low income and has a low percent of higher ed students?
- label: Is low income and high percent of residents that are not higher ed students?
+ - score_name: Is low income (imputed and adjusted)?
+ label: Is low income?
format: bool
- - score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?
+ label: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?
format: bool
- score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
@@ -87,8 +87,8 @@ sheets:
- score_name: Expected agricultural loss rate (Natural Hazards Risk Index)
label: Expected agricultural loss rate (Natural Hazards Risk Index)
format: loss_rate_percentage
- - score_name: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for expected building loss rate and is low income?
+ label: Greater than or equal to the 90th percentile for expected building loss rate and is low income?
format: bool
- score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)
label: Expected building loss rate (Natural Hazards Risk Index) (percentile)
@@ -96,8 +96,8 @@ sheets:
- score_name: Expected building loss rate (Natural Hazards Risk Index)
label: Expected building loss rate (Natural Hazards Risk Index)
format: loss_rate_percentage
- - score_name: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for expected population loss rate and is low income?
+ label: Greater than or equal to the 90th percentile for expected population loss rate and is low income?
format: bool
- score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)
label: Expected population loss rate (Natural Hazards Risk Index) (percentile)
@@ -105,8 +105,8 @@ sheets:
- score_name: Expected population loss rate (Natural Hazards Risk Index)
label: Expected population loss rate (Natural Hazards Risk Index)
format: loss_rate_percentage
- - score_name: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for energy burden, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for energy burden and is low income?
+ label: Greater than or equal to the 90th percentile for energy burden and is low income?
format: bool
- score_name: Energy burden (percentile)
label: Energy burden (percentile)
@@ -114,8 +114,8 @@ sheets:
- score_name: Energy burden
label: Energy burden
format: percentage
- - score_name: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?
+ label: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?
format: bool
- score_name: PM2.5 in the air (percentile)
label: PM2.5 in the air (percentile)
@@ -123,8 +123,8 @@ sheets:
- score_name: PM2.5 in the air
label: PM2.5 in the air
format: float
- - score_name: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for diesel particulate matter and is low income?
+ label: Greater than or equal to the 90th percentile for diesel particulate matter and is low income?
format: bool
- score_name: Diesel particulate matter exposure (percentile)
label: Diesel particulate matter exposure (percentile)
@@ -132,8 +132,8 @@ sheets:
- score_name: Diesel particulate matter exposure
label: Diesel particulate matter exposure
format: float
- - score_name: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for traffic proximity, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for traffic proximity and is low income?
+ label: Greater than or equal to the 90th percentile for traffic proximity and is low income?
format: bool
- score_name: Traffic proximity and volume (percentile)
label: Traffic proximity and volume (percentile)
@@ -141,8 +141,8 @@ sheets:
- score_name: Traffic proximity and volume
label: Traffic proximity and volume
format: float
- - score_name: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for housing burden, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for housing burden and is low income?
+ label: Greater than or equal to the 90th percentile for housing burden and is low income?
format: bool
- score_name: Housing burden (percent) (percentile)
label: Housing burden (percent) (percentile)
@@ -150,8 +150,8 @@ sheets:
- score_name: Housing burden (percent)
label: Housing burden (percent)
format: percentage
- - score_name: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?
+ label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile and is low income?
format: bool
- score_name: Percent pre-1960s housing (lead paint indicator) (percentile)
label: Percent pre-1960s housing (lead paint indicator) (percentile)
@@ -165,8 +165,8 @@ sheets:
- score_name: Median value ($) of owner-occupied housing units
label: Median value ($) of owner-occupied housing units
format: float
- - score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?
+ label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?
format: bool
- score_name: Proximity to hazardous waste sites (percentile)
label: Proximity to hazardous waste sites (percentile)
@@ -174,8 +174,8 @@ sheets:
- score_name: Proximity to hazardous waste sites
label: Proximity to hazardous waste sites
format: float
- - score_name: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?
+ label: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?
format: bool
- score_name: Proximity to NPL sites (percentile)
label: Proximity to NPL (Superfund) sites (percentile)
@@ -183,8 +183,8 @@ sheets:
- score_name: Proximity to NPL sites
label: Proximity to NPL (Superfund) sites
format: float
- - score_name: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?
+ label: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?
format: bool
- score_name: Proximity to Risk Management Plan (RMP) facilities (percentile)
label: Proximity to Risk Management Plan (RMP) facilities (percentile)
@@ -192,8 +192,8 @@ sheets:
- score_name: Proximity to Risk Management Plan (RMP) facilities
label: Proximity to Risk Management Plan (RMP) facilities
format: float
- - score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for wastewater discharge and is low income?
+ label: Greater than or equal to the 90th percentile for wastewater discharge and is low income?
format: bool
- score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
@@ -210,8 +210,8 @@ sheets:
- score_name: Leaky underground storage tanks
label: Leaky underground storage tanks
format: float
- - score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for asthma, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for asthma and is low income?
+ label: Greater than or equal to the 90th percentile for asthma and is low income?
format: bool
- score_name: Current asthma among adults aged greater than or equal to 18 years (percentile)
label: Current asthma among adults aged greater than or equal to 18 years (percentile)
@@ -219,8 +219,8 @@ sheets:
- score_name: Current asthma among adults aged greater than or equal to 18 years
label: Current asthma among adults aged greater than or equal to 18 years
format: percentage
- - score_name: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for diabetes, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for diabetes and is low income?
+ label: Greater than or equal to the 90th percentile for diabetes and is low income?
format: bool
- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
@@ -228,8 +228,8 @@ sheets:
- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years
label: Diagnosed diabetes among adults aged greater than or equal to 18 years
format: percentage
- - score_name: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for heart disease, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for heart disease and is low income?
+ label: Greater than or equal to the 90th percentile for heart disease and is low income?
format: bool
- score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
@@ -237,8 +237,8 @@ sheets:
- score_name: Coronary heart disease among adults aged greater than or equal to 18 years
label: Coronary heart disease among adults aged greater than or equal to 18 years
format: percentage
- - score_name: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for low life expectancy, is low income, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for low life expectancy and is low income?
+ label: Greater than or equal to the 90th percentile for low life expectancy and is low income?
format: bool
- score_name: Low life expectancy (percentile)
label: Low life expectancy (percentile)
@@ -246,8 +246,8 @@ sheets:
- score_name: Life expectancy (years)
label: Life expectancy (years)
format: float
- - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?
format: bool
- score_name: Low median household income as a percent of area median income (percentile)
label: Low median household income as a percent of area median income (percentile)
@@ -255,8 +255,8 @@ sheets:
- score_name: Median household income as a percent of area median income
label: Median household income as a percent of area median income
format: percentage
- - score_name: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?
format: bool
- score_name: Linguistic isolation (percent) (percentile)
label: Linguistic isolation (percent) (percentile)
@@ -264,8 +264,8 @@ sheets:
- score_name: Linguistic isolation (percent)
label: Linguistic isolation (percent)
format: percentage
- - score_name: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for unemployment and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for unemployment and has low HS attainment?
format: bool
- score_name: Unemployment (percent) (percentile)
label: Unemployment (percent) (percentile)
@@ -273,8 +273,8 @@ sheets:
- score_name: Unemployment (percent)
label: Unemployment (percent)
format: percentage
- - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?
- label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and high percent of residents that are not higher ed students?
+ - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?
+ label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?
format: bool
- score_name: Percent of individuals below 200% Federal Poverty Line (percentile)
label: Percent of individuals below 200% Federal Poverty Line (percentile)
diff --git a/data/data-pipeline/data_pipeline/content/config/scratch.ipynb b/data/data-pipeline/data_pipeline/content/config/scratch.ipynb
new file mode 100644
index 00000000..e2535b7e
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/content/config/scratch.ipynb
@@ -0,0 +1,798 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "cf8f39b0-7735-4f7c-9178-61bbf2257951",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "%load_ext lab_black"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "66639c20-be5e-4bf6-9b58-98338874f7cc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Median value ($) of owner-occupied housing units (percentile)'"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "check = pd.read_csv(\n",
+ " \"/Users/emmausds/j40/data_pipeline/data/score/downloadable/codebook.csv\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "5e525e4e-6764-4d4d-9119-b4d400ba022f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " score_name | \n",
+ " csv_field_type | \n",
+ " csv_label | \n",
+ " excel_label | \n",
+ " calculation_notes | \n",
+ " threshold_category | \n",
+ " notes | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " GEOID10_TRACT | \n",
+ " string | \n",
+ " Census tract ID | \n",
+ " Census tract ID | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " County Name | \n",
+ " string | \n",
+ " County Name | \n",
+ " County Name | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " State/Territory | \n",
+ " string | \n",
+ " State/Territory | \n",
+ " State/Territory | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Total threshold criteria exceeded | \n",
+ " int64 | \n",
+ " Total threshold criteria exceeded | \n",
+ " Total threshold criteria exceeded | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Lists out the total number of criteria (where ... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Definition M (communities) | \n",
+ " bool | \n",
+ " Identified as disadvantaged | \n",
+ " Identified as disadvantaged | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " True / False variable for whether a tract is a... | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " Percentage households below 100% of federal po... | \n",
+ " percentage | \n",
+ " Percentage households below 100% of federal po... | \n",
+ " Percentage households below 100% of federal po... | \n",
+ " Because not all data is available for the Nati... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " bool | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " Because not all data is available for the Nati... | \n",
+ " training and workforce development | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " bool | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " Because not all data is available for the Nati... | \n",
+ " training and workforce development | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " bool | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " Because not all data is available for the Nati... | \n",
+ " training and workforce development | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 81 | \n",
+ " Percent of population not currently enrolled i... | \n",
+ " percentage | \n",
+ " Percent of residents who are not currently enr... | \n",
+ " Percent of residents who are not currently enr... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
82 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " score_name csv_field_type \\\n",
+ "0 GEOID10_TRACT string \n",
+ "1 County Name string \n",
+ "2 State/Territory string \n",
+ "3 Total threshold criteria exceeded int64 \n",
+ "4 Definition M (communities) bool \n",
+ ".. ... ... \n",
+ "77 Percentage households below 100% of federal po... percentage \n",
+ "78 Greater than or equal to the 90th percentile f... bool \n",
+ "79 Greater than or equal to the 90th percentile f... bool \n",
+ "80 Greater than or equal to the 90th percentile f... bool \n",
+ "81 Percent of population not currently enrolled i... percentage \n",
+ "\n",
+ " csv_label \\\n",
+ "0 Census tract ID \n",
+ "1 County Name \n",
+ "2 State/Territory \n",
+ "3 Total threshold criteria exceeded \n",
+ "4 Identified as disadvantaged \n",
+ ".. ... \n",
+ "77 Percentage households below 100% of federal po... \n",
+ "78 Greater than or equal to the 90th percentile f... \n",
+ "79 Greater than or equal to the 90th percentile f... \n",
+ "80 Greater than or equal to the 90th percentile f... \n",
+ "81 Percent of residents who are not currently enr... \n",
+ "\n",
+ " excel_label \\\n",
+ "0 Census tract ID \n",
+ "1 County Name \n",
+ "2 State/Territory \n",
+ "3 Total threshold criteria exceeded \n",
+ "4 Identified as disadvantaged \n",
+ ".. ... \n",
+ "77 Percentage households below 100% of federal po... \n",
+ "78 Greater than or equal to the 90th percentile f... \n",
+ "79 Greater than or equal to the 90th percentile f... \n",
+ "80 Greater than or equal to the 90th percentile f... \n",
+ "81 Percent of residents who are not currently enr... \n",
+ "\n",
+ " calculation_notes \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ ".. ... \n",
+ "77 Because not all data is available for the Nati... \n",
+ "78 Because not all data is available for the Nati... \n",
+ "79 Because not all data is available for the Nati... \n",
+ "80 Because not all data is available for the Nati... \n",
+ "81 NaN \n",
+ "\n",
+ " threshold_category \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ ".. ... \n",
+ "77 NaN \n",
+ "78 training and workforce development \n",
+ "79 training and workforce development \n",
+ "80 training and workforce development \n",
+ "81 NaN \n",
+ "\n",
+ " notes \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 Lists out the total number of criteria (where ... \n",
+ "4 True / False variable for whether a tract is a... \n",
+ ".. ... \n",
+ "77 NaN \n",
+ "78 NaN \n",
+ "79 NaN \n",
+ "80 NaN \n",
+ "81 NaN \n",
+ "\n",
+ "[82 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "check"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "d86c867a-1a55-4ec0-82a6-040841406236",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "codebook = pd.DataFrame(to_frame_dict)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "6215deaf-b004-4da0-a70b-bc54f636601a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "details_to_merge = pd.DataFrame(mapping_dictionary)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "ac4e65c2-09e6-4978-9440-37b3be057f65",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shapefile_codes = pd.read_csv(\n",
+ " \"/Users/emmausds/j40/data_pipeline/data/score/shapefile/columns.csv\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 153,
+ "id": "31cfd9ec-5f5f-4642-a51f-6875c2c279a4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)',\n",
+ " 'Expected building loss rate (Natural Hazards Risk Index) (percentile)',\n",
+ " 'Expected population loss rate (Natural Hazards Risk Index) (percentile)',\n",
+ " 'Energy burden (percentile)',\n",
+ " 'PM2.5 in the air (percentile)',\n",
+ " 'Diesel particulate matter exposure (percentile)',\n",
+ " 'Traffic proximity and volume (percentile)',\n",
+ " 'Housing burden (percent) (percentile)',\n",
+ " 'Percent pre-1960s housing (lead paint indicator) (percentile)',\n",
+ " 'Median value ($) of owner-occupied housing units (percentile)',\n",
+ " 'Proximity to hazardous waste sites (percentile)',\n",
+ " 'Proximity to NPL sites (percentile)',\n",
+ " 'Proximity to Risk Management Plan (RMP) facilities (percentile)',\n",
+ " 'Wastewater discharge (percentile)',\n",
+ " 'Current asthma among adults aged greater than or equal to 18 years (percentile)',\n",
+ " 'Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)',\n",
+ " 'Coronary heart disease among adults aged greater than or equal to 18 years (percentile)',\n",
+ " 'Low life expectancy (percentile)',\n",
+ " 'Low median household income as a percent of area median income (percentile)',\n",
+ " 'Linguistic isolation (percent) (percentile)',\n",
+ " 'Unemployment (percent) (percentile)',\n",
+ " 'Percent of individuals below 200% Federal Poverty Line (percentile)',\n",
+ " 'Percent of individuals < 100% Federal Poverty Line (percentile)',\n",
+ " 'Percent individuals age 25 or over with less than high school degree (percentile)',\n",
+ " 'Definition M (percentile)',\n",
+ " 'Low median household income as a percent of territory median income in 2009 (percentile)',\n",
+ " 'Percentage households below 100% of federal poverty line in 2009 for island areas (percentile)',\n",
+ " 'Unemployment (percent) in 2009 for island areas (percentile)']"
+ ]
+ },
+ "execution_count": 153,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 154,
+ "id": "66dde4fc-48e6-4bdf-b3a6-16c766e94d8a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " - column_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Energy burden (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: PM2.5 in the air (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Diesel particulate matter exposure (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Traffic proximity and volume (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Housing burden (percent) (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Percent pre-1960s housing (lead paint indicator) (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Median value ($) of owner-occupied housing units (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Proximity to hazardous waste sites (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Proximity to NPL sites (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Proximity to Risk Management Plan (RMP) facilities (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Wastewater discharge (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Current asthma among adults aged greater than or equal to 18 years (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Low life expectancy (percentile)\n",
+ " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Low median household income as a percent of area median income (percentile)\n",
+ " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Linguistic isolation (percent) (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Unemployment (percent) (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Percent of individuals below 200% Federal Poverty Line (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Percent of individuals < 100% Federal Poverty Line (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Percent individuals age 25 or over with less than high school degree (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Definition M (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Low median household income as a percent of territory median income in 2009 (percentile)\n",
+ " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Percentage households below 100% of federal poverty line in 2009 for island areas (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n",
+ " - column_name: Unemployment (percent) in 2009 for island areas (percentile)\n",
+ " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n"
+ ]
+ }
+ ],
+ "source": [
+ "for col in [col for col in download_codebook.index.to_list() if \"(percentile)\" in col]:\n",
+ " print(f\" - column_name: {col}\")\n",
+ " if \"Low\" not in col:\n",
+ " print(\n",
+ " f\" notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\"\n",
+ " )\n",
+ " else:\n",
+ " print(\n",
+ " f\" notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 143,
+ "id": "5c08708e-4ebf-4cfe-8efb-7ee6c7930427",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " excel_label | \n",
+ " format | \n",
+ " shapefile_column | \n",
+ " notes | \n",
+ " category | \n",
+ "
\n",
+ " \n",
+ " score_name | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " GEOID10_TRACT | \n",
+ " Census tract ID | \n",
+ " string | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " County Name | \n",
+ " County Name | \n",
+ " string | \n",
+ " CF | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " State/Territory | \n",
+ " State/Territory | \n",
+ " string | \n",
+ " SF | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " Total threshold criteria exceeded | \n",
+ " Total threshold criteria exceeded | \n",
+ " int64 | \n",
+ " TC | \n",
+ " Lists out the total number of criteria (where ... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " Definition M (communities) | \n",
+ " Identified as disadvantaged | \n",
+ " bool | \n",
+ " SM_C | \n",
+ " True / False variable for whether a tract is a... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " Unemployment (percent) in 2009 (island areas) and 2010 (states and PR) | \n",
+ " Unemployment (percent) in 2009 (island areas) ... | \n",
+ " percentage | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR) | \n",
+ " Percentage households below 100% of federal po... | \n",
+ " percentage | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)? | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " bool | \n",
+ " IAULHSE | \n",
+ " island area information comes from the dicenni... | \n",
+ " training and workforce development | \n",
+ "
\n",
+ " \n",
+ " Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)? | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " bool | \n",
+ " IAPLHSE | \n",
+ " island area information comes from the dicenni... | \n",
+ " training and workforce development | \n",
+ "
\n",
+ " \n",
+ " Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)? | \n",
+ " Greater than or equal to the 90th percentile f... | \n",
+ " bool | \n",
+ " IALMILHSE | \n",
+ " island area information comes from the dicenni... | \n",
+ " training and workforce development | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
82 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " excel_label \\\n",
+ "score_name \n",
+ "GEOID10_TRACT Census tract ID \n",
+ "County Name County Name \n",
+ "State/Territory State/Territory \n",
+ "Total threshold criteria exceeded Total threshold criteria exceeded \n",
+ "Definition M (communities) Identified as disadvantaged \n",
+ "... ... \n",
+ "Unemployment (percent) in 2009 (island areas) a... Unemployment (percent) in 2009 (island areas) ... \n",
+ "Percentage households below 100% of federal pov... Percentage households below 100% of federal po... \n",
+ "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n",
+ "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n",
+ "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n",
+ "\n",
+ " format \\\n",
+ "score_name \n",
+ "GEOID10_TRACT string \n",
+ "County Name string \n",
+ "State/Territory string \n",
+ "Total threshold criteria exceeded int64 \n",
+ "Definition M (communities) bool \n",
+ "... ... \n",
+ "Unemployment (percent) in 2009 (island areas) a... percentage \n",
+ "Percentage households below 100% of federal pov... percentage \n",
+ "Greater than or equal to the 90th percentile fo... bool \n",
+ "Greater than or equal to the 90th percentile fo... bool \n",
+ "Greater than or equal to the 90th percentile fo... bool \n",
+ "\n",
+ " shapefile_column \\\n",
+ "score_name \n",
+ "GEOID10_TRACT NaN \n",
+ "County Name CF \n",
+ "State/Territory SF \n",
+ "Total threshold criteria exceeded TC \n",
+ "Definition M (communities) SM_C \n",
+ "... ... \n",
+ "Unemployment (percent) in 2009 (island areas) a... NaN \n",
+ "Percentage households below 100% of federal pov... NaN \n",
+ "Greater than or equal to the 90th percentile fo... IAULHSE \n",
+ "Greater than or equal to the 90th percentile fo... IAPLHSE \n",
+ "Greater than or equal to the 90th percentile fo... IALMILHSE \n",
+ "\n",
+ " notes \\\n",
+ "score_name \n",
+ "GEOID10_TRACT NaN \n",
+ "County Name NaN \n",
+ "State/Territory NaN \n",
+ "Total threshold criteria exceeded Lists out the total number of criteria (where ... \n",
+ "Definition M (communities) True / False variable for whether a tract is a... \n",
+ "... ... \n",
+ "Unemployment (percent) in 2009 (island areas) a... NaN \n",
+ "Percentage households below 100% of federal pov... NaN \n",
+ "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n",
+ "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n",
+ "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n",
+ "\n",
+ " category \n",
+ "score_name \n",
+ "GEOID10_TRACT NaN \n",
+ "County Name NaN \n",
+ "State/Territory NaN \n",
+ "Total threshold criteria exceeded NaN \n",
+ "Definition M (communities) NaN \n",
+ "... ... \n",
+ "Unemployment (percent) in 2009 (island areas) a... NaN \n",
+ "Percentage households below 100% of federal pov... NaN \n",
+ "Greater than or equal to the 90th percentile fo... training and workforce development \n",
+ "Greater than or equal to the 90th percentile fo... training and workforce development \n",
+ "Greater than or equal to the 90th percentile fo... training and workforce development \n",
+ "\n",
+ "[82 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 143,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "download_codebook.dropna(subset=[\"format\"]).reset_index()[\"score_name\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 137,
+ "id": "7139ce5d-db5e-49dd-8bb3-122c7b73b395",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " excel_label | \n",
+ " format | \n",
+ " shapefile_column | \n",
+ " notes | \n",
+ " category | \n",
+ "
\n",
+ " \n",
+ " score_name | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [excel_label, format, shapefile_column, notes, category]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "download_codebook.loc[\n",
+ " sum([download_codebook[col] == \"percentile\" for col in [\"format\"]]) > 0\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
+ "id": "e31ef01c-b225-48f0-bdf5-1efb8d4ed95c",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "Cannot index with multidimensional key",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Input \u001b[0;32mIn [134]\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdownload_codebook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdownload_codebook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlike\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mformat\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpercentile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n",
+ "File \u001b[0;32m/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py:931\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 928\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 930\u001b[0m maybe_callable \u001b[38;5;241m=\u001b[39m com\u001b[38;5;241m.\u001b[39mapply_if_callable(key, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj)\n\u001b[0;32m--> 931\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmaybe_callable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py:1151\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(labels, MultiIndex)):\n\u001b[1;32m 1150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mndim\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m key\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m-> 1151\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot index with multidimensional key\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1153\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_iterable(key, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 1155\u001b[0m \u001b[38;5;66;03m# nested tuple slicing\u001b[39;00m\n",
+ "\u001b[0;31mValueError\u001b[0m: Cannot index with multidimensional key"
+ ]
+ }
+ ],
+ "source": [
+ "download_codebook.loc[download_codebook.filter(like=\"format\") == \"percentile\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 131,
+ "id": "73268de4-3378-4ac7-bf85-f483a78c3966",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "download_codebook = pd.concat(\n",
+ " [\n",
+ " codebook.set_index(\"score_name\"),\n",
+ " shapefile_codes.rename(\n",
+ " columns={\"meaning\": \"shapefile_column\", \"column\": \"score_name\"}\n",
+ " ).set_index(\"score_name\"),\n",
+ " details_to_merge.set_index(\"score_name\"),\n",
+ " ],\n",
+ " axis=1,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6321ed42-aee6-40fc-8bf8-2a4ce4276eca",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py
index abf387b2..45415a57 100644
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@@ -198,42 +198,42 @@ TILES_SCORE_COLUMNS = {
field_names.WASTEWATER_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
field_names.UST_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "UST_PFS",
- field_names.M_WATER: "M_WTR",
- field_names.M_WORKFORCE: "M_WKFC",
- field_names.M_CLIMATE: "M_CLT",
- field_names.M_ENERGY: "M_ENY",
- field_names.M_TRANSPORTATION: "M_TRN",
- field_names.M_HOUSING: "M_HSG",
- field_names.M_POLLUTION: "M_PLN",
- field_names.M_HEALTH: "M_HLTH",
+ field_names.N_WATER: "N_WTR",
+ field_names.N_WORKFORCE: "N_WKFC",
+ field_names.N_CLIMATE: "N_CLT",
+ field_names.N_ENERGY: "N_ENY",
+ field_names.N_TRANSPORTATION: "N_TRN",
+ field_names.N_HOUSING: "N_HSG",
+ field_names.N_POLLUTION: "N_PLN",
+ field_names.N_HEALTH: "N_HLTH",
# temporarily update this so that it's the Narwhal score that gets visualized on the map
# The NEW final score value INCLUDES the adjacency index.
- field_names.FINAL_SCORE_N_BOOLEAN: "SM_C",
+ field_names.FINAL_SCORE_N_BOOLEAN: "SN_C",
field_names.SCORE_N_COMMUNITIES
- + field_names.ADJACENT_MEAN_SUFFIX: "SM_DON",
- field_names.SCORE_N_COMMUNITIES: "SM_NO_DON",
- field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
- field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
- field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",
- field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "PM25LI",
- field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLI",
- field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DPMLI",
- field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD: "TPLI",
- field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "LPMHVLI",
- field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HBLI",
- field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD: "RMPLI",
- field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD: "SFLI",
- field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HWLI",
- field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "WDLI",
+ + field_names.ADJACENT_MEAN_SUFFIX: "SN_DON",
+ field_names.SCORE_N_COMMUNITIES: "SN_NO_DON",
+ field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
+ field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
+ field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
+ field_names.PM25_EXPOSURE_LOW_INCOME_FIELD: "PM25LI",
+ field_names.ENERGY_BURDEN_LOW_INCOME_FIELD: "EBLI",
+ field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD: "DPMLI",
+ field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD: "TPLI",
+ field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD: "LPMHVLI",
+ field_names.HOUSING_BURDEN_LOW_INCOME_FIELD: "HBLI",
+ field_names.RMP_LOW_INCOME_FIELD: "RMPLI",
+ field_names.SUPERFUND_LOW_INCOME_FIELD: "SFLI",
+ field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD: "HWLI",
+ field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD: "WDLI",
field_names.UST_LOW_INCOME_FIELD: "USTLI",
- field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DLI",
- field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD: "ALI",
- field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HDLI",
- field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD: "LLELI",
- field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD: "LILHSE",
- field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD: "PLHSE",
- field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD: "LMILHSE",
- field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD: "ULHSE",
+ field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
+ field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
+ field_names.HEART_DISEASE_LOW_INCOME_FIELD: "HDLI",
+ field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD: "LLELI",
+ field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD: "LILHSE",
+ field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE",
+ field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE",
+ field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE",
# new booleans only for the environmental factors
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD: "EPL_ET",
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD: "EAL_ET",
@@ -276,28 +276,24 @@ TILES_SCORE_COLUMNS = {
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAULHSE_PFS",
- field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD: "LHE",
+ field_names.LOW_HS_EDUCATION_FIELD: "LHE",
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE",
# Percentage of HS Degree completion for Islands
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF",
- field_names.COLLEGE_ATTENDANCE_FIELD: "CA",
- field_names.COLLEGE_NON_ATTENDANCE_FIELD: "NCA",
- # This is logically equivalent to "non-college greater than 80%"
- field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD: "CA_LT20",
# Booleans for the front end about the types of thresholds exceeded
- field_names.CLIMATE_THRESHOLD_EXCEEDED: "M_CLT_EOMI",
- field_names.ENERGY_THRESHOLD_EXCEEDED: "M_ENY_EOMI",
- field_names.TRAFFIC_THRESHOLD_EXCEEDED: "M_TRN_EOMI",
- field_names.HOUSING_THREHSOLD_EXCEEDED: "M_HSG_EOMI",
- field_names.POLLUTION_THRESHOLD_EXCEEDED: "M_PLN_EOMI",
- field_names.WATER_THRESHOLD_EXCEEDED: "M_WTR_EOMI",
- field_names.HEALTH_THRESHOLD_EXCEEDED: "M_HLTH_EOMI",
- field_names.WORKFORCE_THRESHOLD_EXCEEDED: "M_WKFC_EOMI",
+ field_names.CLIMATE_THRESHOLD_EXCEEDED: "N_CLT_EOMI",
+ field_names.ENERGY_THRESHOLD_EXCEEDED: "N_ENY_EOMI",
+ field_names.TRAFFIC_THRESHOLD_EXCEEDED: "N_TRN_EOMI",
+ field_names.HOUSING_THREHSOLD_EXCEEDED: "N_HSG_EOMI",
+ field_names.POLLUTION_THRESHOLD_EXCEEDED: "N_PLN_EOMI",
+ field_names.WATER_THRESHOLD_EXCEEDED: "N_WTR_EOMI",
+ field_names.HEALTH_THRESHOLD_EXCEEDED: "N_HLTH_EOMI",
+ field_names.WORKFORCE_THRESHOLD_EXCEEDED: "N_WKFC_EOMI",
# These are the booleans for socioeconomic indicators
## this measures low income boolean
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED: "FPL200S",
## Low high school for t&wd
- field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
+ field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "N_WKFC_EBSI",
field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",
field_names.DOT_TRAVEL_BURDEN_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS",
@@ -377,8 +373,6 @@ TILES_SCORE_FLOAT_COLUMNS = [
# Island areas HS degree attainment rate
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
- field_names.COLLEGE_NON_ATTENDANCE_FIELD,
- field_names.COLLEGE_ATTENDANCE_FIELD,
field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.FUTURE_WILDFIRE_RISK_FIELD
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index 64804322..e8631258 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -403,6 +403,7 @@ class ScoreETL(ExtractTransformLoad):
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
)
+ # Donut columns get added later
numeric_columns = [
field_names.HOUSING_BURDEN_FIELD,
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD,
@@ -477,12 +478,15 @@ class ScoreETL(ExtractTransformLoad):
non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
field_names.PERSISTENT_POVERTY_FIELD,
- field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
- field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
+ ]
+
+ boolean_columns = [
field_names.AML_BOOLEAN,
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
+ field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
+ field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
]
# For some columns, high values are "good", so we want to reverse the percentile
@@ -523,6 +527,7 @@ class ScoreETL(ExtractTransformLoad):
non_numeric_columns
+ numeric_columns
+ [rp.field_name for rp in reverse_percentiles]
+ + boolean_columns
)
df_copy = df[columns_to_keep].copy()
@@ -533,6 +538,10 @@ class ScoreETL(ExtractTransformLoad):
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
+ # coerce all booleans to bools
+ for col in boolean_columns:
+ df_copy[col] = df_copy[col].astype(bool)
+
# Convert all columns to numeric and do math
# Note that we have a few special conditions here and we handle them explicitly.
# For *Linguistic Isolation*, we do NOT want to include Puerto Rico in the percentile
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
index 24348305..14f72ad2 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
@@ -53,7 +53,7 @@ class GeoScoreETL(ExtractTransformLoad):
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
field_names.SCORE_N
]
- self.TARGET_SCORE_RENAME_TO = "M_SCORE"
+ self.TARGET_SCORE_RENAME_TO = "SCORE"
# Import the shortened name for tract ("GTF") that's used on the tiles.
self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
index a4043668..af29b11f 100644
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
@@ -1,3 +1,3 @@
-GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition M community, including adjacency index tracts"
-01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,True,0.8571428571428571,False,False,False
-01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1,True,1.0,True,True,True
+GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,At least one climate threshold exceeded,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,At least one housing threshold exceeded,Housing Factor (Definition N),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for wastewater discharge,Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,At least one water threshold exceeded,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?,Greater than or equal to the 90th percentile for unemployment and has low HS attainment?,At least one workforce threshold exceeded,Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition N),Total categories exceeded,Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition N community, including adjacency index tracts"
+01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,0.0,False,0,True,0.8571428571428571,False,False,False
+01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,False,False,True,False,False,True,False,False,True,False,False,True,True,False,True,False,True,True,True,False,True,True,True,True,False,True,True,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,6.0,True,1,True,1.0,True,True,True
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
index 0eb41e97..a91ff8ca 100644
Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
index 8d726f01..5ed5203d 100644
Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
index 8119822a..1d34250f 100644
Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
index 49f9fe03..4baa3447 100644
Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ
diff --git a/data/data-pipeline/data_pipeline/ipython/TractArea.ipynb b/data/data-pipeline/data_pipeline/ipython/TractArea.ipynb
deleted file mode 100644
index d4b678c1..00000000
--- a/data/data-pipeline/data_pipeline/ipython/TractArea.ipynb
+++ /dev/null
@@ -1,418 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 54,
- "id": "df048f08",
- "metadata": {},
- "outputs": [],
- "source": [
- "import geopandas as gpd\n",
- "import pathlib"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "id": "62366f7d",
- "metadata": {},
- "outputs": [],
- "source": [
- "lowJson = pathlib.Path() / 'usa-low.json'\n",
- "assert lowJson.exists()\n",
- "highJson = pathlib.Path() / 'usa-high.json'\n",
- "assert highJson.exists()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 50,
- "id": "4077ed78",
- "metadata": {},
- "outputs": [],
- "source": [
- "gdf = gpd.read_file(highJson)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 56,
- "id": "d4abfc64",
- "metadata": {},
- "outputs": [],
- "source": [
- "gdf['area'] = gdf.apply(lambda row : gpd.GeoSeries(row['geometry']).area, axis = 1)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5077d9ef",
- "metadata": {},
- "source": [
- "Add `zlfc` = *zoom level full containment*, This field will indicate the maximum zoom level the user can go up to while still keeping the entire tract in view. Below, we sample a few tracts to get an idea of the relationship between zoom level and area"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 89,
- "id": "a1234574",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " GEOID10 | \n",
- " SF | \n",
- " CF | \n",
- " area | \n",
- " zlfc | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 9846 | \n",
- " 02185000200 | \n",
- " Alaska | \n",
- " North Slope Borough | \n",
- " 53.323702 | \n",
- " 4.45 | \n",
- "
\n",
- " \n",
- " 9937 | \n",
- " 02290000100 | \n",
- " Alaska | \n",
- " Yukon-Koyukuk Census Area | \n",
- " 21.653154 | \n",
- " 5.50 | \n",
- "
\n",
- " \n",
- " 9857 | \n",
- " 02188000100 | \n",
- " Alaska | \n",
- " Northwest Arctic Borough | \n",
- " 21.188159 | \n",
- " 5.50 | \n",
- "
\n",
- " \n",
- " 9935 | \n",
- " 02290000200 | \n",
- " Alaska | \n",
- " Yukon-Koyukuk Census Area | \n",
- " 20.744770 | \n",
- " 5.38 | \n",
- "
\n",
- " \n",
- " 9934 | \n",
- " 02290000300 | \n",
- " Alaska | \n",
- " Yukon-Koyukuk Census Area | \n",
- " 17.140826 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9936 | \n",
- " 02290000400 | \n",
- " Alaska | \n",
- " Yukon-Koyukuk Census Area | \n",
- " 14.687448 | \n",
- " 5.77 | \n",
- "
\n",
- " \n",
- " 9893 | \n",
- " 02180000100 | \n",
- " Alaska | \n",
- " Nome Census Area | \n",
- " 13.377817 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9847 | \n",
- " 02164000100 | \n",
- " Alaska | \n",
- " Lake and Peninsula Borough | \n",
- " 13.061644 | \n",
- " 5.33 | \n",
- "
\n",
- " \n",
- " 9918 | \n",
- " 02261000100 | \n",
- " Alaska | \n",
- " Valdez-Cordova Census Area | \n",
- " 11.118835 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9945 | \n",
- " 02050000100 | \n",
- " Alaska | \n",
- " Bethel Census Area | \n",
- " 10.951888 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9841 | \n",
- " 02270000100 | \n",
- " Alaska | \n",
- " Wade Hampton Census Area | \n",
- " 8.771806 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9839 | \n",
- " 02240000100 | \n",
- " Alaska | \n",
- " Southeast Fairbanks Census Area | \n",
- " 8.613690 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9843 | \n",
- " 02070000100 | \n",
- " Alaska | \n",
- " Dillingham Census Area | \n",
- " 8.575307 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9947 | \n",
- " 02050000300 | \n",
- " Alaska | \n",
- " Bethel Census Area | \n",
- " 8.408040 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9899 | \n",
- " 02170000101 | \n",
- " Alaska | \n",
- " Matanuska-Susitna Borough | \n",
- " 6.480444 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9944 | \n",
- " 02068000100 | \n",
- " Alaska | \n",
- " Denali Borough | \n",
- " 5.997236 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9836 | \n",
- " 02013000100 | \n",
- " Alaska | \n",
- " Aleutians East Borough | \n",
- " 5.487726 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9921 | \n",
- " 02122000100 | \n",
- " Alaska | \n",
- " Kenai Peninsula Borough | \n",
- " 4.831831 | \n",
- " 6.10 | \n",
- "
\n",
- " \n",
- " 9851 | \n",
- " 02150000100 | \n",
- " Alaska | \n",
- " Kodiak Island Borough | \n",
- " 4.664009 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9850 | \n",
- " 02105000300 | \n",
- " Alaska | \n",
- " Hoonah-Angoon Census Area | \n",
- " 4.305716 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9838 | \n",
- " 02016000100 | \n",
- " Alaska | \n",
- " Aleutians West Census Area | \n",
- " 4.053520 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9917 | \n",
- " 02282000100 | \n",
- " Alaska | \n",
- " Yakutat City and Borough | \n",
- " 3.926182 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9920 | \n",
- " 02261000300 | \n",
- " Alaska | \n",
- " Valdez-Cordova Census Area | \n",
- " 3.285482 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9840 | \n",
- " 02240000400 | \n",
- " Alaska | \n",
- " Southeast Fairbanks Census Area | \n",
- " 3.233961 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9919 | \n",
- " 02261000200 | \n",
- " Alaska | \n",
- " Valdez-Cordova Census Area | \n",
- " 3.156317 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 10354 | \n",
- " 41045970900 | \n",
- " Oregon | \n",
- " Malheur County | \n",
- " 2.731719 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9888 | \n",
- " 02198000100 | \n",
- " Alaska | \n",
- " Prince of Wales-Hyder Census Area | \n",
- " 2.606286 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 10212 | \n",
- " 41025960200 | \n",
- " Oregon | \n",
- " Harney County | \n",
- " 2.568943 | \n",
- " 7.08 | \n",
- "
\n",
- " \n",
- " 9844 | \n",
- " 02185000300 | \n",
- " Alaska | \n",
- " North Slope Borough | \n",
- " 2.463165 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- " 9858 | \n",
- " 02130000100 | \n",
- " Alaska | \n",
- " Ketchikan Gateway Borough | \n",
- " 2.440051 | \n",
- " 0.00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " GEOID10 SF CF area zlfc\n",
- "9846 02185000200 Alaska North Slope Borough 53.323702 4.45\n",
- "9937 02290000100 Alaska Yukon-Koyukuk Census Area 21.653154 5.50\n",
- "9857 02188000100 Alaska Northwest Arctic Borough 21.188159 5.50\n",
- "9935 02290000200 Alaska Yukon-Koyukuk Census Area 20.744770 5.38\n",
- "9934 02290000300 Alaska Yukon-Koyukuk Census Area 17.140826 0.00\n",
- "9936 02290000400 Alaska Yukon-Koyukuk Census Area 14.687448 5.77\n",
- "9893 02180000100 Alaska Nome Census Area 13.377817 0.00\n",
- "9847 02164000100 Alaska Lake and Peninsula Borough 13.061644 5.33\n",
- "9918 02261000100 Alaska Valdez-Cordova Census Area 11.118835 0.00\n",
- "9945 02050000100 Alaska Bethel Census Area 10.951888 0.00\n",
- "9841 02270000100 Alaska Wade Hampton Census Area 8.771806 0.00\n",
- "9839 02240000100 Alaska Southeast Fairbanks Census Area 8.613690 0.00\n",
- "9843 02070000100 Alaska Dillingham Census Area 8.575307 0.00\n",
- "9947 02050000300 Alaska Bethel Census Area 8.408040 0.00\n",
- "9899 02170000101 Alaska Matanuska-Susitna Borough 6.480444 0.00\n",
- "9944 02068000100 Alaska Denali Borough 5.997236 0.00\n",
- "9836 02013000100 Alaska Aleutians East Borough 5.487726 0.00\n",
- "9921 02122000100 Alaska Kenai Peninsula Borough 4.831831 6.10\n",
- "9851 02150000100 Alaska Kodiak Island Borough 4.664009 0.00\n",
- "9850 02105000300 Alaska Hoonah-Angoon Census Area 4.305716 0.00\n",
- "9838 02016000100 Alaska Aleutians West Census Area 4.053520 0.00\n",
- "9917 02282000100 Alaska Yakutat City and Borough 3.926182 0.00\n",
- "9920 02261000300 Alaska Valdez-Cordova Census Area 3.285482 0.00\n",
- "9840 02240000400 Alaska Southeast Fairbanks Census Area 3.233961 0.00\n",
- "9919 02261000200 Alaska Valdez-Cordova Census Area 3.156317 0.00\n",
- "10354 41045970900 Oregon Malheur County 2.731719 0.00\n",
- "9888 02198000100 Alaska Prince of Wales-Hyder Census Area 2.606286 0.00\n",
- "10212 41025960200 Oregon Harney County 2.568943 7.08\n",
- "9844 02185000300 Alaska North Slope Borough 2.463165 0.00\n",
- "9858 02130000100 Alaska Ketchikan Gateway Borough 2.440051 0.00"
- ]
- },
- "execution_count": 89,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "gdf['zlfc'] = 0\n",
- "gdf.at[9846, 'zlfc'] = 4.45\n",
- "gdf.at[10212, 'zlfc'] = 7.08\n",
- "gdf.at[9937, 'zlfc'] = 5.5\n",
- "gdf.at[9857, 'zlfc'] = 5.5\n",
- "gdf.at[9935, 'zlfc'] = 5.38\n",
- "gdf.at[9936, 'zlfc'] = 5.77\n",
- "gdf.at[9921, 'zlfc'] = 6.1\n",
- "gdf.at[9847, 'zlfc'] = 5.33\n",
- "gdf_short = gdf[[\"GEOID10\", \"SF\", \"CF\", \"area\", \"zlfc\"]]\n",
- "gdf_short_sorted = gdf_short.sort_values(by='area', ascending=False);\n",
- "gdf_short_sorted.head(30)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "5930de0e",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.13"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb b/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb
index f3619af6..6c464972 100644
--- a/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb
@@ -150,7 +150,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.6"
+ "version": "3.9.10"
}
},
"nbformat": 4,
diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py
index a07816ff..2029fba2 100644
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@@ -9,23 +9,6 @@ GEOID_TRACT_FIELD = "GEOID10_TRACT"
STATE_FIELD = "State/Territory"
COUNTY_FIELD = "County Name"
-# Score file field names
-# Definition M fields
-SCORE_M = "Definition M"
-FINAL_SCORE_N_BOOLEAN = (
- "Definition M community, including adjacency index tracts"
-)
-SCORE_M_COMMUNITIES = "Definition M (communities)"
-M_CLIMATE = "Climate Factor (Definition M)"
-M_ENERGY = "Energy Factor (Definition M)"
-M_TRANSPORTATION = "Transportation Factor (Definition M)"
-M_HOUSING = "Housing Factor (Definition M)"
-M_POLLUTION = "Pollution Factor (Definition M)"
-M_WATER = "Water Factor (Definition M)"
-M_HEALTH = "Health Factor (Definition M)"
-M_WORKFORCE = "Workforce Factor (Definition M)"
-M_NON_WORKFORCE = "Any Non-Workforce Factor (Definition M)"
-
# Definition Narwhal fields
SCORE_N = "Definition N (communities)"
SCORE_N_COMMUNITIES = "Definition N (communities)"
@@ -38,6 +21,9 @@ N_WATER = "Water Factor (Definition N)"
N_HEALTH = "Health Factor (Definition N)"
N_WORKFORCE = "Workforce Factor (Definition N)"
N_NON_WORKFORCE = "Any Non-Workforce Factor (Definition N)"
+FINAL_SCORE_N_BOOLEAN = (
+ "Definition N community, including adjacency index tracts"
+)
PERCENTILE = 90
MEDIAN_HOUSE_VALUE_PERCENTILE = 90
@@ -545,22 +531,22 @@ LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD = (
# Workforce
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for unemployment"
- " and has low HS education?"
+ " and has low HS attainment?"
)
LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for households in linguistic isolation"
- " and has low HS education?"
+ " and has low HS attainment?"
)
POVERTY_LOW_HS_EDUCATION_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
- " and has low HS education?"
+ " and has low HS attainment?"
)
LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for low median household income as a "
- f"percent of area median income and has low HS education?"
+ f"percent of area median income and has low HS attainment?"
)
# Score M Workforce Variables
diff --git a/data/data-pipeline/data_pipeline/score/score_runner.py b/data/data-pipeline/data_pipeline/score/score_runner.py
index 54b5c878..81343201 100644
--- a/data/data-pipeline/data_pipeline/score/score_runner.py
+++ b/data/data-pipeline/data_pipeline/score/score_runner.py
@@ -1,5 +1,4 @@
import pandas as pd
-from data_pipeline.score.score_m import ScoreM
from data_pipeline.score.score_narwhal import ScoreNarwhal
from data_pipeline.utils import get_module_logger
@@ -13,8 +12,6 @@ class ScoreRunner:
self.df = df
def calculate_scores(self) -> pd.DataFrame:
- # Index scores
- self.df = ScoreM(df=self.df).add_columns()
self.df = ScoreNarwhal(df=self.df).add_columns()
return self.df