From 6418335219d9f0f17296cff77e23c31e2ea504f6 Mon Sep 17 00:00:00 2001 From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com> Date: Tue, 23 Aug 2022 16:19:00 -0400 Subject: [PATCH] Updates backend constants to N (#1854) --- .../data_pipeline/content/config/csv.yml | 738 ++++++++-------- .../data_pipeline/content/config/excel.yml | 90 +- .../content/config/scratch.ipynb | 798 ++++++++++++++++++ .../data_pipeline/etl/score/constants.py | 90 +- .../data_pipeline/etl/score/etl_score.py | 13 +- .../data_pipeline/etl/score/etl_score_geo.py | 2 +- .../tests/sample_data/score_data_initial.csv | 6 +- .../snapshots/downloadable_data_expected.pkl | Bin 17649 -> 16517 bytes .../tests/snapshots/score_data_expected.pkl | Bin 25454 -> 21554 bytes .../snapshots/score_transformed_expected.pkl | Bin 25135 -> 21208 bytes .../tests/snapshots/tile_data_expected.pkl | Bin 4789 -> 4706 bytes .../data_pipeline/ipython/TractArea.ipynb | 418 --------- ...pare_two_score_files_for_differences.ipynb | 2 +- .../data_pipeline/score/field_names.py | 28 +- .../data_pipeline/score/score_runner.py | 3 - 15 files changed, 1277 insertions(+), 911 deletions(-) create mode 100644 data/data-pipeline/data_pipeline/content/config/scratch.ipynb delete mode 100644 data/data-pipeline/data_pipeline/ipython/TractArea.ipynb diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index 591c2b72..0036686a 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -5,372 +5,372 @@ global_config: float: 2 loss_rate_percentage: 4 fields: - - score_name: GEOID10_TRACT - label: Census tract ID - format: string - - score_name: County Name - label: County Name - format: string - - score_name: State/Territory - label: State/Territory - format: string - - score_name: Percent Black or African American - label: Percent Black or African American alone - format: float - - score_name: Percent American Indian / Alaska Native - label: Percent American Indian / Alaska Native - format: float - - score_name: Percent Asian - label: Percent Asian - format: float - - score_name: Percent Native Hawaiian or Pacific - label: Percent Native Hawaiian or Pacific - format: float - - score_name: Percent two or more races - label: Percent two or more races - format: float - - score_name: Percent White - label: Percent White - format: float - - score_name: Percent Hispanic or Latino - label: Percent Hispanic or Latino - format: float - - score_name: Percent other races - label: Percent other races - format: float - - score_name: Percent age under 10 - label: Percent age under 10 - format: float - - score_name: Percent age 10 to 64 - label: Percent age 10 to 64 - format: float - - score_name: Percent age over 64 - label: Percent age over 64 - format: float - - score_name: Total threshold criteria exceeded - label: Total threshold criteria exceeded - format: int64 - - score_name: Total categories exceeded - label: Total categories exceeded - format: int64 - - score_name: Definition N (communities) - label: Identified as disadvantaged without considering neighbors - format: bool - - score_name: Definition N (communities) (based on adjacency index and low income alone) - label: Identified as disadvantaged based on neighbors and relaxed low income threshold only - format: bool - - score_name: Definition M community, including adjacency index tracts - label: Identified as disadvantaged - format: bool - - score_name: Definition N (communities) (average of neighbors) - label: Share of neighbors that are identified as disadvantaged - format: percentage - - score_name: Total population - label: Total population - format: float - - score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted - label: Adjusted percent of individuals below 200% Federal Poverty Line - format: float - - score_name: Is low income and has a low percent of higher ed students? - label: Is low income and high percent of residents that are not higher ed students? - format: bool - - score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile) - label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile) - format: percentage - - score_name: Expected agricultural loss rate (Natural Hazards Risk Index) - label: Expected agricultural loss rate (Natural Hazards Risk Index) - format: loss_rate_percentage - - score_name: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile) - label: Expected building loss rate (Natural Hazards Risk Index) (percentile) - format: percentage - - score_name: Expected building loss rate (Natural Hazards Risk Index) - label: Expected building loss rate (Natural Hazards Risk Index) - format: loss_rate_percentage - - score_name: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile) - label: Expected population loss rate (Natural Hazards Risk Index) (percentile) - format: percentage - - score_name: Expected population loss rate (Natural Hazards Risk Index) - label: Expected population loss rate (Natural Hazards Risk Index) - format: loss_rate_percentage - - score_name: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for energy burden, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Energy burden (percentile) - label: Energy burden (percentile) - format: percentage - - score_name: Energy burden - label: Energy burden - format: percentage - - score_name: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: PM2.5 in the air (percentile) - label: PM2.5 in the air (percentile) - format: percentage - - score_name: PM2.5 in the air - label: PM2.5 in the air - format: float - - score_name: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Diesel particulate matter exposure (percentile) - label: Diesel particulate matter exposure (percentile) - format: percentage - - score_name: Diesel particulate matter exposure - label: Diesel particulate matter exposure - format: float - - score_name: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for traffic proximity, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Traffic proximity and volume (percentile) - label: Traffic proximity and volume (percentile) - format: percentage - - score_name: Traffic proximity and volume - label: Traffic proximity and volume - format: float - - score_name: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for housing burden, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Housing burden (percent) (percentile) - label: Housing burden (percent) (percentile) - format: percentage - - score_name: Housing burden (percent) - label: Housing burden (percent) - format: percentage - - score_name: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Percent pre-1960s housing (lead paint indicator) (percentile) - label: Percent pre-1960s housing (lead paint indicator) (percentile) - format: percentage - - score_name: Percent pre-1960s housing (lead paint indicator) - label: Percent pre-1960s housing (lead paint indicator) - format: percentage - - score_name: Median value ($) of owner-occupied housing units (percentile) - label: Median value ($) of owner-occupied housing units (percentile) - format: percentage - - score_name: Median value ($) of owner-occupied housing units - label: Median value ($) of owner-occupied housing units - format: float - - score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Proximity to hazardous waste sites (percentile) - label: Proximity to hazardous waste sites (percentile) - format: percentage - - score_name: Proximity to hazardous waste sites - label: Proximity to hazardous waste sites - format: float - - score_name: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Proximity to NPL sites (percentile) - label: Proximity to NPL (Superfund) sites (percentile) - format: percentage - - score_name: Proximity to NPL sites - label: Proximity to NPL (Superfund) sites - format: float - - score_name: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Proximity to Risk Management Plan (RMP) facilities (percentile) - label: Proximity to Risk Management Plan (RMP) facilities (percentile) - format: percentage - - score_name: Proximity to Risk Management Plan (RMP) facilities - label: Proximity to Risk Management Plan (RMP) facilities - format: float - - score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Wastewater discharge (percentile) - label: Wastewater discharge (percentile) - format: percentage - - score_name: Wastewater discharge - label: Wastewater discharge - format: float - - score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for asthma, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Current asthma among adults aged greater than or equal to 18 years (percentile) - label: Current asthma among adults aged greater than or equal to 18 years (percentile) - format: percentage - - score_name: Current asthma among adults aged greater than or equal to 18 years - label: Current asthma among adults aged greater than or equal to 18 years - format: percentage - - score_name: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for diabetes, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) - label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) - format: percentage - - score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years - label: Diagnosed diabetes among adults aged greater than or equal to 18 years - format: percentage - - score_name: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for heart disease, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile) - label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile) - format: percentage - - score_name: Coronary heart disease among adults aged greater than or equal to 18 years - label: Coronary heart disease among adults aged greater than or equal to 18 years - format: percentage - - score_name: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for low life expectancy, is low income, and high percent of residents that are not higher ed students? - format: bool - - score_name: Low life expectancy (percentile) - label: Low life expectancy (percentile) - format: percentage - - score_name: Life expectancy (years) - label: Life expectancy (years) - format: float - - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and high percent of residents that are not higher ed students? - format: bool - - score_name: Low median household income as a percent of area median income (percentile) - label: Low median household income as a percent of area median income (percentile) - format: percentage - - score_name: Median household income as a percent of area median income - label: Median household income as a percent of area median income - format: percentage - - score_name: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and high percent of residents that are not higher ed students? - format: bool - - score_name: Linguistic isolation (percent) (percentile) - label: Linguistic isolation (percent) (percentile) - format: percentage - - score_name: Linguistic isolation (percent) - label: Linguistic isolation (percent) - format: percentage - - score_name: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and high percent of residents that are not higher ed students? - format: bool - - score_name: Unemployment (percent) (percentile) - label: Unemployment (percent) (percentile) - format: percentage - - score_name: Unemployment (percent) - label: Unemployment (percent) - format: percentage - - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and high percent of residents that are not higher ed students? - format: bool - - score_name: Percent of individuals below 200% Federal Poverty Line (percentile) - label: Percent of individuals below 200% Federal Poverty Line (percentile) - format: percentage - - score_name: Percent of individuals below 200% Federal Poverty Line - label: Percent of individuals below 200% Federal Poverty Line - format: percentage - - score_name: Percent of individuals < 100% Federal Poverty Line (percentile) - label: Percent of individuals < 100% Federal Poverty Line (percentile) - format: percentage - - score_name: Percent of individuals < 100% Federal Poverty Line - label: Percent of individuals < 100% Federal Poverty Line - format: percentage - - score_name: Percent individuals age 25 or over with less than high school degree (percentile) - label: Percent individuals age 25 or over with less than high school degree (percentile) - format: percentage - - score_name: Percent individuals age 25 or over with less than high school degree - label: Percent individuals age 25 or over with less than high school degree - format: percentage - - score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR) - label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR) - format: percentage - - score_name: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR) - label: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR) - format: percentage - - score_name: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)? - label: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)? - format: bool - - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)? - label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)? - format: bool - - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)? - label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)? - format: bool - - score_name: Percent of population not currently enrolled in college or graduate school - label: Percent of residents who are not currently enrolled in higher ed - format: percentage - - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? - label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? - format: bool - - score_name: Greater than or equal to the 90th percentile for DOT transit barriers and is low income? - label: Greater than or equal to the 90th percentile for DOT transit barriers and is low income? - format: bool - - score_name: DOT Travel Barriers Score (percentile) - label: DOT Travel Barriers Score (percentile) - format: percentage - - score_name: Leaky underground storage tanks (percentile) - label: Leaky underground storage tanks (percentile) - format: percentage - - score_name: Leaky underground storage tanks - label: Leaky underground storage tanks - format: float - - score_name: Share of properties at risk of flood in 30 years - label: Share of properties at risk of flood in 30 years - format: float - - score_name: Share of properties at risk of fire in 30 years - label: Share of properties at risk of fire in 30 years - format: float - - score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income? - label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income? - format: bool - - score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income? - label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income? - format: bool - - score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years - label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years - format: bool - - score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years - label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years - format: bool - - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? - label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? - format: bool - - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent - label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent - format: bool - - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent - label: Share of the tract's land area that is covered by impervious surface or cropland as a percent - format: percentage - - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) - label: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) - format: percentage - - score_name: Share of properties at risk of flood in 30 years (percentile) - label: Share of properties at risk of flood in 30 years (percentile) - format: percentage - - score_name: Share of properties at risk of fire in 30 years (percentile) - label: Share of properties at risk of fire in 30 years (percentile) - format: percentage - - score_name: Does the tract have at least 35 acres in it? - label: Does the tract have at least 35 acres in it? - format: bool - - score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract? - label: Is there at least one Formerly Used Defense Site (FUDS) in the tract? - format: bool - - score_name: Is there at least one abandoned mine in this census tract? - label: Is there at least one abandoned mine in this census tract? - format: bool - - score_name: There is at least one abandoned mine in this census tract and the tract is low income. - label: There is at least one abandoned mine in this census tract and the tract is low income. - format: bool - - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. - label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. - format: bool - - score_name: Tract-level redlining score meets or exceeds 3.25 and is low income - label: Tract experienced historic underinvestment and remains low income - format: bool - - score_name: Tract-level redlining score meets or exceeds 3.25 - label: Tract experienced historic underinvestment - format: bool - - score_name: Income data has been estimated based on neighbor income - label: Income data has been estimated based on geographic neighbor income - format: bool +- score_name: GEOID10_TRACT + label: Census tract ID + format: string +- score_name: County Name + label: County Name + format: string +- score_name: State/Territory + label: State/Territory + format: string +- score_name: Percent Black or African American + label: Percent Black or African American alone + format: float +- score_name: Percent American Indian / Alaska Native + label: Percent American Indian / Alaska Native + format: float +- score_name: Percent Asian + label: Percent Asian + format: float +- score_name: Percent Native Hawaiian or Pacific + label: Percent Native Hawaiian or Pacific + format: float +- score_name: Percent two or more races + label: Percent two or more races + format: float +- score_name: Percent White + label: Percent White + format: float +- score_name: Percent Hispanic or Latino + label: Percent Hispanic or Latino + format: float +- score_name: Percent other races + label: Percent other races + format: float +- score_name: Percent age under 10 + label: Percent age under 10 + format: float +- score_name: Percent age 10 to 64 + label: Percent age 10 to 64 + format: float +- score_name: Percent age over 64 + label: Percent age over 64 + format: float +- score_name: Total threshold criteria exceeded + label: Total threshold criteria exceeded + format: int64 +- score_name: Total categories exceeded + label: Total categories exceeded + format: int64 +- score_name: Definition N (communities) + label: Identified as disadvantaged without considering neighbors + format: bool +- score_name: Definition N (communities) (based on adjacency index and low income alone) + label: Identified as disadvantaged based on neighbors and relaxed low income threshold only + format: bool +- score_name: Definition N community, including adjacency index tracts + label: Identified as disadvantaged + format: bool +- score_name: Definition N (communities) (average of neighbors) + label: Share of neighbors that are identified as disadvantaged + format: percentage +- score_name: Total population + label: Total population + format: float +- score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted + label: Adjusted percent of individuals below 200% Federal Poverty Line + format: float +- score_name: Is low income (imputed and adjusted)? + label: Is low income? + format: bool +- score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income? + label: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income? + format: bool +- score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile) + label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile) + format: percentage +- score_name: Expected agricultural loss rate (Natural Hazards Risk Index) + label: Expected agricultural loss rate (Natural Hazards Risk Index) + format: loss_rate_percentage +- score_name: Greater than or equal to the 90th percentile for expected building loss rate and is low income? + label: Greater than or equal to the 90th percentile for expected building loss rate and is low income? + format: bool +- score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile) + label: Expected building loss rate (Natural Hazards Risk Index) (percentile) + format: percentage +- score_name: Expected building loss rate (Natural Hazards Risk Index) + label: Expected building loss rate (Natural Hazards Risk Index) + format: loss_rate_percentage +- score_name: Greater than or equal to the 90th percentile for expected population loss rate and is low income? + label: Greater than or equal to the 90th percentile for expected population loss rate and is low income? + format: bool +- score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile) + label: Expected population loss rate (Natural Hazards Risk Index) (percentile) + format: percentage +- score_name: Expected population loss rate (Natural Hazards Risk Index) + label: Expected population loss rate (Natural Hazards Risk Index) + format: loss_rate_percentage +- score_name: Greater than or equal to the 90th percentile for energy burden and is low income? + label: Greater than or equal to the 90th percentile for energy burden and is low income? + format: bool +- score_name: Energy burden (percentile) + label: Energy burden (percentile) + format: percentage +- score_name: Energy burden + label: Energy burden + format: percentage +- score_name: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income? + label: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income? + format: bool +- score_name: PM2.5 in the air (percentile) + label: PM2.5 in the air (percentile) + format: percentage +- score_name: PM2.5 in the air + label: PM2.5 in the air + format: float +- score_name: Greater than or equal to the 90th percentile for diesel particulate matter and is low income? + label: Greater than or equal to the 90th percentile for diesel particulate matter and is low income? + format: bool +- score_name: Diesel particulate matter exposure (percentile) + label: Diesel particulate matter exposure (percentile) + format: percentage +- score_name: Diesel particulate matter exposure + label: Diesel particulate matter exposure + format: float +- score_name: Greater than or equal to the 90th percentile for traffic proximity and is low income? + label: Greater than or equal to the 90th percentile for traffic proximity and is low income? + format: bool +- score_name: Traffic proximity and volume (percentile) + label: Traffic proximity and volume (percentile) + format: percentage +- score_name: Traffic proximity and volume + label: Traffic proximity and volume + format: float +- score_name: Greater than or equal to the 90th percentile for housing burden and is low income? + label: Greater than or equal to the 90th percentile for housing burden and is low income? + format: bool +- score_name: Housing burden (percent) (percentile) + label: Housing burden (percent) (percentile) + format: percentage +- score_name: Housing burden (percent) + label: Housing burden (percent) + format: percentage +- score_name: Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income? + label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile and is low income? + format: bool +- score_name: Percent pre-1960s housing (lead paint indicator) (percentile) + label: Percent pre-1960s housing (lead paint indicator) (percentile) + format: percentage +- score_name: Percent pre-1960s housing (lead paint indicator) + label: Percent pre-1960s housing (lead paint indicator) + format: percentage +- score_name: Median value ($) of owner-occupied housing units (percentile) + label: Median value ($) of owner-occupied housing units (percentile) + format: percentage +- score_name: Median value ($) of owner-occupied housing units + label: Median value ($) of owner-occupied housing units + format: float +- score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income? + label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income? + format: bool +- score_name: Proximity to hazardous waste sites (percentile) + label: Proximity to hazardous waste sites (percentile) + format: percentage +- score_name: Proximity to hazardous waste sites + label: Proximity to hazardous waste sites + format: float +- score_name: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income? + label: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income? + format: bool +- score_name: Proximity to NPL sites (percentile) + label: Proximity to NPL (Superfund) sites (percentile) + format: percentage +- score_name: Proximity to NPL sites + label: Proximity to NPL (Superfund) sites + format: float +- score_name: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income? + label: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income? + format: bool +- score_name: Proximity to Risk Management Plan (RMP) facilities (percentile) + label: Proximity to Risk Management Plan (RMP) facilities (percentile) + format: percentage +- score_name: Proximity to Risk Management Plan (RMP) facilities + label: Proximity to Risk Management Plan (RMP) facilities + format: float +- score_name: Greater than or equal to the 90th percentile for wastewater discharge and is low income? + label: Greater than or equal to the 90th percentile for wastewater discharge and is low income? + format: bool +- score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? + format: bool +- score_name: Wastewater discharge (percentile) + label: Wastewater discharge (percentile) + format: percentage +- score_name: Leaky underground storage tanks (percentile) + label: Leaky underground storage tanks (percentile) + format: percentage +- score_name: Wastewater discharge + label: Wastewater discharge + format: float +- score_name: Leaky underground storage tanks + label: Leaky underground storage tanks + format: float +- score_name: Greater than or equal to the 90th percentile for asthma and is low income? + label: Greater than or equal to the 90th percentile for asthma and is low income? + format: bool +- score_name: Current asthma among adults aged greater than or equal to 18 years (percentile) + label: Current asthma among adults aged greater than or equal to 18 years (percentile) + format: percentage +- score_name: Current asthma among adults aged greater than or equal to 18 years + label: Current asthma among adults aged greater than or equal to 18 years + format: percentage +- score_name: Greater than or equal to the 90th percentile for diabetes and is low income? + label: Greater than or equal to the 90th percentile for diabetes and is low income? + format: bool +- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) + label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) + format: percentage +- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years + label: Diagnosed diabetes among adults aged greater than or equal to 18 years + format: percentage +- score_name: Greater than or equal to the 90th percentile for heart disease and is low income? + label: Greater than or equal to the 90th percentile for heart disease and is low income? + format: bool +- score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile) + label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile) + format: percentage +- score_name: Coronary heart disease among adults aged greater than or equal to 18 years + label: Coronary heart disease among adults aged greater than or equal to 18 years + format: percentage +- score_name: Greater than or equal to the 90th percentile for low life expectancy and is low income? + label: Greater than or equal to the 90th percentile for low life expectancy and is low income? + format: bool +- score_name: Low life expectancy (percentile) + label: Low life expectancy (percentile) + format: percentage +- score_name: Life expectancy (years) + label: Life expectancy (years) + format: float +- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment? + label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment? + format: bool +- score_name: Low median household income as a percent of area median income (percentile) + label: Low median household income as a percent of area median income (percentile) + format: percentage +- score_name: Median household income as a percent of area median income + label: Median household income as a percent of area median income + format: percentage +- score_name: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment? + label: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment? + format: bool +- score_name: Linguistic isolation (percent) (percentile) + label: Linguistic isolation (percent) (percentile) + format: percentage +- score_name: Linguistic isolation (percent) + label: Linguistic isolation (percent) + format: percentage +- score_name: Greater than or equal to the 90th percentile for unemployment and has low HS attainment? + label: Greater than or equal to the 90th percentile for unemployment and has low HS attainment? + format: bool +- score_name: Unemployment (percent) (percentile) + label: Unemployment (percent) (percentile) + format: percentage +- score_name: Unemployment (percent) + label: Unemployment (percent) + format: percentage +- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment? + label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment? + format: bool +- score_name: Percent of individuals below 200% Federal Poverty Line (percentile) + label: Percent of individuals below 200% Federal Poverty Line (percentile) + format: percentage +- score_name: Percent of individuals below 200% Federal Poverty Line + label: Percent of individuals below 200% Federal Poverty Line + format: percentage +- score_name: Percent of individuals < 100% Federal Poverty Line (percentile) + label: Percent of individuals < 100% Federal Poverty Line (percentile) + format: percentage +- score_name: Percent of individuals < 100% Federal Poverty Line + label: Percent of individuals < 100% Federal Poverty Line + format: percentage +- score_name: Percent individuals age 25 or over with less than high school degree (percentile) + label: Percent individuals age 25 or over with less than high school degree (percentile) + format: percentage +- score_name: Percent individuals age 25 or over with less than high school degree + label: Percent individuals age 25 or over with less than high school degree + format: percentage +- score_name: Percent of population not currently enrolled in college or graduate school + label: Percent of residents who are not currently enrolled in higher ed + format: percentage +- score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR) + label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR) + format: percentage +- score_name: Greater than or equal to the 90th percentile for DOT transit barriers and is low income? + label: Greater than or equal to the 90th percentile for DOT transit barriers and is low income? + format: bool +- score_name: DOT Travel Barriers Score (percentile) + label: DOT Travel Barriers Score (percentile) + format: percentage +- score_name: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR) + label: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR) + format: percentage +- score_name: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)? + label: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)? + format: bool +- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)? + label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)? + format: bool +- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)? + label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)? + format: bool +- score_name: Share of properties at risk of flood in 30 years + label: Share of properties at risk of flood in 30 years + format: percentage +- score_name: Share of properties at risk of fire in 30 years + label: Share of properties at risk of fire in 30 years + format: percentage +- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income? + label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income? + format: bool +- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income? + label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income? + format: bool +- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years + label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years + format: bool +- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years + label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years + format: bool +- score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? + label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? + format: bool +- score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent + label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent + format: bool +- score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent + label: Share of the tract's land area that is covered by impervious surface or cropland as a percent + format: percentage +- score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) + label: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) + format: percentage +- score_name: Share of properties at risk of flood in 30 years (percentile) + label: Share of properties at risk of flood in 30 years (percentile) + format: percentage +- score_name: Share of properties at risk of fire in 30 years (percentile) + label: Share of properties at risk of fire in 30 years (percentile) + format: percentage +- score_name: Does the tract have at least 35 acres in it? + label: Does the tract have at least 35 acres in it? + format: bool +- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract? + label: Is there at least one Formerly Used Defense Site (FUDS) in the tract? + format: bool +- score_name: Is there at least one abandoned mine in this census tract? + label: Is there at least one abandoned mine in this census tract? + format: bool +- score_name: There is at least one abandoned mine in this census tract and the tract is low income. + label: There is at least one abandoned mine in this census tract and the tract is low income. + format: bool +- score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. + label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. + format: bool +- score_name: Tract-level redlining score meets or exceeds 3.25 and is low income + label: Tract experienced historic underinvestment and remains low income + format: bool +- score_name: Tract-level redlining score meets or exceeds 3.25 + label: Tract experienced historic underinvestment + format: bool +- score_name: Income data has been estimated based on neighbor income + label: Income data has been estimated based on geographic neighbor income + format: bool diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index 03fd55a6..62e657b1 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -63,7 +63,7 @@ sheets: - score_name: Definition N (communities) (based on adjacency index and low income alone) label: Identified as disadvantaged based on neighbors and relaxed low income threshold only format: bool - - score_name: Definition M community, including adjacency index tracts + - score_name: Definition N community, including adjacency index tracts label: Identified as disadvantaged format: bool - score_name: Definition N (communities) (average of neighbors) @@ -75,11 +75,11 @@ sheets: - score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted label: Adjusted percent of individuals below 200% Federal Poverty Line format: float - - score_name: Is low income and has a low percent of higher ed students? - label: Is low income and high percent of residents that are not higher ed students? + - score_name: Is low income (imputed and adjusted)? + label: Is low income? format: bool - - score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income? + label: Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income? format: bool - score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile) label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile) @@ -87,8 +87,8 @@ sheets: - score_name: Expected agricultural loss rate (Natural Hazards Risk Index) label: Expected agricultural loss rate (Natural Hazards Risk Index) format: loss_rate_percentage - - score_name: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for expected building loss rate and is low income? + label: Greater than or equal to the 90th percentile for expected building loss rate and is low income? format: bool - score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile) label: Expected building loss rate (Natural Hazards Risk Index) (percentile) @@ -96,8 +96,8 @@ sheets: - score_name: Expected building loss rate (Natural Hazards Risk Index) label: Expected building loss rate (Natural Hazards Risk Index) format: loss_rate_percentage - - score_name: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for expected population loss rate and is low income? + label: Greater than or equal to the 90th percentile for expected population loss rate and is low income? format: bool - score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile) label: Expected population loss rate (Natural Hazards Risk Index) (percentile) @@ -105,8 +105,8 @@ sheets: - score_name: Expected population loss rate (Natural Hazards Risk Index) label: Expected population loss rate (Natural Hazards Risk Index) format: loss_rate_percentage - - score_name: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for energy burden, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for energy burden and is low income? + label: Greater than or equal to the 90th percentile for energy burden and is low income? format: bool - score_name: Energy burden (percentile) label: Energy burden (percentile) @@ -114,8 +114,8 @@ sheets: - score_name: Energy burden label: Energy burden format: percentage - - score_name: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income? + label: Greater than or equal to the 90th percentile for PM2.5 exposure and is low income? format: bool - score_name: PM2.5 in the air (percentile) label: PM2.5 in the air (percentile) @@ -123,8 +123,8 @@ sheets: - score_name: PM2.5 in the air label: PM2.5 in the air format: float - - score_name: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for diesel particulate matter and is low income? + label: Greater than or equal to the 90th percentile for diesel particulate matter and is low income? format: bool - score_name: Diesel particulate matter exposure (percentile) label: Diesel particulate matter exposure (percentile) @@ -132,8 +132,8 @@ sheets: - score_name: Diesel particulate matter exposure label: Diesel particulate matter exposure format: float - - score_name: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for traffic proximity, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for traffic proximity and is low income? + label: Greater than or equal to the 90th percentile for traffic proximity and is low income? format: bool - score_name: Traffic proximity and volume (percentile) label: Traffic proximity and volume (percentile) @@ -141,8 +141,8 @@ sheets: - score_name: Traffic proximity and volume label: Traffic proximity and volume format: float - - score_name: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for housing burden, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for housing burden and is low income? + label: Greater than or equal to the 90th percentile for housing burden and is low income? format: bool - score_name: Housing burden (percent) (percentile) label: Housing burden (percent) (percentile) @@ -150,8 +150,8 @@ sheets: - score_name: Housing burden (percent) label: Housing burden (percent) format: percentage - - score_name: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income? + label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile and is low income? format: bool - score_name: Percent pre-1960s housing (lead paint indicator) (percentile) label: Percent pre-1960s housing (lead paint indicator) (percentile) @@ -165,8 +165,8 @@ sheets: - score_name: Median value ($) of owner-occupied housing units label: Median value ($) of owner-occupied housing units format: float - - score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income? + label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income? format: bool - score_name: Proximity to hazardous waste sites (percentile) label: Proximity to hazardous waste sites (percentile) @@ -174,8 +174,8 @@ sheets: - score_name: Proximity to hazardous waste sites label: Proximity to hazardous waste sites format: float - - score_name: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income? + label: Greater than or equal to the 90th percentile for proximity to superfund sites and is low income? format: bool - score_name: Proximity to NPL sites (percentile) label: Proximity to NPL (Superfund) sites (percentile) @@ -183,8 +183,8 @@ sheets: - score_name: Proximity to NPL sites label: Proximity to NPL (Superfund) sites format: float - - score_name: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income? + label: Greater than or equal to the 90th percentile for proximity to RMP sites and is low income? format: bool - score_name: Proximity to Risk Management Plan (RMP) facilities (percentile) label: Proximity to Risk Management Plan (RMP) facilities (percentile) @@ -192,8 +192,8 @@ sheets: - score_name: Proximity to Risk Management Plan (RMP) facilities label: Proximity to Risk Management Plan (RMP) facilities format: float - - score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for wastewater discharge and is low income? + label: Greater than or equal to the 90th percentile for wastewater discharge and is low income? format: bool - score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income? @@ -210,8 +210,8 @@ sheets: - score_name: Leaky underground storage tanks label: Leaky underground storage tanks format: float - - score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for asthma, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for asthma and is low income? + label: Greater than or equal to the 90th percentile for asthma and is low income? format: bool - score_name: Current asthma among adults aged greater than or equal to 18 years (percentile) label: Current asthma among adults aged greater than or equal to 18 years (percentile) @@ -219,8 +219,8 @@ sheets: - score_name: Current asthma among adults aged greater than or equal to 18 years label: Current asthma among adults aged greater than or equal to 18 years format: percentage - - score_name: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for diabetes, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for diabetes and is low income? + label: Greater than or equal to the 90th percentile for diabetes and is low income? format: bool - score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile) @@ -228,8 +228,8 @@ sheets: - score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years label: Diagnosed diabetes among adults aged greater than or equal to 18 years format: percentage - - score_name: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for heart disease, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for heart disease and is low income? + label: Greater than or equal to the 90th percentile for heart disease and is low income? format: bool - score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile) label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile) @@ -237,8 +237,8 @@ sheets: - score_name: Coronary heart disease among adults aged greater than or equal to 18 years label: Coronary heart disease among adults aged greater than or equal to 18 years format: percentage - - score_name: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for low life expectancy, is low income, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for low life expectancy and is low income? + label: Greater than or equal to the 90th percentile for low life expectancy and is low income? format: bool - score_name: Low life expectancy (percentile) label: Low life expectancy (percentile) @@ -246,8 +246,8 @@ sheets: - score_name: Life expectancy (years) label: Life expectancy (years) format: float - - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment? + label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment? format: bool - score_name: Low median household income as a percent of area median income (percentile) label: Low median household income as a percent of area median income (percentile) @@ -255,8 +255,8 @@ sheets: - score_name: Median household income as a percent of area median income label: Median household income as a percent of area median income format: percentage - - score_name: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment? + label: Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment? format: bool - score_name: Linguistic isolation (percent) (percentile) label: Linguistic isolation (percent) (percentile) @@ -264,8 +264,8 @@ sheets: - score_name: Linguistic isolation (percent) label: Linguistic isolation (percent) format: percentage - - score_name: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for unemployment and has low HS attainment? + label: Greater than or equal to the 90th percentile for unemployment and has low HS attainment? format: bool - score_name: Unemployment (percent) (percentile) label: Unemployment (percent) (percentile) @@ -273,8 +273,8 @@ sheets: - score_name: Unemployment (percent) label: Unemployment (percent) format: percentage - - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students? - label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and high percent of residents that are not higher ed students? + - score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment? + label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment? format: bool - score_name: Percent of individuals below 200% Federal Poverty Line (percentile) label: Percent of individuals below 200% Federal Poverty Line (percentile) diff --git a/data/data-pipeline/data_pipeline/content/config/scratch.ipynb b/data/data-pipeline/data_pipeline/content/config/scratch.ipynb new file mode 100644 index 00000000..e2535b7e --- /dev/null +++ b/data/data-pipeline/data_pipeline/content/config/scratch.ipynb @@ -0,0 +1,798 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "cf8f39b0-7735-4f7c-9178-61bbf2257951", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "%load_ext lab_black" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "66639c20-be5e-4bf6-9b58-98338874f7cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Median value ($) of owner-occupied housing units (percentile)'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check = pd.read_csv(\n", + " \"/Users/emmausds/j40/data_pipeline/data/score/downloadable/codebook.csv\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5e525e4e-6764-4d4d-9119-b4d400ba022f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score_namecsv_field_typecsv_labelexcel_labelcalculation_notesthreshold_categorynotes
0GEOID10_TRACTstringCensus tract IDCensus tract IDNaNNaNNaN
1County NamestringCounty NameCounty NameNaNNaNNaN
2State/TerritorystringState/TerritoryState/TerritoryNaNNaNNaN
3Total threshold criteria exceededint64Total threshold criteria exceededTotal threshold criteria exceededNaNNaNLists out the total number of criteria (where ...
4Definition M (communities)boolIdentified as disadvantagedIdentified as disadvantagedNaNNaNTrue / False variable for whether a tract is a...
........................
77Percentage households below 100% of federal po...percentagePercentage households below 100% of federal po...Percentage households below 100% of federal po...Because not all data is available for the Nati...NaNNaN
78Greater than or equal to the 90th percentile f...boolGreater than or equal to the 90th percentile f...Greater than or equal to the 90th percentile f...Because not all data is available for the Nati...training and workforce developmentNaN
79Greater than or equal to the 90th percentile f...boolGreater than or equal to the 90th percentile f...Greater than or equal to the 90th percentile f...Because not all data is available for the Nati...training and workforce developmentNaN
80Greater than or equal to the 90th percentile f...boolGreater than or equal to the 90th percentile f...Greater than or equal to the 90th percentile f...Because not all data is available for the Nati...training and workforce developmentNaN
81Percent of population not currently enrolled i...percentagePercent of residents who are not currently enr...Percent of residents who are not currently enr...NaNNaNNaN
\n", + "

82 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " score_name csv_field_type \\\n", + "0 GEOID10_TRACT string \n", + "1 County Name string \n", + "2 State/Territory string \n", + "3 Total threshold criteria exceeded int64 \n", + "4 Definition M (communities) bool \n", + ".. ... ... \n", + "77 Percentage households below 100% of federal po... percentage \n", + "78 Greater than or equal to the 90th percentile f... bool \n", + "79 Greater than or equal to the 90th percentile f... bool \n", + "80 Greater than or equal to the 90th percentile f... bool \n", + "81 Percent of population not currently enrolled i... percentage \n", + "\n", + " csv_label \\\n", + "0 Census tract ID \n", + "1 County Name \n", + "2 State/Territory \n", + "3 Total threshold criteria exceeded \n", + "4 Identified as disadvantaged \n", + ".. ... \n", + "77 Percentage households below 100% of federal po... \n", + "78 Greater than or equal to the 90th percentile f... \n", + "79 Greater than or equal to the 90th percentile f... \n", + "80 Greater than or equal to the 90th percentile f... \n", + "81 Percent of residents who are not currently enr... \n", + "\n", + " excel_label \\\n", + "0 Census tract ID \n", + "1 County Name \n", + "2 State/Territory \n", + "3 Total threshold criteria exceeded \n", + "4 Identified as disadvantaged \n", + ".. ... \n", + "77 Percentage households below 100% of federal po... \n", + "78 Greater than or equal to the 90th percentile f... \n", + "79 Greater than or equal to the 90th percentile f... \n", + "80 Greater than or equal to the 90th percentile f... \n", + "81 Percent of residents who are not currently enr... \n", + "\n", + " calculation_notes \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + ".. ... \n", + "77 Because not all data is available for the Nati... \n", + "78 Because not all data is available for the Nati... \n", + "79 Because not all data is available for the Nati... \n", + "80 Because not all data is available for the Nati... \n", + "81 NaN \n", + "\n", + " threshold_category \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + ".. ... \n", + "77 NaN \n", + "78 training and workforce development \n", + "79 training and workforce development \n", + "80 training and workforce development \n", + "81 NaN \n", + "\n", + " notes \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 Lists out the total number of criteria (where ... \n", + "4 True / False variable for whether a tract is a... \n", + ".. ... \n", + "77 NaN \n", + "78 NaN \n", + "79 NaN \n", + "80 NaN \n", + "81 NaN \n", + "\n", + "[82 rows x 7 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "d86c867a-1a55-4ec0-82a6-040841406236", + "metadata": {}, + "outputs": [], + "source": [ + "codebook = pd.DataFrame(to_frame_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "6215deaf-b004-4da0-a70b-bc54f636601a", + "metadata": {}, + "outputs": [], + "source": [ + "details_to_merge = pd.DataFrame(mapping_dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "ac4e65c2-09e6-4978-9440-37b3be057f65", + "metadata": {}, + "outputs": [], + "source": [ + "shapefile_codes = pd.read_csv(\n", + " \"/Users/emmausds/j40/data_pipeline/data/score/shapefile/columns.csv\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "id": "31cfd9ec-5f5f-4642-a51f-6875c2c279a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)',\n", + " 'Expected building loss rate (Natural Hazards Risk Index) (percentile)',\n", + " 'Expected population loss rate (Natural Hazards Risk Index) (percentile)',\n", + " 'Energy burden (percentile)',\n", + " 'PM2.5 in the air (percentile)',\n", + " 'Diesel particulate matter exposure (percentile)',\n", + " 'Traffic proximity and volume (percentile)',\n", + " 'Housing burden (percent) (percentile)',\n", + " 'Percent pre-1960s housing (lead paint indicator) (percentile)',\n", + " 'Median value ($) of owner-occupied housing units (percentile)',\n", + " 'Proximity to hazardous waste sites (percentile)',\n", + " 'Proximity to NPL sites (percentile)',\n", + " 'Proximity to Risk Management Plan (RMP) facilities (percentile)',\n", + " 'Wastewater discharge (percentile)',\n", + " 'Current asthma among adults aged greater than or equal to 18 years (percentile)',\n", + " 'Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)',\n", + " 'Coronary heart disease among adults aged greater than or equal to 18 years (percentile)',\n", + " 'Low life expectancy (percentile)',\n", + " 'Low median household income as a percent of area median income (percentile)',\n", + " 'Linguistic isolation (percent) (percentile)',\n", + " 'Unemployment (percent) (percentile)',\n", + " 'Percent of individuals below 200% Federal Poverty Line (percentile)',\n", + " 'Percent of individuals < 100% Federal Poverty Line (percentile)',\n", + " 'Percent individuals age 25 or over with less than high school degree (percentile)',\n", + " 'Definition M (percentile)',\n", + " 'Low median household income as a percent of territory median income in 2009 (percentile)',\n", + " 'Percentage households below 100% of federal poverty line in 2009 for island areas (percentile)',\n", + " 'Unemployment (percent) in 2009 for island areas (percentile)']" + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "66dde4fc-48e6-4bdf-b3a6-16c766e94d8a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - column_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Energy burden (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: PM2.5 in the air (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Diesel particulate matter exposure (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Traffic proximity and volume (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Housing burden (percent) (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Percent pre-1960s housing (lead paint indicator) (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Median value ($) of owner-occupied housing units (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Proximity to hazardous waste sites (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Proximity to NPL sites (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Proximity to Risk Management Plan (RMP) facilities (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Wastewater discharge (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Current asthma among adults aged greater than or equal to 18 years (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Low life expectancy (percentile)\n", + " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Low median household income as a percent of area median income (percentile)\n", + " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Linguistic isolation (percent) (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Unemployment (percent) (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Percent of individuals below 200% Federal Poverty Line (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Percent of individuals < 100% Federal Poverty Line (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Percent individuals age 25 or over with less than high school degree (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Definition M (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Low median household income as a percent of territory median income in 2009 (percentile)\n", + " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Percentage households below 100% of federal poverty line in 2009 for island areas (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", + " - column_name: Unemployment (percent) in 2009 for island areas (percentile)\n", + " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n" + ] + } + ], + "source": [ + "for col in [col for col in download_codebook.index.to_list() if \"(percentile)\" in col]:\n", + " print(f\" - column_name: {col}\")\n", + " if \"Low\" not in col:\n", + " print(\n", + " f\" notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\"\n", + " )\n", + " else:\n", + " print(\n", + " f\" notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "id": "5c08708e-4ebf-4cfe-8efb-7ee6c7930427", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
excel_labelformatshapefile_columnnotescategory
score_name
GEOID10_TRACTCensus tract IDstringNaNNaNNaN
County NameCounty NamestringCFNaNNaN
State/TerritoryState/TerritorystringSFNaNNaN
Total threshold criteria exceededTotal threshold criteria exceededint64TCLists out the total number of criteria (where ...NaN
Definition M (communities)Identified as disadvantagedboolSM_CTrue / False variable for whether a tract is a...NaN
..................
Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)Unemployment (percent) in 2009 (island areas) ...percentageNaNNaNNaN
Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)Percentage households below 100% of federal po...percentageNaNNaNNaN
Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?Greater than or equal to the 90th percentile f...boolIAULHSEisland area information comes from the dicenni...training and workforce development
Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?Greater than or equal to the 90th percentile f...boolIAPLHSEisland area information comes from the dicenni...training and workforce development
Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?Greater than or equal to the 90th percentile f...boolIALMILHSEisland area information comes from the dicenni...training and workforce development
\n", + "

82 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " excel_label \\\n", + "score_name \n", + "GEOID10_TRACT Census tract ID \n", + "County Name County Name \n", + "State/Territory State/Territory \n", + "Total threshold criteria exceeded Total threshold criteria exceeded \n", + "Definition M (communities) Identified as disadvantaged \n", + "... ... \n", + "Unemployment (percent) in 2009 (island areas) a... Unemployment (percent) in 2009 (island areas) ... \n", + "Percentage households below 100% of federal pov... Percentage households below 100% of federal po... \n", + "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n", + "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n", + "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n", + "\n", + " format \\\n", + "score_name \n", + "GEOID10_TRACT string \n", + "County Name string \n", + "State/Territory string \n", + "Total threshold criteria exceeded int64 \n", + "Definition M (communities) bool \n", + "... ... \n", + "Unemployment (percent) in 2009 (island areas) a... percentage \n", + "Percentage households below 100% of federal pov... percentage \n", + "Greater than or equal to the 90th percentile fo... bool \n", + "Greater than or equal to the 90th percentile fo... bool \n", + "Greater than or equal to the 90th percentile fo... bool \n", + "\n", + " shapefile_column \\\n", + "score_name \n", + "GEOID10_TRACT NaN \n", + "County Name CF \n", + "State/Territory SF \n", + "Total threshold criteria exceeded TC \n", + "Definition M (communities) SM_C \n", + "... ... \n", + "Unemployment (percent) in 2009 (island areas) a... NaN \n", + "Percentage households below 100% of federal pov... NaN \n", + "Greater than or equal to the 90th percentile fo... IAULHSE \n", + "Greater than or equal to the 90th percentile fo... IAPLHSE \n", + "Greater than or equal to the 90th percentile fo... IALMILHSE \n", + "\n", + " notes \\\n", + "score_name \n", + "GEOID10_TRACT NaN \n", + "County Name NaN \n", + "State/Territory NaN \n", + "Total threshold criteria exceeded Lists out the total number of criteria (where ... \n", + "Definition M (communities) True / False variable for whether a tract is a... \n", + "... ... \n", + "Unemployment (percent) in 2009 (island areas) a... NaN \n", + "Percentage households below 100% of federal pov... NaN \n", + "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n", + "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n", + "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n", + "\n", + " category \n", + "score_name \n", + "GEOID10_TRACT NaN \n", + "County Name NaN \n", + "State/Territory NaN \n", + "Total threshold criteria exceeded NaN \n", + "Definition M (communities) NaN \n", + "... ... \n", + "Unemployment (percent) in 2009 (island areas) a... NaN \n", + "Percentage households below 100% of federal pov... NaN \n", + "Greater than or equal to the 90th percentile fo... training and workforce development \n", + "Greater than or equal to the 90th percentile fo... training and workforce development \n", + "Greater than or equal to the 90th percentile fo... training and workforce development \n", + "\n", + "[82 rows x 5 columns]" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "download_codebook.dropna(subset=[\"format\"]).reset_index()[\"score_name\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "7139ce5d-db5e-49dd-8bb3-122c7b73b395", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
excel_labelformatshapefile_columnnotescategory
score_name
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [excel_label, format, shapefile_column, notes, category]\n", + "Index: []" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "download_codebook.loc[\n", + " sum([download_codebook[col] == \"percentile\" for col in [\"format\"]]) > 0\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "e31ef01c-b225-48f0-bdf5-1efb8d4ed95c", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Cannot index with multidimensional key", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [134]\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdownload_codebook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdownload_codebook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlike\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mformat\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpercentile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py:931\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 928\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 930\u001b[0m maybe_callable \u001b[38;5;241m=\u001b[39m com\u001b[38;5;241m.\u001b[39mapply_if_callable(key, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj)\n\u001b[0;32m--> 931\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmaybe_callable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py:1151\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(labels, MultiIndex)):\n\u001b[1;32m 1150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mndim\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m key\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m-> 1151\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot index with multidimensional key\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1153\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_iterable(key, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 1155\u001b[0m \u001b[38;5;66;03m# nested tuple slicing\u001b[39;00m\n", + "\u001b[0;31mValueError\u001b[0m: Cannot index with multidimensional key" + ] + } + ], + "source": [ + "download_codebook.loc[download_codebook.filter(like=\"format\") == \"percentile\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "73268de4-3378-4ac7-bf85-f483a78c3966", + "metadata": {}, + "outputs": [], + "source": [ + "download_codebook = pd.concat(\n", + " [\n", + " codebook.set_index(\"score_name\"),\n", + " shapefile_codes.rename(\n", + " columns={\"meaning\": \"shapefile_column\", \"column\": \"score_name\"}\n", + " ).set_index(\"score_name\"),\n", + " details_to_merge.set_index(\"score_name\"),\n", + " ],\n", + " axis=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6321ed42-aee6-40fc-8bf8-2a4ce4276eca", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index abf387b2..45415a57 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -198,42 +198,42 @@ TILES_SCORE_COLUMNS = { field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS", field_names.UST_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "UST_PFS", - field_names.M_WATER: "M_WTR", - field_names.M_WORKFORCE: "M_WKFC", - field_names.M_CLIMATE: "M_CLT", - field_names.M_ENERGY: "M_ENY", - field_names.M_TRANSPORTATION: "M_TRN", - field_names.M_HOUSING: "M_HSG", - field_names.M_POLLUTION: "M_PLN", - field_names.M_HEALTH: "M_HLTH", + field_names.N_WATER: "N_WTR", + field_names.N_WORKFORCE: "N_WKFC", + field_names.N_CLIMATE: "N_CLT", + field_names.N_ENERGY: "N_ENY", + field_names.N_TRANSPORTATION: "N_TRN", + field_names.N_HOUSING: "N_HSG", + field_names.N_POLLUTION: "N_PLN", + field_names.N_HEALTH: "N_HLTH", # temporarily update this so that it's the Narwhal score that gets visualized on the map # The NEW final score value INCLUDES the adjacency index. - field_names.FINAL_SCORE_N_BOOLEAN: "SM_C", + field_names.FINAL_SCORE_N_BOOLEAN: "SN_C", field_names.SCORE_N_COMMUNITIES - + field_names.ADJACENT_MEAN_SUFFIX: "SM_DON", - field_names.SCORE_N_COMMUNITIES: "SM_NO_DON", - field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI", - field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI", - field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI", - field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "PM25LI", - field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLI", - field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DPMLI", - field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD: "TPLI", - field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "LPMHVLI", - field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HBLI", - field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD: "RMPLI", - field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD: "SFLI", - field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HWLI", - field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "WDLI", + + field_names.ADJACENT_MEAN_SUFFIX: "SN_DON", + field_names.SCORE_N_COMMUNITIES: "SN_NO_DON", + field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI", + field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI", + field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI", + field_names.PM25_EXPOSURE_LOW_INCOME_FIELD: "PM25LI", + field_names.ENERGY_BURDEN_LOW_INCOME_FIELD: "EBLI", + field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD: "DPMLI", + field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD: "TPLI", + field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD: "LPMHVLI", + field_names.HOUSING_BURDEN_LOW_INCOME_FIELD: "HBLI", + field_names.RMP_LOW_INCOME_FIELD: "RMPLI", + field_names.SUPERFUND_LOW_INCOME_FIELD: "SFLI", + field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD: "HWLI", + field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD: "WDLI", field_names.UST_LOW_INCOME_FIELD: "USTLI", - field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD: "DLI", - field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD: "ALI", - field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "HDLI", - field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD: "LLELI", - field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD: "LILHSE", - field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD: "PLHSE", - field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD: "LMILHSE", - field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD: "ULHSE", + field_names.DIABETES_LOW_INCOME_FIELD: "DLI", + field_names.ASTHMA_LOW_INCOME_FIELD: "ALI", + field_names.HEART_DISEASE_LOW_INCOME_FIELD: "HDLI", + field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD: "LLELI", + field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD: "LILHSE", + field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE", + field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE", + field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE", # new booleans only for the environmental factors field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD: "EPL_ET", field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD: "EAL_ET", @@ -276,28 +276,24 @@ TILES_SCORE_COLUMNS = { field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 + field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "IAULHSE_PFS", - field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD: "LHE", + field_names.LOW_HS_EDUCATION_FIELD: "LHE", field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE", # Percentage of HS Degree completion for Islands field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF", - field_names.COLLEGE_ATTENDANCE_FIELD: "CA", - field_names.COLLEGE_NON_ATTENDANCE_FIELD: "NCA", - # This is logically equivalent to "non-college greater than 80%" - field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD: "CA_LT20", # Booleans for the front end about the types of thresholds exceeded - field_names.CLIMATE_THRESHOLD_EXCEEDED: "M_CLT_EOMI", - field_names.ENERGY_THRESHOLD_EXCEEDED: "M_ENY_EOMI", - field_names.TRAFFIC_THRESHOLD_EXCEEDED: "M_TRN_EOMI", - field_names.HOUSING_THREHSOLD_EXCEEDED: "M_HSG_EOMI", - field_names.POLLUTION_THRESHOLD_EXCEEDED: "M_PLN_EOMI", - field_names.WATER_THRESHOLD_EXCEEDED: "M_WTR_EOMI", - field_names.HEALTH_THRESHOLD_EXCEEDED: "M_HLTH_EOMI", - field_names.WORKFORCE_THRESHOLD_EXCEEDED: "M_WKFC_EOMI", + field_names.CLIMATE_THRESHOLD_EXCEEDED: "N_CLT_EOMI", + field_names.ENERGY_THRESHOLD_EXCEEDED: "N_ENY_EOMI", + field_names.TRAFFIC_THRESHOLD_EXCEEDED: "N_TRN_EOMI", + field_names.HOUSING_THREHSOLD_EXCEEDED: "N_HSG_EOMI", + field_names.POLLUTION_THRESHOLD_EXCEEDED: "N_PLN_EOMI", + field_names.WATER_THRESHOLD_EXCEEDED: "N_WTR_EOMI", + field_names.HEALTH_THRESHOLD_EXCEEDED: "N_HLTH_EOMI", + field_names.WORKFORCE_THRESHOLD_EXCEEDED: "N_WKFC_EOMI", # These are the booleans for socioeconomic indicators ## this measures low income boolean field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED: "FPL200S", ## Low high school for t&wd - field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI", + field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "N_WKFC_EBSI", field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET", field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS", @@ -377,8 +373,6 @@ TILES_SCORE_FLOAT_COLUMNS = [ # Island areas HS degree attainment rate field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009, field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.COLLEGE_NON_ATTENDANCE_FIELD, - field_names.COLLEGE_ATTENDANCE_FIELD, field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.FUTURE_WILDFIRE_RISK_FIELD diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 64804322..e8631258 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -403,6 +403,7 @@ class ScoreETL(ExtractTransformLoad): df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD] ) + # Donut columns get added later numeric_columns = [ field_names.HOUSING_BURDEN_FIELD, field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD, @@ -477,12 +478,15 @@ class ScoreETL(ExtractTransformLoad): non_numeric_columns = [ self.GEOID_TRACT_FIELD_NAME, field_names.PERSISTENT_POVERTY_FIELD, - field_names.HISTORIC_REDLINING_SCORE_EXCEEDED, field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD, field_names.AGRICULTURAL_VALUE_BOOL_FIELD, - field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME, + ] + + boolean_columns = [ field_names.AML_BOOLEAN, field_names.IMPUTED_INCOME_FLAG_FIELD_NAME, + field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME, + field_names.HISTORIC_REDLINING_SCORE_EXCEEDED, ] # For some columns, high values are "good", so we want to reverse the percentile @@ -523,6 +527,7 @@ class ScoreETL(ExtractTransformLoad): non_numeric_columns + numeric_columns + [rp.field_name for rp in reverse_percentiles] + + boolean_columns ) df_copy = df[columns_to_keep].copy() @@ -533,6 +538,10 @@ class ScoreETL(ExtractTransformLoad): df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric) + # coerce all booleans to bools + for col in boolean_columns: + df_copy[col] = df_copy[col].astype(bool) + # Convert all columns to numeric and do math # Note that we have a few special conditions here and we handle them explicitly. # For *Linguistic Isolation*, we do NOT want to include Puerto Rico in the percentile diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py index 24348305..14f72ad2 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py @@ -53,7 +53,7 @@ class GeoScoreETL(ExtractTransformLoad): self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[ field_names.SCORE_N ] - self.TARGET_SCORE_RENAME_TO = "M_SCORE" + self.TARGET_SCORE_RENAME_TO = "SCORE" # Import the shortened name for tract ("GTF") that's used on the tiles. self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index a4043668..af29b11f 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition M community, including adjacency index tracts" -01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,True,0.8571428571428571,False,False,False -01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1,True,1.0,True,True,True +GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,At least one climate threshold exceeded,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,At least one housing threshold exceeded,Housing Factor (Definition N),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for wastewater discharge,Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,At least one water threshold exceeded,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?,Greater than or equal to the 90th percentile for unemployment and has low HS attainment?,At least one workforce threshold exceeded,Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition N),Total categories exceeded,Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition N community, including adjacency index tracts" +01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,0.0,False,0,True,0.8571428571428571,False,False,False +01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,False,False,True,False,False,True,False,False,True,False,False,True,True,False,True,False,True,True,True,False,True,True,True,True,False,True,True,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,6.0,True,1,True,1.0,True,True,True diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 0eb41e974fc4b2b972211b7486a564ce446b7536..a91ff8cabc77f8a4f8ed5be14ee7a64b7ce5ffce 100644 GIT binary patch delta 1105 zcmX|9ZA@EL81Cu4KwD^UTUrXEU!%Y%DJzs+aBvIlxWM#I1*Sq4w3N0;5ek9gbjGik zO}jPI>?!VtOHAAk6F+8_8xwyx8EVw{5#zESCPqbnELk**OEmh&;OXJc^XGZa^E~f+ z-kfu87s$!yNyT&g#);VF3b%>ixih^GTgOM+_Fj7s1=d0%dj{^d~cVzN0bC?j>ox# zGQ-Xtua&tOCCT`Q4w;)(QjC8|^^}q>b7Pfmce${|^>dFZ^GrRYsXC_4E7`KDUUG)@ zmy)H0dS3mlQ6=8No$0o}K7M(6G-e#V1)XVKZ;1#62D zUpG0iB{>lZoFR$-)qeu^vZ@~*#vg$_*lhM-IUwL-b3fh~h|oe0vqy(vYjNX3OO}kY z+TK<(>$cI_K_<$IW$fO_e$k(KVr6-4eKo_(qx-ta!z}*6z6Pc=wM~)-neuj<4ZdIn zK9j642bJ9>I32XG+|A$+d4c94NVE?y%jQ`W`Ho3)ObgVp=IQ;%$rMw}VMkdZDY}c` zu+)v!a5HwKFkTJ2i0@p~$CZlru;|a>N*28zh0a5Gy-UDLox|h^^L9E7%sYzrx<;rs zrCnPvvU_bWM&2ZoG${&;*||*l)-tJlGM9Rijq_=Dl$@sSv$)V>!siFhk{s*$RbMsE zi7wus%%$;4PaSr|32p9Uee`vDOgw$^0EAv@^|XXewXGuOcZ3y46I1HW@d=J?BigzZik76UUdAviXP_xsq&5aTS5q*Vs@sz9wWKGICCK_$W&_)NVEtb+tuxz)P*9nbgVdKCQm*g^B zDg7HEUbl-V79D|bc_gAEpW?*{`tN<7KQ6wuFA=RyELo5Iurom~-<)%NIP!CXzBGO2 zYE9pv1pSjVzd7ejNK0VJVbeS2N-5WT{1#kseCeWP7p$xYWu1^U1&huB?HvMsaeC2` z*g~#pn|BQs$f?{cIhh^HWq6Xyj+3d$i4-{v7bt6+km3ZA%kyM5C*bdVp5({Lf-p1A zXN3Y?S~v+8dV&ydbJ)D(;) z1xM_MjJp-~yCdi*rheu&D^V2>d(FM4oH?w9b&&SdsOA+tJJ1StcgI7krVVYqfj+=A z|F$}q?QBsZwktHZbW!}n@N}mgKJV;7$7Qr#39DGf{o9^EZ~beueRTIQS(p^~g4zx; zdyk+J#(v_pDw(XHgIsXRXNA{&9jI8Qy10Un%5VxUE8q^n;jS1A28=M<^$2n`p zVibi4i$L2wC|>Sv)WQlCM`L)4CSL^Y#nH3fqcH9BBPSg4If>Fj;Ele3*w}M=1Jb~G lI-=9*q}TR}zR{8XQF_eh<0kn%)gU=238iGZ2(8hve*kTgDCYnG diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index 8d726f018b231da65dee247614e24cc01247b32a..5ed5203d764f45222797c51e2ed911aeaefd899e 100644 GIT binary patch delta 707 zcmZutO=uHA7|pl4HO(5^NLx2lNkzoOXiXYXt4R}SLc8sjG=GgrG&Hsdy{HF|{ ziYH$btRV5$lcMWEdk{S+JyisSf)^uItAfQp(5k`7bSphLF!SELdEa|5pPta2Nvau> zE<01VYhr?={nQq5P{>N!FYPxYU6(|gto_j{8l5X9inv+ZOnK}Pw`RjrWGs`(L~Q^6 ztiCL5T7k<7A6s!S51}kJz$;ntNOW43f|&zx$<3NiNp4|Y&?hBXfJ?E_EZP(WQ>#>Z ziw1MI<%<#a4JXYls3;D7y0%c~o-^b(2EH?J-oVcWemCKVvRM{2VZ1nU=6u+TF~y6A z%DQIpEEnvu;k;_#*Xl%C(iXJ}QuZN1tLXRb_Z;YsyQl{g@FBEUf!aPnhuN(So&y%t zB@}pe+VCuX1h3oO{H5yKfDef#eIoIkD9Fq~H2RNHf{&F_SE!e>r-Ahp;cR4}i8?u} zPp?I5$9DEVtU|vv&?;ay=|d?bV>-Bx#!>F|p+1v_5b9)sWp$T+Et4WdLT)T%H}bYN zEbO179M8L+TcbyFGlUyqmHPQ~|DjiSA9lf$cj$K`PNF{EHJVQnb#Pix{Y1xDcoJI% zZOCh~6e?ak&4d6(2X`+E7g*eJG{*ipGQ6^&TJbWcY$p1*J`9-QQ=Ga3U6` XC?3bQ=`RXB#AwUVM|yzO!^3|8FnPr< delta 2899 zcmbuAe`s4(6vuOt*L3U8bhb&`b@gq`t&M5dgw578AxYb$%aUiB4e2^ii0P{dBrh{B znaYN(Qk81P7$*ZA^KS)_sqqj08Hl0?GMp&>1pm;f6Pfe(&pEyC=Dc>|AF{oKE=iPhWi_gHfZ$QJ6a3Y<4p<%{sl{SIY4>)I@vr0FDBx=J5UN+mT(q@ph0NN}f zETP*ncwoW=CKJ%d#D|(k8(l@sQ~zn+wQqf%_P;ur8=8%8S%#y!Rp(k@{T7;6znM1D zZ=sLZq5+f5UCsJ+HWST)lPzILaGFg*ZBA%1!-u$RO>}%gL0=Lt;Ct5T#qY`bf%qfw zC*sedFe@iOr*;uYfGL|Wb=D{>}EU-NatvMq(e-{Ef?P&i84B9}qtxeoQ=1>;d#Q zZfz1`=0~cT2UaG7*kd2TTkYGn2oH0I*hDj*Bz{G7@JM{h?y<7ntZmzO?RQxiYom^r z`xm>z_oZcdUKJxVanHzzSjrS-HB-pVS1XQ_8%UNvUFrI}egzVL{hyXQFDUJkJ!> zK(!LZ!ZDEludq+5iMF2!86ka8&4=&KjHxlCVB_7=B)MV|F-3;T4%N5=ZiQMZ>gP$?f`g#o5y0!sO)jD zBbJQY-PhWTpC%3Wb;iHM%VCTAuQR^B4cU_F@x#}af9+CyNWjJt+2&S~H}^{7 z0DIx(%SyJCl~vY7w}+Jb#oG$X5Y8kw^=o~#XG&hkYS#;j$fi@(vpIPXFYaa-J2hd;Hho|kTj^XsnN`vCIV5b0TBFlNJ766*!_BCa>RW3iC#SfPqbx?`4y eo{INr4hW!>+*&)Dyd79QKHvKde1>oDOaB8@c|HyR diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index 8119822adf52c6e280ec39aa6485f8f0250ad8f2..1d34250f254271a7d2d21e57b1ecc536b8905bf5 100644 GIT binary patch delta 697 zcmZutOK1~O6wRHMC8@m#es8K|zwhP;ZPG6>wg%9pK=iPJex$w%*Y4JV@ zMeEHs={cd9qZ}lzEh|4E^H?kvGV1^Lx>7G{z>N+YYq$&#VV>IzH{XPNT&trVY&g)t z`;E<0yq~LadWm;(u#0Y5M6)R2wh*Jw_$@>ozs#^@Om5nSm!e01V9UGwRkQuc#4je+ zOk6T?#e!$zb|0!h#ltp~#$4cgr;R@`7U{3Yc5_%tcj8?rLbJ@& zn#oX@u}a3pYbuM>E17XZuFns9*2}ip4^(Nwu=Sz6-=WWBUl1K+Q{M+fN-=g~@&h>8 zhqBxvB-=E#n|1%nD>P^_yMA{{AqrtG5z;S314O5pQps2mhc(2aX$@=Yxdy=o zHMSE^>mp|Q_u*$UxnUe%55w{hn?x=ZphGyb$BXVkpMGBX$U{wyP!jJ`?fQ>naWZIE PAxbKWg#g_^rEuXN+;Yc; delta 2874 zcmbuATWlLu5Qcr$7l@l{b59fdRHUurI&BgqP3y#VV(d7{){YwlDhfo33gY&_HJm*jn;;%gySD6a zXZFm0W@dM1uf0Yte?-hnrn8+htL8d^EJ3r-^`PHC3oENsiu+_SybcXE&Yl4&IT5og;29xSD zS_N_q9yL$ZeT`z@AlKlEIkf!;w0=bXg!~!#3s^1o{I6*JhWs7*2l7wkU&sw4R>>Mx zbO=Zz(u6c4Eyz2NcNUp9RG`C(wveA3XVhAfX%J3qxLKSO?w`~rCc*~(+AMo~DSBqmPX-2)p96kZi-!QN=z-JYj8#5b+$%|0kw8O|Ob)e~OTfQau z)T|`MWjYo;*w@oTbBU}ZCo-wHQs|3H*;y%_qlx6vgp{RHoJ#3zCY74koY`1TqDh&i zBw1!ru{7=L*`XBN6U>!)k|xu$nR$uE(s7!I$u!2DkD*{@uB2JRMCIHCQBUwX)c*g=8~B- z4R;(|-we!V&KYpIEQbhRGVM{NA=7y#es4e}f;`@Iv>;V6}pIZ4(ftrT8hYGJ*F zp4C!c>GRAx~(Rc|4ws z%}DwinB(`1Z_J{%iTsI$#jz^ALd!g(D`WNg9u?oXjXx?r@J8Ak+922F`hQ3Grs=8_ zJn(qLqrN-7mylO@8mA|>z>$t-_CQUEO})1|t6xWr-rJ0Si^qRI?!C?U<61ez*gdzF zyLwaXHo=FY3)-TG*sYw3?rsZH2YV^bFKAO!lGzm6HJVkNNH=aC3yJcloOXGR4on5x5R&&_)jOFlXov0+NrKY?Vk+Ey@5*{PQ5Wo2Fkf13& zY(U!E&3Q@>t6#jcPY9TH^@{}4Awhm%+9TKC*N z;0*TLti>nJRL=!ADc;rJ*Kr6&goB?AD$5_Kj3 delta 487 zcmaE)vQ?F(fo1BdjVwY;j6svdnY4J9FfcGMLLtLsKc+G!M#jlUn9P9EznQccCu~+@ z=4P7g$m%h{DBgj z!M>Z{@QO3lGdVj>>0$N*k?hWn@jf9&22*;tz{bbB`ulo\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GEOID10SFCFareazlfc
984602185000200AlaskaNorth Slope Borough53.3237024.45
993702290000100AlaskaYukon-Koyukuk Census Area21.6531545.50
985702188000100AlaskaNorthwest Arctic Borough21.1881595.50
993502290000200AlaskaYukon-Koyukuk Census Area20.7447705.38
993402290000300AlaskaYukon-Koyukuk Census Area17.1408260.00
993602290000400AlaskaYukon-Koyukuk Census Area14.6874485.77
989302180000100AlaskaNome Census Area13.3778170.00
984702164000100AlaskaLake and Peninsula Borough13.0616445.33
991802261000100AlaskaValdez-Cordova Census Area11.1188350.00
994502050000100AlaskaBethel Census Area10.9518880.00
984102270000100AlaskaWade Hampton Census Area8.7718060.00
983902240000100AlaskaSoutheast Fairbanks Census Area8.6136900.00
984302070000100AlaskaDillingham Census Area8.5753070.00
994702050000300AlaskaBethel Census Area8.4080400.00
989902170000101AlaskaMatanuska-Susitna Borough6.4804440.00
994402068000100AlaskaDenali Borough5.9972360.00
983602013000100AlaskaAleutians East Borough5.4877260.00
992102122000100AlaskaKenai Peninsula Borough4.8318316.10
985102150000100AlaskaKodiak Island Borough4.6640090.00
985002105000300AlaskaHoonah-Angoon Census Area4.3057160.00
983802016000100AlaskaAleutians West Census Area4.0535200.00
991702282000100AlaskaYakutat City and Borough3.9261820.00
992002261000300AlaskaValdez-Cordova Census Area3.2854820.00
984002240000400AlaskaSoutheast Fairbanks Census Area3.2339610.00
991902261000200AlaskaValdez-Cordova Census Area3.1563170.00
1035441045970900OregonMalheur County2.7317190.00
988802198000100AlaskaPrince of Wales-Hyder Census Area2.6062860.00
1021241025960200OregonHarney County2.5689437.08
984402185000300AlaskaNorth Slope Borough2.4631650.00
985802130000100AlaskaKetchikan Gateway Borough2.4400510.00
\n", - "" - ], - "text/plain": [ - " GEOID10 SF CF area zlfc\n", - "9846 02185000200 Alaska North Slope Borough 53.323702 4.45\n", - "9937 02290000100 Alaska Yukon-Koyukuk Census Area 21.653154 5.50\n", - "9857 02188000100 Alaska Northwest Arctic Borough 21.188159 5.50\n", - "9935 02290000200 Alaska Yukon-Koyukuk Census Area 20.744770 5.38\n", - "9934 02290000300 Alaska Yukon-Koyukuk Census Area 17.140826 0.00\n", - "9936 02290000400 Alaska Yukon-Koyukuk Census Area 14.687448 5.77\n", - "9893 02180000100 Alaska Nome Census Area 13.377817 0.00\n", - "9847 02164000100 Alaska Lake and Peninsula Borough 13.061644 5.33\n", - "9918 02261000100 Alaska Valdez-Cordova Census Area 11.118835 0.00\n", - "9945 02050000100 Alaska Bethel Census Area 10.951888 0.00\n", - "9841 02270000100 Alaska Wade Hampton Census Area 8.771806 0.00\n", - "9839 02240000100 Alaska Southeast Fairbanks Census Area 8.613690 0.00\n", - "9843 02070000100 Alaska Dillingham Census Area 8.575307 0.00\n", - "9947 02050000300 Alaska Bethel Census Area 8.408040 0.00\n", - "9899 02170000101 Alaska Matanuska-Susitna Borough 6.480444 0.00\n", - "9944 02068000100 Alaska Denali Borough 5.997236 0.00\n", - "9836 02013000100 Alaska Aleutians East Borough 5.487726 0.00\n", - "9921 02122000100 Alaska Kenai Peninsula Borough 4.831831 6.10\n", - "9851 02150000100 Alaska Kodiak Island Borough 4.664009 0.00\n", - "9850 02105000300 Alaska Hoonah-Angoon Census Area 4.305716 0.00\n", - "9838 02016000100 Alaska Aleutians West Census Area 4.053520 0.00\n", - "9917 02282000100 Alaska Yakutat City and Borough 3.926182 0.00\n", - "9920 02261000300 Alaska Valdez-Cordova Census Area 3.285482 0.00\n", - "9840 02240000400 Alaska Southeast Fairbanks Census Area 3.233961 0.00\n", - "9919 02261000200 Alaska Valdez-Cordova Census Area 3.156317 0.00\n", - "10354 41045970900 Oregon Malheur County 2.731719 0.00\n", - "9888 02198000100 Alaska Prince of Wales-Hyder Census Area 2.606286 0.00\n", - "10212 41025960200 Oregon Harney County 2.568943 7.08\n", - "9844 02185000300 Alaska North Slope Borough 2.463165 0.00\n", - "9858 02130000100 Alaska Ketchikan Gateway Borough 2.440051 0.00" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf['zlfc'] = 0\n", - "gdf.at[9846, 'zlfc'] = 4.45\n", - "gdf.at[10212, 'zlfc'] = 7.08\n", - "gdf.at[9937, 'zlfc'] = 5.5\n", - "gdf.at[9857, 'zlfc'] = 5.5\n", - "gdf.at[9935, 'zlfc'] = 5.38\n", - "gdf.at[9936, 'zlfc'] = 5.77\n", - "gdf.at[9921, 'zlfc'] = 6.1\n", - "gdf.at[9847, 'zlfc'] = 5.33\n", - "gdf_short = gdf[[\"GEOID10\", \"SF\", \"CF\", \"area\", \"zlfc\"]]\n", - "gdf_short_sorted = gdf_short.sort_values(by='area', ascending=False);\n", - "gdf_short_sorted.head(30)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5930de0e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb b/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb index f3619af6..6c464972 100644 --- a/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb @@ -150,7 +150,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.9.10" } }, "nbformat": 4, diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index a07816ff..2029fba2 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -9,23 +9,6 @@ GEOID_TRACT_FIELD = "GEOID10_TRACT" STATE_FIELD = "State/Territory" COUNTY_FIELD = "County Name" -# Score file field names -# Definition M fields -SCORE_M = "Definition M" -FINAL_SCORE_N_BOOLEAN = ( - "Definition M community, including adjacency index tracts" -) -SCORE_M_COMMUNITIES = "Definition M (communities)" -M_CLIMATE = "Climate Factor (Definition M)" -M_ENERGY = "Energy Factor (Definition M)" -M_TRANSPORTATION = "Transportation Factor (Definition M)" -M_HOUSING = "Housing Factor (Definition M)" -M_POLLUTION = "Pollution Factor (Definition M)" -M_WATER = "Water Factor (Definition M)" -M_HEALTH = "Health Factor (Definition M)" -M_WORKFORCE = "Workforce Factor (Definition M)" -M_NON_WORKFORCE = "Any Non-Workforce Factor (Definition M)" - # Definition Narwhal fields SCORE_N = "Definition N (communities)" SCORE_N_COMMUNITIES = "Definition N (communities)" @@ -38,6 +21,9 @@ N_WATER = "Water Factor (Definition N)" N_HEALTH = "Health Factor (Definition N)" N_WORKFORCE = "Workforce Factor (Definition N)" N_NON_WORKFORCE = "Any Non-Workforce Factor (Definition N)" +FINAL_SCORE_N_BOOLEAN = ( + "Definition N community, including adjacency index tracts" +) PERCENTILE = 90 MEDIAN_HOUSE_VALUE_PERCENTILE = 90 @@ -545,22 +531,22 @@ LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD = ( # Workforce UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = ( f"Greater than or equal to the {PERCENTILE}th percentile for unemployment" - " and has low HS education?" + " and has low HS attainment?" ) LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD = ( f"Greater than or equal to the {PERCENTILE}th percentile for households in linguistic isolation" - " and has low HS education?" + " and has low HS attainment?" ) POVERTY_LOW_HS_EDUCATION_FIELD = ( f"Greater than or equal to the {PERCENTILE}th percentile for households at or below 100% federal poverty level" - " and has low HS education?" + " and has low HS attainment?" ) LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = ( f"Greater than or equal to the {PERCENTILE}th percentile for low median household income as a " - f"percent of area median income and has low HS education?" + f"percent of area median income and has low HS attainment?" ) # Score M Workforce Variables diff --git a/data/data-pipeline/data_pipeline/score/score_runner.py b/data/data-pipeline/data_pipeline/score/score_runner.py index 54b5c878..81343201 100644 --- a/data/data-pipeline/data_pipeline/score/score_runner.py +++ b/data/data-pipeline/data_pipeline/score/score_runner.py @@ -1,5 +1,4 @@ import pandas as pd -from data_pipeline.score.score_m import ScoreM from data_pipeline.score.score_narwhal import ScoreNarwhal from data_pipeline.utils import get_module_logger @@ -13,8 +12,6 @@ class ScoreRunner: self.df = df def calculate_scores(self) -> pd.DataFrame: - # Index scores - self.df = ScoreM(df=self.df).add_columns() self.df = ScoreNarwhal(df=self.df).add_columns() return self.df