mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-25 07:20:18 -07:00
Adding DOT composite to travel score (#1820)
This adds the DOT dataset to the ETL and to the score. Note that currently we take a percentile of an average of percentiles.
This commit is contained in:
parent
932179841f
commit
ebac552d75
17 changed files with 553 additions and 354 deletions
|
@ -260,6 +260,12 @@ fields:
|
|||
- score_name: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
|
||||
label: Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?
|
||||
format: bool
|
||||
- score_name: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
|
||||
label: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
|
||||
format: bool
|
||||
- score_name: DOT Travel Barriers Score (percentile)
|
||||
label: DOT Travel Barriers Score (percentile)
|
||||
format: percentage
|
||||
- score_name: Leaky underground storage tanks (percentile)
|
||||
label: Leaky underground storage tanks (percentile)
|
||||
format: percentage
|
||||
|
|
|
@ -258,6 +258,12 @@ sheets:
|
|||
- score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
|
||||
label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
|
||||
format: percentage
|
||||
- score_name: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
|
||||
label: Greater than or equal to the 90th percentile for DOT transit barriers and is low income?
|
||||
format: bool
|
||||
- score_name: DOT Travel Barriers Score (percentile)
|
||||
label: DOT Travel Barriers Score (percentile)
|
||||
format: percentage
|
||||
- score_name: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
|
||||
label: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
|
||||
format: percentage
|
||||
|
|
|
@ -9,6 +9,11 @@ DATASET_LIST = [
|
|||
"module_dir": "national_risk_index",
|
||||
"class_name": "NationalRiskIndexETL",
|
||||
},
|
||||
{
|
||||
"name": "travel_composite",
|
||||
"module_dir": "dot_travel_composite",
|
||||
"class_name": "TravelCompositeETL",
|
||||
},
|
||||
{
|
||||
"name": "tree_equity_score",
|
||||
"module_dir": "tree_equity_score",
|
||||
|
|
|
@ -156,3 +156,16 @@ datasets:
|
|||
field_type: float
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
|
||||
- long_name: "DOT Travel Disadvantage Index"
|
||||
short_name: "DOT"
|
||||
module_name: "travel_composite"
|
||||
input_geoid_tract_field_name: "GEOID10_TRACT"
|
||||
load_fields:
|
||||
- short_name: "travel_burden"
|
||||
df_field_name: "TRAVEL_BURDEN_FIELD_NAME"
|
||||
long_name: "DOT Travel Barriers Score"
|
||||
field_type: float
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
|
|
@ -296,6 +296,9 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.FPL_200_SERIES: "FPL200S",
|
||||
## Low high school for t&wd
|
||||
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
|
||||
field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS"
|
||||
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
||||
## FPL_200 (there is no higher ed in narwhal)
|
||||
}
|
||||
|
@ -348,4 +351,5 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
|
|
|
@ -8,6 +8,9 @@ from data_pipeline.etl.base import ExtractTransformLoad
|
|||
from data_pipeline.etl.sources.national_risk_index.etl import (
|
||||
NationalRiskIndexETL,
|
||||
)
|
||||
from data_pipeline.etl.sources.dot_travel_composite.etl import (
|
||||
TravelCompositeETL,
|
||||
)
|
||||
from data_pipeline.score.score_runner import ScoreRunner
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.etl.score import constants
|
||||
|
@ -37,6 +40,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.census_2010_df: pd.DataFrame
|
||||
self.child_opportunity_index_df: pd.DataFrame
|
||||
self.hrs_df: pd.DataFrame
|
||||
self.dot_travel_disadvantage_df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Loading data sets from disk.")
|
||||
|
@ -115,6 +119,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Load FEMA national risk index data
|
||||
self.national_risk_index_df = NationalRiskIndexETL.get_data_frame()
|
||||
|
||||
# Load DOT Travel Disadvantage
|
||||
self.dot_travel_disadvantage_df = TravelCompositeETL.get_data_frame()
|
||||
|
||||
# Load GeoCorr Urban Rural Map
|
||||
geocorr_urban_rural_csv = (
|
||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||
|
@ -334,6 +341,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.census_2010_df,
|
||||
self.child_opportunity_index_df,
|
||||
self.hrs_df,
|
||||
self.dot_travel_disadvantage_df,
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -416,6 +424,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.HEALTHY_FOOD_FIELD,
|
||||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||
field_names.UST_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD,
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
]
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,16 @@
|
|||
# DOT travel barriers
|
||||
|
||||
The below description is taken from DOT directly:
|
||||
|
||||
Consistent with OMB’s Interim Guidance for the Justice40 Initiative, DOT’s interim definition of DACs includes (a) certain qualifying census tracts, (b) any Tribal land, or (c) any territory or possession of the United States. DOT has provided a mapping tool to assist applicants in identifying whether a project is located in a Disadvantaged Community, available at Transportation Disadvantaged Census Tracts (arcgis.com). A shapefile of the geospatial data is available Transportation Disadvantaged Census Tracts shapefile (version 2 .0, posted 5/10/22).
|
||||
|
||||
The DOT interim definition for DACs was developed by an internal and external collaborative research process (see recordings from November 2021 public meetings). It includes data for 22 indicators collected at the census tract level and grouped into six (6) categories of transportation disadvantage. The numbers in parenthesis show how many indicators fall in that category:
|
||||
|
||||
- Transportation access disadvantage identifies communities and places that spend more, and take longer, to get where they need to go. (4)
|
||||
- Health disadvantage identifies communities based on variables associated with adverse health outcomes, disability, as well as environmental exposures. (3)
|
||||
- Environmental disadvantage identifies communities with disproportionately high levels of certain air pollutants and high potential presence of lead-based paint in housing units. (6)
|
||||
- Economic disadvantage identifies areas and populations with high poverty, low wealth, lack of local jobs, low homeownership, low educational attainment, and high inequality. (7)
|
||||
Resilience disadvantage identifies communities vulnerable to hazards caused by climate change. (1)
|
||||
- Equity disadvantage identifies communities with a with a high percentile of persons (age 5+) who speak English "less than well." (1)
|
||||
|
||||
The CEJST uses only Transportation Access Disadvantage.
|
|
@ -0,0 +1,59 @@
|
|||
# pylint: disable=unsubscriptable-object
|
||||
# pylint: disable=unsupported-assignment-operation
|
||||
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class TravelCompositeETL(ExtractTransformLoad):
|
||||
"""ETL class for the DOT Travel Disadvantage Dataset"""
|
||||
|
||||
NAME = "travel_composite"
|
||||
SOURCE_URL = "https://www.transportation.gov/sites/dot.gov/files/Shapefile_and_Metadata.zip"
|
||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||
|
||||
# Output score variables (values set on datasets.yml) for linting purposes
|
||||
TRAVEL_BURDEN_FIELD_NAME: str
|
||||
|
||||
def __init__(self):
|
||||
# define the full path for the input CSV file
|
||||
self.INPUT_SHP = (
|
||||
self.get_tmp_path() / "DOT_Disadvantage_Layer_Final_April2022.shp"
|
||||
)
|
||||
|
||||
# this is the main dataframe
|
||||
self.df: pd.DataFrame
|
||||
|
||||
# Start dataset-specific vars here
|
||||
## Average of Transportation Indicator Percentiles (calculated)
|
||||
## Calculated: Average of (EPL_TCB+EPL_NWKI+EPL_NOVEH+EPL_COMMUTE) excluding NULLS
|
||||
## See metadata for more information
|
||||
self.INPUT_TRAVEL_DISADVANTAGE_FIELD_NAME = "Transp_TH"
|
||||
self.INPUT_GEOID_TRACT_FIELD_NAME = "FIPS"
|
||||
|
||||
def transform(self) -> None:
|
||||
"""Reads the unzipped data file into memory and applies the following
|
||||
transformations to prepare it for the load() method:
|
||||
|
||||
- Renames the Census Tract column to match the other datasets
|
||||
- Converts to CSV
|
||||
"""
|
||||
logger.info("Transforming DOT Travel Disadvantage Data")
|
||||
|
||||
# read in the unzipped shapefile from data source
|
||||
# reformat it to be standard df, remove unassigned rows, and
|
||||
# then rename the Census Tract column for merging
|
||||
df_dot: pd.DataFrame = gpd.read_file(self.INPUT_SHP)
|
||||
df_dot = df_dot.rename(
|
||||
columns={
|
||||
self.INPUT_GEOID_TRACT_FIELD_NAME: self.GEOID_TRACT_FIELD_NAME,
|
||||
self.INPUT_TRAVEL_DISADVANTAGE_FIELD_NAME: self.TRAVEL_BURDEN_FIELD_NAME,
|
||||
}
|
||||
).dropna(subset=[self.GEOID_TRACT_FIELD_NAME])
|
||||
# Assign the final df to the class' output_df for the load method
|
||||
self.output_df = df_dot
|
|
@ -344,6 +344,9 @@ CDC_SVI_INDEX_RPL_THEMES_OVERALL_FIELD: str = (
|
|||
)
|
||||
CDC_SVI_INDEX_THEMES_PRIORITY_COMMUNITY: str = "At or above 90 for overall percentile ranking according to Social Vulnerability Indices"
|
||||
|
||||
# DOT Travel Burden Data
|
||||
DOT_TRAVEL_BURDEN_FIELD: str = "DOT Travel Barriers Score"
|
||||
|
||||
# Maryland EJSCREEN Data.
|
||||
MARYLAND_EJSCREEN_SCORE_FIELD: str = "Maryland Environmental Justice Score"
|
||||
|
||||
|
@ -416,6 +419,7 @@ DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = (
|
|||
)
|
||||
TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for traffic proximity and is low income?"
|
||||
|
||||
|
||||
# Affordable and Sustainable Housing
|
||||
LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for lead paint and"
|
||||
|
@ -494,6 +498,10 @@ TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD = (
|
|||
f"traffic proximity{SCORE_M_LOW_INCOME_SUFFIX}?"
|
||||
)
|
||||
|
||||
DOT_TRAVEL_BURDEN_LOW_INCOME_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile "
|
||||
f"for DOT transit barriers and is low income?"
|
||||
)
|
||||
# Affordable and Sustainable Housing
|
||||
LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for lead paint,"
|
||||
|
@ -624,6 +632,7 @@ PM25_EXCEEDS_PCTILE_THRESHOLD = (
|
|||
)
|
||||
DIESEL_EXCEEDS_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for diesel particulate matter"
|
||||
TRAFFIC_PROXIMITY_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for traffic proximity"
|
||||
DOT_BURDEN_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for DOT travel barriers"
|
||||
LEAD_PAINT_PROXY_PCTILE_THRESHOLD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for lead paint and"
|
||||
f" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
|
||||
|
|
|
@ -246,6 +246,8 @@ class ScoreNarwhal(Score):
|
|||
# In Xth percentile or above for PM 2.5 (Source: EPA, Office of Air and Radiation (OAR) fusion of model and monitor data)]
|
||||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# or
|
||||
# In Xth percentile or above for DOT Travel Disadvantage
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
|
@ -255,6 +257,7 @@ class ScoreNarwhal(Score):
|
|||
transportion_eligibility_columns = [
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
|
@ -264,6 +267,14 @@ class ScoreNarwhal(Score):
|
|||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DOT_BURDEN_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
|
@ -274,6 +285,7 @@ class ScoreNarwhal(Score):
|
|||
self.df[field_names.TRAFFIC_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.DOT_BURDEN_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
self.df[field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD] = (
|
||||
|
@ -286,6 +298,11 @@ class ScoreNarwhal(Score):
|
|||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.DOT_TRAVEL_BURDEN_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.DOT_BURDEN_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
transportion_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue