mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-29 19:21:16 -07:00
Adding DOT composite to travel score (#1820)
This adds the DOT dataset to the ETL and to the score. Note that currently we take a percentile of an average of percentiles.
This commit is contained in:
parent
932179841f
commit
ebac552d75
17 changed files with 553 additions and 354 deletions
|
@ -156,3 +156,16 @@ datasets:
|
|||
field_type: float
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
|
||||
- long_name: "DOT Travel Disadvantage Index"
|
||||
short_name: "DOT"
|
||||
module_name: "travel_composite"
|
||||
input_geoid_tract_field_name: "GEOID10_TRACT"
|
||||
load_fields:
|
||||
- short_name: "travel_burden"
|
||||
df_field_name: "TRAVEL_BURDEN_FIELD_NAME"
|
||||
long_name: "DOT Travel Barriers Score"
|
||||
field_type: float
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
|
|
@ -296,6 +296,9 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.FPL_200_SERIES: "FPL200S",
|
||||
## Low high school for t&wd
|
||||
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
|
||||
field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS"
|
||||
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
||||
## FPL_200 (there is no higher ed in narwhal)
|
||||
}
|
||||
|
@ -348,4 +351,5 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
|
|
|
@ -8,6 +8,9 @@ from data_pipeline.etl.base import ExtractTransformLoad
|
|||
from data_pipeline.etl.sources.national_risk_index.etl import (
|
||||
NationalRiskIndexETL,
|
||||
)
|
||||
from data_pipeline.etl.sources.dot_travel_composite.etl import (
|
||||
TravelCompositeETL,
|
||||
)
|
||||
from data_pipeline.score.score_runner import ScoreRunner
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.etl.score import constants
|
||||
|
@ -37,6 +40,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.census_2010_df: pd.DataFrame
|
||||
self.child_opportunity_index_df: pd.DataFrame
|
||||
self.hrs_df: pd.DataFrame
|
||||
self.dot_travel_disadvantage_df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Loading data sets from disk.")
|
||||
|
@ -115,6 +119,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Load FEMA national risk index data
|
||||
self.national_risk_index_df = NationalRiskIndexETL.get_data_frame()
|
||||
|
||||
# Load DOT Travel Disadvantage
|
||||
self.dot_travel_disadvantage_df = TravelCompositeETL.get_data_frame()
|
||||
|
||||
# Load GeoCorr Urban Rural Map
|
||||
geocorr_urban_rural_csv = (
|
||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||
|
@ -334,6 +341,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.census_2010_df,
|
||||
self.child_opportunity_index_df,
|
||||
self.hrs_df,
|
||||
self.dot_travel_disadvantage_df,
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -416,6 +424,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.HEALTHY_FOOD_FIELD,
|
||||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||
field_names.UST_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD,
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
]
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue