Adding DOT composite to travel score (#1820)

This adds the DOT dataset to the ETL and to the score. Note that currently we take a percentile of an average of percentiles.
2025-09-30 15:03:17 -07:00 · 2022-08-16 14:44:39 -04:00 · 2022-08-16 14:44:39 -04:00 · ebac552d75
commit ebac552d75
parent 932179841f
17 changed files with 553 additions and 354 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
+++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
@ -156,3 +156,16 @@ datasets:
        field_type: float
        include_in_tiles: true
        include_in_downloadable_files: true
+
+  - long_name: "DOT Travel Disadvantage Index"
+    short_name: "DOT"
+    module_name: "travel_composite"
+    input_geoid_tract_field_name: "GEOID10_TRACT"
+    load_fields:
+      - short_name: "travel_burden"
+        df_field_name: "TRAVEL_BURDEN_FIELD_NAME"
+        long_name: "DOT Travel Barriers Score" 
+        field_type: float
+        include_in_tiles: true
+        include_in_downloadable_files: true
+        create_percentile: true
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -296,6 +296,9 @@ TILES_SCORE_COLUMNS = {
    field_names.FPL_200_SERIES: "FPL200S",
    ## Low high school for t&wd
    field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
+    field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",
+    field_names.DOT_TRAVEL_BURDEN_FIELD
+    + field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS"
    ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
    ## FPL_200 (there is no higher ed in narwhal)
 }
@ -348,4 +351,5 @@ TILES_SCORE_FLOAT_COLUMNS = [
    field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
    field_names.COLLEGE_NON_ATTENDANCE_FIELD,
    field_names.COLLEGE_ATTENDANCE_FIELD,
+    field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
 ]
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -8,6 +8,9 @@ from data_pipeline.etl.base import ExtractTransformLoad
 from data_pipeline.etl.sources.national_risk_index.etl import (
    NationalRiskIndexETL,
 )
+from data_pipeline.etl.sources.dot_travel_composite.etl import (
+    TravelCompositeETL,
+)
 from data_pipeline.score.score_runner import ScoreRunner
 from data_pipeline.score import field_names
 from data_pipeline.etl.score import constants
@ -37,6 +40,7 @@ class ScoreETL(ExtractTransformLoad):
        self.census_2010_df: pd.DataFrame
        self.child_opportunity_index_df: pd.DataFrame
        self.hrs_df: pd.DataFrame
+        self.dot_travel_disadvantage_df: pd.DataFrame

    def extract(self) -> None:
        logger.info("Loading data sets from disk.")
@ -115,6 +119,9 @@ class ScoreETL(ExtractTransformLoad):
        # Load FEMA national risk index data
        self.national_risk_index_df = NationalRiskIndexETL.get_data_frame()

+        # Load DOT Travel Disadvantage
+        self.dot_travel_disadvantage_df = TravelCompositeETL.get_data_frame()
+
        # Load GeoCorr Urban Rural Map
        geocorr_urban_rural_csv = (
            constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
@ -334,6 +341,7 @@ class ScoreETL(ExtractTransformLoad):
            self.census_2010_df,
            self.child_opportunity_index_df,
            self.hrs_df,
+            self.dot_travel_disadvantage_df,
        ]

        # Sanity check each data frame before merging.
@ -416,6 +424,7 @@ class ScoreETL(ExtractTransformLoad):
            field_names.HEALTHY_FOOD_FIELD,
            field_names.IMPENETRABLE_SURFACES_FIELD,
            field_names.UST_FIELD,
+            field_names.DOT_TRAVEL_BURDEN_FIELD,
            field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
            field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
        ]
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl