From 426328e378126760a61388552463183801dbc2f6 Mon Sep 17 00:00:00 2001
From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com>
Date: Wed, 7 Sep 2022 17:13:31 -0400
Subject: [PATCH 1/2] =?UTF-8?q?Updating=20traffic=20barriers=C2=A0to=20inc?=
 =?UTF-8?q?lude=20low=20pop=20threshold=20(#1889)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changing the traffic barriers to only be included for places with recorded population
---
 .../data_pipeline/etl/score/etl_score.py        | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index 62a5006d..56682d49 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -380,7 +380,8 @@ class ScoreETL(ExtractTransformLoad):
         ), "Join against national tract list ADDED rows"
         logger.info(
             "Dropped %s tracts not in the 2010 tract data",
-            pre_join_len - census_tract_df[field_names.GEOID_TRACT_FIELD].nunique()
+            pre_join_len
+            - census_tract_df[field_names.GEOID_TRACT_FIELD].nunique(),
         )
 
         # Now sanity-check the merged df.
@@ -551,6 +552,9 @@ class ScoreETL(ExtractTransformLoad):
         #     For *Non-Natural Space*, we may only want to include tracts that have at least 35 acreas, I think. This will
         #     get rid of  tracts that we think are aberrations statistically. Right now, we have left this out
         #     pending ground-truthing.
+        #
+        #     For *Traffic Barriers*, we want to exclude low population tracts, which may have high burden because they are
+        #     low population alone. We set this low population constant in the if statement.
 
         for numeric_column in numeric_columns:
             drop_tracts = []
@@ -575,6 +579,17 @@ class ScoreETL(ExtractTransformLoad):
                     f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation"
                 )
 
+            elif numeric_column == field_names.DOT_TRAVEL_BURDEN_FIELD:
+                # Not having any people appears to be correlated with transit burden, but also doesn't represent
+                # on the ground need. For now, we remove these tracts from the percentile calculation. (To be QAed live)
+                low_population = 20
+                drop_tracts = df_copy[
+                    df_copy[field_names.TOTAL_POP_FIELD] <= low_population
+                ][field_names.GEOID_TRACT_FIELD].to_list()
+                logger.info(
+                    f"Dropping {len(drop_tracts)} tracts from DOT traffic burden"
+                )
+
             df_copy = self._add_percentiles_to_df(
                 df=df_copy,
                 input_column_name=numeric_column,

From fb4c484e5c2bed4bd60e329092cb7c05bd9c604f Mon Sep 17 00:00:00 2001
From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com>
Date: Thu, 8 Sep 2022 14:55:00 -0400
Subject: [PATCH 2/2] Remove no land tracts from map (#1894)

remove from map
---
 .../data_pipeline/etl/score/etl_score_geo.py       | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
index da02beef..31eacbe1 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
@@ -60,6 +60,7 @@ class GeoScoreETL(ExtractTransformLoad):
             field_names.GEOID_TRACT_FIELD
         ]
         self.GEOMETRY_FIELD_NAME = "geometry"
+        self.LAND_FIELD_NAME = "ALAND10"
 
         # We will adjust this upwards while there is some fractional value
         # in the score. This is a starting value.
@@ -86,13 +87,22 @@ class GeoScoreETL(ExtractTransformLoad):
         )
 
         logger.info("Reading US GeoJSON (~6 minutes)")
-        self.geojson_usa_df = gpd.read_file(
+        full_geojson_usa_df = gpd.read_file(
             self.CENSUS_USA_GEOJSON,
             dtype={self.GEOID_FIELD_NAME: "string"},
-            usecols=[self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME],
+            usecols=[
+                self.GEOID_FIELD_NAME,
+                self.GEOMETRY_FIELD_NAME,
+                self.LAND_FIELD_NAME,
+            ],
             low_memory=False,
         )
 
+        # We only want to keep tracts to visualize that have non-0 land
+        self.geojson_usa_df = full_geojson_usa_df[
+            full_geojson_usa_df[self.LAND_FIELD_NAME] > 0
+        ]
+
         logger.info("Reading score CSV")
         self.score_usa_df = pd.read_csv(
             self.TILE_SCORE_CSV,