From 426328e378126760a61388552463183801dbc2f6 Mon Sep 17 00:00:00 2001 From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com> Date: Wed, 7 Sep 2022 17:13:31 -0400 Subject: [PATCH 1/2] =?UTF-8?q?Updating=20traffic=20barriers=C2=A0to=20inc?= =?UTF-8?q?lude=20low=20pop=20threshold=20(#1889)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changing the traffic barriers to only be included for places with recorded population --- .../data_pipeline/etl/score/etl_score.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 62a5006d..56682d49 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -380,7 +380,8 @@ class ScoreETL(ExtractTransformLoad): ), "Join against national tract list ADDED rows" logger.info( "Dropped %s tracts not in the 2010 tract data", - pre_join_len - census_tract_df[field_names.GEOID_TRACT_FIELD].nunique() + pre_join_len + - census_tract_df[field_names.GEOID_TRACT_FIELD].nunique(), ) # Now sanity-check the merged df. @@ -551,6 +552,9 @@ class ScoreETL(ExtractTransformLoad): # For *Non-Natural Space*, we may only want to include tracts that have at least 35 acreas, I think. This will # get rid of tracts that we think are aberrations statistically. Right now, we have left this out # pending ground-truthing. + # + # For *Traffic Barriers*, we want to exclude low population tracts, which may have high burden because they are + # low population alone. We set this low population constant in the if statement. for numeric_column in numeric_columns: drop_tracts = [] @@ -575,6 +579,17 @@ class ScoreETL(ExtractTransformLoad): f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation" ) + elif numeric_column == field_names.DOT_TRAVEL_BURDEN_FIELD: + # Not having any people appears to be correlated with transit burden, but also doesn't represent + # on the ground need. For now, we remove these tracts from the percentile calculation. (To be QAed live) + low_population = 20 + drop_tracts = df_copy[ + df_copy[field_names.TOTAL_POP_FIELD] <= low_population + ][field_names.GEOID_TRACT_FIELD].to_list() + logger.info( + f"Dropping {len(drop_tracts)} tracts from DOT traffic burden" + ) + df_copy = self._add_percentiles_to_df( df=df_copy, input_column_name=numeric_column, From fb4c484e5c2bed4bd60e329092cb7c05bd9c604f Mon Sep 17 00:00:00 2001 From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com> Date: Thu, 8 Sep 2022 14:55:00 -0400 Subject: [PATCH 2/2] Remove no land tracts from map (#1894) remove from map --- .../data_pipeline/etl/score/etl_score_geo.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py index da02beef..31eacbe1 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py @@ -60,6 +60,7 @@ class GeoScoreETL(ExtractTransformLoad): field_names.GEOID_TRACT_FIELD ] self.GEOMETRY_FIELD_NAME = "geometry" + self.LAND_FIELD_NAME = "ALAND10" # We will adjust this upwards while there is some fractional value # in the score. This is a starting value. @@ -86,13 +87,22 @@ class GeoScoreETL(ExtractTransformLoad): ) logger.info("Reading US GeoJSON (~6 minutes)") - self.geojson_usa_df = gpd.read_file( + full_geojson_usa_df = gpd.read_file( self.CENSUS_USA_GEOJSON, dtype={self.GEOID_FIELD_NAME: "string"}, - usecols=[self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME], + usecols=[ + self.GEOID_FIELD_NAME, + self.GEOMETRY_FIELD_NAME, + self.LAND_FIELD_NAME, + ], low_memory=False, ) + # We only want to keep tracts to visualize that have non-0 land + self.geojson_usa_df = full_geojson_usa_df[ + full_geojson_usa_df[self.LAND_FIELD_NAME] > 0 + ] + logger.info("Reading score CSV") self.score_usa_df = pd.read_csv( self.TILE_SCORE_CSV,