From 60164c863791af413cbc3838180f3fb7ed991d1b Mon Sep 17 00:00:00 2001 From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com> Date: Mon, 12 Sep 2022 13:48:38 -0400 Subject: [PATCH] Removing low pop tracts from FEMA population loss (#1898) dropping 0 population from FEMA --- .../data_pipeline/etl/score/etl_score.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 56682d49..3cdedf8d 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -579,12 +579,17 @@ class ScoreETL(ExtractTransformLoad): f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation" ) - elif numeric_column == field_names.DOT_TRAVEL_BURDEN_FIELD: + elif numeric_column in [ + field_names.DOT_TRAVEL_BURDEN_FIELD, + field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD, + ]: # Not having any people appears to be correlated with transit burden, but also doesn't represent - # on the ground need. For now, we remove these tracts from the percentile calculation. (To be QAed live) + # on the ground need. For now, we remove these tracts from the percentile calculation.ß + # Similarly, we want to exclude low population tracts from FEMA's index low_population = 20 drop_tracts = df_copy[ - df_copy[field_names.TOTAL_POP_FIELD] <= low_population + df_copy[field_names.TOTAL_POP_FIELD].fillna(0) + <= low_population ][field_names.GEOID_TRACT_FIELD].to_list() logger.info( f"Dropping {len(drop_tracts)} tracts from DOT traffic burden"