From 60164c863791af413cbc3838180f3fb7ed991d1b Mon Sep 17 00:00:00 2001
From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com>
Date: Mon, 12 Sep 2022 13:48:38 -0400
Subject: [PATCH] Removing low pop tracts from FEMA population loss (#1898)

dropping 0 population from FEMA
---
 .../data_pipeline/etl/score/etl_score.py              | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index 56682d49..3cdedf8d 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -579,12 +579,17 @@ class ScoreETL(ExtractTransformLoad):
                     f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation"
                 )
 
-            elif numeric_column == field_names.DOT_TRAVEL_BURDEN_FIELD:
+            elif numeric_column in [
+                field_names.DOT_TRAVEL_BURDEN_FIELD,
+                field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
+            ]:
                 # Not having any people appears to be correlated with transit burden, but also doesn't represent
-                # on the ground need. For now, we remove these tracts from the percentile calculation. (To be QAed live)
+                # on the ground need. For now, we remove these tracts from the percentile calculation.ß
+                # Similarly, we want to exclude low population tracts from FEMA's index
                 low_population = 20
                 drop_tracts = df_copy[
-                    df_copy[field_names.TOTAL_POP_FIELD] <= low_population
+                    df_copy[field_names.TOTAL_POP_FIELD].fillna(0)
+                    <= low_population
                 ][field_names.GEOID_TRACT_FIELD].to_list()
                 logger.info(
                     f"Dropping {len(drop_tracts)} tracts from DOT traffic burden"