From 2b35a8937ac5b310c51152ed9eebfb511428782b Mon Sep 17 00:00:00 2001
From: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
Date: Thu, 27 Jan 2022 17:22:39 -0500
Subject: [PATCH] Hot fix for Score M (#1182)

* fixes

* pr feedback

* tuple
---
 .../data_pipeline/etl/score/etl_score_post.py | 19 +++++++------------
 .../data_pipeline/score/score_m.py            |  3 ++-
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
index 084ab495..a745e66b 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@@ -17,6 +17,9 @@ from . import constants
 
 logger = get_module_logger(__name__)
 
+# Define the DAC variable
+DISADVANTAGED_COMMUNITIES_FIELD = field_names.SCORE_M_COMMUNITIES
+
 
 class PostScoreETL(ExtractTransformLoad):
     """
@@ -184,17 +187,9 @@ class PostScoreETL(ExtractTransformLoad):
             merged_df["Total population"].fillna(0.0).astype(int)
         )
 
-        # list the null score tracts
-        null_tract_df = merged_df[
-            merged_df[field_names.SCORE_L_COMMUNITIES].isnull()
-        ]
-
-        # subtract data sets
-        # this follows the XOR pattern outlined here:
-        # https://stackoverflow.com/a/37313953
-        de_duplicated_df = pd.concat(
-            [merged_df, null_tract_df, null_tract_df]
-        ).drop_duplicates(keep=False)
+        de_duplicated_df = merged_df.dropna(
+            subset=[DISADVANTAGED_COMMUNITIES_FIELD]
+        )
 
         # set the score to the new df
         return de_duplicated_df
@@ -333,7 +328,7 @@ class PostScoreETL(ExtractTransformLoad):
         # Rename score column
         downloadable_df_copy = downloadable_df.rename(
             columns={
-                field_names.SCORE_M_COMMUNITIES: "Identified as disadvantaged (v0.1)"
+                DISADVANTAGED_COMMUNITIES_FIELD: "Identified as disadvantaged (v0.1)"
             },
             inplace=False,
         )
diff --git a/data/data-pipeline/data_pipeline/score/score_m.py b/data/data-pipeline/data_pipeline/score/score_m.py
index 7ee94645..0138a19a 100644
--- a/data/data-pipeline/data_pipeline/score/score_m.py
+++ b/data/data-pipeline/data_pipeline/score/score_m.py
@@ -1,3 +1,4 @@
+from typing import Tuple
 import numpy as np
 import pandas as pd
 
@@ -27,7 +28,7 @@ class ScoreM(Score):
         column_from_decennial_census: str,
         combined_column_name: str,
         threshold_cutoff_for_island_areas: float,
-    ) -> (pd.DataFrame, str):
+    ) -> Tuple[pd.DataFrame, str]:
         """Steps to set thresholds for island areas.
 
         This function is fairly logically complicated. It takes the following steps: