Hot fix for Score M (#1182)

* fixes * pr feedback * tuple
2025-09-30 03:03:17 -07:00 · 2022-01-27 17:22:39 -05:00 · 2022-01-27 17:22:39 -05:00 · 2b35a8937a
commit 2b35a8937a
parent f5f7a254af
2 changed files with 9 additions and 13 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -17,6 +17,9 @@ from . import constants
 logger = get_module_logger(__name__)
 # Define the DAC variable
 DISADVANTAGED_COMMUNITIES_FIELD = field_names.SCORE_M_COMMUNITIES
 class PostScoreETL(ExtractTransformLoad):
    """
@ -184,17 +187,9 @@ class PostScoreETL(ExtractTransformLoad):
            merged_df["Total population"].fillna(0.0).astype(int)
        )
-        # list the null score tracts
+        de_duplicated_df = merged_df.dropna(
-        null_tract_df = merged_df[
+            subset=[DISADVANTAGED_COMMUNITIES_FIELD]
-            merged_df[field_names.SCORE_L_COMMUNITIES].isnull()
+        )
        ]
        # subtract data sets
        # this follows the XOR pattern outlined here:
        # https://stackoverflow.com/a/37313953
        de_duplicated_df = pd.concat(
            [merged_df, null_tract_df, null_tract_df]
        ).drop_duplicates(keep=False)
        # set the score to the new df
        return de_duplicated_df
@ -333,7 +328,7 @@ class PostScoreETL(ExtractTransformLoad):
        # Rename score column
        downloadable_df_copy = downloadable_df.rename(
            columns={
-                field_names.SCORE_M_COMMUNITIES: "Identified as disadvantaged (v0.1)"
+                DISADVANTAGED_COMMUNITIES_FIELD: "Identified as disadvantaged (v0.1)"
            },
            inplace=False,
        )
--- a/data/data-pipeline/data_pipeline/score/score_m.py
+++ b/data/data-pipeline/data_pipeline/score/score_m.py
@ -1,3 +1,4 @@
 from typing import Tuple
 import numpy as np
 import pandas as pd
@ -27,7 +28,7 @@ class ScoreM(Score):
        column_from_decennial_census: str,
        combined_column_name: str,
        threshold_cutoff_for_island_areas: float,
-    ) -> (pd.DataFrame, str):
+    ) -> Tuple[pd.DataFrame, str]:
        """Steps to set thresholds for island areas.
        This function is fairly logically complicated. It takes the following steps: