Hot fix for Score M (#1182)

* fixes

* pr feedback

* tuple
This commit is contained in:
Jorge Escobar 2022-01-27 17:22:39 -05:00 committed by GitHub
commit 2b35a8937a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 13 deletions

View file

@ -17,6 +17,9 @@ from . import constants
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
# Define the DAC variable
DISADVANTAGED_COMMUNITIES_FIELD = field_names.SCORE_M_COMMUNITIES
class PostScoreETL(ExtractTransformLoad): class PostScoreETL(ExtractTransformLoad):
""" """
@ -184,17 +187,9 @@ class PostScoreETL(ExtractTransformLoad):
merged_df["Total population"].fillna(0.0).astype(int) merged_df["Total population"].fillna(0.0).astype(int)
) )
# list the null score tracts de_duplicated_df = merged_df.dropna(
null_tract_df = merged_df[ subset=[DISADVANTAGED_COMMUNITIES_FIELD]
merged_df[field_names.SCORE_L_COMMUNITIES].isnull() )
]
# subtract data sets
# this follows the XOR pattern outlined here:
# https://stackoverflow.com/a/37313953
de_duplicated_df = pd.concat(
[merged_df, null_tract_df, null_tract_df]
).drop_duplicates(keep=False)
# set the score to the new df # set the score to the new df
return de_duplicated_df return de_duplicated_df
@ -333,7 +328,7 @@ class PostScoreETL(ExtractTransformLoad):
# Rename score column # Rename score column
downloadable_df_copy = downloadable_df.rename( downloadable_df_copy = downloadable_df.rename(
columns={ columns={
field_names.SCORE_M_COMMUNITIES: "Identified as disadvantaged (v0.1)" DISADVANTAGED_COMMUNITIES_FIELD: "Identified as disadvantaged (v0.1)"
}, },
inplace=False, inplace=False,
) )

View file

@ -1,3 +1,4 @@
from typing import Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -27,7 +28,7 @@ class ScoreM(Score):
column_from_decennial_census: str, column_from_decennial_census: str,
combined_column_name: str, combined_column_name: str,
threshold_cutoff_for_island_areas: float, threshold_cutoff_for_island_areas: float,
) -> (pd.DataFrame, str): ) -> Tuple[pd.DataFrame, str]:
"""Steps to set thresholds for island areas. """Steps to set thresholds for island areas.
This function is fairly logically complicated. It takes the following steps: This function is fairly logically complicated. It takes the following steps: