From 6d0cb29dcd12f1f94dd94a8f21b5fc9852a3a34b Mon Sep 17 00:00:00 2001 From: Saran Ahluwalia Date: Tue, 30 Nov 2021 14:20:29 -0500 Subject: [PATCH] create new copy and remove chained assignment (#939) Co-authored-by: Saran Ahluwalia --- .../data_pipeline/etl/score/etl_score.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index a98b7b61..34803ea4 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -307,13 +307,15 @@ class ScoreETL(ExtractTransformLoad): ] columns_to_keep = non_numeric_columns + numeric_columns - df = df[columns_to_keep] + + df_copy = df[columns_to_keep].copy() + + df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric) # Convert all columns to numeric and do math for col in numeric_columns: - df[col] = pd.to_numeric(df[col]) # Calculate percentiles - df[f"{col}{field_names.PERCENTILE_FIELD_SUFFIX}"] = df[col].rank( + df_copy[f"{col}{field_names.PERCENTILE_FIELD_SUFFIX}"] = df_copy[col].rank( pct=True ) @@ -327,19 +329,19 @@ class ScoreETL(ExtractTransformLoad): # Maximum of all values # - minimum of all values # ) - min_value = df[col].min(skipna=True) + min_value = df_copy[col].min(skipna=True) - max_value = df[col].max(skipna=True) + max_value = df_copy[col].max(skipna=True) logger.info( f"For data set {col}, the min value is {min_value} and the max value is {max_value}." ) - df[f"{col}{field_names.MIN_MAX_FIELD_SUFFIX}"] = ( - df[col] - min_value + df_copy[f"{col}{field_names.MIN_MAX_FIELD_SUFFIX}"] = ( + df_copy[col] - min_value ) / (max_value - min_value) - return df + return df_copy def transform(self) -> None: logger.info("Transforming Score Data")