mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
create new copy and remove chained assignment (#939)
Co-authored-by: Saran Ahluwalia <sarahluw@cisco.com>
This commit is contained in:
parent
d2352c6217
commit
6d0cb29dcd
1 changed files with 10 additions and 8 deletions
|
@ -307,13 +307,15 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
]
|
]
|
||||||
|
|
||||||
columns_to_keep = non_numeric_columns + numeric_columns
|
columns_to_keep = non_numeric_columns + numeric_columns
|
||||||
df = df[columns_to_keep]
|
|
||||||
|
df_copy = df[columns_to_keep].copy()
|
||||||
|
|
||||||
|
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
|
||||||
|
|
||||||
# Convert all columns to numeric and do math
|
# Convert all columns to numeric and do math
|
||||||
for col in numeric_columns:
|
for col in numeric_columns:
|
||||||
df[col] = pd.to_numeric(df[col])
|
|
||||||
# Calculate percentiles
|
# Calculate percentiles
|
||||||
df[f"{col}{field_names.PERCENTILE_FIELD_SUFFIX}"] = df[col].rank(
|
df_copy[f"{col}{field_names.PERCENTILE_FIELD_SUFFIX}"] = df_copy[col].rank(
|
||||||
pct=True
|
pct=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -327,19 +329,19 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
# Maximum of all values
|
# Maximum of all values
|
||||||
# - minimum of all values
|
# - minimum of all values
|
||||||
# )
|
# )
|
||||||
min_value = df[col].min(skipna=True)
|
min_value = df_copy[col].min(skipna=True)
|
||||||
|
|
||||||
max_value = df[col].max(skipna=True)
|
max_value = df_copy[col].max(skipna=True)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"For data set {col}, the min value is {min_value} and the max value is {max_value}."
|
f"For data set {col}, the min value is {min_value} and the max value is {max_value}."
|
||||||
)
|
)
|
||||||
|
|
||||||
df[f"{col}{field_names.MIN_MAX_FIELD_SUFFIX}"] = (
|
df_copy[f"{col}{field_names.MIN_MAX_FIELD_SUFFIX}"] = (
|
||||||
df[col] - min_value
|
df_copy[col] - min_value
|
||||||
) / (max_value - min_value)
|
) / (max_value - min_value)
|
||||||
|
|
||||||
return df
|
return df_copy
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
logger.info("Transforming Score Data")
|
logger.info("Transforming Score Data")
|
||||||
|
|
Loading…
Add table
Reference in a new issue