Fix errors and improve code quality and readability in Health Scores (#1147)

* run black on health_score.py

* to_numpy() versus values - see https://pandas.pydata.org/pandas-docs/version/0.24.0rc1/api/generated/pandas.Series.to_numpy.html
This commit is contained in:
Saran Ahluwalia 2022-01-14 13:11:47 -05:00 committed by GitHub
parent 667678f20e
commit 2604b66cf7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -8,7 +8,8 @@ import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
class HealthScores():
class HealthScores:
"""
Calculates health scores by calling the final_scaled_data() method
@ -22,7 +23,16 @@ class HealthScores():
weightage(list) : weightage for each y-variable
"""
def __init__(self,weights_1,weights_2,weights_3,multiplied_data,geoid,is_weighted_average = True,weightage = [0.25,0.25,0.5]):
def __init__(
self,
weights_1,
weights_2,
weights_3,
multiplied_data,
geoid,
is_weighted_average=True,
weightage=[0.25, 0.25, 0.5],
):
self.weights_1 = weights_1
self.weights_2 = weights_2
@ -31,50 +41,64 @@ class HealthScores():
self.geoid = geoid
self.is_weighted_average = is_weighted_average
self.weightage = weightage
self.weights = np.zeros(self.weights_1.shape).reshape(-1,1)
self.weights = np.zeros(self.weights_1.shape).reshape(-1, 1)
def _get_weights(self):
'''
"""
Averages weights of all the models and performs transformation so that sum of all weights will be equal to 1.
Arguments : None
Returns : Averaged weights which totals to 1.
'''
weights = np.hstack((self.weights_1.reshape(-1,1),self.weights_2.reshape(-1,1),self.weights_3.reshape(-1,1)))
weights = np.sum(weights,axis = 1) / weights.shape[1]
"""
weights = np.hstack(
(
self.weights_1.reshape(-1, 1),
self.weights_2.reshape(-1, 1),
self.weights_3.reshape(-1, 1),
)
)
weights = np.sum(weights, axis=1) / weights.shape[1]
self.weights = weights / np.sum(weights)
return self.weights
def _weighted_average(self):
'''
"""
weighted average:
Averages weights of all the models with specified weightage for each model. And performs transformation so that sum of
all weights will be equal to 1.
Arguments : None
Returns : Averaged weights which totals to 1.(np array)
'''
weights = np.hstack((self.weights_1.reshape(-1,1) * self.weightage[0],self.weights_2.reshape(-1,1) * self.weightage[1],self.weights_3.reshape(-1,1) * self.weightage[2]))
weights = np.sum(weights,axis = 1)
"""
weights = np.hstack(
(
self.weights_1.reshape(-1, 1) * self.weightage[0],
self.weights_2.reshape(-1, 1) * self.weightage[1],
self.weights_3.reshape(-1, 1) * self.weightage[2],
)
)
weights = np.sum(weights, axis=1)
self.weights = weights / np.sum(weights)
return self.weights
def _health_score(self):
'''
"""
Converts data in (0 to 100)scale using min max scaler and multiiplying with 100.
Then it calculates health scores by multiplying with the weights
Returns : data frame with health score and x variables in (0 - 100)scale.
'''
columns = list(self.multiplied_data.columns) + ['health_scores']
scaled_data = MinMaxScaler().fit_transform(self.multiplied_data.values) * 100
health_scores = np.dot(scaled_data,self.weights.reshape(-1,1))
"""
columns = list(self.multiplied_data.columns) + ["health_scores"]
scaled_data = (
MinMaxScaler().fit_transform(self.multiplied_data.to_numpy()) * 100
)
health_scores = np.dot(scaled_data, self.weights.reshape(-1, 1))
health_scores = MinMaxScaler().fit_transform(health_scores) * 100
scaled_data = np.hstack((scaled_data,health_scores))
scaled_data = np.hstack((scaled_data, health_scores))
scaled_data = pd.DataFrame(data = scaled_data, columns = columns)
scaled_data = pd.DataFrame(data=scaled_data, columns=columns)
return scaled_data
@ -95,10 +119,16 @@ class HealthScores():
self._get_weights()
scaled_data_100 = self._health_score()
final_data = pd.concat([self.geoid,scaled_data_100],axis = 1)
weights_table = pd.DataFrame({'phy_health_weights' : self.weights_1, 'mntl_health_weights' : self.weights_2, 'life_expectancy_weights' : self.weights_3, 'averaged_weights' : self.weights},index = self.multiplied_data.columns)
return final_data,weights_table
final_data = pd.concat([self.geoid, scaled_data_100], axis=1)
weights_table = pd.DataFrame(
{
"phy_health_weights": self.weights_1,
"mntl_health_weights": self.weights_2,
"life_expectancy_weights": self.weights_3,
"averaged_weights": self.weights,
},
index=self.multiplied_data.columns,
)
return final_data, weights_table