mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Fix errors and improve code quality and readability in Health Scores (#1147)
* run black on health_score.py * to_numpy() versus values - see https://pandas.pydata.org/pandas-docs/version/0.24.0rc1/api/generated/pandas.Series.to_numpy.html
This commit is contained in:
parent
667678f20e
commit
2604b66cf7
1 changed files with 66 additions and 36 deletions
|
@ -8,10 +8,11 @@ import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.preprocessing import MinMaxScaler
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
class HealthScores():
|
|
||||||
|
class HealthScores:
|
||||||
"""
|
"""
|
||||||
Calculates health scores by calling the final_scaled_data() method
|
Calculates health scores by calling the final_scaled_data() method
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
weights_1(np array) : weights of model 1.
|
weights_1(np array) : weights of model 1.
|
||||||
weights_2(np array) : weights of model 2.
|
weights_2(np array) : weights of model 2.
|
||||||
|
@ -21,9 +22,18 @@ class HealthScores():
|
||||||
is_weighted_average(boolean) : weights calculation methodology(default is True)
|
is_weighted_average(boolean) : weights calculation methodology(default is True)
|
||||||
weightage(list) : weightage for each y-variable
|
weightage(list) : weightage for each y-variable
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,weights_1,weights_2,weights_3,multiplied_data,geoid,is_weighted_average = True,weightage = [0.25,0.25,0.5]):
|
def __init__(
|
||||||
|
self,
|
||||||
|
weights_1,
|
||||||
|
weights_2,
|
||||||
|
weights_3,
|
||||||
|
multiplied_data,
|
||||||
|
geoid,
|
||||||
|
is_weighted_average=True,
|
||||||
|
weightage=[0.25, 0.25, 0.5],
|
||||||
|
):
|
||||||
|
|
||||||
self.weights_1 = weights_1
|
self.weights_1 = weights_1
|
||||||
self.weights_2 = weights_2
|
self.weights_2 = weights_2
|
||||||
self.weights_3 = weights_3
|
self.weights_3 = weights_3
|
||||||
|
@ -31,74 +41,94 @@ class HealthScores():
|
||||||
self.geoid = geoid
|
self.geoid = geoid
|
||||||
self.is_weighted_average = is_weighted_average
|
self.is_weighted_average = is_weighted_average
|
||||||
self.weightage = weightage
|
self.weightage = weightage
|
||||||
self.weights = np.zeros(self.weights_1.shape).reshape(-1,1)
|
self.weights = np.zeros(self.weights_1.shape).reshape(-1, 1)
|
||||||
|
|
||||||
def _get_weights(self):
|
def _get_weights(self):
|
||||||
'''
|
"""
|
||||||
Averages weights of all the models and performs transformation so that sum of all weights will be equal to 1.
|
Averages weights of all the models and performs transformation so that sum of all weights will be equal to 1.
|
||||||
|
|
||||||
Arguments : None
|
Arguments : None
|
||||||
Returns : Averaged weights which totals to 1.
|
Returns : Averaged weights which totals to 1.
|
||||||
'''
|
"""
|
||||||
weights = np.hstack((self.weights_1.reshape(-1,1),self.weights_2.reshape(-1,1),self.weights_3.reshape(-1,1)))
|
weights = np.hstack(
|
||||||
weights = np.sum(weights,axis = 1) / weights.shape[1]
|
(
|
||||||
|
self.weights_1.reshape(-1, 1),
|
||||||
|
self.weights_2.reshape(-1, 1),
|
||||||
|
self.weights_3.reshape(-1, 1),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
weights = np.sum(weights, axis=1) / weights.shape[1]
|
||||||
self.weights = weights / np.sum(weights)
|
self.weights = weights / np.sum(weights)
|
||||||
|
|
||||||
return self.weights
|
return self.weights
|
||||||
|
|
||||||
def _weighted_average(self):
|
def _weighted_average(self):
|
||||||
'''
|
"""
|
||||||
weighted average:
|
weighted average:
|
||||||
Averages weights of all the models with specified weightage for each model. And performs transformation so that sum of
|
Averages weights of all the models with specified weightage for each model. And performs transformation so that sum of
|
||||||
all weights will be equal to 1.
|
all weights will be equal to 1.
|
||||||
|
|
||||||
Arguments : None
|
Arguments : None
|
||||||
Returns : Averaged weights which totals to 1.(np array)
|
Returns : Averaged weights which totals to 1.(np array)
|
||||||
'''
|
"""
|
||||||
weights = np.hstack((self.weights_1.reshape(-1,1) * self.weightage[0],self.weights_2.reshape(-1,1) * self.weightage[1],self.weights_3.reshape(-1,1) * self.weightage[2]))
|
weights = np.hstack(
|
||||||
weights = np.sum(weights,axis = 1)
|
(
|
||||||
|
self.weights_1.reshape(-1, 1) * self.weightage[0],
|
||||||
|
self.weights_2.reshape(-1, 1) * self.weightage[1],
|
||||||
|
self.weights_3.reshape(-1, 1) * self.weightage[2],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
weights = np.sum(weights, axis=1)
|
||||||
self.weights = weights / np.sum(weights)
|
self.weights = weights / np.sum(weights)
|
||||||
|
|
||||||
return self.weights
|
return self.weights
|
||||||
|
|
||||||
def _health_score(self):
|
def _health_score(self):
|
||||||
'''
|
"""
|
||||||
Converts data in (0 to 100)scale using min max scaler and multiiplying with 100.
|
Converts data in (0 to 100)scale using min max scaler and multiiplying with 100.
|
||||||
Then it calculates health scores by multiplying with the weights
|
Then it calculates health scores by multiplying with the weights
|
||||||
|
|
||||||
Returns : data frame with health score and x variables in (0 - 100)scale.
|
Returns : data frame with health score and x variables in (0 - 100)scale.
|
||||||
'''
|
"""
|
||||||
columns = list(self.multiplied_data.columns) + ['health_scores']
|
columns = list(self.multiplied_data.columns) + ["health_scores"]
|
||||||
scaled_data = MinMaxScaler().fit_transform(self.multiplied_data.values) * 100
|
scaled_data = (
|
||||||
health_scores = np.dot(scaled_data,self.weights.reshape(-1,1))
|
MinMaxScaler().fit_transform(self.multiplied_data.to_numpy()) * 100
|
||||||
|
)
|
||||||
|
health_scores = np.dot(scaled_data, self.weights.reshape(-1, 1))
|
||||||
health_scores = MinMaxScaler().fit_transform(health_scores) * 100
|
health_scores = MinMaxScaler().fit_transform(health_scores) * 100
|
||||||
scaled_data = np.hstack((scaled_data,health_scores))
|
scaled_data = np.hstack((scaled_data, health_scores))
|
||||||
|
|
||||||
scaled_data = pd.DataFrame(data = scaled_data, columns = columns)
|
scaled_data = pd.DataFrame(data=scaled_data, columns=columns)
|
||||||
|
|
||||||
return scaled_data
|
return scaled_data
|
||||||
|
|
||||||
def final_scaled_data(self):
|
def final_scaled_data(self):
|
||||||
"""
|
"""
|
||||||
Calls appropriate methods in class based on arguments. Concatenates geoids and health scores.
|
Calls appropriate methods in class based on arguments. Concatenates geoids and health scores.
|
||||||
|
|
||||||
Arguments : None
|
Arguments : None
|
||||||
Returns :
|
Returns :
|
||||||
final_data(df) : dataframe with all health scores and geoids
|
final_data(df) : dataframe with all health scores and geoids
|
||||||
weights_tables(df) : dataframe with each y-variable weights and averaged weights
|
weights_tables(df) : dataframe with each y-variable weights and averaged weights
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# final scaled data
|
# final scaled data
|
||||||
if self.is_weighted_average:
|
if self.is_weighted_average:
|
||||||
self._weighted_average()
|
self._weighted_average()
|
||||||
else:
|
else:
|
||||||
self._get_weights()
|
self._get_weights()
|
||||||
|
|
||||||
scaled_data_100 = self._health_score()
|
scaled_data_100 = self._health_score()
|
||||||
final_data = pd.concat([self.geoid,scaled_data_100],axis = 1)
|
final_data = pd.concat([self.geoid, scaled_data_100], axis=1)
|
||||||
|
|
||||||
weights_table = pd.DataFrame({'phy_health_weights' : self.weights_1, 'mntl_health_weights' : self.weights_2, 'life_expectancy_weights' : self.weights_3, 'averaged_weights' : self.weights},index = self.multiplied_data.columns)
|
|
||||||
|
|
||||||
return final_data,weights_table
|
|
||||||
|
|
||||||
|
weights_table = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"phy_health_weights": self.weights_1,
|
||||||
|
"mntl_health_weights": self.weights_2,
|
||||||
|
"life_expectancy_weights": self.weights_3,
|
||||||
|
"averaged_weights": self.weights,
|
||||||
|
},
|
||||||
|
index=self.multiplied_data.columns,
|
||||||
|
)
|
||||||
|
|
||||||
|
return final_data, weights_table
|
||||||
|
|
Loading…
Add table
Reference in a new issue