mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
Add experimental Jupyter notebook with Health Scoring Methodology Example for Health Scores (#989)
Co-authored-by: Saran Ahluwalia <sarahluw@cisco.com>
This commit is contained in:
parent
4cec1bb37e
commit
98ff4bd9d8
2 changed files with 1045 additions and 0 deletions
|
@ -0,0 +1,104 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[ ]:
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
|
class HealthScores():
|
||||||
|
"""
|
||||||
|
Calculates health scores by calling the final_scaled_data() method
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
weights_1(np array) : weights of model 1.
|
||||||
|
weights_2(np array) : weights of model 2.
|
||||||
|
weights_3(np array) : weights of model 3.
|
||||||
|
multiplied_data(df) : all cenusus tracts data which needs to be multiplied with weights to get health scores
|
||||||
|
geoid(Series) : geoids of census tracts to concatenate with our health scores data.
|
||||||
|
is_weighted_average(boolean) : weights calculation methodology(default is True)
|
||||||
|
weightage(list) : weightage for each y-variable
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,weights_1,weights_2,weights_3,multiplied_data,geoid,is_weighted_average = True,weightage = [0.25,0.25,0.5]):
|
||||||
|
|
||||||
|
self.weights_1 = weights_1
|
||||||
|
self.weights_2 = weights_2
|
||||||
|
self.weights_3 = weights_3
|
||||||
|
self.multiplied_data = multiplied_data
|
||||||
|
self.geoid = geoid
|
||||||
|
self.is_weighted_average = is_weighted_average
|
||||||
|
self.weightage = weightage
|
||||||
|
self.weights = np.zeros(self.weights_1.shape).reshape(-1,1)
|
||||||
|
|
||||||
|
def _get_weights(self):
|
||||||
|
'''
|
||||||
|
Averages weights of all the models and performs transformation so that sum of all weights will be equal to 1.
|
||||||
|
|
||||||
|
Arguments : None
|
||||||
|
Returns : Averaged weights which totals to 1.
|
||||||
|
'''
|
||||||
|
weights = np.hstack((self.weights_1.reshape(-1,1),self.weights_2.reshape(-1,1),self.weights_3.reshape(-1,1)))
|
||||||
|
weights = np.sum(weights,axis = 1) / weights.shape[1]
|
||||||
|
self.weights = weights / np.sum(weights)
|
||||||
|
|
||||||
|
return self.weights
|
||||||
|
|
||||||
|
def _weighted_average(self):
|
||||||
|
'''
|
||||||
|
weighted average:
|
||||||
|
Averages weights of all the models with specified weightage for each model. And performs transformation so that sum of
|
||||||
|
all weights will be equal to 1.
|
||||||
|
|
||||||
|
Arguments : None
|
||||||
|
Returns : Averaged weights which totals to 1.(np array)
|
||||||
|
'''
|
||||||
|
weights = np.hstack((self.weights_1.reshape(-1,1) * self.weightage[0],self.weights_2.reshape(-1,1) * self.weightage[1],self.weights_3.reshape(-1,1) * self.weightage[2]))
|
||||||
|
weights = np.sum(weights,axis = 1)
|
||||||
|
self.weights = weights / np.sum(weights)
|
||||||
|
|
||||||
|
return self.weights
|
||||||
|
|
||||||
|
def _health_score(self):
|
||||||
|
'''
|
||||||
|
Converts data in (0 to 100)scale using min max scaler and multiiplying with 100.
|
||||||
|
Then it calculates health scores by multiplying with the weights
|
||||||
|
|
||||||
|
Returns : data frame with health score and x variables in (0 - 100)scale.
|
||||||
|
'''
|
||||||
|
columns = list(self.multiplied_data.columns) + ['health_scores']
|
||||||
|
scaled_data = MinMaxScaler().fit_transform(self.multiplied_data.values) * 100
|
||||||
|
health_scores = np.dot(scaled_data,self.weights.reshape(-1,1))
|
||||||
|
health_scores = MinMaxScaler().fit_transform(health_scores) * 100
|
||||||
|
scaled_data = np.hstack((scaled_data,health_scores))
|
||||||
|
|
||||||
|
scaled_data = pd.DataFrame(data = scaled_data, columns = columns)
|
||||||
|
|
||||||
|
return scaled_data
|
||||||
|
|
||||||
|
def final_scaled_data(self):
|
||||||
|
"""
|
||||||
|
Calls appropriate methods in class based on arguments. Concatenates geoids and health scores.
|
||||||
|
|
||||||
|
Arguments : None
|
||||||
|
Returns :
|
||||||
|
final_data(df) : dataframe with all health scores and geoids
|
||||||
|
weights_tables(df) : dataframe with each y-variable weights and averaged weights
|
||||||
|
"""
|
||||||
|
|
||||||
|
# final scaled data
|
||||||
|
if self.is_weighted_average:
|
||||||
|
self._weighted_average()
|
||||||
|
else:
|
||||||
|
self._get_weights()
|
||||||
|
|
||||||
|
scaled_data_100 = self._health_score()
|
||||||
|
final_data = pd.concat([self.geoid,scaled_data_100],axis = 1)
|
||||||
|
|
||||||
|
weights_table = pd.DataFrame({'phy_health_weights' : self.weights_1, 'mntl_health_weights' : self.weights_2, 'life_expectancy_weights' : self.weights_3, 'averaged_weights' : self.weights},index = self.multiplied_data.columns)
|
||||||
|
|
||||||
|
return final_data,weights_table
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,941 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from health_scores import HealthScores\n",
|
||||||
|
"from Model import Model\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Load Master dataset in raw format (All variables we have collected) So many variables have nulls. In the code null values are handeled by imputing with the group means of first 8-digit geoids(which represents nearest census tracts). "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"(1172, 55)\n",
|
||||||
|
"Index(['Unnamed: 0', 'COI_FOOD', 'COI_GREEN', 'COI_WALK', 'COI_VACANCY',\n",
|
||||||
|
" 'COI_SUPRFND', 'COI_RSEI', 'COI_PM25', 'COI_OZONE', 'COI_HEAT',\n",
|
||||||
|
" 'COI_HLTHINS', 'latitude', 'longitude', 'geoid', 'countyfips',\n",
|
||||||
|
" 'TractFIPS', 'County', 'StateAbbr', 'PlaceName', 'PlaceFIPS',\n",
|
||||||
|
" 'Place_TractID', 'Population2010', 'ACCESS2_CrudePrev',\n",
|
||||||
|
" 'ARTHRITIS_CrudePrev', 'BINGE_CrudePrev', 'BPHIGH_CrudePrev',\n",
|
||||||
|
" 'BPMED_CrudePrev', 'CANCER_CrudePrev', 'CASTHMA_CrudePrev',\n",
|
||||||
|
" 'CHD_CrudePrev', 'CHECKUP_CrudePrev', 'CHOLSCREEN_CrudePrev',\n",
|
||||||
|
" 'COLON_SCREEN_CrudePrev', 'COPD_CrudePrev', 'COREM_CrudePrev',\n",
|
||||||
|
" 'COREW_CrudePrev', 'CSMOKING_CrudePrev', 'DENTAL_CrudePrev',\n",
|
||||||
|
" 'DIABETES_CrudePrev', 'HIGHCHOL_CrudePrev', 'KIDNEY_CrudePrev',\n",
|
||||||
|
" 'LPA_CrudePrev', 'MAMMOUSE_CrudePrev', 'MHLTH_CrudePrev',\n",
|
||||||
|
" 'OBESITY_CrudePrev', 'PAPTEST_CrudePrev', 'PHLTH_CrudePrev',\n",
|
||||||
|
" 'SLEEP_CrudePrev', 'STROKE_CrudePrev', 'TEETHLOST_CrudePrev',\n",
|
||||||
|
" 'life expectancy', 'Alcohol Test', 'Drug Test', 'Pedalcyclist',\n",
|
||||||
|
" 'Pedastrian'],\n",
|
||||||
|
" dtype='object')\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_raw = pd.read_csv('master_raw_data.csv') #file from the 'Data collection.ipynb'\n",
|
||||||
|
"print(df_raw.shape)\n",
|
||||||
|
"print(df_raw.columns)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"From these variables you can pick any variables to include in the model and get the results.(Method is explained below)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### More advanced imputation method "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Equal missing data to the nearest neighborhood - (distance between Latitude/Longitude points was used, see https://www.movable-type.co.uk/scripts/latlong.html for details)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>geoid</th>\n",
|
||||||
|
" <th>longitude</th>\n",
|
||||||
|
" <th>latitude</th>\n",
|
||||||
|
" <th>ED_PRXECE</th>\n",
|
||||||
|
" <th>ED_PRXHQECE</th>\n",
|
||||||
|
" <th>ED_ECENROL</th>\n",
|
||||||
|
" <th>ED_READING</th>\n",
|
||||||
|
" <th>ED_MATH</th>\n",
|
||||||
|
" <th>ED_HSGRAD</th>\n",
|
||||||
|
" <th>ED_APENR</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>PAPTEST_CrudePrev</th>\n",
|
||||||
|
" <th>PHLTH_CrudePrev</th>\n",
|
||||||
|
" <th>SLEEP_CrudePrev</th>\n",
|
||||||
|
" <th>STROKE_CrudePrev</th>\n",
|
||||||
|
" <th>TEETHLOST_CrudePrev</th>\n",
|
||||||
|
" <th>Alcohol Test</th>\n",
|
||||||
|
" <th>Drug Test</th>\n",
|
||||||
|
" <th>Pedalcyclist</th>\n",
|
||||||
|
" <th>Pedastrian</th>\n",
|
||||||
|
" <th>life expectancy</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>48085030100</td>\n",
|
||||||
|
" <td>-96.39820</td>\n",
|
||||||
|
" <td>33.29592</td>\n",
|
||||||
|
" <td>1.343954</td>\n",
|
||||||
|
" <td>-13.815511</td>\n",
|
||||||
|
" <td>30.9</td>\n",
|
||||||
|
" <td>217.85674</td>\n",
|
||||||
|
" <td>250.22748</td>\n",
|
||||||
|
" <td>94.002556</td>\n",
|
||||||
|
" <td>0.278373</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>78.30</td>\n",
|
||||||
|
" <td>11.90</td>\n",
|
||||||
|
" <td>35.40</td>\n",
|
||||||
|
" <td>2.70</td>\n",
|
||||||
|
" <td>18.80</td>\n",
|
||||||
|
" <td>0.821918</td>\n",
|
||||||
|
" <td>0.821918</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>1.095890</td>\n",
|
||||||
|
" <td>76.7</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>48085030201</td>\n",
|
||||||
|
" <td>-96.53734</td>\n",
|
||||||
|
" <td>33.26331</td>\n",
|
||||||
|
" <td>2.069664</td>\n",
|
||||||
|
" <td>-13.815511</td>\n",
|
||||||
|
" <td>61.0</td>\n",
|
||||||
|
" <td>220.04181</td>\n",
|
||||||
|
" <td>246.44695</td>\n",
|
||||||
|
" <td>87.928993</td>\n",
|
||||||
|
" <td>0.287710</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>78.30</td>\n",
|
||||||
|
" <td>11.90</td>\n",
|
||||||
|
" <td>35.40</td>\n",
|
||||||
|
" <td>2.70</td>\n",
|
||||||
|
" <td>18.80</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>48085030202</td>\n",
|
||||||
|
" <td>-96.64279</td>\n",
|
||||||
|
" <td>33.34124</td>\n",
|
||||||
|
" <td>1.751906</td>\n",
|
||||||
|
" <td>-13.815511</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>233.74568</td>\n",
|
||||||
|
" <td>262.12021</td>\n",
|
||||||
|
" <td>80.740799</td>\n",
|
||||||
|
" <td>0.568353</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>81.40</td>\n",
|
||||||
|
" <td>8.80</td>\n",
|
||||||
|
" <td>35.00</td>\n",
|
||||||
|
" <td>1.60</td>\n",
|
||||||
|
" <td>9.50</td>\n",
|
||||||
|
" <td>4.464286</td>\n",
|
||||||
|
" <td>1.116071</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>2.232143</td>\n",
|
||||||
|
" <td>78.8</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>48085030203</td>\n",
|
||||||
|
" <td>-96.54547</td>\n",
|
||||||
|
" <td>33.34176</td>\n",
|
||||||
|
" <td>2.338918</td>\n",
|
||||||
|
" <td>-13.815511</td>\n",
|
||||||
|
" <td>32.7</td>\n",
|
||||||
|
" <td>226.88499</td>\n",
|
||||||
|
" <td>261.43530</td>\n",
|
||||||
|
" <td>95.360466</td>\n",
|
||||||
|
" <td>0.290443</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>81.40</td>\n",
|
||||||
|
" <td>8.80</td>\n",
|
||||||
|
" <td>35.00</td>\n",
|
||||||
|
" <td>1.60</td>\n",
|
||||||
|
" <td>9.50</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.163881</td>\n",
|
||||||
|
" <td>0.327761</td>\n",
|
||||||
|
" <td>78.2</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>48085030302</td>\n",
|
||||||
|
" <td>-96.75005</td>\n",
|
||||||
|
" <td>33.24045</td>\n",
|
||||||
|
" <td>3.588619</td>\n",
|
||||||
|
" <td>-13.815511</td>\n",
|
||||||
|
" <td>62.3</td>\n",
|
||||||
|
" <td>250.81639</td>\n",
|
||||||
|
" <td>274.56683</td>\n",
|
||||||
|
" <td>96.399155</td>\n",
|
||||||
|
" <td>0.650187</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>82.05</td>\n",
|
||||||
|
" <td>9.15</td>\n",
|
||||||
|
" <td>32.95</td>\n",
|
||||||
|
" <td>1.95</td>\n",
|
||||||
|
" <td>7.75</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.000000</td>\n",
|
||||||
|
" <td>0.407166</td>\n",
|
||||||
|
" <td>0.407166</td>\n",
|
||||||
|
" <td>82.0</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 65 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" geoid longitude latitude ED_PRXECE ED_PRXHQECE ED_ECENROL \\\n",
|
||||||
|
"0 48085030100 -96.39820 33.29592 1.343954 -13.815511 30.9 \n",
|
||||||
|
"1 48085030201 -96.53734 33.26331 2.069664 -13.815511 61.0 \n",
|
||||||
|
"2 48085030202 -96.64279 33.34124 1.751906 -13.815511 0.0 \n",
|
||||||
|
"3 48085030203 -96.54547 33.34176 2.338918 -13.815511 32.7 \n",
|
||||||
|
"4 48085030302 -96.75005 33.24045 3.588619 -13.815511 62.3 \n",
|
||||||
|
"\n",
|
||||||
|
" ED_READING ED_MATH ED_HSGRAD ED_APENR ... \\\n",
|
||||||
|
"0 217.85674 250.22748 94.002556 0.278373 ... \n",
|
||||||
|
"1 220.04181 246.44695 87.928993 0.287710 ... \n",
|
||||||
|
"2 233.74568 262.12021 80.740799 0.568353 ... \n",
|
||||||
|
"3 226.88499 261.43530 95.360466 0.290443 ... \n",
|
||||||
|
"4 250.81639 274.56683 96.399155 0.650187 ... \n",
|
||||||
|
"\n",
|
||||||
|
" PAPTEST_CrudePrev PHLTH_CrudePrev SLEEP_CrudePrev STROKE_CrudePrev \\\n",
|
||||||
|
"0 78.30 11.90 35.40 2.70 \n",
|
||||||
|
"1 78.30 11.90 35.40 2.70 \n",
|
||||||
|
"2 81.40 8.80 35.00 1.60 \n",
|
||||||
|
"3 81.40 8.80 35.00 1.60 \n",
|
||||||
|
"4 82.05 9.15 32.95 1.95 \n",
|
||||||
|
"\n",
|
||||||
|
" TEETHLOST_CrudePrev Alcohol Test Drug Test Pedalcyclist Pedastrian \\\n",
|
||||||
|
"0 18.80 0.821918 0.821918 0.000000 1.095890 \n",
|
||||||
|
"1 18.80 0.000000 0.000000 0.000000 0.000000 \n",
|
||||||
|
"2 9.50 4.464286 1.116071 0.000000 2.232143 \n",
|
||||||
|
"3 9.50 0.000000 0.000000 0.163881 0.327761 \n",
|
||||||
|
"4 7.75 0.000000 0.000000 0.407166 0.407166 \n",
|
||||||
|
"\n",
|
||||||
|
" life expectancy \n",
|
||||||
|
"0 76.7 \n",
|
||||||
|
"1 NaN \n",
|
||||||
|
"2 78.8 \n",
|
||||||
|
"3 78.2 \n",
|
||||||
|
"4 82.0 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 65 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#More advanced imputation method\n",
|
||||||
|
"df_imputed_x = pd.read_excel('20200420_input_final.xlsx')\n",
|
||||||
|
"df_imputed_x.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Steps to get the model output:\n",
|
||||||
|
"\n",
|
||||||
|
"#### 1. Initialize Model() class by passing the data file.\n",
|
||||||
|
"#### 2. Call the model_output() method with the below arguments.\n",
|
||||||
|
" \n",
|
||||||
|
" columns_regress(list) : x variables to include in the regression model\n",
|
||||||
|
" target(string) : target variable(y) in the regression model\n",
|
||||||
|
" multiply_cols(dict): dictionary with columns and thier multiplier(-1,1) as key-value pairs to rescale all variables\n",
|
||||||
|
" as high is good.\n",
|
||||||
|
"\n",
|
||||||
|
" Default Arguments : Has default values that can be changed as per requirement.\n",
|
||||||
|
" \n",
|
||||||
|
" columns_impute(list) : columns_impute to be imputed(default is None)\n",
|
||||||
|
" winsorize_outliers(dict) : dictionary of limits for the respective columns{'col' : limit} (default is None)\n",
|
||||||
|
" winsorize_with_95(boolean) : winsorize all columns with 95 percentile(True or False) (default - False)\n",
|
||||||
|
" target_multiplier(int) : to change the direction of y variable if needed(default is 1)\n",
|
||||||
|
"\n",
|
||||||
|
"#### IMPORTANT : Follow the arguments order or specify the argument name when calling the method.\n",
|
||||||
|
"Example : model1.model_output(columns_regress,target,multiply_cols,target_multiplier = target_multiplier)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" Returns :returns all census tracts transformed data(high is good).\n",
|
||||||
|
" :prints model summary.\n",
|
||||||
|
" :returns model weights.\n",
|
||||||
|
" \n",
|
||||||
|
"##### Store the data and model weights to get the cummulative health score"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Example :"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" OLS Regression Results \n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"Dep. Variable: y R-squared: 0.558\n",
|
||||||
|
"Model: OLS Adj. R-squared: 0.553\n",
|
||||||
|
"Method: Least Squares F-statistic: 103.9\n",
|
||||||
|
"Date: Tue, 28 Apr 2020 Prob (F-statistic): 1.35e-165\n",
|
||||||
|
"Time: 09:32:30 Log-Likelihood: -1009.2\n",
|
||||||
|
"No. Observations: 999 AIC: 2044.\n",
|
||||||
|
"Df Residuals: 986 BIC: 2108.\n",
|
||||||
|
"Df Model: 12 \n",
|
||||||
|
"Covariance Type: nonrobust \n",
|
||||||
|
"=====================================================================================\n",
|
||||||
|
" coef std err t P>|t| [0.025 0.975]\n",
|
||||||
|
"-------------------------------------------------------------------------------------\n",
|
||||||
|
"const 5.117e-16 0.021 2.42e-14 1.000 -0.042 0.042\n",
|
||||||
|
"HE_FOOD 0.0891 0.028 3.134 0.002 0.033 0.145\n",
|
||||||
|
"HE_WALK 0.0926 0.025 3.646 0.000 0.043 0.142\n",
|
||||||
|
"HE_VACANCY 0.1124 0.026 4.300 0.000 0.061 0.164\n",
|
||||||
|
"HE_SUPRFND 0.1049 0.029 3.579 0.000 0.047 0.162\n",
|
||||||
|
"HE_HLTHINS 0.3111 0.056 5.533 0.000 0.201 0.421\n",
|
||||||
|
"BINGE_CrudePrev 0.1404 0.055 2.573 0.010 0.033 0.247\n",
|
||||||
|
"CHECKUP_CrudePrev 0.3370 0.119 2.841 0.005 0.104 0.570\n",
|
||||||
|
"BPHIGH_CrudePrev 0.2403 0.141 1.704 0.089 -0.036 0.517\n",
|
||||||
|
"SLEEP_CrudePrev 0.3075 0.037 8.209 0.000 0.234 0.381\n",
|
||||||
|
"STROKE_CrudePrev 0.2741 0.096 2.841 0.005 0.085 0.463\n",
|
||||||
|
"Drug Test 0.0294 0.027 1.107 0.269 -0.023 0.081\n",
|
||||||
|
"Pedalcyclist 0.0648 0.027 2.408 0.016 0.012 0.118\n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"Omnibus: 17.440 Durbin-Watson: 1.833\n",
|
||||||
|
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 30.414\n",
|
||||||
|
"Skew: -0.074 Prob(JB): 2.49e-07\n",
|
||||||
|
"Kurtosis: 3.842 Cond. No. 18.5\n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"\n",
|
||||||
|
"Warnings:\n",
|
||||||
|
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"data = df_imputed_x.copy() #data\n",
|
||||||
|
"model1 = Model(data) #Initializing class variable\n",
|
||||||
|
"\n",
|
||||||
|
"# variables list to include in our regression model.\n",
|
||||||
|
"columns_regress = ['HE_FOOD', 'HE_WALK', 'HE_VACANCY', 'HE_SUPRFND','HE_HLTHINS','BINGE_CrudePrev',\n",
|
||||||
|
" 'CHECKUP_CrudePrev','BPHIGH_CrudePrev','SLEEP_CrudePrev', 'STROKE_CrudePrev', 'Drug Test', 'Pedalcyclist']\n",
|
||||||
|
"\n",
|
||||||
|
"# multipliers for each variable to rescale the variables as higher the value better for the health score.\n",
|
||||||
|
"multiply_cols = {'HE_FOOD': -1,'HE_WALK': 1,'HE_VACANCY': -1,'HE_SUPRFND':-1 , 'HE_HLTHINS': 1 ,'BINGE_CrudePrev': -1 , \n",
|
||||||
|
" 'CHECKUP_CrudePrev': 1,'BPHIGH_CrudePrev': -1,'SLEEP_CrudePrev': -1,'STROKE_CrudePrev' : -1, 'Drug Test' : -1, 'Pedalcyclist' : -1,}\n",
|
||||||
|
"\n",
|
||||||
|
"#target variable\n",
|
||||||
|
"target = 'life expectancy'\n",
|
||||||
|
"\n",
|
||||||
|
"#storing the data and model weights to calculate health score.\n",
|
||||||
|
"multiplied_zscore_data_le, params_le = model1.model_output(columns_regress,target,multiply_cols)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" OLS Regression Results \n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"Dep. Variable: y R-squared: 0.917\n",
|
||||||
|
"Model: OLS Adj. R-squared: 0.916\n",
|
||||||
|
"Method: Least Squares F-statistic: 906.4\n",
|
||||||
|
"Date: Tue, 28 Apr 2020 Prob (F-statistic): 0.00\n",
|
||||||
|
"Time: 09:32:30 Log-Likelihood: -175.01\n",
|
||||||
|
"No. Observations: 999 AIC: 376.0\n",
|
||||||
|
"Df Residuals: 986 BIC: 439.8\n",
|
||||||
|
"Df Model: 12 \n",
|
||||||
|
"Covariance Type: nonrobust \n",
|
||||||
|
"=====================================================================================\n",
|
||||||
|
" coef std err t P>|t| [0.025 0.975]\n",
|
||||||
|
"-------------------------------------------------------------------------------------\n",
|
||||||
|
"const -4.337e-18 0.009 -4.72e-16 1.000 -0.018 0.018\n",
|
||||||
|
"HE_FOOD -0.0565 0.012 -4.585 0.000 -0.081 -0.032\n",
|
||||||
|
"HE_WALK -0.0731 0.011 -6.640 0.000 -0.095 -0.052\n",
|
||||||
|
"HE_VACANCY -0.0380 0.011 -3.355 0.001 -0.060 -0.016\n",
|
||||||
|
"HE_SUPRFND -0.0610 0.013 -4.795 0.000 -0.086 -0.036\n",
|
||||||
|
"HE_HLTHINS 0.2803 0.024 11.491 0.000 0.232 0.328\n",
|
||||||
|
"BINGE_CrudePrev 0.3286 0.024 13.881 0.000 0.282 0.375\n",
|
||||||
|
"CHECKUP_CrudePrev 0.4692 0.051 9.118 0.000 0.368 0.570\n",
|
||||||
|
"BPHIGH_CrudePrev 0.5301 0.061 8.664 0.000 0.410 0.650\n",
|
||||||
|
"SLEEP_CrudePrev 0.3841 0.016 23.633 0.000 0.352 0.416\n",
|
||||||
|
"STROKE_CrudePrev 0.5059 0.042 12.089 0.000 0.424 0.588\n",
|
||||||
|
"Drug Test -0.0137 0.012 -1.191 0.234 -0.036 0.009\n",
|
||||||
|
"Pedalcyclist -0.0416 0.012 -3.560 0.000 -0.064 -0.019\n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"Omnibus: 46.489 Durbin-Watson: 1.112\n",
|
||||||
|
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 134.286\n",
|
||||||
|
"Skew: -0.136 Prob(JB): 6.92e-30\n",
|
||||||
|
"Kurtosis: 4.775 Cond. No. 18.5\n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"\n",
|
||||||
|
"Warnings:\n",
|
||||||
|
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"columns_regress = ['HE_FOOD', 'HE_WALK', 'HE_VACANCY', 'HE_SUPRFND','HE_HLTHINS','BINGE_CrudePrev',\n",
|
||||||
|
" 'CHECKUP_CrudePrev','BPHIGH_CrudePrev','SLEEP_CrudePrev', 'STROKE_CrudePrev', 'Drug Test', 'Pedalcyclist']\n",
|
||||||
|
"\n",
|
||||||
|
"multiply_cols = {'HE_FOOD': -1,'HE_WALK': 1,'HE_VACANCY': -1,'HE_SUPRFND':-1 , 'HE_HLTHINS': 1 ,'BINGE_CrudePrev': -1 , \n",
|
||||||
|
" 'CHECKUP_CrudePrev': 1,'BPHIGH_CrudePrev': -1,'SLEEP_CrudePrev': -1,'STROKE_CrudePrev' : -1, 'Drug Test' : -1, 'Pedalcyclist' : -1,}\n",
|
||||||
|
"\n",
|
||||||
|
"target = 'PHLTH_CrudePrev'\n",
|
||||||
|
"target_multiplier = -1\n",
|
||||||
|
"\n",
|
||||||
|
"multiplied_zscore_data_1, params_1 = model1.model_output(columns_regress,target,multiply_cols,target_multiplier = target_multiplier)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" OLS Regression Results \n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"Dep. Variable: y R-squared: 0.880\n",
|
||||||
|
"Model: OLS Adj. R-squared: 0.878\n",
|
||||||
|
"Method: Least Squares F-statistic: 600.6\n",
|
||||||
|
"Date: Tue, 28 Apr 2020 Prob (F-statistic): 0.00\n",
|
||||||
|
"Time: 09:32:30 Log-Likelihood: -359.89\n",
|
||||||
|
"No. Observations: 999 AIC: 745.8\n",
|
||||||
|
"Df Residuals: 986 BIC: 809.6\n",
|
||||||
|
"Df Model: 12 \n",
|
||||||
|
"Covariance Type: nonrobust \n",
|
||||||
|
"=====================================================================================\n",
|
||||||
|
" coef std err t P>|t| [0.025 0.975]\n",
|
||||||
|
"-------------------------------------------------------------------------------------\n",
|
||||||
|
"const 5.169e-16 0.011 4.68e-14 1.000 -0.022 0.022\n",
|
||||||
|
"HE_FOOD -0.0148 0.015 -1.001 0.317 -0.044 0.014\n",
|
||||||
|
"HE_WALK -0.0438 0.013 -3.306 0.001 -0.070 -0.018\n",
|
||||||
|
"HE_VACANCY -0.0091 0.014 -0.666 0.506 -0.036 0.018\n",
|
||||||
|
"HE_SUPRFND -0.0544 0.015 -3.553 0.000 -0.084 -0.024\n",
|
||||||
|
"HE_HLTHINS 0.2667 0.029 9.086 0.000 0.209 0.324\n",
|
||||||
|
"BINGE_CrudePrev 0.4644 0.028 16.305 0.000 0.408 0.520\n",
|
||||||
|
"CHECKUP_CrudePrev 0.6262 0.062 10.113 0.000 0.505 0.748\n",
|
||||||
|
"BPHIGH_CrudePrev 0.2980 0.074 4.047 0.000 0.154 0.443\n",
|
||||||
|
"SLEEP_CrudePrev 0.7537 0.020 38.535 0.000 0.715 0.792\n",
|
||||||
|
"STROKE_CrudePrev 0.4109 0.050 8.161 0.000 0.312 0.510\n",
|
||||||
|
"Drug Test -0.0091 0.014 -0.656 0.512 -0.036 0.018\n",
|
||||||
|
"Pedalcyclist -0.0420 0.014 -2.994 0.003 -0.070 -0.014\n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"Omnibus: 104.229 Durbin-Watson: 1.055\n",
|
||||||
|
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 329.031\n",
|
||||||
|
"Skew: -0.500 Prob(JB): 3.56e-72\n",
|
||||||
|
"Kurtosis: 5.628 Cond. No. 18.5\n",
|
||||||
|
"==============================================================================\n",
|
||||||
|
"\n",
|
||||||
|
"Warnings:\n",
|
||||||
|
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#regression of MHLTH_CrudePrev \n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"columns_regress = ['HE_FOOD', 'HE_WALK', 'HE_VACANCY', 'HE_SUPRFND','HE_HLTHINS','BINGE_CrudePrev',\n",
|
||||||
|
" 'CHECKUP_CrudePrev','BPHIGH_CrudePrev','SLEEP_CrudePrev', 'STROKE_CrudePrev', 'Drug Test', 'Pedalcyclist']\n",
|
||||||
|
"\n",
|
||||||
|
"multiply_cols = {'HE_FOOD': -1,'HE_WALK': 1,'HE_VACANCY': -1,'HE_SUPRFND':-1 , 'HE_HLTHINS': 1 ,'BINGE_CrudePrev': -1 , \n",
|
||||||
|
" 'CHECKUP_CrudePrev': 1,'BPHIGH_CrudePrev': -1,'SLEEP_CrudePrev': -1,'STROKE_CrudePrev' : -1, 'Drug Test' : -1, 'Pedalcyclist' : -1,}\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"target = 'MHLTH_CrudePrev'\n",
|
||||||
|
"target_multiplier = -1\n",
|
||||||
|
"\n",
|
||||||
|
"multiplied_zscore_data_2, params_2 = model1.model_output(columns_regress,target,multiply_cols,target_multiplier = target_multiplier)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Steps to get the health scores :\n",
|
||||||
|
"\n",
|
||||||
|
"#### 1. Initialize the HealthScores() by passing the below arguments.\n",
|
||||||
|
"\n",
|
||||||
|
" Arguments:\n",
|
||||||
|
" weights_1(np array) : weights of model 1 (Physical health).\n",
|
||||||
|
" weights_2(np array) : weights of model 2 (Mental health).\n",
|
||||||
|
" weights_3(np array) : weights of model 3 (Life Expectancy).\n",
|
||||||
|
" multiplied_data(df) : all cenusus tracts data which needs to be multiplied with weights to get health scores\n",
|
||||||
|
" geoid(Series) : geoids of census tracts to concatenate with our health scores data.\n",
|
||||||
|
" \n",
|
||||||
|
" Default Arguments :\n",
|
||||||
|
" \n",
|
||||||
|
" is_weighted_average(boolean) : weights calculation methodology(default is True)\n",
|
||||||
|
" weightage(list) : weightage for each y-variable(default : [0.25,0.25,0.5])\n",
|
||||||
|
" \n",
|
||||||
|
"#### 2. Call the final_scaled_data() method\n",
|
||||||
|
"\n",
|
||||||
|
" Returns : \n",
|
||||||
|
" final_data(df) : dataframe with all health scores and geoids\n",
|
||||||
|
" weights_tables(df) : dataframe with each y-variable weights and averaged weights"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([0.0117727 , 0.00751751, 0.01959428, 0.01042273, 0.128936 ,\n",
|
||||||
|
" 0.1184022 , 0.19513146, 0.14433487, 0.19330818, 0.16154886,\n",
|
||||||
|
" 0.00396387, 0.00506735])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"health_scores1 = HealthScores(params_1,params_2,params_le,multiplied_zscore_data_1,data['geoid'])\n",
|
||||||
|
"final_data,weights_table = health_scores1.final_scaled_data()\n",
|
||||||
|
"\n",
|
||||||
|
"health_scores1.weights #Can access the weights directly like this."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>geoid</th>\n",
|
||||||
|
" <th>HE_FOOD</th>\n",
|
||||||
|
" <th>HE_WALK</th>\n",
|
||||||
|
" <th>HE_VACANCY</th>\n",
|
||||||
|
" <th>HE_SUPRFND</th>\n",
|
||||||
|
" <th>HE_HLTHINS</th>\n",
|
||||||
|
" <th>BINGE_CrudePrev</th>\n",
|
||||||
|
" <th>CHECKUP_CrudePrev</th>\n",
|
||||||
|
" <th>BPHIGH_CrudePrev</th>\n",
|
||||||
|
" <th>SLEEP_CrudePrev</th>\n",
|
||||||
|
" <th>STROKE_CrudePrev</th>\n",
|
||||||
|
" <th>Drug Test</th>\n",
|
||||||
|
" <th>Pedalcyclist</th>\n",
|
||||||
|
" <th>health_scores</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>48085030100</td>\n",
|
||||||
|
" <td>96.091281</td>\n",
|
||||||
|
" <td>2.544479</td>\n",
|
||||||
|
" <td>76.981603</td>\n",
|
||||||
|
" <td>50.0</td>\n",
|
||||||
|
" <td>13.890877</td>\n",
|
||||||
|
" <td>74.926254</td>\n",
|
||||||
|
" <td>9.292503</td>\n",
|
||||||
|
" <td>78.357236</td>\n",
|
||||||
|
" <td>78.620690</td>\n",
|
||||||
|
" <td>79.207921</td>\n",
|
||||||
|
" <td>98.520548</td>\n",
|
||||||
|
" <td>100.000000</td>\n",
|
||||||
|
" <td>69.027647</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>48085030201</td>\n",
|
||||||
|
" <td>98.439143</td>\n",
|
||||||
|
" <td>11.524690</td>\n",
|
||||||
|
" <td>92.643096</td>\n",
|
||||||
|
" <td>50.0</td>\n",
|
||||||
|
" <td>15.042605</td>\n",
|
||||||
|
" <td>74.926254</td>\n",
|
||||||
|
" <td>9.292503</td>\n",
|
||||||
|
" <td>78.357236</td>\n",
|
||||||
|
" <td>78.620690</td>\n",
|
||||||
|
" <td>79.207921</td>\n",
|
||||||
|
" <td>100.000000</td>\n",
|
||||||
|
" <td>100.000000</td>\n",
|
||||||
|
" <td>71.151708</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>48085030202</td>\n",
|
||||||
|
" <td>95.919226</td>\n",
|
||||||
|
" <td>7.482860</td>\n",
|
||||||
|
" <td>67.903118</td>\n",
|
||||||
|
" <td>50.0</td>\n",
|
||||||
|
" <td>18.821577</td>\n",
|
||||||
|
" <td>57.227139</td>\n",
|
||||||
|
" <td>5.385428</td>\n",
|
||||||
|
" <td>85.658409</td>\n",
|
||||||
|
" <td>79.310345</td>\n",
|
||||||
|
" <td>90.099010</td>\n",
|
||||||
|
" <td>97.991071</td>\n",
|
||||||
|
" <td>100.000000</td>\n",
|
||||||
|
" <td>71.239647</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>48085030203</td>\n",
|
||||||
|
" <td>98.935264</td>\n",
|
||||||
|
" <td>13.103215</td>\n",
|
||||||
|
" <td>83.265171</td>\n",
|
||||||
|
" <td>50.0</td>\n",
|
||||||
|
" <td>22.212623</td>\n",
|
||||||
|
" <td>57.227139</td>\n",
|
||||||
|
" <td>5.385428</td>\n",
|
||||||
|
" <td>85.658409</td>\n",
|
||||||
|
" <td>79.310345</td>\n",
|
||||||
|
" <td>90.099010</td>\n",
|
||||||
|
" <td>100.000000</td>\n",
|
||||||
|
" <td>99.414712</td>\n",
|
||||||
|
" <td>74.373851</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>48085030302</td>\n",
|
||||||
|
" <td>96.612195</td>\n",
|
||||||
|
" <td>13.885353</td>\n",
|
||||||
|
" <td>79.536820</td>\n",
|
||||||
|
" <td>50.0</td>\n",
|
||||||
|
" <td>28.758720</td>\n",
|
||||||
|
" <td>70.058997</td>\n",
|
||||||
|
" <td>9.926082</td>\n",
|
||||||
|
" <td>80.312907</td>\n",
|
||||||
|
" <td>82.844828</td>\n",
|
||||||
|
" <td>86.633663</td>\n",
|
||||||
|
" <td>100.000000</td>\n",
|
||||||
|
" <td>98.545835</td>\n",
|
||||||
|
" <td>83.926690</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" geoid HE_FOOD HE_WALK HE_VACANCY HE_SUPRFND HE_HLTHINS \\\n",
|
||||||
|
"0 48085030100 96.091281 2.544479 76.981603 50.0 13.890877 \n",
|
||||||
|
"1 48085030201 98.439143 11.524690 92.643096 50.0 15.042605 \n",
|
||||||
|
"2 48085030202 95.919226 7.482860 67.903118 50.0 18.821577 \n",
|
||||||
|
"3 48085030203 98.935264 13.103215 83.265171 50.0 22.212623 \n",
|
||||||
|
"4 48085030302 96.612195 13.885353 79.536820 50.0 28.758720 \n",
|
||||||
|
"\n",
|
||||||
|
" BINGE_CrudePrev CHECKUP_CrudePrev BPHIGH_CrudePrev SLEEP_CrudePrev \\\n",
|
||||||
|
"0 74.926254 9.292503 78.357236 78.620690 \n",
|
||||||
|
"1 74.926254 9.292503 78.357236 78.620690 \n",
|
||||||
|
"2 57.227139 5.385428 85.658409 79.310345 \n",
|
||||||
|
"3 57.227139 5.385428 85.658409 79.310345 \n",
|
||||||
|
"4 70.058997 9.926082 80.312907 82.844828 \n",
|
||||||
|
"\n",
|
||||||
|
" STROKE_CrudePrev Drug Test Pedalcyclist health_scores \n",
|
||||||
|
"0 79.207921 98.520548 100.000000 69.027647 \n",
|
||||||
|
"1 79.207921 100.000000 100.000000 71.151708 \n",
|
||||||
|
"2 90.099010 97.991071 100.000000 71.239647 \n",
|
||||||
|
"3 90.099010 100.000000 99.414712 74.373851 \n",
|
||||||
|
"4 86.633663 100.000000 98.545835 83.926690 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"final_data.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>phy_health_weights</th>\n",
|
||||||
|
" <th>mntl_health_weights</th>\n",
|
||||||
|
" <th>life_expectancy_weights</th>\n",
|
||||||
|
" <th>averaged_weights</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>HE_FOOD</th>\n",
|
||||||
|
" <td>-0.056542</td>\n",
|
||||||
|
" <td>-0.014846</td>\n",
|
||||||
|
" <td>0.089070</td>\n",
|
||||||
|
" <td>0.011773</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>HE_WALK</th>\n",
|
||||||
|
" <td>-0.073147</td>\n",
|
||||||
|
" <td>-0.043824</td>\n",
|
||||||
|
" <td>0.092569</td>\n",
|
||||||
|
" <td>0.007518</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>HE_VACANCY</th>\n",
|
||||||
|
" <td>-0.038049</td>\n",
|
||||||
|
" <td>-0.009083</td>\n",
|
||||||
|
" <td>0.112404</td>\n",
|
||||||
|
" <td>0.019594</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>HE_SUPRFND</th>\n",
|
||||||
|
" <td>-0.060990</td>\n",
|
||||||
|
" <td>-0.054371</td>\n",
|
||||||
|
" <td>0.104936</td>\n",
|
||||||
|
" <td>0.010423</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>HE_HLTHINS</th>\n",
|
||||||
|
" <td>0.280300</td>\n",
|
||||||
|
" <td>0.266710</td>\n",
|
||||||
|
" <td>0.311076</td>\n",
|
||||||
|
" <td>0.128936</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>BINGE_CrudePrev</th>\n",
|
||||||
|
" <td>0.328551</td>\n",
|
||||||
|
" <td>0.464386</td>\n",
|
||||||
|
" <td>0.140354</td>\n",
|
||||||
|
" <td>0.118402</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>CHECKUP_CrudePrev</th>\n",
|
||||||
|
" <td>0.469246</td>\n",
|
||||||
|
" <td>0.626203</td>\n",
|
||||||
|
" <td>0.336980</td>\n",
|
||||||
|
" <td>0.195131</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>BPHIGH_CrudePrev</th>\n",
|
||||||
|
" <td>0.530143</td>\n",
|
||||||
|
" <td>0.298016</td>\n",
|
||||||
|
" <td>0.240319</td>\n",
|
||||||
|
" <td>0.144335</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>SLEEP_CrudePrev</th>\n",
|
||||||
|
" <td>0.384115</td>\n",
|
||||||
|
" <td>0.753666</td>\n",
|
||||||
|
" <td>0.307548</td>\n",
|
||||||
|
" <td>0.193308</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>STROKE_CrudePrev</th>\n",
|
||||||
|
" <td>0.505871</td>\n",
|
||||||
|
" <td>0.410902</td>\n",
|
||||||
|
" <td>0.274058</td>\n",
|
||||||
|
" <td>0.161549</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>Drug Test</th>\n",
|
||||||
|
" <td>-0.013707</td>\n",
|
||||||
|
" <td>-0.009082</td>\n",
|
||||||
|
" <td>0.029366</td>\n",
|
||||||
|
" <td>0.003964</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>Pedalcyclist</th>\n",
|
||||||
|
" <td>-0.041551</td>\n",
|
||||||
|
" <td>-0.042050</td>\n",
|
||||||
|
" <td>0.064775</td>\n",
|
||||||
|
" <td>0.005067</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" phy_health_weights mntl_health_weights \\\n",
|
||||||
|
"HE_FOOD -0.056542 -0.014846 \n",
|
||||||
|
"HE_WALK -0.073147 -0.043824 \n",
|
||||||
|
"HE_VACANCY -0.038049 -0.009083 \n",
|
||||||
|
"HE_SUPRFND -0.060990 -0.054371 \n",
|
||||||
|
"HE_HLTHINS 0.280300 0.266710 \n",
|
||||||
|
"BINGE_CrudePrev 0.328551 0.464386 \n",
|
||||||
|
"CHECKUP_CrudePrev 0.469246 0.626203 \n",
|
||||||
|
"BPHIGH_CrudePrev 0.530143 0.298016 \n",
|
||||||
|
"SLEEP_CrudePrev 0.384115 0.753666 \n",
|
||||||
|
"STROKE_CrudePrev 0.505871 0.410902 \n",
|
||||||
|
"Drug Test -0.013707 -0.009082 \n",
|
||||||
|
"Pedalcyclist -0.041551 -0.042050 \n",
|
||||||
|
"\n",
|
||||||
|
" life_expectancy_weights averaged_weights \n",
|
||||||
|
"HE_FOOD 0.089070 0.011773 \n",
|
||||||
|
"HE_WALK 0.092569 0.007518 \n",
|
||||||
|
"HE_VACANCY 0.112404 0.019594 \n",
|
||||||
|
"HE_SUPRFND 0.104936 0.010423 \n",
|
||||||
|
"HE_HLTHINS 0.311076 0.128936 \n",
|
||||||
|
"BINGE_CrudePrev 0.140354 0.118402 \n",
|
||||||
|
"CHECKUP_CrudePrev 0.336980 0.195131 \n",
|
||||||
|
"BPHIGH_CrudePrev 0.240319 0.144335 \n",
|
||||||
|
"SLEEP_CrudePrev 0.307548 0.193308 \n",
|
||||||
|
"STROKE_CrudePrev 0.274058 0.161549 \n",
|
||||||
|
"Drug Test 0.029366 0.003964 \n",
|
||||||
|
"Pedalcyclist 0.064775 0.005067 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"weights_table"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue