added revised formulation

This commit is contained in:
Saran Ahluwalia 2021-12-13 05:03:41 -05:00
parent c7422ca15a
commit 94faab0c4c

View file

@ -478,16 +478,16 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(73056, 4)"
"(73056, 280)"
]
},
"execution_count": 5,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -755,7 +755,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@ -769,7 +769,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@ -779,7 +779,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@ -1095,7 +1095,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@ -1104,52 +1104,6 @@
" 'current_methodology_denominator', 'current_methodology_percent']]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 67811\n",
"True 2\n",
"Name: current_methodology_percent, dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_percentile = 90\n",
"\n",
"(final_df['current_methodology_percent'] >= 90).value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/0m/ppxy6yr56jx1mk52p_9sf2sw0000gn/T/ipykernel_40643/3972884231.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)\n"
]
}
],
"source": [
"final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -1159,7 +1113,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@ -1168,7 +1122,20 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"common_percentile = 90\n",
"\n",
"(final_df['current_methodology_percent'] >= 90).value_counts()\n",
"\n",
"final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@ -1177,7 +1144,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 15,
"metadata": {},
"outputs": [
{
@ -1188,7 +1155,7 @@
"Name: new_threshold_exceeded, dtype: int64"
]
},
"execution_count": 56,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@ -1199,7 +1166,29 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 67811\n",
"True 2\n",
"Name: current_threshold_exceeded, dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_df[\"current_threshold_exceeded\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
@ -1210,22 +1199,21 @@
"22446 100.0\n",
"39484 100.0\n",
"61182 100.0\n",
" ... \n",
"40143 0.0\n",
"66932 0.0\n",
"44151 0.0\n",
"46733 0.0\n",
"62933 0.0\n",
"Name: hbrd_rank, Length: 67813, dtype: float64"
"27460 100.0\n",
"59657 100.0\n",
"31732 100.0\n",
"12998 100.0\n",
"29681 100.0\n",
"Name: hbrd_rank, dtype: float64"
]
},
"execution_count": 57,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_df[\"hbrd_rank\"].sort_values(ascending=False)"
"final_df[\"hbrd_rank\"].sort_values(ascending=False)[:10]"
]
},
{
@ -1378,6 +1366,7 @@
}
],
"source": [
"# find the corresponding tracts that are different between the two\n",
"final_df.loc[final_df[\n",
" 'current_threshold_exceeded'] != final_df['new_threshold_exceeded']].head()"
]