added revised formulation

This commit is contained in:
Saran Ahluwalia 2021-12-13 05:03:41 -05:00
parent c7422ca15a
commit 94faab0c4c

View file

@ -478,16 +478,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(73056, 4)" "(73056, 280)"
] ]
}, },
"execution_count": 5, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -755,7 +755,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -769,7 +769,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -779,7 +779,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1095,7 +1095,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1104,52 +1104,6 @@
" 'current_methodology_denominator', 'current_methodology_percent']]" " 'current_methodology_denominator', 'current_methodology_percent']]"
] ]
}, },
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 67811\n",
"True 2\n",
"Name: current_methodology_percent, dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_percentile = 90\n",
"\n",
"(final_df['current_methodology_percent'] >= 90).value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/0m/ppxy6yr56jx1mk52p_9sf2sw0000gn/T/ipykernel_40643/3972884231.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)\n"
]
}
],
"source": [
"final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -1159,7 +1113,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1168,7 +1122,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"common_percentile = 90\n",
"\n",
"(final_df['current_methodology_percent'] >= 90).value_counts()\n",
"\n",
"final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1177,7 +1144,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1188,7 +1155,7 @@
"Name: new_threshold_exceeded, dtype: int64" "Name: new_threshold_exceeded, dtype: int64"
] ]
}, },
"execution_count": 56, "execution_count": 15,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1199,7 +1166,29 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False 67811\n",
"True 2\n",
"Name: current_threshold_exceeded, dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_df[\"current_threshold_exceeded\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1210,22 +1199,21 @@
"22446 100.0\n", "22446 100.0\n",
"39484 100.0\n", "39484 100.0\n",
"61182 100.0\n", "61182 100.0\n",
" ... \n", "27460 100.0\n",
"40143 0.0\n", "59657 100.0\n",
"66932 0.0\n", "31732 100.0\n",
"44151 0.0\n", "12998 100.0\n",
"46733 0.0\n", "29681 100.0\n",
"62933 0.0\n", "Name: hbrd_rank, dtype: float64"
"Name: hbrd_rank, Length: 67813, dtype: float64"
] ]
}, },
"execution_count": 57, "execution_count": 17,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"final_df[\"hbrd_rank\"].sort_values(ascending=False)" "final_df[\"hbrd_rank\"].sort_values(ascending=False)[:10]"
] ]
}, },
{ {
@ -1378,6 +1366,7 @@
} }
], ],
"source": [ "source": [
"# find the corresponding tracts that are different between the two\n",
"final_df.loc[final_df[\n", "final_df.loc[final_df[\n",
" 'current_threshold_exceeded'] != final_df['new_threshold_exceeded']].head()" " 'current_threshold_exceeded'] != final_df['new_threshold_exceeded']].head()"
] ]