diff --git a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb index ac894cfd..9265275c 100644 --- a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb @@ -478,16 +478,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(73056, 4)" + "(73056, 280)" ] }, - "execution_count": 5, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -755,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -769,7 +769,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -779,7 +779,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1095,7 +1095,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -1104,52 +1104,6 @@ " 'current_methodology_denominator', 'current_methodology_percent']]" ] }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False 67811\n", - "True 2\n", - "Name: current_methodology_percent, dtype: int64" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "common_percentile = 90\n", - "\n", - "(final_df['current_methodology_percent'] >= 90).value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/0m/ppxy6yr56jx1mk52p_9sf2sw0000gn/T/ipykernel_40643/3972884231.py:1: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)\n" - ] - } - ], - "source": [ - "final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1159,7 +1113,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -1168,7 +1122,20 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "common_percentile = 90\n", + "\n", + "(final_df['current_methodology_percent'] >= 90).value_counts()\n", + "\n", + "final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -1177,7 +1144,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1188,7 +1155,7 @@ "Name: new_threshold_exceeded, dtype: int64" ] }, - "execution_count": 56, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1199,7 +1166,29 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 67811\n", + "True 2\n", + "Name: current_threshold_exceeded, dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df[\"current_threshold_exceeded\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1210,22 +1199,21 @@ "22446 100.0\n", "39484 100.0\n", "61182 100.0\n", - " ... \n", - "40143 0.0\n", - "66932 0.0\n", - "44151 0.0\n", - "46733 0.0\n", - "62933 0.0\n", - "Name: hbrd_rank, Length: 67813, dtype: float64" + "27460 100.0\n", + "59657 100.0\n", + "31732 100.0\n", + "12998 100.0\n", + "29681 100.0\n", + "Name: hbrd_rank, dtype: float64" ] }, - "execution_count": 57, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "final_df[\"hbrd_rank\"].sort_values(ascending=False)" + "final_df[\"hbrd_rank\"].sort_values(ascending=False)[:10]" ] }, { @@ -1378,6 +1366,7 @@ } ], "source": [ + "# find the corresponding tracts that are different between the two\n", "final_df.loc[final_df[\n", " 'current_threshold_exceeded'] != final_df['new_threshold_exceeded']].head()" ]