added revised formulation

2025-08-28 09:21:40 -07:00 · 2021-12-13 05:03:41 -05:00 · 2021-12-13 05:03:41 -05:00 · 94faab0c4c
commit 94faab0c4c
parent c7422ca15a
1 changed files with 56 additions and 67 deletions
--- a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb
@ -478,16 +478,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "(73056, 4)"
+       "(73056, 280)"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -755,7 +755,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -769,7 +769,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -779,7 +779,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1095,7 +1095,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1104,52 +1104,6 @@
    "       'current_methodology_denominator', 'current_methodology_percent']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    67811\n",
       "True         2\n",
       "Name: current_methodology_percent, dtype: int64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "common_percentile = 90\n",
    "\n",
    "(final_df['current_methodology_percent'] >= 90).value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/0m/ppxy6yr56jx1mk52p_9sf2sw0000gn/T/ipykernel_40643/3972884231.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)\n"
     ]
    }
   ],
   "source": [
    "final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -1159,7 +1113,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1168,7 +1122,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "common_percentile = 90\n",
    "\n",
    "(final_df['current_methodology_percent'] >= 90).value_counts()\n",
    "\n",
    "final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1177,7 +1144,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@ -1188,7 +1155,7 @@
       "Name: new_threshold_exceeded, dtype: int64"
      ]
     },
-     "execution_count": 56,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -1199,7 +1166,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    67811\n",
       "True         2\n",
       "Name: current_threshold_exceeded, dtype: int64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "final_df[\"current_threshold_exceeded\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
@ -1210,22 +1199,21 @@
       "22446    100.0\n",
       "39484    100.0\n",
       "61182    100.0\n",
-       "         ...  \n",
+       "27460    100.0\n",
-       "40143      0.0\n",
+       "59657    100.0\n",
-       "66932      0.0\n",
+       "31732    100.0\n",
-       "44151      0.0\n",
+       "12998    100.0\n",
-       "46733      0.0\n",
+       "29681    100.0\n",
-       "62933      0.0\n",
+       "Name: hbrd_rank, dtype: float64"
       "Name: hbrd_rank, Length: 67813, dtype: float64"
      ]
     },
-     "execution_count": 57,
+     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "final_df[\"hbrd_rank\"].sort_values(ascending=False)"
+    "final_df[\"hbrd_rank\"].sort_values(ascending=False)[:10]"
   ]
  },
  {
@ -1378,6 +1366,7 @@
    }
   ],
   "source": [
    "# find the corresponding tracts that are different between the two\n",
    "final_df.loc[final_df[\n",
    "    'current_threshold_exceeded'] != final_df['new_threshold_exceeded']].head()"
   ]