added revised formulation

2025-07-08 20:39:59 -07:00 · 2021-12-13 05:03:41 -05:00 · 2021-12-13 05:03:41 -05:00 · 94faab0c4c
commit 94faab0c4c
parent c7422ca15a
1 changed files with 56 additions and 67 deletions
--- a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_12_2011_relative_differences_between_methodologies-ranking-percentile.ipynb
@ -478,16 +478,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "(73056, 4)"
+       "(73056, 280)"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -755,7 +755,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -769,7 +769,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -779,7 +779,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1095,7 +1095,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1104,52 +1104,6 @@
    "       'current_methodology_denominator', 'current_methodology_percent']]"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False    67811\n",
-       "True         2\n",
-       "Name: current_methodology_percent, dtype: int64"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "common_percentile = 90\n",
-    "\n",
-    "(final_df['current_methodology_percent'] >= 90).value_counts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/0m/ppxy6yr56jx1mk52p_9sf2sw0000gn/T/ipykernel_40643/3972884231.py:1: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "  final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)\n"
-     ]
-    }
-   ],
-   "source": [
-    "final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -1159,7 +1113,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1168,7 +1122,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "common_percentile = 90\n",
+    "\n",
+    "(final_df['current_methodology_percent'] >= 90).value_counts()\n",
+    "\n",
+    "final_df[\"current_threshold_exceeded\"] = (final_df['current_methodology_percent'] >= 90)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1177,7 +1144,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@ -1188,7 +1155,7 @@
       "Name: new_threshold_exceeded, dtype: int64"
      ]
     },
-     "execution_count": 56,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -1199,7 +1166,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False    67811\n",
+       "True         2\n",
+       "Name: current_threshold_exceeded, dtype: int64"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "final_df[\"current_threshold_exceeded\"].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
@ -1210,22 +1199,21 @@
       "22446    100.0\n",
       "39484    100.0\n",
       "61182    100.0\n",
-       "         ...  \n",
-       "40143      0.0\n",
-       "66932      0.0\n",
-       "44151      0.0\n",
-       "46733      0.0\n",
-       "62933      0.0\n",
-       "Name: hbrd_rank, Length: 67813, dtype: float64"
+       "27460    100.0\n",
+       "59657    100.0\n",
+       "31732    100.0\n",
+       "12998    100.0\n",
+       "29681    100.0\n",
+       "Name: hbrd_rank, dtype: float64"
      ]
     },
-     "execution_count": 57,
+     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "final_df[\"hbrd_rank\"].sort_values(ascending=False)"
+    "final_df[\"hbrd_rank\"].sort_values(ascending=False)[:10]"
   ]
  },
  {
@ -1378,6 +1366,7 @@
    }
   ],
   "source": [
+    "# find the corresponding tracts that are different between the two\n",
    "final_df.loc[final_df[\n",
    "    'current_threshold_exceeded'] != final_df['new_threshold_exceeded']].head()"
   ]