From 26427323341426765a4712fa441384f35df328fb Mon Sep 17 00:00:00 2001 From: Saran Ahluwalia Date: Sat, 11 Dec 2021 13:40:13 -0500 Subject: [PATCH] comments --- ...ve_differences_between_methodologies.ipynb | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_11_2011_relative_differences_between_methodologies.ipynb b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_11_2011_relative_differences_between_methodologies.ipynb index 9e10921b..3c83cf34 100644 --- a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_11_2011_relative_differences_between_methodologies.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_11_2011_relative_differences_between_methodologies.ipynb @@ -496,6 +496,13 @@ "housingburden.shape" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### As desired we see a uniform distribution for the percentile rank for burdened households" + ] + }, { "cell_type": "code", "execution_count": 28, @@ -535,6 +542,13 @@ "sns.histplot(housingburden[\"hbrd_rank\"])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now we compute for a baseline comparison " + ] + }, { "cell_type": "code", "execution_count": 20, @@ -750,13 +764,20 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "housingburden.to_csv(\"housing_burden.csv\", index=False)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now we construct the distribution of differences in the number of owned and rented burdened households\n" + ] + }, { "cell_type": "code", "execution_count": 27, @@ -786,7 +807,6 @@ } ], "source": [ - "### Now compare to relative \n", "\n", "plt.figure(figsize=(12, 8))\n", "plt.title('Distribution of differences between two methodologies')\n", @@ -802,7 +822,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Below are KDE plots of the respective densities. Green is the current methodology; red represents the revision." + "### Plotting both distributions for accounting for different aggregations of owned and rented burdened households. Red is the revised version; green is the current methodology" ] }, { @@ -1040,6 +1060,13 @@ "p = np.sum(bs_replicates >= empirical_diff_means) / len(bs_replicates)\n", "print(\"P Value: {:.4f}\".format(round(p, 2)))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {