added revisions including ETL process for table 8 acquistion

2025-02-23 10:04:18 -08:00 · 2021-12-11 08:28:01 -05:00 · 2021-12-11 08:28:01 -05:00 · f80d8c1880
commit f80d8c1880
parent aa27f5d6e2
2 changed files with 407 additions and 20 deletions
--- a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_09_2021-revised-denominator.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_09_2021-revised-denominator.ipynb
--- a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_09_2021.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_09_2021.ipynb
@ -4,7 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Methodology to address fundamental problem 1 tiemized in Issue 1024"
+    "## Methodology to address fundamental problem 1 itemized in Issue 1024"
   ]
  },
  {
@ -26,7 +26,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@ -36,6 +36,68 @@
    "import pandas as pd"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ETL process for acquiring relevant tables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### NOTE: If you ran the ETL Process to acquire Table 8 in the other notebook of this draft PR you do not need to run the ETL cell block again"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copy and adapt certain sections of code from data_pipeline.utils \n",
+    "\n",
+    "def download_hud_dataset():\n",
+    "    DOWNLOAD_FILENAME = \"HUD_ZIPPED.csv\"\n",
+    "    HOUSING_FTP_URL = \"https://www.huduser.gov/portal/datasets/cp/2014thru2018-140-csv.zip\"    \n",
+    "    response = requests.get(HOUSING_FTP_URL, verify=True)\n",
+    "    if response.status_code == 200:\n",
+    "        file_contents = response.content\n",
+    "    else:\n",
+    "        sys.exit(\n",
+    "            f\"HTTP response {response.status_code} from url {file_url}. Info: {response.content}\"\n",
+    "        )\n",
+    "\n",
+    "    # Write the contents to disk.\n",
+    "    file = open(DOWNLOAD_FILENAME, \"wb\")\n",
+    "    file.write(file_contents)\n",
+    "    file.close()\n",
+    "    \n",
+    "def extract_zipped_download(zip_file_path, unzipped_path):\n",
+    "    with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n",
+    "        zip_ref.extractall(unzipped_path)\n",
+    "    # cleanup temporary file\n",
+    "    os.remove(zip_file_path)\n",
+    "    \n",
+    "def up_one_directory(path):\n",
+    "    try:\n",
+    "        # from Python 3.6\n",
+    "        parent_dir = Path(path).parents[1]\n",
+    "        # for Python 3.4/3.5, use str to convert the path to string\n",
+    "        # parent_dir = str(Path(path).parents[1])\n",
+    "        shutil.move(path, parent_dir)\n",
+    "    except IndexError:\n",
+    "        # no upper directory\n",
+    "        pass\n",
+    "\n",
+    "CURRENT_DIRECTORY = os.getcwd()\n",
+    "download_hud_dataset()\n",
+    "extract_zipped_download(CURRENT_DIRECTORY + \"/HUD_ZIPPED.csv\", CURRENT_DIRECTORY)  \n",
+    "up_one_directory(CURRENT_DIRECTORY + \"/140/Table8.csv\")\n",
+    "shutil.rmtree(\"./140/\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -93,7 +155,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@ -205,7 +267,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
@ -284,7 +346,7 @@
       "4    01   01001020500    0.142515  68.221154"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -315,10 +377,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "housingburden.to_csv(\"housing_burden.csv\", index=False)"
+   ]
  }
 ],
 "metadata": {