mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
added revisions including ETL process for table 8 acquistion
This commit is contained in:
parent
aa27f5d6e2
commit
f80d8c1880
2 changed files with 407 additions and 20 deletions
File diff suppressed because one or more lines are too long
|
@ -4,7 +4,7 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Methodology to address fundamental problem 1 tiemized in Issue 1024"
|
||||
"## Methodology to address fundamental problem 1 itemized in Issue 1024"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -26,7 +26,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -36,6 +36,68 @@
|
|||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ETL process for acquiring relevant tables"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### NOTE: If you ran the ETL Process to acquire Table 8 in the other notebook of this draft PR you do not need to run the ETL cell block again"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Copy and adapt certain sections of code from data_pipeline.utils \n",
|
||||
"\n",
|
||||
"def download_hud_dataset():\n",
|
||||
" DOWNLOAD_FILENAME = \"HUD_ZIPPED.csv\"\n",
|
||||
" HOUSING_FTP_URL = \"https://www.huduser.gov/portal/datasets/cp/2014thru2018-140-csv.zip\" \n",
|
||||
" response = requests.get(HOUSING_FTP_URL, verify=True)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" file_contents = response.content\n",
|
||||
" else:\n",
|
||||
" sys.exit(\n",
|
||||
" f\"HTTP response {response.status_code} from url {file_url}. Info: {response.content}\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Write the contents to disk.\n",
|
||||
" file = open(DOWNLOAD_FILENAME, \"wb\")\n",
|
||||
" file.write(file_contents)\n",
|
||||
" file.close()\n",
|
||||
" \n",
|
||||
"def extract_zipped_download(zip_file_path, unzipped_path):\n",
|
||||
" with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n",
|
||||
" zip_ref.extractall(unzipped_path)\n",
|
||||
" # cleanup temporary file\n",
|
||||
" os.remove(zip_file_path)\n",
|
||||
" \n",
|
||||
"def up_one_directory(path):\n",
|
||||
" try:\n",
|
||||
" # from Python 3.6\n",
|
||||
" parent_dir = Path(path).parents[1]\n",
|
||||
" # for Python 3.4/3.5, use str to convert the path to string\n",
|
||||
" # parent_dir = str(Path(path).parents[1])\n",
|
||||
" shutil.move(path, parent_dir)\n",
|
||||
" except IndexError:\n",
|
||||
" # no upper directory\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"CURRENT_DIRECTORY = os.getcwd()\n",
|
||||
"download_hud_dataset()\n",
|
||||
"extract_zipped_download(CURRENT_DIRECTORY + \"/HUD_ZIPPED.csv\", CURRENT_DIRECTORY) \n",
|
||||
"up_one_directory(CURRENT_DIRECTORY + \"/140/Table8.csv\")\n",
|
||||
"shutil.rmtree(\"./140/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
@ -93,7 +155,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -205,7 +267,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -284,7 +346,7 @@
|
|||
"4 01 01001020500 0.142515 68.221154"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -315,10 +377,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"housingburden.to_csv(\"housing_burden.csv\", index=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Loading…
Add table
Reference in a new issue