adding housing CSV to score

2025-08-24 08:51:40 -07:00 · 2021-06-23 12:26:45 -07:00 · 2021-06-23 12:26:45 -07:00 · 678ab8c081
commit 678ab8c081
parent ec75b732cb
3 changed files with 165 additions and 12 deletions
--- a/score/ipython/housing_and_transportation_etl.ipynb
+++ b/score/ipython/housing_and_transportation_etl.ipynb
@ -0,0 +1,124 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c21b63a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import censusdata\n",
+    "import csv\n",
+    "import requests\n",
+    "import zipfile\n",
+    "\n",
+    "from pathlib import Path\n",
+    "\n",
+    "ACS_YEAR = 2019\n",
+    "\n",
+    "DATA_PATH = Path.cwd().parent / \"data\"\n",
+    "FIPS_CSV_PATH = DATA_PATH / \"fips_states_2010.csv\"\n",
+    "OUTPUT_PATH = DATA_PATH / \"dataset\" / \"housing_and_transportation_index\"\n",
+    "\n",
+    "GEOID_FIELD_NAME = \"GEOID10\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6696bc66",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid=01\n",
+    "\n",
+    "# Download each state / territory individually\n",
+    "dfs = []\n",
+    "with open(FIPS_CSV_PATH) as csv_file:\n",
+    "    csv_reader = csv.reader(csv_file, delimiter=\",\")\n",
+    "    line_count = 0\n",
+    "\n",
+    "    for row in csv_reader:\n",
+    "        if line_count == 0:\n",
+    "            line_count += 1\n",
+    "        else:\n",
+    "            fips = row[0].strip()\n",
+    "\n",
+    "            print(f\"Downloading data for state/territory with FIPS code {fips}\")\n",
+    "\n",
+    "            download = requests.get(\n",
+    "                f\"https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid={fips}\",\n",
+    "                verify=False,\n",
+    "            )\n",
+    "            file_contents = download.content\n",
+    "            zip_file_dir = DATA_PATH / \"tmp\" / \"housing_and_transportation_index\"\n",
+    "\n",
+    "            # Make the directory if it doesn't exist\n",
+    "            zip_file_dir.mkdir(parents=True, exist_ok=True)\n",
+    "            zip_file_path = zip_file_dir / f\"{fips}-downloaded.zip\"\n",
+    "            zip_file = open(zip_file_name, \"wb\")\n",
+    "            zip_file.write(file_contents)\n",
+    "            zip_file.close()\n",
+    "\n",
+    "            with zipfile.ZipFile(zip_file_name, \"r\") as zip_ref:\n",
+    "                zip_ref.extractall(zip_file_dir)\n",
+    "\n",
+    "            # New file name:\n",
+    "            tmp_csv_file_path = zip_file_dir / f\"htaindex_data_blkgrps_{fips}.csv\"\n",
+    "            tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)\n",
+    "\n",
+    "            dfs.append(tmp_df)\n",
+    "\n",
+    "df = pd.concat(dfs)\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "244e0d03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Rename and reformat block group ID\n",
+    "df.rename(columns={\"blkgrp\": GEOID_FIELD_NAME}, inplace=True)\n",
+    "df[GEOID_FIELD_NAME] = df[GEOID_FIELD_NAME].str.replace('\"', \"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8275c1ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "df.to_csv(path_or_buf=OUTPUT_PATH / \"usa.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/score/ipython/score_calc.ipynb
+++ b/score/ipython/score_calc.ipynb
@ -7,11 +7,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Before running this notebook, you must run the following notebooks:\n",
+    "# Before running this notebook, you must run the following notebooks (in any order):\n",
    "# 1. `ejscreen_etl.ipynb`\n",
    "# 2. `census_etl.ipynb`\n",
+    "# 3. `housing_and_transportation_etl.ipynb`\n",
    "\n",
    "import collections\n",
+    "import functools\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "import csv\n",
@ -74,6 +76,25 @@
    "census_df.head()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "144bdde2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load housing and transportation data\n",
+    "housing_and_transportation_index_csv = (\n",
+    "    data_path / \"dataset\" / \"housing_and_transportation_index\" / \"usa.csv\"\n",
+    ")\n",
+    "housing_and_transportation_df = pd.read_csv(\n",
+    "    housing_and_transportation_index_csv,\n",
+    "    dtype={GEOID_FIELD_NAME: \"string\"},\n",
+    "    low_memory=False,\n",
+    ")\n",
+    "housing_and_transportation_df.head()"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -81,11 +102,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Join the two datasets\n",
-    "df = ejscreen_df.merge(\n",
-    "    census_df,\n",
-    "    how=\"left\",\n",
-    "    on=GEOID_FIELD_NAME,\n",
+    "# Join all the data sources that use census block groups\n",
+    "dfs = [ejscreen_df, census_df, housing_and_transportation_df]\n",
+    "\n",
+    "df = functools.reduce(\n",
+    "    lambda left, right: pd.merge(\n",
+    "        left=left, right=right, on=GEOID_FIELD_NAME, how=\"outer\"\n",
+    "    ),\n",
+    "    dfs,\n",
    ")\n",
    "\n",
    "df.head()"
@ -195,6 +219,11 @@
    "        renamed_field=\"Unemployed Civilians (percent)\",\n",
    "        bucket=BUCKET_SOCIOECONOMIC,\n",
    "    ),\n",
+    "    DataSet(\n",
+    "        input_field=\"ht_ami\",\n",
+    "        renamed_field=\"Housing + Transportation Costs % Income for the Regional Typical Household\",\n",
+    "        bucket=BUCKET_SOCIOECONOMIC,\n",
+    "    ),\n",
    "]"
   ]
  },
--- a/score/ipython/scoring_comparison.ipynb
+++ b/score/ipython/scoring_comparison.ipynb
@ -301,12 +301,12 @@
   "id": "0c534966",
   "metadata": {
    "variables": {
-     "all_100_sum": {},
-     "all_100_sum_percent": {},
-     "at_least_one_sum": {},
-     "at_least_one_sum_percent": {},
-     "cejst_cbgs_ca_only": {},
-     "ces_tracts_count": {}
+     "all_100_sum": "1373",
+     "all_100_sum_percent": "69%",
+     "at_least_one_sum": "1866",
+     "at_least_one_sum_percent": "94%",
+     "cejst_cbgs_ca_only": "10849",
+     "ces_tracts_count": "1983"
    }
   },
   "source": [