j40-cejst-2/data/data-pipeline/data_pipeline/ipython/explore_adjacency.ipynb

719 lines
43 KiB
Text
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "8d2dd0fc-5bd6-4d28-9ef3-f7cf7403f0be",
"metadata": {},
"outputs": [],
"source": [
"import geopandas as gpd\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import sys\n",
"\n",
"module_path = os.path.abspath(os.path.join(\"../..\"))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)\n",
"\n",
"from data_pipeline.config import settings\n",
2022-09-28 13:35:52 -04:00
"from data_pipeline.etl.sources.geo_utils import (\n",
" add_tracts_for_geometries,\n",
" get_tract_geojson,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f36fe7ef-2717-48be-ae94-a3f0aa33acc1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%load_ext lab_black"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "207209d1-f95a-4b84-bc34-d37235a6dab4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-08-17 11:45:46,587 [data_pipeline.etl.sources.geo_utils] INFO Loading tract geometry data from census ETL\n",
"2022-08-17 11:45:46,588 [data_pipeline.etl.sources.geo_utils] DEBUG Loading existing tract geojson\n"
]
}
],
"source": [
"df = get_tract_geojson()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a6cd76db-9dff-4e18-aa26-bab9a841ce1f",
"metadata": {},
"outputs": [],
"source": [
"center = \"6030.04\"\n",
"center_GEOID10_TRACT = \"24027603004\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ce9487d8-5e27-4d07-8f9e-467ac7cdf7e0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>STATEFP10</th>\n",
" <th>COUNTYFP10</th>\n",
" <th>TRACTCE10</th>\n",
" <th>GEOID10_TRACT</th>\n",
" <th>NAME10</th>\n",
" <th>NAMELSAD10</th>\n",
" <th>MTFCC10</th>\n",
" <th>FUNCSTAT10</th>\n",
" <th>ALAND10</th>\n",
" <th>AWATER10</th>\n",
" <th>INTPTLAT10</th>\n",
" <th>INTPTLON10</th>\n",
" <th>geometry</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>53415</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>602100</td>\n",
" <td>24027602100</td>\n",
" <td>6021</td>\n",
" <td>Census Tract 6021</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>13769934</td>\n",
" <td>3674</td>\n",
" <td>+39.3076905</td>\n",
" <td>-076.8349752</td>\n",
" <td>POLYGON ((-76.86305 39.31484, -76.86308 39.315...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53416</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>602303</td>\n",
" <td>24027602303</td>\n",
" <td>6023.03</td>\n",
" <td>Census Tract 6023.03</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>11740756</td>\n",
" <td>26901</td>\n",
" <td>+39.2600506</td>\n",
" <td>-076.8754102</td>\n",
" <td>POLYGON ((-76.86750 39.25170, -76.86754 39.251...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53424</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>605503</td>\n",
" <td>24027605503</td>\n",
" <td>6055.03</td>\n",
" <td>Census Tract 6055.03</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>3158645</td>\n",
" <td>8444</td>\n",
" <td>+39.2274469</td>\n",
" <td>-076.8803625</td>\n",
" <td>POLYGON ((-76.88405 39.23543, -76.88398 39.235...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53429</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>605502</td>\n",
" <td>24027605502</td>\n",
" <td>6055.02</td>\n",
" <td>Census Tract 6055.02</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>5260984</td>\n",
" <td>16405</td>\n",
" <td>+39.2257617</td>\n",
" <td>-076.9054701</td>\n",
" <td>POLYGON ((-76.90881 39.21739, -76.90882 39.217...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53452</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>603004</td>\n",
" <td>24027603004</td>\n",
" <td>6030.04</td>\n",
" <td>Census Tract 6030.04</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>44425097</td>\n",
" <td>99882</td>\n",
" <td>+39.2817022</td>\n",
" <td>-076.9188315</td>\n",
" <td>POLYGON ((-76.98539 39.26610, -76.98557 39.266...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53453</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>605104</td>\n",
" <td>24027605104</td>\n",
" <td>6051.04</td>\n",
" <td>Census Tract 6051.04</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>39969526</td>\n",
" <td>1553493</td>\n",
" <td>+39.2369323</td>\n",
" <td>-076.9735549</td>\n",
" <td>POLYGON ((-76.97979 39.23322, -76.98018 39.233...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53454</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>603003</td>\n",
" <td>24027603003</td>\n",
" <td>6030.03</td>\n",
" <td>Census Tract 6030.03</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>45161338</td>\n",
" <td>174904</td>\n",
" <td>+39.3192333</td>\n",
" <td>-076.9680454</td>\n",
" <td>POLYGON ((-76.93350 39.35760, -76.93341 39.357...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53455</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>603001</td>\n",
" <td>24027603001</td>\n",
" <td>6030.01</td>\n",
" <td>Census Tract 6030.01</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>24726303</td>\n",
" <td>57708</td>\n",
" <td>+39.3279761</td>\n",
" <td>-076.8942412</td>\n",
" <td>POLYGON ((-76.86308 39.31501, -76.86305 39.314...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53464</th>\n",
" <td>24</td>\n",
" <td>027</td>\n",
" <td>602201</td>\n",
" <td>24027602201</td>\n",
" <td>6022.01</td>\n",
" <td>Census Tract 6022.01</td>\n",
" <td>G5020</td>\n",
" <td>S</td>\n",
" <td>4611549</td>\n",
" <td>6125</td>\n",
" <td>+39.2900228</td>\n",
" <td>-076.8721238</td>\n",
" <td>POLYGON ((-76.87750 39.30290, -76.87747 39.302...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" STATEFP10 COUNTYFP10 TRACTCE10 GEOID10_TRACT NAME10 \\\n",
"53415 24 027 602100 24027602100 6021 \n",
"53416 24 027 602303 24027602303 6023.03 \n",
"53424 24 027 605503 24027605503 6055.03 \n",
"53429 24 027 605502 24027605502 6055.02 \n",
"53452 24 027 603004 24027603004 6030.04 \n",
"53453 24 027 605104 24027605104 6051.04 \n",
"53454 24 027 603003 24027603003 6030.03 \n",
"53455 24 027 603001 24027603001 6030.01 \n",
"53464 24 027 602201 24027602201 6022.01 \n",
"\n",
" NAMELSAD10 MTFCC10 FUNCSTAT10 ALAND10 AWATER10 \\\n",
"53415 Census Tract 6021 G5020 S 13769934 3674 \n",
"53416 Census Tract 6023.03 G5020 S 11740756 26901 \n",
"53424 Census Tract 6055.03 G5020 S 3158645 8444 \n",
"53429 Census Tract 6055.02 G5020 S 5260984 16405 \n",
"53452 Census Tract 6030.04 G5020 S 44425097 99882 \n",
"53453 Census Tract 6051.04 G5020 S 39969526 1553493 \n",
"53454 Census Tract 6030.03 G5020 S 45161338 174904 \n",
"53455 Census Tract 6030.01 G5020 S 24726303 57708 \n",
"53464 Census Tract 6022.01 G5020 S 4611549 6125 \n",
"\n",
" INTPTLAT10 INTPTLON10 \\\n",
"53415 +39.3076905 -076.8349752 \n",
"53416 +39.2600506 -076.8754102 \n",
"53424 +39.2274469 -076.8803625 \n",
"53429 +39.2257617 -076.9054701 \n",
"53452 +39.2817022 -076.9188315 \n",
"53453 +39.2369323 -076.9735549 \n",
"53454 +39.3192333 -076.9680454 \n",
"53455 +39.3279761 -076.8942412 \n",
"53464 +39.2900228 -076.8721238 \n",
"\n",
" geometry \n",
"53415 POLYGON ((-76.86305 39.31484, -76.86308 39.315... \n",
"53416 POLYGON ((-76.86750 39.25170, -76.86754 39.251... \n",
"53424 POLYGON ((-76.88405 39.23543, -76.88398 39.235... \n",
"53429 POLYGON ((-76.90881 39.21739, -76.90882 39.217... \n",
"53452 POLYGON ((-76.98539 39.26610, -76.98557 39.266... \n",
"53453 POLYGON ((-76.97979 39.23322, -76.98018 39.233... \n",
"53454 POLYGON ((-76.93350 39.35760, -76.93341 39.357... \n",
"53455 POLYGON ((-76.86308 39.31501, -76.86305 39.314... \n",
"53464 POLYGON ((-76.87750 39.30290, -76.87747 39.302... "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_subset = df.query(\n",
" 'STATEFP10==\"24\" and COUNTYFP10==\"027\" and NAME10 in [\"6030.03\", \"6051.04\", \"6055.02\", \"6055.03\", \"6023.03\", \"6022.01\", \"6030.01\", \"6022.01\", \"6021\", \"6030.04\"]'\n",
")\n",
"df_subset"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "64e8c34d-df3a-4011-afad-c8a04af71d39",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATEAAAD4CAYAAACE9dGgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA9eklEQVR4nO2deXhV1bn/P++ZcjIPZCAkIQkggyAECEFRHHAo2sFacW4dqrXe3vZ2tGrtcGtvf7debbWztWrV2jq2WsdSqjgLIWFGZggkBEgg85ycs35/nB0I4SQ5J9lnXp/nOU9O9t5r73ef7HzPWu961/uKUgqNRqOJVCyhNkCj0WjGghYxjUYT0WgR02g0EY0WMY1GE9FoEdNoNBGNLdQGmEFmZqYqKioKtRkajSZAVFZWHlFKZXnbFxUiVlRUREVFRajN0Gg0AUJE9g21Tw8nNRpNRKNFTKPRRDRaxDQaTUSjRUyj0UQ0WsQ0Gk1Eo0VMo9FENFrENBpNRKNFTKPRRDRaxGIEpRQ7D7fyx3f38NW/ruVQc1eoTdJoTCEqIvZjmfXVTdS1dHHRzPFDHnOkrZv/enodH+4+emzbtkOt/PVLC8lOdgbDTI0mYGgRi2D+VlnD7S9sQAGvfW0xmckOAJo7ejnc0k1agp1Zean8/F/bTxAwgF11bXz61+9z46Jils3PJys5LgR3oNGMHYmG9NSlpaUq2tdO1rV28T+vbmVjTRNOu5WSgjT+vu4APX3uYdt98cxinlq1jx7X0MdlJjlY+Z1zSXbaR21f+d4GyoozRt1eoxkOEalUSpV626d7YhGA2634+tPr+WjPicNBX3jsg70jHnOkrYc3Nh/iytICv+zaVNPMP9YfoLGjl7+treHvX1nEvInpfp1DoxkrWsTCnPrWbi7+5XscaesO6HXueeVjzpySSV5avE/H/+Gd3dy3fDt97uM9+Uff38u8a7WIaYKLnp0Mc9IS7ARjyN/W3cefPxoy28lJNHf2niBgAP/acohn1+ynubP32DaXW/G9FzdxxUMfsvlAs2n2ajT9aJ9YBPDm1sPc9lQlva7A/q1E4I6l07lxURFOuxWXW7F8yyGqjrbT3t1Hn0sxLsnB7rp2UuJt/PE970PVFKeN73xiGp+ePYHHPtjLr9/adWzf9PHJnD5pHDedWUThuMSA3o8mehjOJ6ZFLEL4wqOreW/nkaBca0p2Er+5di7/8+pW3t8VmGumJdh58StnUpyphUwzMsOJmB5ORgj56b75qsxgV10bSx98L2ACBtDU0cv9y7cH7Pya2EGLWIQwITV4IhYsXtt0kG88s47tPs60ajTe0LOTEYLVKqE2ISC8tL6W+rZunrp5ISLm3uPmA81srGnGrRQJDivTx6dw6oQUU6+hCT1axCKEnCheHvTBrqOc//N3+NTsXHbVtzEhNZ7vXTIDi2V0otbU0cNv3trFnz6swjVgBtVuFZ74YhmLJmcO235XXRs//9d2vrbkFC16EYAWsQjh8vn5jEty8P9e38qOw22hNsd09hxp51cDZjE31jTz2E0LSIrz7xH9cNcRrn+s/KTwD4Bel+KH/9jCP7++GJv1uCfF5Vb8a8sh3t5eT+X+RnbVeT7fhcUZnDohhQ93HSEtwaEFLUwZ8QkRESfwLhBnHP+CUupHIrIEuB9wAJXAzUqpvkFtC4EX8fje7MCvlVIPGfveBnKBTuPwi5RSdSISBzwJzAeOAlcpparGeJ9RwbnTsjlrSibPVlTzwIodHGnrCbVJAaO8qoFLf/M+507L5hMzxzMrL4UEx8iCtq+hw6uA9bOrro0LfvEOX11yCoeaO3m+sobmzl6aOnpPOnZqTjIAD7+3h7e31/PNC6by9QtOGf1NaQLCiCEW4nFUJCql2kTEDrwPfBN4FjhfKbVDRO4B9imlHh3U1mFco1tEkoDNwCKlVK0hYt9RSlUMavMVYLZS6jYRuRq4TCl11XA2xkKIxWBaunr5zVu7ePyDqmHXRUYLNotw2dw8zjolk8lZSfzpgyoS46zkp8ezaHIms/JSAXhq1T6+/9JmU66ZHGfjV9fM5bEP9vLeziNYLcJfb1nIwknjTDm/xndMixMTkQQ8IvYfwF+VUpON7YuBu5RSlwzTdhywDjh9BBFbDvy3UuojEbEBh4AsNYyhsShi/VQ3dHDNH1dR09g58sFRxsLiDFbvbcBpt7D5vz+BzWqhprGDqx8O3OcxNSeJf33znICcWzM0Y44TExGriKwH6oAVQDlgE5H+ky4DvK4eFpECEdkIVAP3KqVqB+z+k4isF5EfyPGpqTzjWIzhaTNw0lefiNwqIhUiUlFfX+/LbUQlcXYLtU2xJ2AA/d9qXb1ufvr6VlxuxYTUeCZlJQXsmjvr2ujucwXs/Br/8clrqpRyASUikobHxzUTuBp4wPBh/Qvw+pdVSlUDs0VkAvCSiLyglDoMXKeUOiAiycDfgC/g8YX5hFLqYeBh8PTEfG0Xbfyt8gDDuICillOykzgwQLz/9EEVG6qbSHDYAhqkqxS8v/MI58/ICdg1NP7hV7CrUqoJWAksVUp9pJRarJQqw+P43zFC21o8PrHFxu8HjJ+twF+BMuPQAxi9OmM4mYrHwa/xwjs76kJtQkjo7nNzYNCQce3+poAKWD+3v7CR1q6TJwI0oWFEERORLKMHhojEAxcC20Qk29gWB9wBPOSlbb7RBhFJB84CtouITUQyje124FN4BA7gZeAG4/0y4K3h/GGxTFNHD5X7GkNtRtApK8pgf0NHyK7f0N7DI0MsftcEH196YrnASsOvtQZYoZR6FbhdRLYCG4FXlFJvAYhIqYg8YrSdAawWkQ3AO8D9SqlNeMI1lhvnXI+n9/VHo82jwDgR2QV8C7jThPuMSv6+9kDAM1uEG/MmplFe1RBqM/jlmzt5P0gL8jXDo7NYRCA7D7fy8Lt7eHHdgWFjoqKN7OQ4OntctHb3jXxwELhsbh4PXFUSajNiAp3FIsp49P29PF9ZE1MClpvqxK1U2AgYwJqqhqAkrNQMjxaxCMRpt4bahKCTkegIuxUKNY2dMRmfF25oEYtA/F1PGOlMzkpkS21LqM3wyvOVNaE2IebRIhaBjDa7Q6QSzj3PX725k68/s04HwIYQLWIRSFaSI9QmBI05+alh2wvr5x/ra/n+i5u1fyxEaBGLQEoKYqMsWrzdwsHmrlCb4RPPV9bEZMxeOKBFLAKZkZtMgiN8h1hmcVpeGnWtga23aSbbD+s026FAi1gEYrNamJ2fGmozAk5nb/iEU/hCog/5zjTmoz/1CGV+YTqr9oQ+cj1QTM5KZNOB8PaF9XPetCySnHYWTdZ5xkKBFrEIpax4HL9duTvUZgSM1Hh7qE3widvOmcydF08PtRkxjR5ORijzC9OxRWmoRVZSHBtrmkNthk+s3FZHc6fOaBFKtIhFKElxNmbkRmfhirz0+IhZUrX9cCvXPbKKrl4dJxYqtIhFKL0uNwebo2vJi8MqzMlPZX11U6hN8YvNB1r4gUl5/TX+o0UsQlm5rS7s1hKOlZKJ6WyIkGHkYJZvOXRCjUtN8NAiFqFE45q93r7IrdrU0tXHi+sOoJSitqmTupbICNKNBvTsZARS39rNym3Rl5Z6z5E2puYkRWxx4O++sIH7l2/nkCFgp+amcO60LOpau9lS20JWchzXlk3kEzNzOF4XRzNWtIhFIC9FaTLE5s4+WjrbWFCUzvZDrbR0RVawq1txTMAAPj7YwscHj8e6bT0I7+6o57xpWdx3xRwyk+JCYWbUoYeTEYZSiucrq0NtRsBQwJqqRvLTE0JtSsBYub2ey3//IfURtKQqnNEiFmFsrGmO2OGWP9Q0ha4QSDDYd7SDr/ylUqfwMQEtYhHGcxXR2wsbSGdP9P9zr6lq5LmK6JugCTa+lGxziki5iGwQkS0i8mNj+xIRWSsim0XkCaNG5OC2hcYx6422txnbE0TkNRHZZmz/2YA2N4pIvdFmvYjcYuYNRzLNnb28uO5AqM0ICrFSxelnr29l+ZZDoTYjovGlJ9YNLFFKzQFKgKUisgh4ArhaKTUL2MfxWpEDOQicoZQ
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"df_subset.plot()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "44c0c3d9-fa1e-405c-9887-29a0a69801ba",
"metadata": {},
"outputs": [],
"source": [
"tract_subset = df_subset.copy()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "1d7645f9-22b2-4fec-9514-d777dae32b29",
"metadata": {},
"outputs": [],
"source": [
"df_subset = df_subset[[\"GEOID10_TRACT\", \"geometry\"]].copy()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "cfff1e45-3b3f-453b-a33b-8cf20f0152b9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([False, False, True, True, False, False, False, True, True])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.random.choice([True, False], len(df_subset))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4f01f4fa-69cf-4c9a-8acc-fddf3d8ab39d",
"metadata": {},
"outputs": [],
"source": [
"df_subset_scores = df_subset.copy()\n",
"# df_subset_scores[\"included\"] = np.random.choice([True, False], len(df_subset))\n",
"df_subset_scores[\"included\"] = True\n",
"df_subset_scores.loc[df.GEOID10_TRACT == \"24027603004\", \"included\"] = False"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8cdda9e3-9c73-405e-8c35-e40e8ff2d29b",
"metadata": {},
"outputs": [],
"source": [
"tract_data = df_subset.rename(columns={\"GEOID10_TRACT\": \"ORIGINAL_TRACT\"})"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f43ffa4a-3503-4497-a247-8cc02b264f7d",
"metadata": {},
"outputs": [],
"source": [
"df = df_subset_scores"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "8e1797ed-4838-4139-943c-809e4df0ab70",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10_TRACT</th>\n",
" <th>geometry</th>\n",
" <th>included</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>53415</th>\n",
" <td>24027602100</td>\n",
" <td>POLYGON ((-76.86305 39.31484, -76.86308 39.315...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53416</th>\n",
" <td>24027602303</td>\n",
" <td>POLYGON ((-76.86750 39.25170, -76.86754 39.251...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53424</th>\n",
" <td>24027605503</td>\n",
" <td>POLYGON ((-76.88405 39.23543, -76.88398 39.235...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53429</th>\n",
" <td>24027605502</td>\n",
" <td>POLYGON ((-76.90881 39.21739, -76.90882 39.217...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53452</th>\n",
" <td>24027603004</td>\n",
" <td>POLYGON ((-76.98539 39.26610, -76.98557 39.266...</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53453</th>\n",
" <td>24027605104</td>\n",
" <td>POLYGON ((-76.97979 39.23322, -76.98018 39.233...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53454</th>\n",
" <td>24027603003</td>\n",
" <td>POLYGON ((-76.93350 39.35760, -76.93341 39.357...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53455</th>\n",
" <td>24027603001</td>\n",
" <td>POLYGON ((-76.86308 39.31501, -76.86305 39.314...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53464</th>\n",
" <td>24027602201</td>\n",
" <td>POLYGON ((-76.87750 39.30290, -76.87747 39.302...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID10_TRACT geometry \\\n",
"53415 24027602100 POLYGON ((-76.86305 39.31484, -76.86308 39.315... \n",
"53416 24027602303 POLYGON ((-76.86750 39.25170, -76.86754 39.251... \n",
"53424 24027605503 POLYGON ((-76.88405 39.23543, -76.88398 39.235... \n",
"53429 24027605502 POLYGON ((-76.90881 39.21739, -76.90882 39.217... \n",
"53452 24027603004 POLYGON ((-76.98539 39.26610, -76.98557 39.266... \n",
"53453 24027605104 POLYGON ((-76.97979 39.23322, -76.98018 39.233... \n",
"53454 24027603003 POLYGON ((-76.93350 39.35760, -76.93341 39.357... \n",
"53455 24027603001 POLYGON ((-76.86308 39.31501, -76.86305 39.314... \n",
"53464 24027602201 POLYGON ((-76.87750 39.30290, -76.87747 39.302... \n",
"\n",
" included \n",
"53415 True \n",
"53416 True \n",
"53424 True \n",
"53429 True \n",
"53452 False \n",
"53453 True \n",
"53454 True \n",
"53455 True \n",
"53464 True "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "799b883e-1f45-4f79-9b3f-65361aec881c",
"metadata": {},
"outputs": [],
"source": [
"adjacent_tracts: gpd.GeoDataFrame = df.sjoin(\n",
" tract_data, how=\"left\", predicate=\"touches\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "21672763-02e0-4608-82b2-3e256591c567",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10_TRACT</th>\n",
" <th>included</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>24027602100</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>24027602201</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>24027602303</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>24027603001</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>24027603003</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>24027603004</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>24027605104</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>24027605502</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>24027605503</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID10_TRACT included\n",
"0 24027602100 0.666667\n",
"1 24027602201 0.666667\n",
"2 24027602303 0.666667\n",
"3 24027603001 0.666667\n",
"4 24027603003 0.666667\n",
"5 24027603004 1.000000\n",
"6 24027605104 0.666667\n",
"7 24027605502 0.666667\n",
"8 24027605503 0.666667"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2022-09-28 13:35:52 -04:00
"adjacent_tracts.groupby(\"ORIGINAL_TRACT\")[\n",
" [\"included\"]\n",
"].mean().reset_index().rename(columns={\"ORIGINAL_TRACT\": \"GEOID10_TRACT\"})"
]
},
{
"cell_type": "markdown",
"id": "e6f4289b-944c-489e-955a-a7a177ec6dd5",
"metadata": {},
"source": [
"# Make test data"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "78772a00-3185-4d28-9755-7ba72c170282",
"metadata": {},
"outputs": [],
"source": [
"tract_subset.to_file(\"../tests/score/test_utils/data/us.geojson\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "425a5cd3-5929-4560-89e7-2dd9233bf303",
"metadata": {},
"outputs": [],
"source": [
"df[[\"GEOID10_TRACT\", \"included\"]].to_csv(\n",
" \"../tests/score/test_utils/data/scores.csv\", index=False\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}