mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-24 10:34:18 -08:00
* Configure and run `black` and other pre-commit hooks Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
130 lines
8.4 KiB
Text
130 lines
8.4 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "e0b801f9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import geopandas as gpd\n",
|
|
"import pyogrio\n",
|
|
"from data_pipeline.etl.sources.census.etl import CensusETL\n",
|
|
"\n",
|
|
"import time"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "c4cbab25",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Time taken to execute the function using pyogrio is 63.07696199417114\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"begin = time.time()\n",
|
|
"census_tract_gdf = gpd.read_file(\n",
|
|
" CensusETL.NATIONAL_TRACT_JSON_PATH,\n",
|
|
" # Use `pyogrio` because it's vectorized and faster.\n",
|
|
" engine=\"pyogrio\",\n",
|
|
")\n",
|
|
"end = time.time()\n",
|
|
"\n",
|
|
"print(\"Time taken to execute the function using pyogrio is\", end - begin)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "372ab939",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Time taken to execute the function using include fields is 67.33577013015747\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"begin2 = time.time()\n",
|
|
"census_tract_gdf = gpd.read_file(\n",
|
|
" CensusETL.NATIONAL_TRACT_JSON_PATH,\n",
|
|
" engine=\"fiona\",\n",
|
|
" include_fields=[\"GEOID10\"],\n",
|
|
")\n",
|
|
"end2 = time.time()\n",
|
|
"\n",
|
|
"print(\n",
|
|
" \"Time taken to execute the function using include fields is\", end2 - begin2\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "32fb7d4b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "KeyboardInterrupt",
|
|
"evalue": "",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[0;32m/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_21074/2531126572.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mbegin2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m census_tract_gdf = gpd.read_file(\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mCensusETL\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNATIONAL_TRACT_JSON_PATH\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"fiona\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0minclude_fields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"GEOID10\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0;32m~/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/io/file.py\u001b[0m in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"fiona\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 253\u001b[0;31m return _read_file_fiona(\n\u001b[0m\u001b[1;32m 254\u001b[0m \u001b[0mpath_or_bytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfrom_bytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbbox\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbbox\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrows\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 255\u001b[0m )\n",
|
|
"\u001b[0;32m~/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/io/file.py\u001b[0m in \u001b[0;36m_read_file_fiona\u001b[0;34m(path_or_bytes, from_bytes, bbox, mask, rows, **kwargs)\u001b[0m\n\u001b[1;32m 338\u001b[0m )\n\u001b[1;32m 339\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 340\u001b[0;31m df = GeoDataFrame.from_features(\n\u001b[0m\u001b[1;32m 341\u001b[0m \u001b[0mf_filt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"geometry\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 342\u001b[0m )\n",
|
|
"\u001b[0;32m~/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/geodataframe.py\u001b[0m in \u001b[0;36mfrom_features\u001b[0;34m(cls, features, crs, columns)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0mrows\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mfeature\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfeatures_lst\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;31m# load geometry\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeature\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"__geo_interface__\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0;32mfiona/ogrext.pyx\u001b[0m in \u001b[0;36mfiona.ogrext.Iterator.__next__\u001b[0;34m()\u001b[0m\n",
|
|
"\u001b[0;32mfiona/ogrext.pyx\u001b[0m in \u001b[0;36mfiona.ogrext.Iterator._next\u001b[0;34m()\u001b[0m\n",
|
|
"\u001b[0;32m~/.pyenv/versions/3.9.6/lib/python3.9/logging/__init__.py\u001b[0m in \u001b[0;36mdebug\u001b[0;34m(self, msg, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1422\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmanager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1424\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1425\u001b[0m \"\"\"\n\u001b[1;32m 1426\u001b[0m \u001b[0mLog\u001b[0m \u001b[0;34m'msg % args'\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mseverity\u001b[0m \u001b[0;34m'DEBUG'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"begin2 = time.time()\n",
|
|
"census_tract_gdf = gpd.read_file(\n",
|
|
" CensusETL.NATIONAL_TRACT_JSON_PATH,\n",
|
|
" engine=\"fiona\",\n",
|
|
" include_fields=[\"GEOID10\"],\n",
|
|
" rows=slice(0, 76322, 100),\n",
|
|
")\n",
|
|
"end2 = time.time()\n",
|
|
"\n",
|
|
"print(\"Time taken to execute the function using slice is\", end2 - begin2)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|