{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e0b801f9", "metadata": {}, "outputs": [], "source": [ "import geopandas as gpd\n", "import pyogrio\n", "from data_pipeline.etl.sources.census.etl import CensusETL\n", "\n", "import time" ] }, { "cell_type": "code", "execution_count": 2, "id": "c4cbab25", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to execute the function using pyogrio is 63.07696199417114\n" ] } ], "source": [ "begin = time.time()\n", "census_tract_gdf = gpd.read_file(\n", " CensusETL.NATIONAL_TRACT_JSON_PATH,\n", " # Use `pyogrio` because it's vectorized and faster.\n", " engine=\"pyogrio\",\n", ")\n", "end = time.time()\n", "\n", "print(\"Time taken to execute the function using pyogrio is\", end - begin)" ] }, { "cell_type": "code", "execution_count": 3, "id": "372ab939", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to execute the function using include fields is 67.33577013015747\n" ] } ], "source": [ "begin2 = time.time()\n", "census_tract_gdf = gpd.read_file(\n", " CensusETL.NATIONAL_TRACT_JSON_PATH,\n", " engine=\"fiona\",\n", " include_fields=[\"GEOID10\"],\n", ")\n", "end2 = time.time()\n", "\n", "print(\n", " \"Time taken to execute the function using include fields is\", end2 - begin2\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "id": "32fb7d4b", "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_21074/2531126572.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mbegin2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m census_tract_gdf = gpd.read_file(\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mCensusETL\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNATIONAL_TRACT_JSON_PATH\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"fiona\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0minclude_fields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"GEOID10\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/io/file.py\u001b[0m in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"fiona\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 253\u001b[0;31m return _read_file_fiona(\n\u001b[0m\u001b[1;32m 254\u001b[0m \u001b[0mpath_or_bytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfrom_bytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbbox\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbbox\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrows\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 255\u001b[0m )\n", "\u001b[0;32m~/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/io/file.py\u001b[0m in \u001b[0;36m_read_file_fiona\u001b[0;34m(path_or_bytes, from_bytes, bbox, mask, rows, **kwargs)\u001b[0m\n\u001b[1;32m 338\u001b[0m )\n\u001b[1;32m 339\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 340\u001b[0;31m df = GeoDataFrame.from_features(\n\u001b[0m\u001b[1;32m 341\u001b[0m \u001b[0mf_filt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"geometry\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 342\u001b[0m )\n", "\u001b[0;32m~/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/geodataframe.py\u001b[0m in \u001b[0;36mfrom_features\u001b[0;34m(cls, features, crs, columns)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0mrows\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mfeature\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfeatures_lst\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;31m# load geometry\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeature\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"__geo_interface__\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mfiona/ogrext.pyx\u001b[0m in \u001b[0;36mfiona.ogrext.Iterator.__next__\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mfiona/ogrext.pyx\u001b[0m in \u001b[0;36mfiona.ogrext.Iterator._next\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m~/.pyenv/versions/3.9.6/lib/python3.9/logging/__init__.py\u001b[0m in \u001b[0;36mdebug\u001b[0;34m(self, msg, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1422\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmanager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1424\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1425\u001b[0m \"\"\"\n\u001b[1;32m 1426\u001b[0m \u001b[0mLog\u001b[0m \u001b[0;34m'msg % args'\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mseverity\u001b[0m \u001b[0;34m'DEBUG'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "begin2 = time.time()\n", "census_tract_gdf = gpd.read_file(\n", " CensusETL.NATIONAL_TRACT_JSON_PATH,\n", " engine=\"fiona\",\n", " include_fields=[\"GEOID10\"],\n", " rows=slice(0, 76322, 100),\n", ")\n", "end2 = time.time()\n", "\n", "print(\"Time taken to execute the function using slice is\", end2 - begin2)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }