{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "f0b6f7e2", "metadata": {}, "outputs": [], "source": [ "import geopandas as gpd\n", "import pyogrio\n", "from data_pipeline.etl.sources.census.etl import CensusETL\n", "from data_pipeline.etl.sources.tribal.etl import TribalETL\n", "\n", "import time\n", "\n", "begin = time.time()" ] }, { "cell_type": "code", "execution_count": 3, "id": "1e3e65af", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tribalIdlandAreaNameClassificationgeometry
0LAR0001Cheyenne River LAR1MULTIPOLYGON (((-100.49935 45.47125, -100.4993...
1LAR0002Crow Creek LAR1POLYGON ((-99.42137 44.27733, -99.42138 44.273...
2LAR0003Flandreau LAR1MULTIPOLYGON (((-96.56655 44.08786, -96.57165 ...
3LAR0004Fort Berthold LAR1POLYGON ((-102.78362 47.99900, -102.78192 47.9...
4LAR0005Lake Traverse (Sisseton) LAR1MULTIPOLYGON (((-97.28946 45.76084, -97.28955 ...
...............
592{0886416F-643E-497E-89D3-E9CC0240158D}ChilkatNonePOINT (-135.88440 59.40390)
593{2029C35B-86D7-4751-A946-EA0772C81A80}ChilkootNonePOINT (-135.44500 59.23580)
594{24DF6536-95CB-4964-94DF-16E440ABCA92}CraigNonePOINT (-133.14830 55.47640)
595{ACDE097A-9BDA-4FCA-9DB7-297DA6B73F88}DouglasNonePOINT (-134.41970 58.30190)
596{5E1D1895-FF41-4B11-9EDB-0C1254A360C4}AgdaaguxNonePOINT (-162.31030 55.06170)
\n", "

597 rows × 4 columns

\n", "
" ], "text/plain": [ " tribalId landAreaName \\\n", "0 LAR0001 Cheyenne River LAR \n", "1 LAR0002 Crow Creek LAR \n", "2 LAR0003 Flandreau LAR \n", "3 LAR0004 Fort Berthold LAR \n", "4 LAR0005 Lake Traverse (Sisseton) LAR \n", ".. ... ... \n", "592 {0886416F-643E-497E-89D3-E9CC0240158D} Chilkat \n", "593 {2029C35B-86D7-4751-A946-EA0772C81A80} Chilkoot \n", "594 {24DF6536-95CB-4964-94DF-16E440ABCA92} Craig \n", "595 {ACDE097A-9BDA-4FCA-9DB7-297DA6B73F88} Douglas \n", "596 {5E1D1895-FF41-4B11-9EDB-0C1254A360C4} Agdaagux \n", "\n", " Classification geometry \n", "0 1 MULTIPOLYGON (((-100.49935 45.47125, -100.4993... \n", "1 1 POLYGON ((-99.42137 44.27733, -99.42138 44.273... \n", "2 1 MULTIPOLYGON (((-96.56655 44.08786, -96.57165 ... \n", "3 1 POLYGON ((-102.78362 47.99900, -102.78192 47.9... \n", "4 1 MULTIPOLYGON (((-97.28946 45.76084, -97.28955 ... \n", ".. ... ... \n", "592 None POINT (-135.88440 59.40390) \n", "593 None POINT (-135.44500 59.23580) \n", "594 None POINT (-133.14830 55.47640) \n", "595 None POINT (-134.41970 58.30190) \n", "596 None POINT (-162.31030 55.06170) \n", "\n", "[597 rows x 4 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load Tribal geojson\n", "tribal_gdf = gpd.read_file(\n", " TribalETL().NATIONAL_TRIBAL_GEOJSON_PATH,\n", " # Use `pyogrio` because it's vectorized and faster.\n", " engine=\"pyogrio\",\n", ")\n", "\n", "tribal_gdf" ] }, { "cell_type": "code", "execution_count": 4, "id": "89fedd44", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tribalIdlandAreaNameClassificationgeometry
0LAR0001Cheyenne River LAR1MULTIPOLYGON (((-100.49935 45.47125, -100.4993...
1LAR0002Crow Creek LAR1POLYGON ((-99.42137 44.27733, -99.42138 44.273...
2LAR0003Flandreau LAR1MULTIPOLYGON (((-96.56655 44.08786, -96.57165 ...
3LAR0004Fort Berthold LAR1POLYGON ((-102.78362 47.99900, -102.78192 47.9...
4LAR0005Lake Traverse (Sisseton) LAR1MULTIPOLYGON (((-97.28946 45.76084, -97.28955 ...
...............
365TSA0354Seminole TSANonePOLYGON ((-96.49048 34.90423, -96.49146 34.903...
366TSA0355Seneca Cayuga TSANonePOLYGON ((-94.61803 36.62531, -94.62083 36.625...
367TSA0356Tonkawa TSANonePOLYGON ((-97.24698 36.68082, -97.24697 36.677...
368TSA0357Wichita Caddo and Delaware TSANonePOLYGON ((-97.99931 35.36425, -97.99948 35.360...
369TSA0358Wyandotte TSANonePOLYGON ((-94.61820 36.82030, -94.61821 36.816...
\n", "

370 rows × 4 columns

\n", "
" ], "text/plain": [ " tribalId landAreaName Classification \\\n", "0 LAR0001 Cheyenne River LAR 1 \n", "1 LAR0002 Crow Creek LAR 1 \n", "2 LAR0003 Flandreau LAR 1 \n", "3 LAR0004 Fort Berthold LAR 1 \n", "4 LAR0005 Lake Traverse (Sisseton) LAR 1 \n", ".. ... ... ... \n", "365 TSA0354 Seminole TSA None \n", "366 TSA0355 Seneca Cayuga TSA None \n", "367 TSA0356 Tonkawa TSA None \n", "368 TSA0357 Wichita Caddo and Delaware TSA None \n", "369 TSA0358 Wyandotte TSA None \n", "\n", " geometry \n", "0 MULTIPOLYGON (((-100.49935 45.47125, -100.4993... \n", "1 POLYGON ((-99.42137 44.27733, -99.42138 44.273... \n", "2 MULTIPOLYGON (((-96.56655 44.08786, -96.57165 ... \n", "3 POLYGON ((-102.78362 47.99900, -102.78192 47.9... \n", "4 MULTIPOLYGON (((-97.28946 45.76084, -97.28955 ... \n", ".. ... \n", "365 POLYGON ((-96.49048 34.90423, -96.49146 34.903... \n", "366 POLYGON ((-94.61803 36.62531, -94.62083 36.625... \n", "367 POLYGON ((-97.24698 36.68082, -97.24697 36.677... \n", "368 POLYGON ((-97.99931 35.36425, -97.99948 35.360... \n", "369 POLYGON ((-94.61820 36.82030, -94.61821 36.816... \n", "\n", "[370 rows x 4 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Drop the points from the Tribal data (because these cannot be joined to a (Multi)Polygon tract data frame)\n", "tribal_gdf = tribal_gdf[tribal_gdf.geom_type != \"Point\"]\n", "tribal_gdf" ] }, { "cell_type": "code", "execution_count": 5, "id": "5940556f", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STATEFP10COUNTYFP10TRACTCE10GEOID10NAME10NAMELSAD10MTFCC10FUNCSTAT10ALAND10AWATER10INTPTLAT10INTPTLON10geometry
020071958100200719581009581Census Tract 9581G5020S20161768140+38.4804076-101.8059837POLYGON ((-101.79971 38.69806, -101.79097 38.6...
120175965600201759656009656Census Tract 9656G5020S16035757012204351+37.1805849-100.8547406POLYGON ((-101.06766 37.20440, -101.06768 37.2...
220175965700201759657009657Census Tract 9657G5020S9466451358282+37.0625361-100.9131437POLYGON ((-100.94250 37.06497, -100.94251 37.0...
32004302030020043020300203Census Tract 203G5020S2115932067045771+39.7881238-094.9734666POLYGON ((-94.95518 39.90129, -94.95475 39.901...
42004302020020043020200202Census Tract 202G5020S3526870262968059+39.7540484-095.1060098POLYGON ((-95.02575 39.88295, -95.02585 39.883...
..........................................
7412935049000600350490006006Census Tract 6G5020S16294710+35.6758519-105.9446097POLYGON ((-105.95207 35.67367, -105.95215 35.6...
7413035049000700350490007007Census Tract 7G5020S12855970+35.6802004-105.9558818POLYGON ((-105.96221 35.67223, -105.96245 35.6...
7413135049000800350490008008Census Tract 8G5020S19167970+35.6805095-105.9703558POLYGON ((-105.98159 35.67739, -105.98143 35.6...
7413235049000900350490009009Census Tract 9G5020S25455630+35.6692966-105.9755351POLYGON ((-105.96362 35.67616, -105.96365 35.6...
74133350490010013504900100110.01Census Tract 10.01G5020S26172810+35.6647341-105.9468629POLYGON ((-105.94510 35.65705, -105.94563 35.6...
\n", "

74134 rows × 13 columns

\n", "
" ], "text/plain": [ " STATEFP10 COUNTYFP10 TRACTCE10 GEOID10 NAME10 NAMELSAD10 \\\n", "0 20 071 958100 20071958100 9581 Census Tract 9581 \n", "1 20 175 965600 20175965600 9656 Census Tract 9656 \n", "2 20 175 965700 20175965700 9657 Census Tract 9657 \n", "3 20 043 020300 20043020300 203 Census Tract 203 \n", "4 20 043 020200 20043020200 202 Census Tract 202 \n", "... ... ... ... ... ... ... \n", "74129 35 049 000600 35049000600 6 Census Tract 6 \n", "74130 35 049 000700 35049000700 7 Census Tract 7 \n", "74131 35 049 000800 35049000800 8 Census Tract 8 \n", "74132 35 049 000900 35049000900 9 Census Tract 9 \n", "74133 35 049 001001 35049001001 10.01 Census Tract 10.01 \n", "\n", " MTFCC10 FUNCSTAT10 ALAND10 AWATER10 INTPTLAT10 INTPTLON10 \\\n", "0 G5020 S 2016176814 0 +38.4804076 -101.8059837 \n", "1 G5020 S 1603575701 2204351 +37.1805849 -100.8547406 \n", "2 G5020 S 9466451 358282 +37.0625361 -100.9131437 \n", "3 G5020 S 211593206 7045771 +39.7881238 -094.9734666 \n", "4 G5020 S 352687026 2968059 +39.7540484 -095.1060098 \n", "... ... ... ... ... ... ... \n", "74129 G5020 S 1629471 0 +35.6758519 -105.9446097 \n", "74130 G5020 S 1285597 0 +35.6802004 -105.9558818 \n", "74131 G5020 S 1916797 0 +35.6805095 -105.9703558 \n", "74132 G5020 S 2545563 0 +35.6692966 -105.9755351 \n", "74133 G5020 S 2617281 0 +35.6647341 -105.9468629 \n", "\n", " geometry \n", "0 POLYGON ((-101.79971 38.69806, -101.79097 38.6... \n", "1 POLYGON ((-101.06766 37.20440, -101.06768 37.2... \n", "2 POLYGON ((-100.94250 37.06497, -100.94251 37.0... \n", "3 POLYGON ((-94.95518 39.90129, -94.95475 39.901... \n", "4 POLYGON ((-95.02575 39.88295, -95.02585 39.883... \n", "... ... \n", "74129 POLYGON ((-105.95207 35.67367, -105.95215 35.6... \n", "74130 POLYGON ((-105.96221 35.67223, -105.96245 35.6... \n", "74131 POLYGON ((-105.98159 35.67739, -105.98143 35.6... \n", "74132 POLYGON ((-105.96362 35.67616, -105.96365 35.6... \n", "74133 POLYGON ((-105.94510 35.65705, -105.94563 35.6... \n", "\n", "[74134 rows x 13 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load Census tracts geojson\n", "census_tract_gdf = gpd.read_file(\n", " CensusETL.NATIONAL_TRACT_JSON_PATH,\n", " # Use `pyogrio` because it's vectorized and faster.\n", " engine=\"pyogrio\",\n", ")\n", "\n", "census_tract_gdf" ] }, { "cell_type": "code", "execution_count": 6, "id": "595b2a2a", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_768/2956500515.py:2: UserWarning: Geometry is in a geographic CRS. Results from 'area' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", "\n", " census_tract_gdf[\"area_tract\"] = census_tract_gdf.area\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STATEFP10COUNTYFP10TRACTCE10GEOID10NAME10NAMELSAD10MTFCC10FUNCSTAT10ALAND10AWATER10INTPTLAT10INTPTLON10geometryarea_tract
020071958100200719581009581Census Tract 9581G5020S20161768140+38.4804076-101.8059837POLYGON ((-101.79971 38.69806, -101.79097 38.6...0.208156
120175965600201759656009656Census Tract 9656G5020S16035757012204351+37.1805849-100.8547406POLYGON ((-101.06766 37.20440, -101.06768 37.2...0.162976
220175965700201759657009657Census Tract 9657G5020S9466451358282+37.0625361-100.9131437POLYGON ((-100.94250 37.06497, -100.94251 37.0...0.000995
32004302030020043020300203Census Tract 203G5020S2115932067045771+39.7881238-094.9734666POLYGON ((-94.95518 39.90129, -94.95475 39.901...0.022990
42004302020020043020200202Census Tract 202G5020S3526870262968059+39.7540484-095.1060098POLYGON ((-95.02575 39.88295, -95.02585 39.883...0.037373
.............................................
7412935049000600350490006006Census Tract 6G5020S16294710+35.6758519-105.9446097POLYGON ((-105.95207 35.67367, -105.95215 35.6...0.000162
7413035049000700350490007007Census Tract 7G5020S12855970+35.6802004-105.9558818POLYGON ((-105.96221 35.67223, -105.96245 35.6...0.000128
7413135049000800350490008008Census Tract 8G5020S19167970+35.6805095-105.9703558POLYGON ((-105.98159 35.67739, -105.98143 35.6...0.000191
7413235049000900350490009009Census Tract 9G5020S25455630+35.6692966-105.9755351POLYGON ((-105.96362 35.67616, -105.96365 35.6...0.000253
74133350490010013504900100110.01Census Tract 10.01G5020S26172810+35.6647341-105.9468629POLYGON ((-105.94510 35.65705, -105.94563 35.6...0.000261
\n", "

74134 rows × 14 columns

\n", "
" ], "text/plain": [ " STATEFP10 COUNTYFP10 TRACTCE10 GEOID10 NAME10 NAMELSAD10 \\\n", "0 20 071 958100 20071958100 9581 Census Tract 9581 \n", "1 20 175 965600 20175965600 9656 Census Tract 9656 \n", "2 20 175 965700 20175965700 9657 Census Tract 9657 \n", "3 20 043 020300 20043020300 203 Census Tract 203 \n", "4 20 043 020200 20043020200 202 Census Tract 202 \n", "... ... ... ... ... ... ... \n", "74129 35 049 000600 35049000600 6 Census Tract 6 \n", "74130 35 049 000700 35049000700 7 Census Tract 7 \n", "74131 35 049 000800 35049000800 8 Census Tract 8 \n", "74132 35 049 000900 35049000900 9 Census Tract 9 \n", "74133 35 049 001001 35049001001 10.01 Census Tract 10.01 \n", "\n", " MTFCC10 FUNCSTAT10 ALAND10 AWATER10 INTPTLAT10 INTPTLON10 \\\n", "0 G5020 S 2016176814 0 +38.4804076 -101.8059837 \n", "1 G5020 S 1603575701 2204351 +37.1805849 -100.8547406 \n", "2 G5020 S 9466451 358282 +37.0625361 -100.9131437 \n", "3 G5020 S 211593206 7045771 +39.7881238 -094.9734666 \n", "4 G5020 S 352687026 2968059 +39.7540484 -095.1060098 \n", "... ... ... ... ... ... ... \n", "74129 G5020 S 1629471 0 +35.6758519 -105.9446097 \n", "74130 G5020 S 1285597 0 +35.6802004 -105.9558818 \n", "74131 G5020 S 1916797 0 +35.6805095 -105.9703558 \n", "74132 G5020 S 2545563 0 +35.6692966 -105.9755351 \n", "74133 G5020 S 2617281 0 +35.6647341 -105.9468629 \n", "\n", " geometry area_tract \n", "0 POLYGON ((-101.79971 38.69806, -101.79097 38.6... 0.208156 \n", "1 POLYGON ((-101.06766 37.20440, -101.06768 37.2... 0.162976 \n", "2 POLYGON ((-100.94250 37.06497, -100.94251 37.0... 0.000995 \n", "3 POLYGON ((-94.95518 39.90129, -94.95475 39.901... 0.022990 \n", "4 POLYGON ((-95.02575 39.88295, -95.02585 39.883... 0.037373 \n", "... ... ... \n", "74129 POLYGON ((-105.95207 35.67367, -105.95215 35.6... 0.000162 \n", "74130 POLYGON ((-105.96221 35.67223, -105.96245 35.6... 0.000128 \n", "74131 POLYGON ((-105.98159 35.67739, -105.98143 35.6... 0.000191 \n", "74132 POLYGON ((-105.96362 35.67616, -105.96365 35.6... 0.000253 \n", "74133 POLYGON ((-105.94510 35.65705, -105.94563 35.6... 0.000261 \n", "\n", "[74134 rows x 14 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create a measure for the entire census tract area\n", "census_tract_gdf[\"area_tract\"] = census_tract_gdf.area\n", "census_tract_gdf" ] }, { "cell_type": "code", "execution_count": 7, "id": "0ea396ed", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_768/1353983773.py:2: UserWarning: `keep_geom_type=True` in overlay resulted in 1123 dropped geometries of different geometry types than df1 has. Set `keep_geom_type=False` to retain all geometries\n", " gdf_joined = gpd.overlay(census_tract_gdf, tribal_gdf, how=\"union\")\n" ] } ], "source": [ "# Performing overlay funcion\n", "gdf_joined = gpd.overlay(census_tract_gdf, tribal_gdf, how=\"union\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "7fb3ef69", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_768/2727120487.py:3: UserWarning: Geometry is in a geographic CRS. Results from 'area' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", "\n", " gdf_joined['area_joined'] = gdf_joined.area\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STATEFP10COUNTYFP10TRACTCE10GEOID10NAME10NAMELSAD10MTFCC10FUNCSTAT10ALAND10AWATER10INTPTLAT10INTPTLON10area_tracttribalIdlandAreaNameClassificationgeometryarea_joinedtribal_area_as_a_share_of_tract_area
02004302010020043020100201Census Tract 201G5020S454634616.02601186.0+39.8206800-095.25672790.048098LAR0210Iowa LAR1POLYGON ((-95.33994 39.97506, -95.33994 39.975...4.998139e-040.010391
120013480600200134806004806Census Tract 4806G5020S882293538.01376818.0+39.8596443-095.62551870.093019LAR0210Iowa LAR1POLYGON ((-95.45656 40.00025, -95.45528 40.000...3.209294e-030.034502
231147964500311479645009645Census Tract 9645G5020S677848509.06076731.0+40.1522236-095.58588700.072289LAR0210Iowa LAR1MULTIPOLYGON (((-95.38162 40.02744, -95.38119 ...1.476624e-030.020427
329087960300290879603009603Census Tract 9603G5020S412869716.06745159.0+39.9730230-095.14797010.044239LAR0210Iowa LAR1POLYGON ((-95.38119 40.02755, -95.38162 40.027...1.965514e-070.000004
42008508260020085082600826Census Tract 826G5020S690868809.0947758.0+39.4553966-095.67314040.072404LAR0211Kickapoo (Kansas) LAR1POLYGON ((-95.71031 39.65308, -95.69902 39.653...5.285627e-060.000073
............................................................
76317NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTSA0353Sac and Fox TSANoneMULTIPOLYGON (((-96.62002 35.75143, -96.62001 ...6.560647e-17NaN
76318NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTSA0354Seminole TSANoneMULTIPOLYGON (((-96.77536 35.03300, -96.77536 ...7.207055e-18NaN
76319NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTSA0355Seneca Cayuga TSANonePOLYGON ((-94.61836 36.74340, -94.61836 36.743...7.016721e-18NaN
76320NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTSA0356Tonkawa TSANoneMULTIPOLYGON (((-97.24698 36.69942, -97.24692 ...2.612218e-17NaN
76321NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTSA0358Wyandotte TSANonePOLYGON ((-94.61828 36.78970, -94.61834 36.795...1.555259e-18NaN
\n", "

76322 rows × 19 columns

\n", "
" ], "text/plain": [ " STATEFP10 COUNTYFP10 TRACTCE10 GEOID10 NAME10 NAMELSAD10 \\\n", "0 20 043 020100 20043020100 201 Census Tract 201 \n", "1 20 013 480600 20013480600 4806 Census Tract 4806 \n", "2 31 147 964500 31147964500 9645 Census Tract 9645 \n", "3 29 087 960300 29087960300 9603 Census Tract 9603 \n", "4 20 085 082600 20085082600 826 Census Tract 826 \n", "... ... ... ... ... ... ... \n", "76317 NaN NaN NaN NaN NaN NaN \n", "76318 NaN NaN NaN NaN NaN NaN \n", "76319 NaN NaN NaN NaN NaN NaN \n", "76320 NaN NaN NaN NaN NaN NaN \n", "76321 NaN NaN NaN NaN NaN NaN \n", "\n", " MTFCC10 FUNCSTAT10 ALAND10 AWATER10 INTPTLAT10 INTPTLON10 \\\n", "0 G5020 S 454634616.0 2601186.0 +39.8206800 -095.2567279 \n", "1 G5020 S 882293538.0 1376818.0 +39.8596443 -095.6255187 \n", "2 G5020 S 677848509.0 6076731.0 +40.1522236 -095.5858870 \n", "3 G5020 S 412869716.0 6745159.0 +39.9730230 -095.1479701 \n", "4 G5020 S 690868809.0 947758.0 +39.4553966 -095.6731404 \n", "... ... ... ... ... ... ... \n", "76317 NaN NaN NaN NaN NaN NaN \n", "76318 NaN NaN NaN NaN NaN NaN \n", "76319 NaN NaN NaN NaN NaN NaN \n", "76320 NaN NaN NaN NaN NaN NaN \n", "76321 NaN NaN NaN NaN NaN NaN \n", "\n", " area_tract tribalId landAreaName Classification \\\n", "0 0.048098 LAR0210 Iowa LAR 1 \n", "1 0.093019 LAR0210 Iowa LAR 1 \n", "2 0.072289 LAR0210 Iowa LAR 1 \n", "3 0.044239 LAR0210 Iowa LAR 1 \n", "4 0.072404 LAR0211 Kickapoo (Kansas) LAR 1 \n", "... ... ... ... ... \n", "76317 NaN TSA0353 Sac and Fox TSA None \n", "76318 NaN TSA0354 Seminole TSA None \n", "76319 NaN TSA0355 Seneca Cayuga TSA None \n", "76320 NaN TSA0356 Tonkawa TSA None \n", "76321 NaN TSA0358 Wyandotte TSA None \n", "\n", " geometry area_joined \\\n", "0 POLYGON ((-95.33994 39.97506, -95.33994 39.975... 4.998139e-04 \n", "1 POLYGON ((-95.45656 40.00025, -95.45528 40.000... 3.209294e-03 \n", "2 MULTIPOLYGON (((-95.38162 40.02744, -95.38119 ... 1.476624e-03 \n", "3 POLYGON ((-95.38119 40.02755, -95.38162 40.027... 1.965514e-07 \n", "4 POLYGON ((-95.71031 39.65308, -95.69902 39.653... 5.285627e-06 \n", "... ... ... \n", "76317 MULTIPOLYGON (((-96.62002 35.75143, -96.62001 ... 6.560647e-17 \n", "76318 MULTIPOLYGON (((-96.77536 35.03300, -96.77536 ... 7.207055e-18 \n", "76319 POLYGON ((-94.61836 36.74340, -94.61836 36.743... 7.016721e-18 \n", "76320 MULTIPOLYGON (((-97.24698 36.69942, -97.24692 ... 2.612218e-17 \n", "76321 POLYGON ((-94.61828 36.78970, -94.61834 36.795... 1.555259e-18 \n", "\n", " tribal_area_as_a_share_of_tract_area \n", "0 0.010391 \n", "1 0.034502 \n", "2 0.020427 \n", "3 0.000004 \n", "4 0.000073 \n", "... ... \n", "76317 NaN \n", "76318 NaN \n", "76319 NaN \n", "76320 NaN \n", "76321 NaN \n", "\n", "[76322 rows x 19 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Calculate overlap\n", "# Calculating the areas of the newly-created geometries\n", "gdf_joined[\"area_joined\"] = gdf_joined.area\n", "\n", "# Calculating the areas of the newly-created geometries in relation\n", "# to the original grid cells\n", "gdf_joined[\"tribal_area_as_a_share_of_tract_area\"] = (\n", " gdf_joined[\"area_joined\"] / gdf_joined[\"area_tract\"]\n", ")\n", "gdf_joined" ] }, { "cell_type": "code", "execution_count": 9, "id": "042da05e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " GEOID10 landAreaName tribal_area_as_a_share_of_tract_area\n", "0 01051030800 Poarch Creek LAR 0.002467\n", "1 01053970400 Poarch Creek LAR 0.002367\n", "2 01053970500 Poarch Creek LAR 0.000682\n", "3 01101005408 Poarch Creek LAR 0.001391\n", "4 02130000100 Annette Island LAR 0.000038\n", "... ... ... ...\n", "2585 56013940300 Wind River LAR 0.204039\n", "2586 56013940400 Wind River LAR 0.053289\n", "2587 56017967900 Wind River LAR 0.191189\n", "2588 56033000600 Crow LAR 0.000565\n", "2589 56035000102 Wind River LAR 0.000140\n", "\n", "[2590 rows x 3 columns]\n" ] } ], "source": [ "# Aggregating the results\n", "results = gdf_joined.groupby([\"GEOID10\", \"landAreaName\"]).agg(\n", " {\"tribal_area_as_a_share_of_tract_area\": \"sum\"}\n", ")\n", "\n", "results = results.reset_index()\n", "\n", "results.to_csv(\n", " \"~/Downloads/tribal_area_as_a_share_of_tract_area.csv\", index=False\n", ")\n", "\n", "# Printing results\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 10, "id": "34524a94", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Time taken to execute the function is 140.10310292243958\n" ] } ], "source": [ "end = time.time()\n", "\n", "print(\"Time taken to execute the ETL is\", end - begin)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }