j40-cejst-2/data/data-pipeline/data_pipeline/ipython/EJScreen Validate.ipynb
Lucas Merrill Brown b1a4d26be8
Adding persistent poverty tracts (#738)
* persistent poverty working

* fixing left-padding

* running black and adding persistent poverty to comp tool

* fixing bug

* running black and fixing linter

* fixing linter

* fixing linter error
2021-09-22 17:57:08 -04:00

1113 lines
37 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3ab8f7c1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import csv\n",
"from pathlib import Path\n",
"import os\n",
"import sys"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8c22494f",
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.abspath(os.path.join(\"..\"))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "eb31e9a1",
"metadata": {},
"outputs": [],
"source": [
"DATA_PATH = Path.cwd().parent / \"data\"\n",
"TMP_PATH: Path = DATA_PATH / \"tmp\"\n",
"OUTPUT_PATH = DATA_PATH / \"dataset\" / \"ejscreen_2019\"\n",
"CENSUS_USA_CSV = DATA_PATH / \"census\" / \"csv\" / \"us.csv\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "95a5f8d8",
"metadata": {},
"outputs": [],
"source": [
"cbg_usa_df = pd.read_csv(\n",
" CENSUS_USA_CSV,\n",
" names=[\"GEOID10\"],\n",
" dtype={\"GEOID10\": \"string\"},\n",
" low_memory=False,\n",
" header=None,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "bdd9ab60",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100010414002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100010415002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100010417011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100010417012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100010422011</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID10\n",
"0 100010414002\n",
"1 100010415002\n",
"2 100010417011\n",
"3 100010417012\n",
"4 100010422011"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cbg_usa_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "05a40080",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GEOID10 string\n",
"dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cbg_usa_df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "114af777",
"metadata": {},
"outputs": [],
"source": [
"ejscreen_df = pd.read_csv(\n",
" OUTPUT_PATH / \"usa.csv\",\n",
" dtype={\"ID\": \"string\"},\n",
" low_memory=False,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "4f070999",
"metadata": {},
"outputs": [],
"source": [
"ejscreen_df.rename(\n",
" columns={\"ID\": \"GEOID10\"},\n",
" inplace=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d5f3ebd4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>OBJECTID</th>\n",
" <th>GEOID10</th>\n",
" <th>STATE_NAME</th>\n",
" <th>ST_ABBREV</th>\n",
" <th>REGION</th>\n",
" <th>ACSTOTPOP</th>\n",
" <th>D_PM25_2</th>\n",
" <th>B_PM25_D2</th>\n",
" <th>P_PM25_D2</th>\n",
" <th>D_OZONE_2</th>\n",
" <th>...</th>\n",
" <th>T_PNPL</th>\n",
" <th>T_PNPL_D2</th>\n",
" <th>T_PRMP</th>\n",
" <th>T_PRMP_D2</th>\n",
" <th>T_PTSDF</th>\n",
" <th>T_PTSDF_D2</th>\n",
" <th>T_PWDIS</th>\n",
" <th>T_PWDIS_D2</th>\n",
" <th>Shape_Length</th>\n",
" <th>Shape_Area</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>010010201001</td>\n",
" <td>Alabama</td>\n",
" <td>AL</td>\n",
" <td>4</td>\n",
" <td>692</td>\n",
" <td>-1161.544049</td>\n",
" <td>5</td>\n",
" <td>43.0</td>\n",
" <td>-4661.186378</td>\n",
" <td>...</td>\n",
" <td>0.071 facilities/km distance (79%ile)</td>\n",
" <td>26%ile</td>\n",
" <td>0.085 facilities/km distance (24%ile)</td>\n",
" <td>47%ile</td>\n",
" <td>0.066 facilities/km distance (21%ile)</td>\n",
" <td>48%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>62%ile</td>\n",
" <td>13435.975560</td>\n",
" <td>6.026828e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>010010201002</td>\n",
" <td>Alabama</td>\n",
" <td>AL</td>\n",
" <td>4</td>\n",
" <td>1153</td>\n",
" <td>-2084.690717</td>\n",
" <td>4</td>\n",
" <td>31.0</td>\n",
" <td>-8365.702519</td>\n",
" <td>...</td>\n",
" <td>0.064 facilities/km distance (76%ile)</td>\n",
" <td>19%ile</td>\n",
" <td>0.074 facilities/km distance (18%ile)</td>\n",
" <td>41%ile</td>\n",
" <td>0.06 facilities/km distance (18%ile)</td>\n",
" <td>42%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>62%ile</td>\n",
" <td>11945.584679</td>\n",
" <td>7.848121e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>010010202001</td>\n",
" <td>Alabama</td>\n",
" <td>AL</td>\n",
" <td>4</td>\n",
" <td>1020</td>\n",
" <td>2641.389659</td>\n",
" <td>9</td>\n",
" <td>81.0</td>\n",
" <td>10550.793324</td>\n",
" <td>...</td>\n",
" <td>0.069 facilities/km distance (78%ile)</td>\n",
" <td>87%ile</td>\n",
" <td>0.078 facilities/km distance (20%ile)</td>\n",
" <td>71%ile</td>\n",
" <td>0.065 facilities/km distance (20%ile)</td>\n",
" <td>71%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>62%ile</td>\n",
" <td>7770.915121</td>\n",
" <td>2.900774e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>010010202002</td>\n",
" <td>Alabama</td>\n",
" <td>AL</td>\n",
" <td>4</td>\n",
" <td>1152</td>\n",
" <td>693.118534</td>\n",
" <td>7</td>\n",
" <td>65.0</td>\n",
" <td>2768.599617</td>\n",
" <td>...</td>\n",
" <td>0.076 facilities/km distance (81%ile)</td>\n",
" <td>75%ile</td>\n",
" <td>0.087 facilities/km distance (25%ile)</td>\n",
" <td>63%ile</td>\n",
" <td>0.07 facilities/km distance (23%ile)</td>\n",
" <td>63%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>62%ile</td>\n",
" <td>6506.804784</td>\n",
" <td>1.793332e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>010010203001</td>\n",
" <td>Alabama</td>\n",
" <td>AL</td>\n",
" <td>4</td>\n",
" <td>2555</td>\n",
" <td>1034.343525</td>\n",
" <td>7</td>\n",
" <td>68.0</td>\n",
" <td>4120.531837</td>\n",
" <td>...</td>\n",
" <td>0.074 facilities/km distance (80%ile)</td>\n",
" <td>79%ile</td>\n",
" <td>0.08 facilities/km distance (21%ile)</td>\n",
" <td>64%ile</td>\n",
" <td>0.07 facilities/km distance (23%ile)</td>\n",
" <td>65%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>62%ile</td>\n",
" <td>11070.367848</td>\n",
" <td>5.461602e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 128 columns</p>\n",
"</div>"
],
"text/plain": [
" OBJECTID GEOID10 STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n",
"0 1 010010201001 Alabama AL 4 692 \n",
"1 2 010010201002 Alabama AL 4 1153 \n",
"2 3 010010202001 Alabama AL 4 1020 \n",
"3 4 010010202002 Alabama AL 4 1152 \n",
"4 5 010010203001 Alabama AL 4 2555 \n",
"\n",
" D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... \\\n",
"0 -1161.544049 5 43.0 -4661.186378 ... \n",
"1 -2084.690717 4 31.0 -8365.702519 ... \n",
"2 2641.389659 9 81.0 10550.793324 ... \n",
"3 693.118534 7 65.0 2768.599617 ... \n",
"4 1034.343525 7 68.0 4120.531837 ... \n",
"\n",
" T_PNPL T_PNPL_D2 \\\n",
"0 0.071 facilities/km distance (79%ile) 26%ile \n",
"1 0.064 facilities/km distance (76%ile) 19%ile \n",
"2 0.069 facilities/km distance (78%ile) 87%ile \n",
"3 0.076 facilities/km distance (81%ile) 75%ile \n",
"4 0.074 facilities/km distance (80%ile) 79%ile \n",
"\n",
" T_PRMP T_PRMP_D2 \\\n",
"0 0.085 facilities/km distance (24%ile) 47%ile \n",
"1 0.074 facilities/km distance (18%ile) 41%ile \n",
"2 0.078 facilities/km distance (20%ile) 71%ile \n",
"3 0.087 facilities/km distance (25%ile) 63%ile \n",
"4 0.08 facilities/km distance (21%ile) 64%ile \n",
"\n",
" T_PTSDF T_PTSDF_D2 \\\n",
"0 0.066 facilities/km distance (21%ile) 48%ile \n",
"1 0.06 facilities/km distance (18%ile) 42%ile \n",
"2 0.065 facilities/km distance (20%ile) 71%ile \n",
"3 0.07 facilities/km distance (23%ile) 63%ile \n",
"4 0.07 facilities/km distance (23%ile) 65%ile \n",
"\n",
" T_PWDIS T_PWDIS_D2 \\\n",
"0 0 toxicity-weighted concentration/meters dista... 62%ile \n",
"1 0 toxicity-weighted concentration/meters dista... 62%ile \n",
"2 0 toxicity-weighted concentration/meters dista... 62%ile \n",
"3 0 toxicity-weighted concentration/meters dista... 62%ile \n",
"4 0 toxicity-weighted concentration/meters dista... 62%ile \n",
"\n",
" Shape_Length Shape_Area \n",
"0 13435.975560 6.026828e+06 \n",
"1 11945.584679 7.848121e+06 \n",
"2 7770.915121 2.900774e+06 \n",
"3 6506.804784 1.793332e+06 \n",
"4 11070.367848 5.461602e+06 \n",
"\n",
"[5 rows x 128 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ejscreen_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "f84f9e1d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OBJECTID int64\n",
"GEOID10 string\n",
"STATE_NAME object\n",
"ST_ABBREV object\n",
"REGION int64\n",
" ... \n",
"T_PTSDF_D2 object\n",
"T_PWDIS object\n",
"T_PWDIS_D2 object\n",
"Shape_Length float64\n",
"Shape_Area float64\n",
"Length: 128, dtype: object"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ejscreen_df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "8d61e29e",
"metadata": {},
"outputs": [],
"source": [
"merged_df = cbg_usa_df.merge(ejscreen_df, on=\"GEOID10\", how=\"left\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "7e8c2f2a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>OBJECTID</th>\n",
" <th>STATE_NAME</th>\n",
" <th>ST_ABBREV</th>\n",
" <th>REGION</th>\n",
" <th>ACSTOTPOP</th>\n",
" <th>D_PM25_2</th>\n",
" <th>B_PM25_D2</th>\n",
" <th>P_PM25_D2</th>\n",
" <th>D_OZONE_2</th>\n",
" <th>...</th>\n",
" <th>T_PNPL</th>\n",
" <th>T_PNPL_D2</th>\n",
" <th>T_PRMP</th>\n",
" <th>T_PRMP_D2</th>\n",
" <th>T_PTSDF</th>\n",
" <th>T_PTSDF_D2</th>\n",
" <th>T_PWDIS</th>\n",
" <th>T_PWDIS_D2</th>\n",
" <th>Shape_Length</th>\n",
" <th>Shape_Area</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100010414002</td>\n",
" <td>39652.0</td>\n",
" <td>Delaware</td>\n",
" <td>DE</td>\n",
" <td>3.0</td>\n",
" <td>1187.0</td>\n",
" <td>3655.279721</td>\n",
" <td>10.0</td>\n",
" <td>90.0</td>\n",
" <td>22778.314495</td>\n",
" <td>...</td>\n",
" <td>1.7 facilities/km distance (99%ile)</td>\n",
" <td>100%ile</td>\n",
" <td>0.23 facilities/km distance (40%ile)</td>\n",
" <td>80%ile</td>\n",
" <td>1.6 facilities/km distance (63%ile)</td>\n",
" <td>87%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>69%ile</td>\n",
" <td>4866.135943</td>\n",
" <td>1.156165e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100010415002</td>\n",
" <td>39654.0</td>\n",
" <td>Delaware</td>\n",
" <td>DE</td>\n",
" <td>3.0</td>\n",
" <td>1088.0</td>\n",
" <td>100.877666</td>\n",
" <td>7.0</td>\n",
" <td>65.0</td>\n",
" <td>629.604923</td>\n",
" <td>...</td>\n",
" <td>0.32 facilities/km distance (69%ile)</td>\n",
" <td>66%ile</td>\n",
" <td>0.14 facilities/km distance (20%ile)</td>\n",
" <td>64%ile</td>\n",
" <td>1 facilities/km distance (52%ile)</td>\n",
" <td>66%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>69%ile</td>\n",
" <td>7972.275657</td>\n",
" <td>2.821805e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100010417011</td>\n",
" <td>39656.0</td>\n",
" <td>Delaware</td>\n",
" <td>DE</td>\n",
" <td>3.0</td>\n",
" <td>1554.0</td>\n",
" <td>-1256.221548</td>\n",
" <td>5.0</td>\n",
" <td>45.0</td>\n",
" <td>-7833.701886</td>\n",
" <td>...</td>\n",
" <td>0.21 facilities/km distance (52%ile)</td>\n",
" <td>31%ile</td>\n",
" <td>0.11 facilities/km distance (11%ile)</td>\n",
" <td>53%ile</td>\n",
" <td>1.3 facilities/km distance (58%ile)</td>\n",
" <td>22%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>69%ile</td>\n",
" <td>17643.717513</td>\n",
" <td>8.143206e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100010417012</td>\n",
" <td>39657.0</td>\n",
" <td>Delaware</td>\n",
" <td>DE</td>\n",
" <td>3.0</td>\n",
" <td>4543.0</td>\n",
" <td>-2095.065215</td>\n",
" <td>4.0</td>\n",
" <td>32.0</td>\n",
" <td>-13064.667094</td>\n",
" <td>...</td>\n",
" <td>0.17 facilities/km distance (43%ile)</td>\n",
" <td>25%ile</td>\n",
" <td>0.1 facilities/km distance (7%ile)</td>\n",
" <td>48%ile</td>\n",
" <td>1.1 facilities/km distance (54%ile)</td>\n",
" <td>18%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>69%ile</td>\n",
" <td>15645.341219</td>\n",
" <td>9.723460e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100010422011</td>\n",
" <td>39671.0</td>\n",
" <td>Delaware</td>\n",
" <td>DE</td>\n",
" <td>3.0</td>\n",
" <td>5153.0</td>\n",
" <td>-723.497337</td>\n",
" <td>6.0</td>\n",
" <td>53.0</td>\n",
" <td>-4534.212814</td>\n",
" <td>...</td>\n",
" <td>0.24 facilities/km distance (58%ile)</td>\n",
" <td>41%ile</td>\n",
" <td>0.11 facilities/km distance (8%ile)</td>\n",
" <td>58%ile</td>\n",
" <td>0.3 facilities/km distance (33%ile)</td>\n",
" <td>50%ile</td>\n",
" <td>0 toxicity-weighted concentration/meters dista...</td>\n",
" <td>69%ile</td>\n",
" <td>20959.959236</td>\n",
" <td>2.066192e+07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 128 columns</p>\n",
"</div>"
],
"text/plain": [
" GEOID10 OBJECTID STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n",
"0 100010414002 39652.0 Delaware DE 3.0 1187.0 \n",
"1 100010415002 39654.0 Delaware DE 3.0 1088.0 \n",
"2 100010417011 39656.0 Delaware DE 3.0 1554.0 \n",
"3 100010417012 39657.0 Delaware DE 3.0 4543.0 \n",
"4 100010422011 39671.0 Delaware DE 3.0 5153.0 \n",
"\n",
" D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... \\\n",
"0 3655.279721 10.0 90.0 22778.314495 ... \n",
"1 100.877666 7.0 65.0 629.604923 ... \n",
"2 -1256.221548 5.0 45.0 -7833.701886 ... \n",
"3 -2095.065215 4.0 32.0 -13064.667094 ... \n",
"4 -723.497337 6.0 53.0 -4534.212814 ... \n",
"\n",
" T_PNPL T_PNPL_D2 \\\n",
"0 1.7 facilities/km distance (99%ile) 100%ile \n",
"1 0.32 facilities/km distance (69%ile) 66%ile \n",
"2 0.21 facilities/km distance (52%ile) 31%ile \n",
"3 0.17 facilities/km distance (43%ile) 25%ile \n",
"4 0.24 facilities/km distance (58%ile) 41%ile \n",
"\n",
" T_PRMP T_PRMP_D2 \\\n",
"0 0.23 facilities/km distance (40%ile) 80%ile \n",
"1 0.14 facilities/km distance (20%ile) 64%ile \n",
"2 0.11 facilities/km distance (11%ile) 53%ile \n",
"3 0.1 facilities/km distance (7%ile) 48%ile \n",
"4 0.11 facilities/km distance (8%ile) 58%ile \n",
"\n",
" T_PTSDF T_PTSDF_D2 \\\n",
"0 1.6 facilities/km distance (63%ile) 87%ile \n",
"1 1 facilities/km distance (52%ile) 66%ile \n",
"2 1.3 facilities/km distance (58%ile) 22%ile \n",
"3 1.1 facilities/km distance (54%ile) 18%ile \n",
"4 0.3 facilities/km distance (33%ile) 50%ile \n",
"\n",
" T_PWDIS T_PWDIS_D2 \\\n",
"0 0 toxicity-weighted concentration/meters dista... 69%ile \n",
"1 0 toxicity-weighted concentration/meters dista... 69%ile \n",
"2 0 toxicity-weighted concentration/meters dista... 69%ile \n",
"3 0 toxicity-weighted concentration/meters dista... 69%ile \n",
"4 0 toxicity-weighted concentration/meters dista... 69%ile \n",
"\n",
" Shape_Length Shape_Area \n",
"0 4866.135943 1.156165e+06 \n",
"1 7972.275657 2.821805e+06 \n",
"2 17643.717513 8.143206e+06 \n",
"3 15645.341219 9.723460e+06 \n",
"4 20959.959236 2.066192e+07 \n",
"\n",
"[5 rows x 128 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "e81b1321",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>OBJECTID</th>\n",
" <th>STATE_NAME</th>\n",
" <th>ST_ABBREV</th>\n",
" <th>REGION</th>\n",
" <th>ACSTOTPOP</th>\n",
" <th>D_PM25_2</th>\n",
" <th>B_PM25_D2</th>\n",
" <th>P_PM25_D2</th>\n",
" <th>D_OZONE_2</th>\n",
" <th>...</th>\n",
" <th>T_PNPL</th>\n",
" <th>T_PNPL_D2</th>\n",
" <th>T_PRMP</th>\n",
" <th>T_PRMP_D2</th>\n",
" <th>T_PTSDF</th>\n",
" <th>T_PTSDF_D2</th>\n",
" <th>T_PWDIS</th>\n",
" <th>T_PWDIS_D2</th>\n",
" <th>Shape_Length</th>\n",
" <th>Shape_Area</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10614</th>\n",
" <td>515150501002</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10615</th>\n",
" <td>515150501003</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10627</th>\n",
" <td>515150501001</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10628</th>\n",
" <td>515150501005</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10629</th>\n",
" <td>515150501004</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174140</th>\n",
" <td>040190029031</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174143</th>\n",
" <td>040190027012</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174184</th>\n",
" <td>040190027011</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174242</th>\n",
" <td>040194105021</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174243</th>\n",
" <td>040194105011</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>73 rows × 128 columns</p>\n",
"</div>"
],
"text/plain": [
" GEOID10 OBJECTID STATE_NAME ST_ABBREV REGION ACSTOTPOP \\\n",
"10614 515150501002 NaN NaN NaN NaN NaN \n",
"10615 515150501003 NaN NaN NaN NaN NaN \n",
"10627 515150501001 NaN NaN NaN NaN NaN \n",
"10628 515150501005 NaN NaN NaN NaN NaN \n",
"10629 515150501004 NaN NaN NaN NaN NaN \n",
"... ... ... ... ... ... ... \n",
"174140 040190029031 NaN NaN NaN NaN NaN \n",
"174143 040190027012 NaN NaN NaN NaN NaN \n",
"174184 040190027011 NaN NaN NaN NaN NaN \n",
"174242 040194105021 NaN NaN NaN NaN NaN \n",
"174243 040194105011 NaN NaN NaN NaN NaN \n",
"\n",
" D_PM25_2 B_PM25_D2 P_PM25_D2 D_OZONE_2 ... T_PNPL T_PNPL_D2 \\\n",
"10614 NaN NaN NaN NaN ... NaN NaN \n",
"10615 NaN NaN NaN NaN ... NaN NaN \n",
"10627 NaN NaN NaN NaN ... NaN NaN \n",
"10628 NaN NaN NaN NaN ... NaN NaN \n",
"10629 NaN NaN NaN NaN ... NaN NaN \n",
"... ... ... ... ... ... ... ... \n",
"174140 NaN NaN NaN NaN ... NaN NaN \n",
"174143 NaN NaN NaN NaN ... NaN NaN \n",
"174184 NaN NaN NaN NaN ... NaN NaN \n",
"174242 NaN NaN NaN NaN ... NaN NaN \n",
"174243 NaN NaN NaN NaN ... NaN NaN \n",
"\n",
" T_PRMP T_PRMP_D2 T_PTSDF T_PTSDF_D2 T_PWDIS T_PWDIS_D2 \\\n",
"10614 NaN NaN NaN NaN NaN NaN \n",
"10615 NaN NaN NaN NaN NaN NaN \n",
"10627 NaN NaN NaN NaN NaN NaN \n",
"10628 NaN NaN NaN NaN NaN NaN \n",
"10629 NaN NaN NaN NaN NaN NaN \n",
"... ... ... ... ... ... ... \n",
"174140 NaN NaN NaN NaN NaN NaN \n",
"174143 NaN NaN NaN NaN NaN NaN \n",
"174184 NaN NaN NaN NaN NaN NaN \n",
"174242 NaN NaN NaN NaN NaN NaN \n",
"174243 NaN NaN NaN NaN NaN NaN \n",
"\n",
" Shape_Length Shape_Area \n",
"10614 NaN NaN \n",
"10615 NaN NaN \n",
"10627 NaN NaN \n",
"10628 NaN NaN \n",
"10629 NaN NaN \n",
"... ... ... \n",
"174140 NaN NaN \n",
"174143 NaN NaN \n",
"174184 NaN NaN \n",
"174242 NaN NaN \n",
"174243 NaN NaN \n",
"\n",
"[73 rows x 128 columns]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged_df[merged_df[\"Shape_Area\"].isnull()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1a7b71d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}