{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3ab8f7c1", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import csv\n", "from pathlib import Path\n", "import os\n", "import sys" ] }, { "cell_type": "code", "execution_count": 2, "id": "8c22494f", "metadata": {}, "outputs": [], "source": [ "module_path = os.path.abspath(os.path.join(\"..\"))\n", "if module_path not in sys.path:\n", " sys.path.append(module_path)" ] }, { "cell_type": "code", "execution_count": 3, "id": "eb31e9a1", "metadata": {}, "outputs": [], "source": [ "DATA_PATH = Path.cwd().parent / \"data\"\n", "TMP_PATH: Path = DATA_PATH / \"tmp\"\n", "OUTPUT_PATH = DATA_PATH / \"score\" / \"csv\" / \"tiles\"\n", "CENSUS_USA_CSV = DATA_PATH / \"census\" / \"csv\" / \"us.csv\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "95a5f8d8", "metadata": {}, "outputs": [], "source": [ "cbg_usa_df = pd.read_csv(\n", " CENSUS_USA_CSV,\n", " names=[\"GEOID10\"],\n", " dtype={\"GEOID10\": \"string\"},\n", " low_memory=False,\n", " header=None,\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "id": "bdd9ab60", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GEOID10
0100010414002
1100010415002
2100010417011
3100010417012
4100010422011
\n", "
" ], "text/plain": [ " GEOID10\n", "0 100010414002\n", "1 100010415002\n", "2 100010417011\n", "3 100010417012\n", "4 100010422011" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cbg_usa_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "05a40080", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GEOID10 string\n", "dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cbg_usa_df.dtypes" ] }, { "cell_type": "code", "execution_count": 7, "id": "114af777", "metadata": {}, "outputs": [], "source": [ "score_df = pd.read_csv(\n", " OUTPUT_PATH / \"usa.csv\",\n", " dtype={\"GEOID10\": \"string\"},\n", " low_memory=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "id": "d5f3ebd4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GEOID10Score E (percentile)Score E (top 25th percentile)GEOIDState AbbreviationCounty Name
01000104140020.808889True10001DEKent County
11000104150020.555160False10001DEKent County
21000104170110.272392False10001DEKent County
31000104170120.345686False10001DEKent County
41000104220110.472567False10001DEKent County
.....................
2202563400760200040.921941True34007NJCamden County
2202573400760170020.934490True34007NJCamden County
2202583400760150050.889613True34007NJCamden County
2202593400760910320.627822False34007NJCamden County
2202603400760530020.762237True34007NJCamden County
\n", "

220261 rows × 6 columns

\n", "
" ], "text/plain": [ " GEOID10 Score E (percentile) Score E (top 25th percentile) \\\n", "0 100010414002 0.808889 True \n", "1 100010415002 0.555160 False \n", "2 100010417011 0.272392 False \n", "3 100010417012 0.345686 False \n", "4 100010422011 0.472567 False \n", "... ... ... ... \n", "220256 340076020004 0.921941 True \n", "220257 340076017002 0.934490 True \n", "220258 340076015005 0.889613 True \n", "220259 340076091032 0.627822 False \n", "220260 340076053002 0.762237 True \n", "\n", " GEOID State Abbreviation County Name \n", "0 10001 DE Kent County \n", "1 10001 DE Kent County \n", "2 10001 DE Kent County \n", "3 10001 DE Kent County \n", "4 10001 DE Kent County \n", "... ... ... ... \n", "220256 34007 NJ Camden County \n", "220257 34007 NJ Camden County \n", "220258 34007 NJ Camden County \n", "220259 34007 NJ Camden County \n", "220260 34007 NJ Camden County \n", "\n", "[220261 rows x 6 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "score_df" ] }, { "cell_type": "code", "execution_count": 9, "id": "f84f9e1d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GEOID10 string\n", "Score E (percentile) float64\n", "Score E (top 25th percentile) bool\n", "GEOID int64\n", "State Abbreviation object\n", "County Name object\n", "dtype: object" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "score_df.dtypes" ] }, { "cell_type": "code", "execution_count": 10, "id": "8d61e29e", "metadata": {}, "outputs": [], "source": [ "merged_df = cbg_usa_df.merge(score_df, on=\"GEOID10\", how=\"left\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "7e8c2f2a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GEOID10Score E (percentile)Score E (top 25th percentile)GEOIDState AbbreviationCounty Name
01000104140020.808889True10001.0DEKent County
11000104150020.555160False10001.0DEKent County
21000104170110.272392False10001.0DEKent County
31000104170120.345686False10001.0DEKent County
41000104220110.472567False10001.0DEKent County
.....................
2203293400760200040.921941True34007.0NJCamden County
2203303400760170020.934490True34007.0NJCamden County
2203313400760150050.889613True34007.0NJCamden County
2203323400760910320.627822False34007.0NJCamden County
2203333400760530020.762237True34007.0NJCamden County
\n", "

220334 rows × 6 columns

\n", "
" ], "text/plain": [ " GEOID10 Score E (percentile) Score E (top 25th percentile) \\\n", "0 100010414002 0.808889 True \n", "1 100010415002 0.555160 False \n", "2 100010417011 0.272392 False \n", "3 100010417012 0.345686 False \n", "4 100010422011 0.472567 False \n", "... ... ... ... \n", "220329 340076020004 0.921941 True \n", "220330 340076017002 0.934490 True \n", "220331 340076015005 0.889613 True \n", "220332 340076091032 0.627822 False \n", "220333 340076053002 0.762237 True \n", "\n", " GEOID State Abbreviation County Name \n", "0 10001.0 DE Kent County \n", "1 10001.0 DE Kent County \n", "2 10001.0 DE Kent County \n", "3 10001.0 DE Kent County \n", "4 10001.0 DE Kent County \n", "... ... ... ... \n", "220329 34007.0 NJ Camden County \n", "220330 34007.0 NJ Camden County \n", "220331 34007.0 NJ Camden County \n", "220332 34007.0 NJ Camden County \n", "220333 34007.0 NJ Camden County \n", "\n", "[220334 rows x 6 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_df" ] }, { "cell_type": "code", "execution_count": 12, "id": "e81b1321", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GEOID10Score E (percentile)Score E (top 25th percentile)GEOIDState AbbreviationCounty Name
10614515150501002NaNNaNNaNNaNNaN
10615515150501003NaNNaNNaNNaNNaN
10627515150501001NaNNaNNaNNaNNaN
10628515150501005NaNNaNNaNNaNNaN
10629515150501004NaNNaNNaNNaNNaN
.....................
174140040190029031NaNNaNNaNNaNNaN
174143040190027012NaNNaNNaNNaNNaN
174184040190027011NaNNaNNaNNaNNaN
174242040194105021NaNNaNNaNNaNNaN
174243040194105011NaNNaNNaNNaNNaN
\n", "

73 rows × 6 columns

\n", "
" ], "text/plain": [ " GEOID10 Score E (percentile) Score E (top 25th percentile) \\\n", "10614 515150501002 NaN NaN \n", "10615 515150501003 NaN NaN \n", "10627 515150501001 NaN NaN \n", "10628 515150501005 NaN NaN \n", "10629 515150501004 NaN NaN \n", "... ... ... ... \n", "174140 040190029031 NaN NaN \n", "174143 040190027012 NaN NaN \n", "174184 040190027011 NaN NaN \n", "174242 040194105021 NaN NaN \n", "174243 040194105011 NaN NaN \n", "\n", " GEOID State Abbreviation County Name \n", "10614 NaN NaN NaN \n", "10615 NaN NaN NaN \n", "10627 NaN NaN NaN \n", "10628 NaN NaN NaN \n", "10629 NaN NaN NaN \n", "... ... ... ... \n", "174140 NaN NaN NaN \n", "174143 NaN NaN NaN \n", "174184 NaN NaN NaN \n", "174242 NaN NaN NaN \n", "174243 NaN NaN NaN \n", "\n", "[73 rows x 6 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_df[merged_df[\"Score E (percentile)\"].isnull()]" ] }, { "cell_type": "code", "execution_count": null, "id": "d1a7b71d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" } }, "nbformat": 4, "nbformat_minor": 5 }