mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
cleanup and add comments
This commit is contained in:
parent
6eeadf4636
commit
3503a94d61
7 changed files with 4020 additions and 427 deletions
|
@ -0,0 +1,229 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "17f525d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sodapy import Socrata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "47eab8ca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Requests made without an app_token will be subject to strict throttling limits.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"# Unauthenticated client only works with public data sets. Note 'None'\n",
|
||||
"# in place of application token, and no username or password:\n",
|
||||
"client = Socrata(\"data.cdc.gov\", None)\n",
|
||||
"\n",
|
||||
"# Example authenticated client (needed for non-public datasets):\n",
|
||||
"# client = Socrata(data.cdc.gov,\n",
|
||||
"# MyAppToken,\n",
|
||||
"# userame=\"user@example.com\",\n",
|
||||
"# password=\"AFakePassword\")\n",
|
||||
"\n",
|
||||
"# First 2000 results, returned as JSON from API / converted to Python list of\n",
|
||||
"# dictionaries by sodapy.\n",
|
||||
"results = client.get(\"kn79-hsxy\", limit=2000)\n",
|
||||
"\n",
|
||||
"# Convert to pandas DataFrame\n",
|
||||
"results_df = pd.DataFrame.from_records(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "5872b77a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'data_as_of': '2021-12-08T00:00:00.000',\n",
|
||||
" 'start_week': '2020-01-01T00:00:00.000',\n",
|
||||
" 'end_week': '2021-12-04T00:00:00.000',\n",
|
||||
" 'state_name': 'AK',\n",
|
||||
" 'county_name': 'Aleutians East Borough',\n",
|
||||
" 'county_fips_code': '2013',\n",
|
||||
" 'urban_rural_code': 'Noncore',\n",
|
||||
" 'total_death': '15',\n",
|
||||
" 'footnote': 'One or more data cells have counts between 1-9 and have been suppressed in accordance with NCHS confidentiality standards.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "1ee41455",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>data_as_of</th>\n",
|
||||
" <th>start_week</th>\n",
|
||||
" <th>end_week</th>\n",
|
||||
" <th>county_name</th>\n",
|
||||
" <th>county_fips_code</th>\n",
|
||||
" <th>total_death</th>\n",
|
||||
" <th>covid_death</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2021-12-08T00:00:00.000</td>\n",
|
||||
" <td>2020-01-01T00:00:00.000</td>\n",
|
||||
" <td>2021-12-04T00:00:00.000</td>\n",
|
||||
" <td>Aleutians East Borough</td>\n",
|
||||
" <td>2013</td>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2021-12-08T00:00:00.000</td>\n",
|
||||
" <td>2020-01-01T00:00:00.000</td>\n",
|
||||
" <td>2021-12-04T00:00:00.000</td>\n",
|
||||
" <td>Anchorage Municipality</td>\n",
|
||||
" <td>2020</td>\n",
|
||||
" <td>4685</td>\n",
|
||||
" <td>510</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2021-12-08T00:00:00.000</td>\n",
|
||||
" <td>2020-01-01T00:00:00.000</td>\n",
|
||||
" <td>2021-12-04T00:00:00.000</td>\n",
|
||||
" <td>Bethel Census Area</td>\n",
|
||||
" <td>2050</td>\n",
|
||||
" <td>227</td>\n",
|
||||
" <td>26</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2021-12-08T00:00:00.000</td>\n",
|
||||
" <td>2020-01-01T00:00:00.000</td>\n",
|
||||
" <td>2021-12-04T00:00:00.000</td>\n",
|
||||
" <td>Denali Borough</td>\n",
|
||||
" <td>2068</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2021-12-08T00:00:00.000</td>\n",
|
||||
" <td>2020-01-01T00:00:00.000</td>\n",
|
||||
" <td>2021-12-04T00:00:00.000</td>\n",
|
||||
" <td>Dillingham Census Area</td>\n",
|
||||
" <td>2070</td>\n",
|
||||
" <td>56</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" data_as_of start_week end_week \\\n",
|
||||
"0 2021-12-08T00:00:00.000 2020-01-01T00:00:00.000 2021-12-04T00:00:00.000 \n",
|
||||
"1 2021-12-08T00:00:00.000 2020-01-01T00:00:00.000 2021-12-04T00:00:00.000 \n",
|
||||
"2 2021-12-08T00:00:00.000 2020-01-01T00:00:00.000 2021-12-04T00:00:00.000 \n",
|
||||
"3 2021-12-08T00:00:00.000 2020-01-01T00:00:00.000 2021-12-04T00:00:00.000 \n",
|
||||
"4 2021-12-08T00:00:00.000 2020-01-01T00:00:00.000 2021-12-04T00:00:00.000 \n",
|
||||
"\n",
|
||||
" county_name county_fips_code total_death covid_death \n",
|
||||
"0 Aleutians East Borough 2013 15 NaN \n",
|
||||
"1 Anchorage Municipality 2020 4685 510 \n",
|
||||
"2 Bethel Census Area 2050 227 26 \n",
|
||||
"3 Denali Borough 2068 12 NaN \n",
|
||||
"4 Dillingham Census Area 2070 56 NaN "
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results_df.head()[['data_as_of', 'start_week', \n",
|
||||
" 'end_week', 'county_name',\n",
|
||||
" 'county_fips_code', 'total_death', 'covid_death']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3b0ed244",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -0,0 +1,665 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "71c4acd0",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import collections\n",
|
||||
"import functools\n",
|
||||
"import IPython\n",
|
||||
"import itertools\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"import pathlib\n",
|
||||
"import pypandoc\n",
|
||||
"import requests\n",
|
||||
"import string\n",
|
||||
"import sys\n",
|
||||
"import typing\n",
|
||||
"import us\n",
|
||||
"import zipfile\n",
|
||||
"\n",
|
||||
"from datetime import datetime\n",
|
||||
"from tqdm.notebook import tqdm_notebook\n",
|
||||
"\n",
|
||||
"module_path = os.path.abspath(os.path.join(\"../..\"))\n",
|
||||
"if module_path not in sys.path:\n",
|
||||
" sys.path.append(module_path)\n",
|
||||
"\n",
|
||||
"from data_pipeline.utils import remove_all_from_dir, get_excel_column_name\n",
|
||||
"from data_pipeline.etl.base import ExtractTransformLoad\n",
|
||||
"from data_pipeline.etl.sources.census.etl_utils import get_state_information\n",
|
||||
"from data_pipeline.etl.sources.ejscreen_areas_of_concern.etl import (\n",
|
||||
" EJSCREENAreasOfConcernETL,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from data_pipeline.score import field_names\n",
|
||||
"\n",
|
||||
"# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n",
|
||||
"tqdm_notebook.pandas()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "2ce3170c",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Suppress scientific notation in pandas (this shows up for census tract IDs)\n",
|
||||
"pd.options.display.float_format = \"{:.2f}\".format\n",
|
||||
"\n",
|
||||
"# Set some global parameters\n",
|
||||
"DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
|
||||
"TEMP_DATA_DIR = DATA_DIR / \"tmp\"\n",
|
||||
"COMPARISON_OUTPUTS_DIR = DATA_DIR / \"comparison_outputs\"\n",
|
||||
"\n",
|
||||
"# Make the dirs if they don't exist\n",
|
||||
"TEMP_DATA_DIR.mkdir(parents=True, exist_ok=True)\n",
|
||||
"COMPARISON_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"CEJST_PRIORITY_COMMUNITY_THRESHOLD = 0.75\n",
|
||||
"\n",
|
||||
"# Name fields using variables. (This makes it easy to reference the same fields frequently without using strings\n",
|
||||
"# and introducing the risk of misspelling the field name.)\n",
|
||||
"GEOID_STATE_FIELD_NAME = \"GEOID10_STATE\"\n",
|
||||
"COUNTRY_FIELD_NAME = \"Country\"\n",
|
||||
"\n",
|
||||
"# Define some suffixes\n",
|
||||
"POPULATION_SUFFIX = \" (priority population)\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "8bd39090",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>GEOID10_TRACT</th>\n",
|
||||
" <th>Total Population</th>\n",
|
||||
" <th>California County</th>\n",
|
||||
" <th>ZIP</th>\n",
|
||||
" <th>Nearby City \\n(to help approximate location only)</th>\n",
|
||||
" <th>Longitude</th>\n",
|
||||
" <th>Latitude</th>\n",
|
||||
" <th>calenviroscreen_score</th>\n",
|
||||
" <th>calenviroscreen_percentile</th>\n",
|
||||
" <th>DRAFT CES 4.0\\nPercentile Range</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>Poverty Pctl</th>\n",
|
||||
" <th>Unemployment</th>\n",
|
||||
" <th>Unemployment Pctl</th>\n",
|
||||
" <th>Housing Burden</th>\n",
|
||||
" <th>Housing Burden Pctl</th>\n",
|
||||
" <th>Pop. Char.</th>\n",
|
||||
" <th>Pop. Char. Score</th>\n",
|
||||
" <th>Pop. Char. Pctl</th>\n",
|
||||
" <th>calenviroscreen_priority_community</th>\n",
|
||||
" <th>GEOID10_STATE</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>06019001100</td>\n",
|
||||
" <td>2760</td>\n",
|
||||
" <td>Fresno</td>\n",
|
||||
" <td>93706</td>\n",
|
||||
" <td>Fresno</td>\n",
|
||||
" <td>-119.78</td>\n",
|
||||
" <td>36.71</td>\n",
|
||||
" <td>94.61</td>\n",
|
||||
" <td>100.00</td>\n",
|
||||
" <td>95-100% (highest scores)</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>98.43</td>\n",
|
||||
" <td>16.20</td>\n",
|
||||
" <td>97.15</td>\n",
|
||||
" <td>30.70</td>\n",
|
||||
" <td>90.61</td>\n",
|
||||
" <td>93.73</td>\n",
|
||||
" <td>9.72</td>\n",
|
||||
" <td>99.87</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>06</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>06077000700</td>\n",
|
||||
" <td>4177</td>\n",
|
||||
" <td>San Joaquin</td>\n",
|
||||
" <td>95206</td>\n",
|
||||
" <td>Stockton</td>\n",
|
||||
" <td>-121.29</td>\n",
|
||||
" <td>37.94</td>\n",
|
||||
" <td>90.83</td>\n",
|
||||
" <td>99.99</td>\n",
|
||||
" <td>95-100% (highest scores)</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>96.43</td>\n",
|
||||
" <td>18.50</td>\n",
|
||||
" <td>98.45</td>\n",
|
||||
" <td>35.20</td>\n",
|
||||
" <td>95.61</td>\n",
|
||||
" <td>93.40</td>\n",
|
||||
" <td>9.68</td>\n",
|
||||
" <td>99.84</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>06</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>06077000100</td>\n",
|
||||
" <td>4055</td>\n",
|
||||
" <td>San Joaquin</td>\n",
|
||||
" <td>95202</td>\n",
|
||||
" <td>Stockton</td>\n",
|
||||
" <td>-121.29</td>\n",
|
||||
" <td>37.95</td>\n",
|
||||
" <td>85.75</td>\n",
|
||||
" <td>99.97</td>\n",
|
||||
" <td>95-100% (highest scores)</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>99.50</td>\n",
|
||||
" <td>17.90</td>\n",
|
||||
" <td>98.17</td>\n",
|
||||
" <td>36.40</td>\n",
|
||||
" <td>96.51</td>\n",
|
||||
" <td>95.71</td>\n",
|
||||
" <td>9.92</td>\n",
|
||||
" <td>99.97</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>06</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>06071001600</td>\n",
|
||||
" <td>5527</td>\n",
|
||||
" <td>San Bernardino</td>\n",
|
||||
" <td>91761</td>\n",
|
||||
" <td>Ontario</td>\n",
|
||||
" <td>-117.62</td>\n",
|
||||
" <td>34.06</td>\n",
|
||||
" <td>83.56</td>\n",
|
||||
" <td>99.96</td>\n",
|
||||
" <td>95-100% (highest scores)</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>94.82</td>\n",
|
||||
" <td>6.70</td>\n",
|
||||
" <td>57.20</td>\n",
|
||||
" <td>32.10</td>\n",
|
||||
" <td>92.65</td>\n",
|
||||
" <td>80.59</td>\n",
|
||||
" <td>8.36</td>\n",
|
||||
" <td>93.06</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>06</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>06037204920</td>\n",
|
||||
" <td>2639</td>\n",
|
||||
" <td>Los Angeles</td>\n",
|
||||
" <td>90023</td>\n",
|
||||
" <td>Los Angeles</td>\n",
|
||||
" <td>-118.20</td>\n",
|
||||
" <td>34.02</td>\n",
|
||||
" <td>82.90</td>\n",
|
||||
" <td>99.95</td>\n",
|
||||
" <td>95-100% (highest scores)</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>93.51</td>\n",
|
||||
" <td>5.60</td>\n",
|
||||
" <td>43.81</td>\n",
|
||||
" <td>25.00</td>\n",
|
||||
" <td>77.95</td>\n",
|
||||
" <td>83.95</td>\n",
|
||||
" <td>8.70</td>\n",
|
||||
" <td>95.78</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>06</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5 rows × 60 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" GEOID10_TRACT Total Population California County ZIP \\\n",
|
||||
"0 06019001100 2760 Fresno 93706 \n",
|
||||
"1 06077000700 4177 San Joaquin 95206 \n",
|
||||
"2 06077000100 4055 San Joaquin 95202 \n",
|
||||
"3 06071001600 5527 San Bernardino 91761 \n",
|
||||
"4 06037204920 2639 Los Angeles 90023 \n",
|
||||
"\n",
|
||||
" Nearby City \\n(to help approximate location only) Longitude Latitude \\\n",
|
||||
"0 Fresno -119.78 36.71 \n",
|
||||
"1 Stockton -121.29 37.94 \n",
|
||||
"2 Stockton -121.29 37.95 \n",
|
||||
"3 Ontario -117.62 34.06 \n",
|
||||
"4 Los Angeles -118.20 34.02 \n",
|
||||
"\n",
|
||||
" calenviroscreen_score calenviroscreen_percentile \\\n",
|
||||
"0 94.61 100.00 \n",
|
||||
"1 90.83 99.99 \n",
|
||||
"2 85.75 99.97 \n",
|
||||
"3 83.56 99.96 \n",
|
||||
"4 82.90 99.95 \n",
|
||||
"\n",
|
||||
" DRAFT CES 4.0\\nPercentile Range ... Poverty Pctl Unemployment \\\n",
|
||||
"0 95-100% (highest scores) ... 98.43 16.20 \n",
|
||||
"1 95-100% (highest scores) ... 96.43 18.50 \n",
|
||||
"2 95-100% (highest scores) ... 99.50 17.90 \n",
|
||||
"3 95-100% (highest scores) ... 94.82 6.70 \n",
|
||||
"4 95-100% (highest scores) ... 93.51 5.60 \n",
|
||||
"\n",
|
||||
" Unemployment Pctl Housing Burden Housing Burden Pctl Pop. Char. \\\n",
|
||||
"0 97.15 30.70 90.61 93.73 \n",
|
||||
"1 98.45 35.20 95.61 93.40 \n",
|
||||
"2 98.17 36.40 96.51 95.71 \n",
|
||||
"3 57.20 32.10 92.65 80.59 \n",
|
||||
"4 43.81 25.00 77.95 83.95 \n",
|
||||
"\n",
|
||||
" Pop. Char. Score Pop. Char. Pctl calenviroscreen_priority_community \\\n",
|
||||
"0 9.72 99.87 True \n",
|
||||
"1 9.68 99.84 True \n",
|
||||
"2 9.92 99.97 True \n",
|
||||
"3 8.36 93.06 True \n",
|
||||
"4 8.70 95.78 True \n",
|
||||
"\n",
|
||||
" GEOID10_STATE \n",
|
||||
"0 06 \n",
|
||||
"1 06 \n",
|
||||
"2 06 \n",
|
||||
"3 06 \n",
|
||||
"4 06 \n",
|
||||
"\n",
|
||||
"[5 rows x 60 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load CEJST score data\n",
|
||||
"cal_environ_4_dot_0_data_path = DATA_DIR / \"dataset\" / \"calenviroscreen4\" / \"data06.csv\"\n",
|
||||
"cejst_df = pd.read_csv(\n",
|
||||
" cal_environ_4_dot_0_data_path,\n",
|
||||
" dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create the state ID by taking the first two digits of the FIPS CODE of the tract.\n",
|
||||
"# For more information, see https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html.\n",
|
||||
"cejst_df.loc[:, GEOID_STATE_FIELD_NAME] = (\n",
|
||||
" cejst_df.loc[:, ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]\n",
|
||||
" .astype(str)\n",
|
||||
" .str[0:2]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"cejst_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "3a54bd52",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(8035, 60)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cejst_df.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "869810a4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hud_data_path = DATA_DIR / \"dataset\" / \"hud_housing\" / \"housing_burden.csv\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "d7d234cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_burden = pd.read_csv(\n",
|
||||
" hud_data_path,\n",
|
||||
" dtype= {\"FIPS_tract_id\": \"string\"},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "297cb1f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Index(['state', 'FIPS_tract_id', 'hbrd_score', 'hbrd_rank'], dtype='object')"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"housing_burden.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "1435a822",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_burden = housing_burden[housing_burden.FIPS_tract_id.isin(list(cejst_df.GEOID10_TRACT.unique()))]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "dc5684ff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(8034, 4)"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"housing_burden.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "25a99131",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"housing_burden = housing_burden.sort_values(by='hbrd_rank', ascending = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "1f40b4c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cejst_df = cejst_df[['GEOID10_TRACT', 'Housing Burden', 'Housing Burden Pctl']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "af09b013",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>state</th>\n",
|
||||
" <th>FIPS_tract_id</th>\n",
|
||||
" <th>hbrd_score</th>\n",
|
||||
" <th>hbrd_rank</th>\n",
|
||||
" <th>GEOID10_TRACT</th>\n",
|
||||
" <th>Housing Burden</th>\n",
|
||||
" <th>Housing Burden Pctl</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06037575500</td>\n",
|
||||
" <td>0.88</td>\n",
|
||||
" <td>100.00</td>\n",
|
||||
" <td>06037575500</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06079010902</td>\n",
|
||||
" <td>0.72</td>\n",
|
||||
" <td>99.99</td>\n",
|
||||
" <td>06079010902</td>\n",
|
||||
" <td>64.60</td>\n",
|
||||
" <td>99.97</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06083002926</td>\n",
|
||||
" <td>0.70</td>\n",
|
||||
" <td>99.97</td>\n",
|
||||
" <td>06083002926</td>\n",
|
||||
" <td>66.10</td>\n",
|
||||
" <td>99.99</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06083002924</td>\n",
|
||||
" <td>0.69</td>\n",
|
||||
" <td>99.96</td>\n",
|
||||
" <td>06083002924</td>\n",
|
||||
" <td>63.90</td>\n",
|
||||
" <td>99.95</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06037980010</td>\n",
|
||||
" <td>0.69</td>\n",
|
||||
" <td>99.95</td>\n",
|
||||
" <td>06037980010</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8029</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06111007507</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>06111007507</td>\n",
|
||||
" <td>6.20</td>\n",
|
||||
" <td>2.59</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8030</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06111007513</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>06111007513</td>\n",
|
||||
" <td>7.40</td>\n",
|
||||
" <td>4.65</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8031</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06111007609</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>06111007609</td>\n",
|
||||
" <td>3.40</td>\n",
|
||||
" <td>0.34</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8032</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06111007610</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>06111007610</td>\n",
|
||||
" <td>9.60</td>\n",
|
||||
" <td>11.03</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8033</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>06111980000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>06111980000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>8034 rows × 7 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" state FIPS_tract_id hbrd_score hbrd_rank GEOID10_TRACT \\\n",
|
||||
"0 6 06037575500 0.88 100.00 06037575500 \n",
|
||||
"1 6 06079010902 0.72 99.99 06079010902 \n",
|
||||
"2 6 06083002926 0.70 99.97 06083002926 \n",
|
||||
"3 6 06083002924 0.69 99.96 06083002924 \n",
|
||||
"4 6 06037980010 0.69 99.95 06037980010 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"8029 6 06111007507 NaN NaN 06111007507 \n",
|
||||
"8030 6 06111007513 NaN NaN 06111007513 \n",
|
||||
"8031 6 06111007609 NaN NaN 06111007609 \n",
|
||||
"8032 6 06111007610 NaN NaN 06111007610 \n",
|
||||
"8033 6 06111980000 NaN NaN 06111980000 \n",
|
||||
"\n",
|
||||
" Housing Burden Housing Burden Pctl \n",
|
||||
"0 NaN NaN \n",
|
||||
"1 64.60 99.97 \n",
|
||||
"2 66.10 99.99 \n",
|
||||
"3 63.90 99.95 \n",
|
||||
"4 NaN NaN \n",
|
||||
"... ... ... \n",
|
||||
"8029 6.20 2.59 \n",
|
||||
"8030 7.40 4.65 \n",
|
||||
"8031 3.40 0.34 \n",
|
||||
"8032 9.60 11.03 \n",
|
||||
"8033 NaN NaN \n",
|
||||
"\n",
|
||||
"[8034 rows x 7 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"housing_burden.merge(cejst_df, left_on = \"FIPS_tract_id\", right_on = \"GEOID10_TRACT\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -1541,7 +1541,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.9.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -4192,7 +4192,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
"version": "3.9.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue