diff --git a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_26_2011_relative_differences_between_methodologies-ranking-percentile.ipynb b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_26_2011_relative_differences_between_methodologies-ranking-percentile.ipynb new file mode 100644 index 00000000..41ec1ecb --- /dev/null +++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_26_2011_relative_differences_between_methodologies-ranking-percentile.ipynb @@ -0,0 +1,2549 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Methodology to address fundamental problem 1 itemized in Issue 1024 - follow-up compare tabulations and relative household burden. This time I extend the 12-11 notebook to look at how the percentile ranks affects the proportion of tracts considered as burdened versus the current methodology." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indicator reviewed: \n", + "\n", + "Socioeconomic Factors Indicator reviewed\n", + "* [Extreme Housing Burden](#housingburden)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ETL process for acquiring relevant tables" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### NOTE: If you ran the ETL Process to acquire Table 8 in the other notebook of this draft PR you do not need to run the ETL cell block again" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'requests' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0mCURRENT_DIRECTORY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetcwd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0mdownload_hud_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0mextract_zipped_download\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCURRENT_DIRECTORY\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/HUD_ZIPPED.csv\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCURRENT_DIRECTORY\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mup_one_directory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCURRENT_DIRECTORY\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/140/Table8.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mdownload_hud_dataset\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mDOWNLOAD_FILENAME\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"HUD_ZIPPED.csv\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mHOUSING_FTP_URL\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"https://www.huduser.gov/portal/datasets/cp/2014thru2018-140-csv.zip\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mHOUSING_FTP_URL\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverify\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus_code\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m200\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mfile_contents\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'requests' is not defined" + ] + } + ], + "source": [ + "# Copy and adapt certain sections of code from data_pipeline.utils \n", + "\n", + "def download_hud_dataset():\n", + " DOWNLOAD_FILENAME = \"HUD_ZIPPED.csv\"\n", + " HOUSING_FTP_URL = \"https://www.huduser.gov/portal/datasets/cp/2014thru2018-140-csv.zip\" \n", + " response = requests.get(HOUSING_FTP_URL, verify=True)\n", + " if response.status_code == 200:\n", + " file_contents = response.content\n", + " else:\n", + " sys.exit(\n", + " f\"HTTP response {response.status_code} from url {file_url}. Info: {response.content}\"\n", + " )\n", + "\n", + " # Write the contents to disk.\n", + " file = open(DOWNLOAD_FILENAME, \"wb\")\n", + " file.write(file_contents)\n", + " file.close()\n", + " \n", + "def extract_zipped_download(zip_file_path, unzipped_path):\n", + " with zipfile.ZipFile(zip_file_path, \"r\") as zip_ref:\n", + " zip_ref.extractall(unzipped_path)\n", + " # cleanup temporary file\n", + " os.remove(zip_file_path)\n", + " \n", + "def up_one_directory(path):\n", + " try:\n", + " # from Python 3.6\n", + " parent_dir = Path(path).parents[1]\n", + " # for Python 3.4/3.5, use str to convert the path to string\n", + " # parent_dir = str(Path(path).parents[1])\n", + " shutil.move(path, parent_dir)\n", + " except IndexError:\n", + " # no upper directory\n", + " pass\n", + "\n", + "CURRENT_DIRECTORY = os.getcwd()\n", + "download_hud_dataset()\n", + "extract_zipped_download(CURRENT_DIRECTORY + \"/HUD_ZIPPED.csv\", CURRENT_DIRECTORY) \n", + "up_one_directory(CURRENT_DIRECTORY + \"/140/Table8.csv\")\n", + "shutil.rmtree(\"./140/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extreme Housing Burden \n", + "\n", + "The Extreme Housing Burden indicator represents the proportion of low-income households that have to spend more than half their income on rent. These households experience higher levels of stress, report lower health, and may delay medical treatment because of its high cost.\n", + "\n", + "The Extreme Housing Burden indicator measures the percent of households in a census tract that are:\n", + "\n", + "1. Making less than 80% of the Area Median Family Income as determined by the Department of Housing and Urban Development (HUD), and\n", + "2. Paying greater than 50% of their income to housing costs. \n", + "\n", + "This data is sourced from the 2014-2018 Comprehensive Housing Affordability Strategy dataset from the Department of Housing and Urban Development (HUD) using the census tract geographic summary level, and contains cost burdens for households by percent HUD-adjusted median family income (HAMFI) category. This data can be found [here](https://www.huduser.gov/portal/datasets/cp.html). \n", + "\n", + "Because CHAS data is based on American Communities Survey (ACS) estimates, which come from a sample of the population, they may be unreliable if based on a small sample or population size.\n", + "\n", + "The standard error and relative standard error were used to evaluate the reliability of each estimate using CalEnviroScreen’s methodology. \n", + "\n", + "Census tract estimates that met either of the following criteria were considered reliable and included in the analysis [(CalEnviroScreen, 2017, page 129)](https://oehha.ca.gov/media/downloads/calenviroscreen/report/ces3report.pdf ):\n", + "\n", + "- Relative standard error less than 50 (meaning the standard error was less than half of the estimate), OR \n", + "- Standard error less than the mean standard error of all census tract estimates \n", + "\n", + "Formulas for calculating the standard error of sums, proportions, and ratio come from the [American Communities Survey Office](https://www2.census.gov/programs-surveys/acs/tech_docs/accuracy/MultiyearACSAccuracyofData2013.pdf).\n", + "\n", + "Note that this code creates a score and rank by state, for every state." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The relevant variables in table 8 of the CHAS dataset are the following (CHAS data dictionary available [here](https://www.huduser.gov/portal/datasets/cp/CHAS-data-dictionary-14-18.xlsx)):\n", + "\n", + "| Name | Label |\n", + "|---------|-----------------------------------------------------|\n", + "|T1_est1 | Total Occupied housing units | \n", + "|T8_est10 | Owner occupied less than or equal to 30% of HAMFI cost burden greater than 50% |\n", + "|T8_est23 |Owner occupied greater than 30% but less than or equal to 50% of HAMFI\tcost burden greater than 50%|\n", + "|T8_est36 |Owner occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tcost burden greater than 50%|\n", + "|T8_est76 | Renter occupied less than or equal to 30% of HAMFI cost burden greater than 50% |\n", + "|T8_est89 |Renter occupied\tgreater than 30% but less than or equal to 50% of HAMFI\tcost burden greater than 50%|\n", + "|T8_est102|Renter occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tcost burden greater than 50%|\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Below I also propose an alternate means for ranking census tracts\n", + "### These steps are outlined and commented below" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sarahluw/.pyenv/versions/3.6.2/envs/my-virtual-env-3.6.2/lib/python3.6/site-packages/pandas/core/series.py:726: RuntimeWarning: invalid value encountered in sqrt\n", + " result = getattr(ufunc, method)(*inputs, **kwargs)\n", + "/Users/sarahluw/.pyenv/versions/3.6.2/envs/my-virtual-env-3.6.2/lib/python3.6/site-packages/pandas/core/indexing.py:670: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " iloc._setitem_with_indexer(indexer, value)\n" + ] + } + ], + "source": [ + "# Read in the data from https://www.huduser.gov/portal/datasets/cp.html\n", + "housing = pd.read_csv(\"Table8.csv\", \n", + " encoding = \"ISO-8859-1\", \n", + " dtype = {'Tract_ID': object, 'st': object, 'geoid': object})\n", + "\n", + "# Remove data for states that aren't included in the census (e.g. American Samoa, Guam, etc.):\n", + "housing.drop(housing.loc[housing['st'] == '72'].index, inplace = True)\n", + "\n", + "# Combine owner and renter occupied low-income households that make less than 80% of HAMFI into one variable\n", + "housing['summed'] = (housing['T8_est10'] + \n", + " housing['T8_est23'] + \n", + " housing['T8_est36'] + \n", + " housing['T8_est76'] + \n", + " housing['T8_est89'] + \n", + " housing['T8_est102'])\n", + "\n", + "# Create a variable for the standard error of the summed variables\n", + "housing['summed_se'] = np.sqrt((housing['T8_moe10'] / 1.645)**2 + \n", + " (housing['T8_moe23'] / 1.645)**2 + \n", + " (housing['T8_moe36'] / 1.645)**2 + \n", + " (housing['T8_moe76'] / 1.645)**2 + \n", + " (housing['T8_moe89'] / 1.645)**2 + \n", + " (housing['T8_moe102'] / 1.645)**2)\n", + "\n", + "# Remove the first 7 digits in the FIPS Census Tract ID \n", + "housing['geoid'] = housing['geoid'].str[-11:]\n", + "\n", + "# Find the estimate of the proportion of the population that is heavily rent burdened\n", + "housing['hbrd_score'] = housing['summed'] / housing['T8_est1']\n", + "\n", + "# Change rates where the population is 0 to nan\n", + "housing['hbrd_score'].replace(np.inf, np.nan, inplace = True)\n", + "\n", + "# Create function for calculating the standard error, using the proportions standard error formula\n", + "# if the value under the radical is negative, use the ratio standard error formula\n", + "def se_prop(x, y, se_x, moe_y): \n", + " se_y = moe_y / 1.645\n", + " test = se_x**2 - (((x**2)/(y**2))*((se_y)**2))\n", + " se = np.where(test < 0,\n", + " (1/y) * np.sqrt(se_x**2 + (((x**2)/(y**2))*(se_y**2))), \n", + " (1/y) * np.sqrt(se_x**2 - (((x**2)/(y**2))*(se_y**2))))\n", + " return se\n", + "\n", + "housing['se'] = se_prop(housing['summed'], housing['T8_est1'], housing['summed_se'], housing['T8_moe1'])\n", + "\n", + "# Calculate the relative standard error\n", + "housing['rse'] = housing['se'] / housing['hbrd_score']*100\n", + "\n", + "# Change infinite rse's where the housing burden is 0 to np.nan\n", + "housing['rse'].replace(np.inf, np.nan, inplace = True)\n", + "\n", + "# Calculate the mean standard error for each state\n", + "housing['mean_state_se'] = np.zeros(len(housing))\n", + "\n", + "for state in housing['st'].unique():\n", + " mean_se = np.mean(housing[housing['st'] == state]['se'])\n", + " housing['mean_state_se'].loc[housing['st'] == state] = mean_se\n", + " \n", + "# Find census tract estimates that meet both of the following criteria and are thus considered unreliable estimates: \n", + "# RSE less than 50 AND\n", + "# SE less than the mean state SE or housing burdened low income households\n", + "# Convert these scores to nan\n", + "housing.loc[(housing['rse'] >= 50) & (housing['rse'] >= housing['mean_state_se']), 'hbrd_score'] = np.nan\n", + "\n", + "# Rename columns\n", + "housing = housing.rename(columns = {'geoid' :'FIPS_tract_id',\n", + " 'st' : 'state'\n", + " })\n", + "\n", + "# Calculate percentile rank for census tracts with a score above 0, set percentile to 0 if score is 0, for each state\n", + "housing['hbrd_rank'] = housing[\n", + " housing['hbrd_score'] != 0][['hbrd_score',\n", + " 'state']].groupby('state').rank( \n", + " na_option = 'keep', \n", + " pct = True) * 100\n", + "\n", + "housing.loc[housing['hbrd_score'] == 0, 'hbrd_rank'] = 0\n", + "\n", + "# Create final housing burden df\n", + "housingburden = housing.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sourcesumlevelFIPS_tract_idnamestatecntytractT8_est1T8_est2T8_est3...T8_moe131T8_moe132T8_moe133summedsummed_sehbrd_scoresersemean_state_sehbrd_rank
02014thru201814001001020100Census Tract 201, Autauga County, Alabama0112010076557050...1212128031.7218070.1045750.04103239.2373140.03660446.298077
12014thru201814001001020200Census Tract 202, Autauga County, Alabama0112020072046565...12121213845.5318740.1916670.06161432.1466590.03660483.269231
22014thru201814001001020300Census Tract 203, Autauga County, Alabama01120300129584060...12121217053.7229210.1312740.04092731.1769990.03660463.653846
32014thru201814001001020400Census Tract 204, Autauga County, Alabama011204001640126015...12121214546.2885100.0884150.02782231.4673970.03660434.615385
42014thru201814001001020500Census Tract 205, Autauga County, Alabama0112050041752320175...171717595147.2216930.1425150.03476024.3901930.03660468.221154
\n", + "

5 rows Ă— 280 columns

\n", + "
" + ], + "text/plain": [ + " source sumlevel FIPS_tract_id \\\n", + "0 2014thru2018 140 01001020100 \n", + "1 2014thru2018 140 01001020200 \n", + "2 2014thru2018 140 01001020300 \n", + "3 2014thru2018 140 01001020400 \n", + "4 2014thru2018 140 01001020500 \n", + "\n", + " name state cnty tract T8_est1 \\\n", + "0 Census Tract 201, Autauga County, Alabama 01 1 20100 765 \n", + "1 Census Tract 202, Autauga County, Alabama 01 1 20200 720 \n", + "2 Census Tract 203, Autauga County, Alabama 01 1 20300 1295 \n", + "3 Census Tract 204, Autauga County, Alabama 01 1 20400 1640 \n", + "4 Census Tract 205, Autauga County, Alabama 01 1 20500 4175 \n", + "\n", + " T8_est2 T8_est3 ... T8_moe131 T8_moe132 T8_moe133 summed summed_se \\\n", + "0 570 50 ... 12 12 12 80 31.721807 \n", + "1 465 65 ... 12 12 12 138 45.531874 \n", + "2 840 60 ... 12 12 12 170 53.722921 \n", + "3 1260 15 ... 12 12 12 145 46.288510 \n", + "4 2320 175 ... 17 17 17 595 147.221693 \n", + "\n", + " hbrd_score se rse mean_state_se hbrd_rank \n", + "0 0.104575 0.041032 39.237314 0.036604 46.298077 \n", + "1 0.191667 0.061614 32.146659 0.036604 83.269231 \n", + "2 0.131274 0.040927 31.176999 0.036604 63.653846 \n", + "3 0.088415 0.027822 31.467397 0.036604 34.615385 \n", + "4 0.142515 0.034760 24.390193 0.036604 68.221154 \n", + "\n", + "[5 rows x 280 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "housingburden.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(73056, 280)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "housingburden.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### As desired we see a uniform distribution for the percentile rank for burdened households" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now we compute for a baseline comparison " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Owner occupied numerator fields\n", + "OWNER_OCCUPIED_NUMERATOR_FIELDS = [\n", + " # Column Name\n", + " # Line_Type\n", + " # Tenure\n", + " # Household income\n", + " # Cost burden\n", + " # Facilities\n", + " \"T8_est7\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # less than or equal to 30% of HAMFI\n", + " # greater than 30% but less than or equal to 50%\n", + " # All\n", + " \"T8_est10\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # less than or equal to 30% of HAMFI\n", + " # greater than 50%\n", + " # All\n", + " \"T8_est20\",\n", + " \n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 30% but less than or equal to 50% of HAMFI\n", + " # greater than 30% but less than or equal to 50%\n", + " # All\n", + " \"T8_est23\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 30% but less than or equal to 50% of HAMFI\n", + " # greater than 50%\n", + " # All\n", + " \"T8_est33\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 50% but less than or equal to 80% of HAMFI\n", + " # greater than 30% but less than or equal to 50%\n", + " # All\n", + " \"T8_est36\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 50% but less than or equal to 80% of HAMFI\n", + " # greater than 50%\n", + " # All\n", + "]\n", + "\n", + "# These rows have the values where HAMFI was not computed, b/c of no or negative income.\n", + "OWNER_OCCUPIED_NOT_COMPUTED_FIELDS = [\n", + " # Column Name\n", + " # Line_Type\n", + " # Tenure\n", + " # Household income\n", + " # Cost burden\n", + " # Facilities\n", + " \"T8_est13\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # less than or equal to 30% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est26\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 30% but less than or equal to 50% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est39\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 50% but less than or equal to 80% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est52\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 80% but less than or equal to 100% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est65\",\n", + " # Subtotal\n", + " # Owner occupied\n", + " # greater than 100% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + "]\n", + "\n", + "OWNER_OCCUPIED_POPULATION_FIELD = \"T8_est2\"\n", + "# Subtotal\n", + "# Owner occupied\n", + "# All\n", + "# All\n", + "# All\n", + "\n", + "OWNER_OCCUPIED_POPULATION_HAMFI_FIELD = \"T8_est3\"\n", + "# Subtotal\n", + "# Owner occupied \n", + "# All\n", + "# All\n", + "# All\n", + "\n", + "# Renter occupied numerator fields\n", + "RENTER_OCCUPIED_NUMERATOR_FIELDS = [\n", + " # Column Name\n", + " # Line_Type\n", + " # Tenure\n", + " # Household income\n", + " # Cost burden\n", + " # Facilities\n", + " \"T8_est73\",\n", + " # Subtotal\n", + " # Renter occupied\n", + " # less than or equal to 30% of HAMFI\n", + " # greater than 30% but less than or equal to 50%\n", + " # All\n", + " \"T8_est76\",\n", + " # Subtotal\n", + " # Renter occupied\n", + " # less than or equal to 30% of HAMFI\n", + " # greater than 50%\n", + " # All\n", + " \"T8_est86\",\n", + " # Subtotal\n", + " # Renter occupied\n", + " # greater than 30% but less than or equal to 50% of HAMFI\n", + " # greater than 30% but less than or equal to 50%\n", + " # All\n", + " \"T8_est89\",\n", + " # Subtotal\n", + " # Renter occupied\n", + " # greater than 30% but less than or equal to 50% of HAMFI\n", + " # greater than 50%\n", + " # All\n", + " \"T8_est99\",\n", + " # Subtotal\n", + " # Renter occupied\tgreater than 50% but less than or equal to 80% of HAMFI\n", + " # greater than 30% but less than or equal to 50%\n", + " # All\n", + " \"T8_est102\",\n", + " # Subtotal\n", + " # Renter occupied\n", + " # greater than 50% but less than or equal to 80% of HAMFI\n", + " # greater than 50%\n", + " # All\n", + "]\n", + "\n", + "# These rows have the values where HAMFI was not computed, b/c of no or negative income.\n", + "RENTER_OCCUPIED_NOT_COMPUTED_FIELDS = [\n", + " # Column Name\n", + " # Line_Type\n", + " # Tenure\n", + " # Household income\n", + " # Cost burden\n", + " # Facilities\n", + " \"T8_est79\",\n", + " # Subtotal\n", + " # Renter occupied\tless than or equal to 30% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est92\",\n", + " # Subtotal\n", + " # Renter occupied\tgreater than 30% but less than or equal to 50% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est105\",\n", + " # Subtotal\n", + " # Renter occupied\n", + " # greater than 50% but less than or equal to 80% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est118\",\n", + " # Subtotal\n", + " # Renter occupied\tgreater than 80% but less than or equal to 100% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + " \"T8_est131\",\n", + " # Subtotal\n", + " # Renter occupied\n", + " # greater than 100% of HAMFI\n", + " # not computed (no/negative income)\n", + " # All\n", + "]\n", + "\n", + "# T8_est68\tSubtotalRenter occupied\tAll\tAll\tAll\n", + "RENTER_OCCUPIED_POPULATION_FIELD = \"T8_est68\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "housingburden[\"current_summed_methodology\"] = housingburden[\n", + " OWNER_OCCUPIED_NUMERATOR_FIELDS\n", + "].sum(axis=1) + housingburden[RENTER_OCCUPIED_NUMERATOR_FIELDS].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "housingburden[\"current_methodology_denominator\"] = (\n", + " housingburden[OWNER_OCCUPIED_POPULATION_FIELD]\n", + " + housingburden[RENTER_OCCUPIED_POPULATION_FIELD]\n", + " - housingburden[OWNER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1)\n", + " - housingburden[RENTER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "housingburden[\"current_methodology_denominator_sans_not_computed\"] = (\n", + " housingburden[OWNER_OCCUPIED_POPULATION_FIELD]\n", + " + housingburden[RENTER_OCCUPIED_POPULATION_FIELD]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "housingburden[\"current_methodology_percent\"] = np.round(\n", + " (housingburden[\"current_summed_methodology\"] / housingburden[\"current_methodology_denominator\"] ), 2) * 100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now we construct the distribution of differences in the number of owned and rented burdened households\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Percentiles Comparison" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "final_df = housingburden[['FIPS_tract_id', 'state','hbrd_rank','hbrd_score', 'summed', \n", + " 'current_summed_methodology', 'T8_est1', \n", + " \"current_methodology_denominator_sans_not_computed\",\n", + " 'current_methodology_denominator', 'current_methodology_percent']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### First notice here that **T8_est1** and **current_methodology_denominator** should represent same or similar aggregates. In general, we cen see that the current computation performed results in a differerntial that undercounts the total occupied and rental households." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sarahluw/.pyenv/versions/3.6.2/envs/my-virtual-env-3.6.2/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] + } + ], + "source": [ + "final_df[\"differences_aggregate_denominator\"] = (\n", + " final_df[\"current_methodology_denominator\"] - final_df[\"T8_est1\"] \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sarahluw/.pyenv/versions/3.6.2/envs/my-virtual-env-3.6.2/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] + } + ], + "source": [ + "final_df[\"differences_aggregate_denominator_sans_not_computed\"] = (\n", + " final_df[\"current_methodology_denominator\"] - final_df[\"T8_est1\"] \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAuAAAAHwCAYAAADn4NoPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABLg0lEQVR4nO3dd5hkVZn48e/LMCCDhEHGRBAEDGBaRMSMoICuCrqo+HN3QFFcc84BFnQXXSO6oihIUEHEAEZkQcKqSM6IjqACEp0eogID7++Pc5q5U1PVXd3TdXu65vt5nnr61rnpvefeuvX2qXPvjcxEkiRJUjtWme4AJEmSpJWJCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZ9mEfGViPjoFC1r44i4PSJm1fenRsTrpmLZdXk/i4g9p2p5E1jvxyPi5oi4vs/pMyI2r8NL1W9EvDEibqj19KCIeEZE/KG+321AmzAtImK/iPjmdMehmS0i9oqI/5uiZd3/2VyZdNZhPd88cjpj6mV5zhsRsX1EXLMc656y78MJrnep74W219+WFe3ztzw5SkT8KSKe12Pcch2HbTEBH6B6gPw9Im6LiEUR8euI+PeIuL/eM/PfM/OAPpfV9WBrLOsvmfnAzLx3CmJf5iScmS/IzCOWd9kTjGNj4N3Alpn50InO36zfiJgNfBbYqdbT34D9gS/V9z+cwtBnNJP39kTE4RHx8bbmX971TWA9U90AMO45cKao55sr21jXVO+HqdLtH7t+vw+nOI5u3wsrpBV1X2pyTMAH78WZuRbwCOBA4P3AoVO9kohYdaqXuYLYGPhbZt44Bct6CPAA4NJG2SM63vdtiOtcDe7nlY/7fKXS7XthhRKF+dqwyUxfA3oBfwKe11G2LXAf8Lj6/nDg43V4feDHwCJgIXAG5Z+ko+o8fwduB94HbAIksDfwF+D0RtmqdXmnAv8FnAXcChwPrFfHbQ9c0y1eYBfgbuCeur4LG8t7XR1eBfgI8GfgRuBIYJ06bjSOPWtsNwMfHqOe1qnz31SX95G6/OfVbb6vxnF4j/nfC1wH/BV4bV335s36BR4F3FHH3Q6cAvyxo15Xr7EcWpd3bZ13Vl3WXsCvgM8Bf6vjVgc+XbfzBuArwBrNOqa04N9Yl/maRtxrAJ+p23wL8H+NebcDfk05Fi4Etm/MtxdwJXAbcBXw6h71sh9wHPCdOu15wBMb4x8OfK/W+1XA22r5MvsfeC5wcWPek4CzG+/PAHYba7mN4+YDte7/BhzLkmNyEyZ23PwzcD7l2L4a2K9j/Pxat38DPkrj81jr/ghgBLic8pm6pjHvnyj/LF8E3AWsOs4+2ZTyGbwN+F/gf4BvNsZ/F7i+7ufTga1q+T61nu+udf2j8eqwYxt7zf9Yyud1ESWxeMk404/uk9uAy4CXdhxv/1eHg3L831jr/WLquawjrk8A9wL/qOv5Ui1P4N+BP9TY/geIOm4zyufyb3XffwtYt45b5hzYZZ3bM/bnret5ZozP9uHAl4Gf1XX+Cngo8HnKcfM74J8ay++rDhv1sHndz7c3XncC2ZjutZTjcwQ4EXjEFO2HL1A+M7cC5wLPmsB54/7za5fvsO1Z+nPUtU4ox+c/amy3A4s6l1Xfvx5YQPk+PAF4eEccXY+lLvWwet1vf62vz9eyZb4Xusy7SR2/T533OuA94y27jrsceFFj2lUpx9/WfZznT63771eU4/5bnfuybvNnOuI9AXhnj3oY6/M31nf6Uvu1cY4cPZ9uC5xDOZ5uAD7bmG68bTygbuNtwC+A9RvjX0I5fy2q0z62x/rXoBw7I5Tj7L0sfRy+n/J9fhtwBbBjr++VNl/THsAwv+iSgNfyvwBvrMOHs+Tk9V+UBG52fT2r8eFYalksOSkcCaxZD8DRsmYCfi3wuDrN96hJQR8fqP1oJBCN5Y0m4K+lnBgfCTwQ+D5wVEdsX6txPZGSxDy2Rz0dSfnnYK067++BvXvF2THvLpQP/Og2fpsuCXhHXKv22kfAD4Cv1mU9mPLPyxvquL2AxcBbKSfSNShfgicA69X4fwT8VyP2xZRuLrOBF1K+YOfW8f9T63QDYBbwdMrJfANKEvBCyknx+fX9vBrXrcCj6zIeRk3mutTNfpRka/e6/vdQkrnZdbnnAh8DVqv78Upg5277v27rPyj/JM6udX5t3eY1KF8QD+pjuW8HzgQ2rNv6VeDoSR432wOPr+t8Qo1ptzpuS8qX1DNrHJ+udTF6fB8InAbMrbFcxLIJ+AXARjWWnvukTv+buo7V6jpv7ai/19a6Gv2yvqAx7nCWTjrGrMMu9dA5/2zKZ/NDdf4dKF88j+42fS17OSUZXAV4JSUpeVjjuB9NwHeusa1LSQIfOzpdl7hOpZ4vGmVJaWRYl/Lr1k3ALnXc5rVeV6cc66cDnx/vfNpxPIz1eRvrPLMXy362D6f8I/BkSgvpKZTPz3zK5/XjwC8nWoeNeti8yzZ8iyWfh13rfnxsjekjwK+naD/8K+XzuirlH5brgQeMd97oFjtjJ+B910mXZe1Q63/rekx8ETi9n2OpSx3sTznvPJhybP0aOKDX90LHvKPjj6acfx9f1/W8Ppb9MeBbjWX9M3B5HR7vnHIqJVfYqu6n2Z37kpL4/pUl/0iuTznmH9JjW8b6/I31nb7Ufu38PFLOf/9Whx8IbDeBbfwj5R+hNer7A+u40X+Onl+3/X01vtW6rP9ASiPQepRz9iWj8QKPpvyz+fDG/tys13mkzde0BzDML3on4GdSW/ZY+oSzP+ULotuJeallseSk8MguZc0E/MDG+C0pLV+z+vhA7cfYCfjJwJsa4x5NOWmv2ohjw8b4s4A9umzXrBrTlo2yNwCn1uFl4uyY/7CObXwUk0zAKT9F3kVtha5lr6J+yVK+MP7SGBeUE8RmjbKnAVc1Yv97x/pupLQIrFLHPbHLNr2feuJrlJ1IaRlek9Ia8C/NOHvUzX7AmY33q1Bab54FPLW5LXX8B4FvjLH/zwBeVuP/BaX1ehdK6/hFdZrxlns5jdYHyj8QEz5uemzv54HP1eGPUROZ+n5OPc5G9/VSCS3wOpZNwF/b5z7ZmJK8zWmM+2Zn/TXGrVu3c53OY7SfOuyyvM75n0VJqFZplB1N/YWgc/oey7wA2LVx3I8m4DtQEtftmsvvsYxT6Z6AP7Px/ljgAz3m3w04v9tntcf029P78zbeeWavLnV+OPC1xvu3UpOn+v7x1JbbidRhox4275j+/ZSkevRXsJ9R/0HIJZ/fOynd5pZrP3SZZoR6LmKM80a32BkjAZ9InXRZ1qHApxrjHkg5V2wyiWPpj8ALG+93Bv5UhzehvwT8MY2yTwGH9rHszSn//M6p778FfKyxv7ueUxr7bf8+PlOXA8+vw28BfjpG/fesM8b+Tl9mv7L0d+fpwH/QaL2ewDZ+pDHuTcDP6/BHgWM7jsNrqS3oHeu/ksY/X5RfK0YT8M0p54HnUf+JXFFe9imaHhtQflLr9N+U//B+ERFXRsQH+ljW1RMY/2fKf5Lr9xXl2B5el9dc9qqUJHZU864ld1JOoJ1GW1Q7l7XBBOLo3MbJekSN5bp60ewiSgvtgxvTNNc1j5LYnduY/ue1fNTfMnNx4/1oPaxPaVX7Y484Xj66zLrcZ1Jaju6gtCT9e43zJxHxmDG26f54M/M+yk/0D6/reHjHOj7E0vuv02mUE/Gz6/CpwHPq67RG7GMt9xHADxrjLqf8rDrR44aIeGpE/DIiboqIWyh1MnpsL3VcZOadlJYXuo2n++eoWdZzn9RlLazrWGbeiJgVEQdGxB8j4lbKFwf0/hxOZt80PRy4uu7vUWN+piJifkRc0Fjf47rFl5mnsOSn7xsj4pCIWLvPuEZ13b8R8ZCIOCYirq319M1uMYxjrM/beOeZbsfADY3hv3d5f/+x2W8ddhMRL6D8OrRbZv69Fj8C+EJjeQsp//RvsLz7ISLeExGXR8QtddnrdMTa67wxIctTJ3R8x2Tm7ZTPcHOf9XWu6FxWHZ7o9nR+z4zO33PZmbmAco57cUTMoXSp+HadbqxzSrd19nIE5RcN6t+jxpm+V531853ey96Uxq/fRcTZEfGiWt7PNvYVTz0Or6b7eaxnHlD3wTso/1jeWM8xEz6WB8EEvGUR8RTKAbTMbb0y87bMfHdmPpLyQX1XROw4OrrHInuVj9qoMbwx5T/amyktt3Macc1i6cRxvOX+lfLhai57MUt/QfXj5hpT57Ku7XP+61h2GyfrakoL+PqZuW59rZ2ZWzWmadbLzZQv4a0a06+Tmb2+BJpupnTp2KxHHEc1lrluZq6ZmQcCZOaJmfl8yknsd5QuG73cXzf1Ip4NKfvuakpLfXMda2XmC7ts56jOBPw0lk3Ax1vu1cALOsY/IDP73d9N36Z0/9koM9ehdN+KOu66uq2j274G5Sd3uo1n6WNoVLMOxton1wHr1S/Ybsv7f5TuBM+jJDqbjIbVZT2j6xqrDseKE8r+3ajjoq3mZ2qp6SPiEZRj6C3AgzJzXcpPuEEXmXlQZj6Z8ovaoyj9LfuJazz/Wed5fGauTUkmmjFMdHlN/ZxnJr38idZhx7yPpiRRr8jMzn8K39BxHKyRmb+Gye+HiHgW5ef8V1C656xLuTahGWuv8waUJKl5rHe9O1UfdTKh75iIWJPyGZ7MuaLb99Vfe0zbS+f3zOj84y37aMovqbsCl9WEEMY5z1edddStzr4J7BoRT6R0RfrhBLapaazv9DHzhcz8Q2a+itJY9UnguLq/+tnGvuKJiKDsg277f8w8IDO/nZnPrMvLGuO0MwFvSUSsXf8rPIby0/TFXaZ5UURsXg+0Wygtg6OtWDdQ+mZN1L9GxJY1OdgfOC7LbQp/DzwgIv653obpI5R+dqNuADYZ48rro4F3RsSmEfFAypfndzpan8ZVYzkW+ERErFVP2u+inFT6cSywV2Mb953I+jtiuY7SteIzdX+tEhGbRcRzekx/H+UL5nMR8WCAiNggInbuY133UbrPfDYiHl5bSZ8WEatTtv3FEbFzLX9AlPuablhbCXetJ7e7KP2c7xtjVU+OiJdFuavDO+o8Z1K6dtwWEe+PiDXqeh5X/0GE7vv/15SfJbcFzsrMSykntKdSfoKkj+V+hbKvH1Hra15E7DpeffWwFqXl+R8RsS0l0R11HKUOnx4Rq1FaP5oJxrHAByNibkRsQEkSxtJzn2TmnykXIO0XEatFxNOAF3fEeRel9W4O5bPS1PnZHq8OO3XO/1tKkvS+iJgdEdvXeI7pMf2alC+lmwAi4jWUlsplRMRTovzyMJvypfwPeh9/Ez1nrUU5nm+p+6QzoZzsOXAqzjPj6bsOm2qr9fGULomdjTJfoRyjW9Vp14mIl9fh5dkPa1ESq5uAVSPiY0Bn63mv8waUbiT/rx6Xu1D+Ae9mvDq5Adiwfj67ORp4TUQ8qZ4X/xP4bWb+qcf0Yzka+Eg936xP6aI20X3/0YiYU/fHaygXqfaz7GOAnYA3sqT1G8Y4p4wRwzKfgcy8Bjib0vL9vcYvKBM11nf6mPlCRPxrRMyr32uLavF9k9zGUccC/xwRO9Z1vptyHP66x7Sj5/MNKd3FRmN7dETsUI+hf7Dkxg7TzgR88H4UEbdR/hP8MOV+o6/pMe0WlDso3E65qOHLmfnLOu6/KB/yRRHxngms/yhKv7rrKV0e3gaQmbdQ+lt9nfIf5R2UnxlHfbf+/VtEnNdluYfVZZ9OuUDnHzQO+gl6a13/lZRfBr5dlz+uzPwZpe/vKZTuO6dMMoZR8ykXrl1G6Rd5HEv/XNbp/XW9Z0b52fx/KUlqP95DuXvB2ZSflz9J6c95NaW15EOUL6+rKcnIKvX1LkrrwELKl98bx1jH8ZQuKyPAvwEvy8x7akLyIuBJlP13M+VYWKfOt8z+r91fzgMuzcy76/jfAH/OepvIPpb7BUqr9S/q5+JMSgI/GW8C9q/L+RjlJEyN41LKcXUMpXXkdko/wLvqJPtTjverKPvsuMa4ZYyzTwBeTen/P3oHje80lnck5SfRaynH1Zks7VBgy/rZ/mEfddipc/67KQn3C+q8XwbmZ+bvekx/GeVuPL+hfME/nnJXgm7WpvzTOcKSO8z8d49pvwDsHhEjEXFQj2ma/oNywd0twE8oF4E1TfYcOGrS55nxTLAOm7amnC8+F+UhMLdHxO11mT+gnBOOqeeWSyj7FJZvP5xI6Sr3+zrvP1i2q0PX80Yd93bK8bWIctz/sNtK+6iTUyh3uLg+Im7uMv//UvoBf4/yGd4M2KPHNo7n45R/ki+inHPPq2UTcRrlXH8y8OnM/EU/y64NO7+hXGT/nUb5eOeUbnp9po6g1O943U/G0vM7vY98YRfg0nrsfoFy3c7fJ7mN1HVeQfkV7IuU89iLKbd1vrvL5P9BOZavojSiNethdcpFmjdT8qAHU66pmXajd9iQpKFVW3QWAVtk5lVdxr+R8qXRqzVvouv7DvC7zJz0LzKSpl9EbMKSu8BM6BfetkTEsymtzY9Ik7oZwxZwSUMpIl5cfzJek3KLwIupF0BGxMMi4hlRuhk9mvLz5g+WY11PidJdaZX6s/yuTL4vpiT1pXbPeDvwdZPvmcUEXNKw2pUlD8fYgtLCPfoFtRrlDje3UX4KP57SVWOyHkq5K8ztwEGU+/yfvxzLk6QxRcRjKb/sPYzSFVMzyMC6oETEYZR+jDdm5uMa5W8F3ky5wPAnmfm+Wv5Byq1s7qU89e3EWr4LpU/RLMp/eAfW8k0p/TsfRLl36r/16BskSZIkrTAG2QJ+OKVj/v0i4rmUVqknZrm126dr+ZaUiyu2qvN8uV4xO4tyn9MXUG619Ko6LZSLUz6XmZtTLhTZe4DbIkmSJE2JgSXgmXk6yz5s5o2UpxbeVae5sZbvChyTmXfVC6QWUG51ti2wIDOvrK3bx1DudxmUJ4EdV+c/gvLUNEmSJGmFtmrL63sU8KyI+ATlFjfvycyzKQ+mad6a6xqWPO3o6o7yp1K6nSxqXJHcnH4ZEbEP5dGkrLnmmk9+zGPGenCgJEmStHzOPffcmzNzXrdxbSfgqwLrAdsBTwGOjYhJPVhhIjLzEOAQgG222SbPOeecQa9SkiRJK7GI+HOvcW0n4NcA3693IjgrIu4D1qfc2L35GNENWfK40W7lfwPWjYhVayt4c3pJkiRphdX2bQh/CDwXICIeRbkV2M2UJ+PtERGr17ubbEF5HPPZwBb10airUS7UPKEm8L8Edq/L3ZNyGzFJkiRphTawFvCIOBrYHlg/Iq4B9qU86vSwiLgEuBvYsybTl0bEsZTHNC8G3lwfx0xEvIXy6NxZwGH1EdNQHgF+TER8HDif8nhlSZIkaYW20j2K3j7gkiRJGrSIODczt+k2zidhSpIkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUWrTncAkiRJ0vLITEZGRgCYO3cuETHNEY3NFnBJkiTNaCMjI8w/+GTmH3zy/Yn4iswWcEmSJM14s+esPd0h9M0WcEmSJKlFJuCSJElSi0zAJUmSpBaZgEuSJEktMgGXJEmSWmQCLkmSJLXIBFySJElqkQm4JEmS1CITcEmSJKlFJuCSJElSi0zAJUmSpBaZgEuSJEktMgGXJEmSWmQCLkmSJLXIBFySJElqkQm4JEmS1CITcEmSJKlFJuCSJElSi0zAJUmSpBaZgEuSJEktMgGXJEmSWmQCLkmSJLXIBFySJElqkQm4JEmS1CITcEmSJKlFJuCSJElSi0zAJUmSpBaZgEuSJEktGlgCHhGHRcSNEXFJl3HvjoiMiPXr+4iIgyJiQURcFBFbN6bdMyL+UF97NsqfHBEX13kOiogY1LZIkiRJU2WQLeCHA7t0FkbERsBOwF8axS8AtqivfYCD67TrAfsCTwW2BfaNiLl1noOB1zfmW2ZdkiRJ0opmYAl4Zp4OLOwy6nPA+4BslO0KHJnFmcC6EfEwYGfgpMxcmJkjwEnALnXc2pl5ZmYmcCSw26C2RZIkSZoqrfYBj4hdgWsz88KOURsAVzfeX1PLxiq/pku5JEmStEJbta0VRcQc4EOU7ietioh9KF1b2HjjjdtevSRJknS/NlvANwM2BS6MiD8BGwLnRcRDgWuBjRrTbljLxirfsEt5V5l5SGZuk5nbzJs3bwo2RZIkSZqc1hLwzLw4Mx+cmZtk5iaUbiNbZ+b1wAnA/Ho3lO2AWzLzOuBEYKeImFsvvtwJOLGOuzUitqt3P5kPHN/WtkiSJEmTNcjbEB4N/AZ4dERcExF7jzH5T4ErgQXA14A3AWTmQuAA4Oz62r+WUaf5ep3nj8DPBrEdkiRJ0lQaWB/wzHzVOOM3aQwn8OYe0x0GHNal/BzgccsXpSRJktQun4QpSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLBpaAR8RhEXFjRFzSKPvviPhdRFwUET+IiHUb4z4YEQsi4oqI2LlRvkstWxARH2iUbxoRv63l34mI1Qa1LZIkSdJUGWQL+OHALh1lJwGPy8wnAL8HPggQEVsCewBb1Xm+HBGzImIW8D/AC4AtgVfVaQE+CXwuMzcHRoC9B7gtkiRJ0pQYWAKemacDCzvKfpGZi+vbM4EN6/CuwDGZeVdmXgUsALatrwWZeWVm3g0cA+waEQHsABxX5z8C2G1Q2yJJkiRNlensA/5a4Gd1eAPg6sa4a2pZr/IHAYsayfxouSRJkrRCm5YEPCI+DCwGvtXS+vaJiHMi4pybbrqpjVVKkiRJXbWegEfEXsCLgFdnZtbia4GNGpNtWMt6lf8NWDciVu0o7yozD8nMbTJzm3nz5k3JdkiSJEmT0WoCHhG7AO8DXpKZdzZGnQDsERGrR8SmwBbAWcDZwBb1jierUS7UPKEm7r8Edq/z7wkc39Z2SJIkSZM1yNsQHg38Bnh0RFwTEXsDXwLWAk6KiAsi4isAmXkpcCxwGfBz4M2ZeW/t4/0W4ETgcuDYOi3A+4F3RcQCSp/wQwe1LZIkSdJUWXX8SSYnM1/VpbhnkpyZnwA+0aX8p8BPu5RfSblLiiRJkjRj+CRMSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaNG4CHhEv76dMkiRJ0vj6aQH/YJ9lkiRJksaxaq8REfEC4IXABhFxUGPU2sDiQQcmSZIkDaOeCTjwV+Ac4CXAuY3y24B3DjIoSZIkaVj1TMAz88KIuATYOTOPaDEmSZIkaWiN2Qc8M+8FNoqI1VqKR5IkSRpqY3VBGXUV8KuIOAG4Y7QwMz87sKgkSZKkIdVPAv7H+loFWGuw4UiSJEnDbdwEPDP/AyAiHljf3z7ooCRJkqRh1c+DeB4XEecDlwKXRsS5EbHV4EOTJEmShk8/D+I5BHhXZj4iMx8BvBv42mDDkiRJkoZTPwn4mpn5y9E3mXkqsObAIpIkSZKGWD8XYV4ZER8Fjqrv/xW4cnAhSZIkScOrnxbw1wLzgO/X17xaJkmSJGmC+rkLygjwtohYB7gvM28bfFiSJEnScOrnLihPiYiLgQuBiyPiwoh48uBDkyRJkoZPP11QDgXelJmbZOYmwJuBb4w3U0QcFhE3RsQljbL1IuKkiPhD/Tu3lkdEHBQRCyLioojYujHPnnX6P0TEno3yJ0fExXWegyIiJrDdkiRJ0rToJwG/NzPPGH2Tmf8HLO5jvsOBXTrKPgCcnJlbACfX9wAvALaor32Ag6Ek7MC+wFOBbYF9R5P2Os3rG/N1rkuSJEla4fSTgJ8WEV+NiO0j4jkR8WXg1IjYutlS3SkzTwcWdhTvChxRh48AdmuUH5nFmcC6EfEwYGfgpMxcWPuinwTsUsetnZlnZmYCRzaWJUmSJK2w+rkN4RPr3307yv8JSGCHCazvIZl5XR2+HnhIHd4AuLox3TW1bKzya7qUS5IkSSu0fu6C8txBrDgzMyJyEMvuFBH7ULq2sPHGG7exSkmSJKmrfu6C8qB6keN5EXFuRHwhIh40yfXdULuPUP/eWMuvBTZqTLdhLRurfMMu5V1l5iGZuU1mbjNv3rxJhi5JkiQtv376gB8D3AT8C7B7Hf7OJNd3AjB6J5M9geMb5fPr3VC2A26pXVVOBHaKiLn14sudgBPruFsjYrt695P5jWVJkiRJK6x++oA/LDMPaLz/eES8cryZIuJoYHtg/Yi4htKH/EDg2IjYG/gz8Io6+U+BFwILgDuB1wBk5sKIOAA4u063f2aOXtj5JsqdVtYAflZfkiRJ0gqtnwT8FxGxB3Bsfb87pWV6TJn5qh6jduwybVLuL95tOYcBh3UpPwd43HhxSJIkSSuSfrqgvB74NnB3fR0DvCEibouIWwcZnCRJkjRs+rkLylptBCJJkiStDMZNwCPi2d3K64N2JEmSJE1AP33A39sYfgDlkfDnMrEH8EiSJEmivy4oL26+j4iNgM8PKiBJkiRpmPVzEWana4DHTnUgkiRJ0sqgnz7gXwRGHxm/CvAk4LwBxiRJkiQNrX76gJ/TGF4MHJ2ZvxpQPJIkSdJQ66cP+BEAETGb8uCbawcdlCRJkjSsevYBj4ivRMRWdXgd4ELgSOD8iOj1lEtJkiRJYxjrIsxnZealdfg1wO8z8/HAk4H3DTwySZIkaQiNlYDf3Rh+PvBDgMy8fpABSZIkScNsrAR8UUS8KCL+CXgG8HOAiFgVWKON4CRJkqRhM9ZFmG8ADgIeCryj0fK9I/CTQQcmSZIkDaOeCXhm/h7YpUv5icCJgwxKkiRJGlaTeRKmJEmSpEkyAZckSZJaZAIuSZIktWjcJ2FGxOrAvwCbNKfPzP0HF5YkSZI0nMZNwIHjgVuAc4G7BhuOJEmSNNz6ScA3zMxl7oYiSZIkaeL66QP+64h4/MAjkSRJklYC/bSAPxPYKyKuonRBCSAz8wkDjUySJEkaQv0k4C8YeBSSJEnSSqJnAh4Ra2fmrcBtLcYjSZIkDbWxWsC/DbyIcveTpHQ9GZXAIwcYlyRJkjSUeibgmfmi+nfT9sKRJEmShptPwpQkSZJaZAIuSZIktcgEXJIkSWrRuAl4RHwmIrZqIxhJkiRp2PXTAn45cEhE/DYi/j0i1hl0UJIkSdKwGjcBz8yvZ+YzgPnAJsBFEfHtiHjuoIOTJEmShk1ffcAjYhbwmPq6GbgQeFdEHDPA2CRJkqShM+6j6CPic5QH8pwC/GdmnlVHfTIirhhkcJIkSdKwGTcBBy4CPpKZd3QZt+0UxyNJkiQNtX66oCyikahHxLoRsRtAZt4ymLAkSZKk4dRPAr5vM9HOzEXAvgOLSJIkSRpi/STg3abpp+uKJEmSpA79JODnRMRnI2Kz+voscO6gA5MkSZKGUT8J+FuBu4Hv1NddwJsHGZQkSZI0rMbtSlLvfvKBFmKRJEmShl4/9wF/FPAeylMw758+M3cYXFiSJEnScOrnYsrvAl8Bvg7cO9hwJEmSpOHWTwK+ODMPHngkkiRJ0kqgn4swfxQRb4qIh0XEeqOvgUcmSZIkDaF+EvA9gfcCv6bcfvBc4JzlWWlEvDMiLo2ISyLi6Ih4QERsGhG/jYgFEfGdiFitTrt6fb+gjt+ksZwP1vIrImLn5YlJkiRJasO4CXhmbtrl9cjJrjAiNgDeBmyTmY8DZgF7AJ8EPpeZmwMjwN51lr2BkVr+uTodEbFlnW8rYBfgyxExa7JxSZIkSW0YNwGPiDkR8ZGIOKS+3yIiXrSc610VWCMiVgXmANcBOwDH1fFHALvV4V3re+r4HSMiavkxmXlXZl4FLAC2Xc64JEmSpIHqpwvKNygP4nl6fX8t8PHJrjAzrwU+DfyFknjfQunWsigzF9fJrgE2qMMbAFfXeRfX6R/ULO8yjyRJkrRC6icB3ywzPwXcA5CZdwIx2RVGxFxK6/WmwMOBNSldSAYmIvaJiHMi4pybbrppkKuSJEmSxtRPAn53RKwBJEBEbEZ5HP1kPQ+4KjNvysx7gO8DzwDWrV1SADaktLRT/25U170qsA7wt2Z5l3mWkpmHZOY2mbnNvHnzliN0SZIkafn0k4DvC/wc2CgivgWcDLxvOdb5F2C72rc8gB2By4BfArvXafYEjq/DJ9T31PGnZGbW8j3qXVI2BbYAzlqOuCRJkqSBG/dBPJl5UkScB2xH6Xry9sy8ebIrzMzfRsRxwHnAYuB84BDgJ8AxEfHxWnZoneVQ4KiIWAAspNz5hMy8NCKOpSTvi4E3Z6ZP6pQkSdIKbdwEPCKeXQdvq3+3jAgy8/TJrjQz96W0rDddSZe7mGTmP4CX91jOJ4BPTDYOSZIkqW39PIr+vY3hB1CS5HMptw2UJEmSNAH9dEF5cfN9RGwEfH5QAUmSJEnDrJ+LMDtdAzx2qgORJEmSVgb99AH/IvUWhJSE/UmUCyglSZIkTVA/fcDPaQwvBo7OzF8NKB5JkiRpqPXTB/yINgKRJEmSVgb9dEG5mCVdUJYaBWRmPmHKo5IkSZKGVD9dUH5W/x5V/766/j146sORJEmSxpaZjIyMMHfuXMqD1WeWfu6C8vzMfF9mXlxfHwB2ysw/Z+afBx2gJEmS1DQyMsIen/4hIyMj0x3KpPSTgEdEPKPx5ul9zidJkiQNxOw5a013CJPWTxeUvYHDImKd+n4R8NqBRSRJkiQNsX7ugnIu8MTRBDwzbxl4VJIkSdKQGrcrSUQ8JCIOBY7JzFsiYsuI2LuF2CRJkqSh009f7sOBE4GH1/e/B94xoHgkSZKkodZPAr5+Zh4L3AeQmYuBewcalSRJkjSk+knA74iIB1EfxhMR2wH2A5ckSZImoZ+7oLwLOAHYLCJ+BcwDdh9oVJIkSdKQGjMBj4hZwHPq69GUx89fkZn3tBCbJEmSNHTG7IKSmfcCr8rMxZl5aWZeYvItSZIkTV4/XVB+FRFfAr4D3DFamJnnDSwqSZIkaUj1k4A/qf7dv1GWwA5THo0kSZI05Hom4BHxlsz8UmY+NyK2ysxL2wxMkiRJGkZj9QF/bWP4qEEHIkmSJK0M+rkPOJS7n0iSJElaTmP1AV83Il5KSdLXjoiXNUdm5vcHGpkkSZI0hMZKwE8DXlKHTwde3BiXgAm4JEmSNEE9E/DMfE2bgUiSJEkrg35uQyhJkiS1LjMZGRkBYO7cuUQMx2WJ/V6EKUmSJLVqZGSE+QefzPyDT74/ER8GtoBLkiRphTV7ztrTHcKUG7cFPCLmRMRHI+Jr9f0WEfGiwYcmSZIkDZ9+uqB8A7gLeFp9fy3w8YFFJEmSJA2xfhLwzTLzU8A9AJl5Jz6YR5IkSZqUfhLwuyNiDcq9v4mIzSgt4pIkSZImqJ+LMPcDfg5sFBHfAp4B7DXAmCRJkqShNW4Cnpm/iIhzge0oXU/enpk3DzwySZIkaQiNm4BHxI+AbwMnZOYdgw9JkiRJGl799AH/NPAs4LKIOC4ido+IBww4LkmSJGko9dMF5TTgtIiYBewAvB44DBi+u6JLkiRJA9bXkzDrXVBeDLwS2Bo4YpBBSZIkScOqnz7gxwLbUu6E8iXgtMy8b9CBSZIkScOonxbwQ4FXZea9gw5GkiRJGnY9E/CI2CEzTwHWBHaNWPrhl5n5/QHHJkmSJA2dsVrAnwOcQun73SkBE3BJkiRpgnom4Jm5bx3cPzOvao6LiE0HGpUkSZI0pPq5D/j3upQdtzwrjYh16z3FfxcRl0fE0yJivYg4KSL+UP/OrdNGRBwUEQsi4qKI2LqxnD3r9H+IiD2XJyZJkiSpDWP1AX8MsBWwTkS8rDFqbWB5H8TzBeDnmbl7RKwGzAE+BJycmQdGxAeADwDvB14AbFFfTwUOBp4aEesB+wLbULrEnBsRJ2TmyHLGJkmSJA3MWH3AHw28CFiXpfuB30Z5GM+kRMQ6wLOBvQAy827g7ojYFdi+TnYEcColAd8VODIzEziztp4/rE57UmYurMs9CdgFOHqysUmSJEmDNlYf8OOB4yPiaZn5mylc56bATcA3IuKJwLnA24GHZOZ1dZrrgYfU4Q2AqxvzX1PLepVLkiRJK6x+7gN+fkS8mdId5f6uJ5n52uVY59bAWzPztxHxBUp3k/tlZkZETnL5y4iIfYB9ADbeeOOpWqwkSZI0Yf1chHkU8FBgZ+A0YENKN5TJuga4JjN/W98fR0nIb6hdS6h/b6zjrwU2asy/YS3rVb6MzDwkM7fJzG3mzZu3HKFLkiRJy6efBHzzzPwocEdmHgH8M+ViyEnJzOuBqyPi0bVoR+Ay4ARg9E4mewLH1+ETgPn1bijbAbfUrionAjtFxNx6x5SdapkkSZK0wuqnC8o99e+iiHgcpX/2g5dzvW8FvlXvgHIl8BrKPwPHRsTewJ+BV9Rpfwq8EFgA3FmnJTMXRsQBwNl1uv1HL8iUJEmSVlT9JOCH1Bbmj1Jaox8IfGx5VpqZF1BuH9hpxy7TJvDmHss5DDhseWKRJEmS2jRuAp6ZX6+DpwGPHGw4kiRJ0nAb60E87xprxsz87NSHI0mSJA23sVrA12otCkmSJGklMdaDeP6jzUAkSZKklcG4tyGMiEdFxMkRcUl9/4SI+MjgQ5MkSZKGTz/3Af8a8EHq7Qgz8yJgj0EGJUmSpJVPZrJw4UIWLlxIuRHecOrnNoRzMvOsiGiWLR5QPJIkSVpJjYyMMP/gkwE48o3L3J16aPSTgN8cEZsBCRARuwPXDTQqSZIkrZRmz1l7ukMYuH4S8DcDhwCPiYhrgauAVw80KkmSJGlIjdsHPDOvzMznAfOAxwDPAZ456MAkSZI03Eb7fA9zf+9ueibgEbF2RHwwIr4UEc8H7gT2BBYAr2grQEmSJA2nkZER9vj0DxkZGZnuUFo1VheUo4AR4DfA64EPAwG8NDMvGHxokiRJGnaz56x8z34cKwF/ZGY+HiAivk658HLjzPxHK5FJkiRJQ2isPuD3jA5k5r3ANSbfkiRJ0vIZqwX8iRFxax0OYI36PoDMzOG/R4wkSZI0xXom4Jk5q81AJEmSpJVBP4+ilyRJkjRFTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktWjaEvCImBUR50fEj+v7TSPitxGxICK+ExGr1fLV6/sFdfwmjWV8sJZfERE7T9OmSJIkSX2bzhbwtwOXN95/EvhcZm4OjAB71/K9gZFa/rk6HRGxJbAHsBWwC/DliJjVUuySJEnSpExLAh4RGwL/DHy9vg9gB+C4OskRwG51eNf6njp+xzr9rsAxmXlXZl4FLAC2bWUDJEmSpEmarhbwzwPvA+6r7x8ELMrMxfX9NcAGdXgD4GqAOv6WOv395V3mkSRJklZIrSfgEfEi4MbMPLfFde4TEedExDk33XRTW6uVJEmSljEdLeDPAF4SEX8CjqF0PfkCsG5ErFqn2RC4tg5fC2wEUMevA/ytWd5lnqVk5iGZuU1mbjNv3ryp3RpJkiRpAlpPwDPzg5m5YWZuQrmI8pTMfDXwS2D3OtmewPF1+IT6njr+lMzMWr5HvUvKpsAWwFktbYYkSZI0KauOP0lr3g8cExEfB84HDq3lhwJHRcQCYCElaSczL42IY4HLgMXAmzPz3vbDliRJkvo3rQl4Zp4KnFqHr6TLXUwy8x/Ay3vM/wngE4OLUJIkSZpaPglTkiRJapEJuCRJktQiE3BJkiSpRSbgkiRJUotMwCVJkqQWmYBLkiRJLTIBlyRJklpkAi5JkiS1aEV6EqYkSZKGUGYyMjICwNy5c4mIaY5oetkCLkmSpIEaGRlh/sEnM//gk+9PxFdmtoBLkiRp4GbPWXu6Q1hh2AIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUVehClJkqQp1XnbQS3NBFySJElTavS2gwBHvnHHaY5mxWMCLkmSpCnnbQd7sw+4JEmS1CITcEmSJKlFJuCSJElSi0zAJUmSpBaZgEuSJGm5ZCYLFy4kM6c7lBnBBFySJEnLZWRkhD0+/cP77/2tsZmAS5IkabnNnrPWdIcwY5iAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIktcgEXJIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJatOp0ByBJkqSZITMZGRkBYO7cuUTENEfU3Yoepy3gkiRJ6svIyAjzDz6Z+QeffH+CuyJa0eO0BVySJEl9mz1n7ekOoS8rcpytt4BHxEYR8cuIuCwiLo2It9fy9SLipIj4Q/07t5ZHRBwUEQsi4qKI2LqxrD3r9H+IiD3b3hZJkiRpoqajC8pi4N2ZuSWwHfDmiNgS+ABwcmZuAZxc3wO8ANiivvYBDoaSsAP7Ak8FtgX2HU3aJUmSpBVV6wl4Zl6XmefV4duAy4ENgF2BI+pkRwC71eFdgSOzOBNYNyIeBuwMnJSZCzNzBDgJ2KW9LZEkSZImblovwoyITYB/An4LPCQzr6ujrgceUoc3AK5uzHZNLetV3m09+0TEORFxzk033TR1GyBJkjTEMpOFCxeycOFCMnO6wxka05aAR8QDge8B78jMW5vjsuzhKdvLmXlIZm6TmdvMmzdvqhYrSZI01Fb0u4nMVNOSgEfEbEry/a3M/H4tvqF2LaH+vbGWXwts1Jh9w1rWq1ySJElTZPactVfoO4rMRNNxF5QADgUuz8zPNkadAIzeyWRP4PhG+fx6N5TtgFtqV5UTgZ0iYm69+HKnWiZJkiStsKbjPuDPAP4NuDgiLqhlHwIOBI6NiL2BPwOvqON+CrwQWADcCbwGIDMXRsQBwNl1uv0zc2ErWyBJkiRNUusJeGb+H9DreaA7dpk+gTf3WNZhwGFTF50kSZI0WD6KXpIkSWqRCbgkSZLUIhNwSZIkqUUm4JIkSVKLTMAlSZKkFpmAS5IkSS0yAZckSZJaZAIuSZIkMpOFCxdSHsGiQTIBlyRJEiMjI+zx6R8yMjIy3aEMvel4FL0kSZKmUWben2jPnTuXiPKQ8tlz1prOsFYatoBLkiStZEZGRph/8MnMP/hkW7yngQm4JEnSSmj2nLWZPWft6Q6jq9EW+mHtj24CLkmSpBXKPXfezj5fPYVFixZNdygDYQIuSZKkFc7sOQ+c7hAGxgRckiRJapEJuCRJktQib0MoSZI05DpvO6jpZQIuSZI05EZvOwhw5Bt3nOZoZAIuSZK0ElhRbzm4MrIPuCRJktQiE3BJkiSpRXZBkSRJ0govM1m4cOH9wzOZCbgkSdIQGb3jydy5c4mI6Q5nyixatIi3H3MeAF/YY+tpjmb52AVFkiRpiIyMjLDHp394/20Hh0FmsmjRImbPWXsoLiY1AZckSRoys+esNd0hTKl77ryddx5+Bvfes3i6Q5kSJuCSJEla4a06Z83pDmHKmIBLkiRJLfIiTEmSpBmq8xHzw3TR5TCzBVySJGmGGn3E/PyDTx6qiy4na/QfkhX9NoUm4JIkSTPYsNwZZCrcc+ft7PPVU1i0aNF0hzImu6BIkiTNEJ1dTrSs2XMeON0hjMsEXJIkaYYY7XICcOQbd5zmaDRZdkGRJEmaQWZyl5OZ0kd70EzAJUmSVkCZycKFC4cqWR3to72yXzBqAi5JkrQCGsZHysPM6KM9aCbgkiRJK4BuLd7D9kh5FSbgkiRJK4BhbfHWskzAJUmSWjRW3+5ha/H2osvuTMAlSZJatDK1dM+UB+O0zQRckiSpZcPW0j0WL7pclgm4JEnSJPTqStJZPoy3E9TyMQGXJEmahF5dSTrLV6YuJ+qPCbgkSVIfJnKbwM7yYety4sWVy8cEXJIkrdT67UpiS/YSPtFy+ZiAS5KkoTSaQHcm1/0m1t3Kh60le3lM98WVo63wM7F//YxPwCNil4i4IiIWRMQHpjseSZLUjvEudhwZGWH+wScz/+CTl0qiJ5JYr2wJ90zqWnLP3+/gzUedzfyDT55xtzmc0Ql4RMwC/gd4AbAl8KqI2HJ6o5IkSdA7QR6vRbrf+fq52HH2nLWZPWftZWJb2RLrXjoT7nv+fseM6lqy2py1WHWNtUzAW7YtsCAzr8zMu4FjgF2nOSZJ0pDqlgh26z/cOd1Uztdt3hV1vm4Jcj8t0v3OBzP3Ysd+u0/02yLdOV2/83Xryz3dXUvGk5lLJdz33Hk77zz8DO69Z/H0BTVBq053AMtpA+DqxvtrgKdOUyxjWrhw4XSHIElaTiMjI7zx0NMAOHjv5zB37lxGRkZ43Zd+ytff8kLmzp3bdTpgyuYbna4574o8X7e/vYbH+9tt+J47b1tqumXf3zrh+Sa2/MnN9/eF17PPV09h1qqzljomuk33mi/8iG+8/cWNaZbepnvuvJW777yd13zhd3zj7S8G6Hu+e/5++1Jli/9+G/ctXjxmHY7Od+89ndMtW/ej0y1atGi551t85x3cvcZt3HfP33nLV89l9uprMmu1Nbjvnr8DcPedZVxzvhVVzIQ+Pr1ExO7ALpn5uvr+34CnZuZbOqbbB9invn00cEWrga7Y1gdunu4gVkLW+/Sx7qeH9T49rPfpY91PjxWp3h+RmfO6jZjpLeDXAhs13m9Yy5aSmYcAh7QV1EwSEedk5jbTHcfKxnqfPtb99LDep4f1Pn2s++kxU+p9pvcBPxvYIiI2jYjVgD2AE6Y5JkmSJKmnGd0CnpmLI+ItwInALOCwzLx0msOSJEmSeprRCThAZv4U+Ol0xzGD2TVneljv08e6nx7W+/Sw3qePdT89ZkS9z+iLMCVJkqSZZqb3AZckSZJmFBPwlUhEvDUifhcRl0bEpxrlH4yIBRFxRUTs3CjfpZYtiIgPTE/UM19E7BcR10bEBfX1wsY4637AIuLdEZERsX59HxFxUK3biyJi68a0e0bEH+prz+mLemaLiANq3V4QEb+IiIfXcut+gCLiv+s5/qKI+EFErNsY57lmQCLi5fV79b6I2KZjnPXeohlVr6NPsPI13C/gucD/AqvX9w+uf7cELgRWBzYF/ki5oHVWHX4ksFqdZsvp3o6Z+AL2A97Tpdy6H3zdb0S5SPvPwPq17IXAz4AAtgN+W8vXA66sf+fW4bnTvQ0z8QWs3Rh+G/AV676Vet8JWLUOfxL4ZB32XDPYen8s5RkjpwLbNMqt93b3w4yqV1vAVx5vBA7MzLsAMvPGWr4rcExm3pWZVwELgG3ra0FmXpmZdwPH1Gk1daz7wfsc8D6gebHLrsCRWZwJrBsRDwN2Bk7KzIWZOQKcBOzSesRDIDObj59bkyX1b90PUGb+IjNHn8V9JuXZGOC5ZqAy8/LM7PaAP+u9XTOqXk3AVx6PAp4VEb+NiNMi4im1fAPg6sZ019SyXuWanLfUn4UPi4i5tcy6H6CI2BW4NjMv7BhlvbcgIj4REVcDrwY+Vout+/a8lvJrA1jv08V6b9eMqtcZfxtCLRER/ws8tMuoD1P29XqUn32fAhwbEY9sMbyhNk7dHwwcQGkFPAD4DOXLUctpnHr/EOUneQ3AWHWfmcdn5oeBD0fEB4G3APu2GuCQGq/e6zQfBhYD32oztmHWT71LE2ECPkQy83m9xkXEG4HvZ+kodVZE3AesD1xL6Sc7asNaxhjl6jBW3TdFxNeAH9e31v1y6lXvEfF4Sp/LCyMCSh2eFxHb0rverwW27yg/dcqDHhL9HvOUJPCnlATcul9O49V7ROwFvAjYsZ7vwXPNcpvA8d5kvbdrrPpe4dgFZeXxQ8qFmETEoygXKNwMnADsERGrR8SmwBbAWcDZwBYRsWlErAbsUafVBNU+rqNeClxSh637AcnMizPzwZm5SWZuQvkpcuvMvJ5Sl/PrHTm2A27JzOsoF2vuFBFzazehnWqZJigitmi83RX4XR227gcoInahXPPwksy8szHKc830sN7bNaPq1RbwlcdhwGERcQlwN7BnbR25NCKOBS6j/GT55sy8FyAi3kL5EpwFHJaZl05P6DPepyLiSZQuKH8C3gCQmdb99Pgp5W4cC4A7gdcAZObCiDiAchIH2D8zF05PiDPegRHxaOA+yh1o/r2WW/eD9SXKHTdOqr/8nJmZ/+65ZrAi4qXAF4F5wE8i4oLM3Nl6b1dmLp5J9eqTMCVJkqQW2QVFkiRJapEJuCRJktQiE3BJkiSpRSbgkiRJUotMwCVJkqQWmYBrxoqI3SIiI+Ix0x1LLxHxoeWcf/uI+HEdfklEfKAOz4uI30bE+RHxrIh4eURcHhG/nIq42xIRX4+ILScw/fYR8fQ+ptutn+VGxH4R8Z5+1z/TRMThEbH7AJb7pIh44VQvtw3Nz9EE5vlTRKzfb/lyxLZNRBw0VcubahFxakRsMw3rndBxHBGb1Fvudhs3sG2IiE9FxKX1XHxQ1HtBRsSTI+LiiFjQUf7JiLgoIo5sLONfI+Idg4hPKxYTcM1krwL+r/6dEhEx1ffGX64EvCkzT8jMA+vbHYGLM/OfMvMMYG/g9Zn53H6WNYDtnJTMfF1mXjaBWbYHxk3Agd2AvhP7FUVEzJqGdU7mWHgS5X7eg17PlOv4HK1QMvOczHzbdMehsUXEel3Kng48A3gC8DjgKcBz6uiDgddTHsSzBbBLRKxDeTjYE4C7I+LxEbEG5d74/zP4rdB0MwHXjBQRDwSeSUk892iUrxIRX46I30XESRHx09GWk4h4YS0/t7ZCjLYs7xcRR0XEr4Cjauvy9yLi7Pp6Rp1uXl3mpbXl9s+jrV8R8cO63EsjYp9adiCwRkRcEBHfqmX/GhFn1bKvdku4ImKXGud5wMsa5XtFxJfqQ30+Bexal7NvrYtDI+K/I2JW/Xt2bV15Q51/+4g4IyJOAC4bZ7pTI+K4Gse3Gi02T4mIX0fEhXU71hpjOQ+LiNNrjJdExLO6bOv9rVERcXtEfKIu+8yIeEjHtJtQHujyzrrMZ9WWrlPqek+OiI3rF+FLgP+u020WEa+v8V1Y9+2ccY6vbsudFRFXRbFuRNwbEc+u058eEVvUY+mwul1XRsTbGsvsuu/rdn8mIi4EntYRR9e4o7QIHlT3xZWx5BiPeoxcERH/Czy4x/adGhGfj4hzgLdHaaE7LcoxfGLUp7fW6T5Z4/59rfPVgP2BV9ZteWVErFm3+6wov8rs2jhmT4iIU4CTu8TxrnpsXBKNVr+ImF/r/sKIOKqWPSQiflDLLoyIp0dHS2dEvCci9mvE/oXG8bdtI6Yv1eFen/UHRcQvon7WgRjreOm1LRHx3tFjICI+V+uBiNgh6jmhYxnNX7zGOpa61c8yx2wtPzwiDo7ymbqyruOwKK20hzeWuVNE/CYizouI70Y5x3bz8ubxUOd9QER8I0or7/kRMfrU5fvrur7/cV3/rBrXJXWed9bxm0XEz+txeEYs/evms6P78f7fjeW8skudrhERx9Tt/QGwRi3vGsMY+/cBEfHqKL8ydvuVIoEHUJ4yvTowG7ghymdp7cw8sz787khKA8F9wOyICGAOcA/wHuCLmXnPWLFoSGSmL18z7gW8Gji0Dv8aeHId3p3ytL1VgIcCI7XsAcDVwKZ1uqOBH9fh/YBzgTXq+28Dz6zDGwOX1+EvAR+sw7tQTrjr1/fr1b9rUB41/6D6/vZGzI8FfgTMru+/DMzv2K7ROLegfOkf24hzL+BLncP1/anANnV4H+AjdXh14BxgU0rr8R2NOhhruluADWs9/oaS4K8GXAk8pc6zNuVpur2W827gw7V8FrBWl/3YjDuBF9fhT40us2P6/YD3NN7/iPJUV4DXAj+sw4cDuzeme1Bj+OPAW7str4/l/hzYCngR5amNH67bfFVjeb+uZesDf6N8Effc93W7X9HjOO8V9+HAd+v+2RJYUMtfBpxU6/vhwKJmPXTU+5fr8Owa87z6/pWUJ8iNTveZOvxC4H97HH//CfxrHV4X+D2wZp3uGurnoyOGJwMX1+keCFwK/FOt39+z7GfrO8A7GsfTOsAmwCWNZb4H2K8R+9fq8LNHp2Ppz1Gvz/pBwMfq8D/T+Kx3bMOf6n7utS3bAd+t055BeQz5bGBf4A1dlrc9S5+Xuh1LvepnrM/CMZTzya7ArcDjKcfOuZRfM9YHTgfWrPO8f3T7uxw33Y6Hd7PkmHkM8BfKuez+uq7jfly38cnASY3ydevfk4Et6vBTgVPGOd7/hSXH+0Pqeh9G47gA3tWI7QmUJ1Ju0yuGLtv8RMpTLhdQvgO27jZdnfbTlM/cLcAnatk2o/VU3z+rsY/fB1wAfKbG/eNey/Y1fK8V4idBaRJeBXyhDh9T359LSRS/m5n3AdfHkj7RjwGuzMyr6vujKYnjqBMy8+91+HnAlqVhAoC1Y0mL+0sBMvPnETHSmP9tUR5HDLARJYH+W0fMO1JO+mfXZa8B3NgxzWMoydwfACLimx1x9mMn4AmxpM/kOjWeu4GzGnUw3nTX1BguoHyh3QJcl5ln1zq4tY7vtZyzgcMiYjYlGbhgnLjvpnxBQ9mXz+9jW5/Gkl8JjqIk7t08LiI+TkkOH0h5VPFklnsGJZnbFPgvys/Kp7HkEeoAP8nMu4C7IuJGSmIw1r6/F/jeJOL+YT3OL4slvxY8Gzg6y+Ou/zra4trDd+rfR1N+Mh99fPks4LrGdN+vf8+lHAfd7AS8JJb0p38AJaGFkuR0e6z8M4EfZOYdABHxfUpykpTP8M1QHlNfp98BmF/L7gVuiYi5Y2wflM85mXl6RKwdEet2jO/1WX82df9n5k86Puvd9NqWg4EnR8TawF3AeZSE7FlAP11Nuh1LO9C9fsb6LPwoMzMiLgZuyMyLa5yXUvbphpTE9le1Llaj/OPdTbfj4ZmUJJXM/F1E/Bl41BjbdSXwyIj4IvAT4Be13p8OfLexP1ZvzNPteH8mS473GyLiNErXj4sa8z2b2mKdmRdFxOi4ZWLoDDIi3kX55/K9lH/U7+q1QRGxOeUf7Q1r0Un1F4K/95onMz9F3U9Rfmn5WES8jvJ5uigzP95rXs18JuCacaL0v9sBeHxEJCVhyIh473Is9o7G8CrAdpn5j4719opne8oX+dMy886IOJWSgCwzKXBEZn5wOeLsR1BaSpdKMmucd/Q5XfOL5l7GPld0XU5d1rMpLYiHR8RnM/PIZeZe4p7M0izUxzon6nBgt8y8MCL2orTCTcbpwBsprcsfo3wxb09JzEd1q7ux9v0/agIx0bib6xm3i0QXo8dCAJdm5tN6TDe6nrH2SQD/kplXLFUY8VSWPuam2mKW7krZ+bnLcd5P6LM+UZl5T0RcRWkJ/jUlMXwusDlwef2nfd86+eu6LGIin8OxjC7nvo5l3leXeS/lH6V+rqfp53gY1XX/ZOZIRDwR2JnSrewVwDuARZn5pHHWC5M73pfSI4bXdkz2TcqvDm8AnhsR3wB+lpmLuyzypcCZmXk7QET8jPJP0VEsScqpw9c2Z4yIf6rbdAXwX5m5c+3Ss8VoY4yGj33ANRPtDhyVmY/IzE0ycyPgKkqr0q+Af4nSF/whLElYrqC0dmxS3y/TV7DhF8BbR99E6XNNXfYratlOwGjr2zrASE2+H0P52XnUPbUFGMrPq7tHxIPrMtaLiEd0rPt3wCYRsVl9P5kLTE8E3ji63oh4VESsuRzTjboCeFhEPKVOv1aUC+u6Lqdu2w2Z+TXg68DWk9iWTrcBazXe/5ol1wC8miWJcOd0awHX1Rhf3cd6ei33LEor3X01abuA8uV8+jjL62ffdzPRuE+n9M2eVfue9nNR7hXAvIh4Wo1tdkRsNc48nfV7IvDW2p91NKEYzxnAbhExpx53L61lp1D6GT+oLmv0greTKf/8jPbfXQe4AXhwlD7bq1O6BjW9sk7/TOCWzLylY3yvz/rpwP+rZS9gyWd9otsyOu49dZlnUJK987P4QWY+qb7OGWcdo3rVT69jth9nAs+orbjUz+9YLdidzqjrpM63MeW4+hPwpHo+3ggY7Ye/PrBKZn4P+AilW8etwFUR8fI6TdQEebz1jh7v8yit3Wd1TNPcl4+jdEPpGkPnwjPzxsz8ZGY+Dvg85bvn97VlvNNfgOdExKr18/ocSpem64BbI2K7+vmYDxzfMe8BwEcpyf7odUH3UfqGa0iZgGsmehXwg46y79Xy71H6nF5Gab04j/LF+3fgTcDPI+JcSgLR+WU86m3ANlEuZrqM8oUJ8B/ATlEu+no5cH1dzs+BVSPicuBAypfZqEOAiyLiW1nu9vERys+tF1H6Lj6sueKa1O0D/CTKRZidXVT68fW6/efVWL9K95aqfqcbje1uSkLzxSgXDJ5EadHqtZztgQsj4vw63xe6LXeCfgS8NOpFmJTk6TW1Pv8NeHud7hjgvVEuCNuM8uX2W8o/Ub/rYz1dl1t/gr6aJfv4DEoievFYC+tn3/cw0bh/APyBsj+OpHc3gmZsd1MSi0/W/XoB499p5peUrhsXRLnw7QBK8nBRlG4NB/Sx3vMoLfxnUbbx65l5fmZeCnwCOK3G89k6y9sprZAXU7o/bJnlYrX96zJOYtk6+kc9/r5CuWC701if9WfXbXkZJbma8LbU0WdQ9vVvMvMG4B9MLDnuXFev+un1WehnmTdRWumPrvP/htIdrl9fBlap++Y7wF71s/IrSuPIZZRuIOfV6TcATo3Sve2bwOgvQ68G9q7bdSmlz/pYfkD5VeFCyj8m78vM6zumORh4YD0/7085dsaKoavMPD0z96T0mb+oyyTHAX+knAsuBC7MzB/VcW+inCcX1Gl+NjpTROwGnJOZf83MRcAFtR4fkJkXjrP9msFiyS++0nCIiAdm5u21hegs4BmZeX2jPCi3efpDZn5uAstdHbg3MxfX1sKDx/i5VNI0itIV7D0TaFmWpNbYB1zD6MdRLrZaDTig0SLy+ojYs5afT2mpnYiNgWMjYhXKBYOvn6J4JUnSSsQWcEmSJKlF9gGXJEmSWmQCLkmSJLXIBFySJElqkQm4JEmS1CITcEmSJKlFJuCSJElSi/4/7weBX9A3W+0AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12, 8))\n", + "plt.title('Distribution of differences between aggregate totals that normalizes tabulation of poverty households')\n", + "# Set x-axis label\n", + "plt.xlabel('Aggregate differences in total owner and renter occupied low-income households < 80%')\n", + "# Set y-axis label\n", + "plt.ylabel('Relative Frequency in Support')\n", + "\n", + "sns.histplot(final_df[\"differences_aggregate_denominator\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12, 8))\n", + "plt.title('Distribution of differences between aggregate totals that normalizes tabulation of poverty households (with removal of not computed fields) ')\n", + "# Set x-axis label\n", + "plt.xlabel('Aggregate differences in total owner and renter occupied low-income households')\n", + "# Set y-axis label\n", + "plt.ylabel('Relative Frequency in Support')\n", + "\n", + "sns.histplot(final_df[\"differences_aggregate_denominator_sans_not_computed\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sarahluw/.pyenv/versions/3.6.2/envs/my-virtual-env-3.6.2/lib/python3.6/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " after removing the cwd from sys.path.\n" + ] + } + ], + "source": [ + "final_df[\"current_methodology_percentile_rank\"] = final_df[\"current_methodology_percent\"].rank(\n", + " pct=True,\n", + " # Set ascending to the parameter value.\n", + " ascending=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Only include non-NA tracts for comparison purposes" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# first save NA tracts that were considered unreliable\n", + "ineligible_tracts = list(final_df[final_df[\"hbrd_rank\"].isna()][\"FIPS_tract_id\"].values)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### How many tracts are ineligible according to CalEnvironScreen but are considerd in Score L?\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "final_current_methodology = final_df[final_df[\"current_methodology_percentile_rank\"] >= 0.90]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7323, 13)" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_current_methodology.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(53, 13)" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 53 tracts\n", + "final_current_methodology[final_current_methodology.FIPS_tract_id.isin(ineligible_tracts)].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "non_null_df = final_df[~final_df[\"hbrd_rank\"].isna()]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7270, 13)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# number of tracks eligible\n", + "non_null_df[non_null_df[\"current_methodology_percentile_rank\"] >= 0.90].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12, 8))\n", + "plt.title(\"Distribution of Percentiles (Score L)\")\n", + "# Set x-axis label\n", + "plt.xlabel('Percentile (although currently not represented as a percentage)')\n", + "# Set y-axis label\n", + "plt.ylabel('Relative Frequency in Support')\n", + "\n", + "sns.histplot(non_null_df[\"current_methodology_percentile_rank\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sarahluw/.pyenv/versions/3.6.2/envs/my-virtual-env-3.6.2/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "non_null_df[\"new_threshold_exceeded\"] = (final_df['hbrd_rank'] >= 90)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sarahluw/.pyenv/versions/3.6.2/envs/my-virtual-env-3.6.2/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] + } + ], + "source": [ + "non_null_df[\"current_threshold_exceeded\"] = (non_null_df[\n", + " 'current_methodology_percentile_rank'] >= 0.90)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 60543\n", + "True 7270\n", + "Name: current_threshold_exceeded, dtype: int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_null_df[\"current_threshold_exceeded\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 61012\n", + "True 6801\n", + "Name: new_threshold_exceeded, dtype: int64" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_null_df[\"new_threshold_exceeded\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FIPS_tract_idstatehbrd_rankhbrd_scoresummedcurrent_summed_methodologyT8_est1current_methodology_denominator_sans_not_computedcurrent_methodology_denominatorcurrent_methodology_percentdifferences_aggregate_denominatordifferences_aggregate_denominator_sans_not_computedcurrent_methodology_percentile_ranknew_threshold_exceededcurrent_threshold_exceeded
0010010201000146.2980770.1045758017476576576523.0000.512833FalseFalse
1010010202000183.2692310.19166713817772072072025.0000.575315FalseFalse
2010010203000163.6538460.13127417027912951295129122.0-4-40.479242FalseFalse
3010010204000134.6153850.08841514527416401635163517.0-5-50.289696FalseFalse
4010010205000168.2211540.14251559588541754175413521.0-40-400.444306FalseFalse
\n", + "
" + ], + "text/plain": [ + " FIPS_tract_id state hbrd_rank hbrd_score summed \\\n", + "0 01001020100 01 46.298077 0.104575 80 \n", + "1 01001020200 01 83.269231 0.191667 138 \n", + "2 01001020300 01 63.653846 0.131274 170 \n", + "3 01001020400 01 34.615385 0.088415 145 \n", + "4 01001020500 01 68.221154 0.142515 595 \n", + "\n", + " current_summed_methodology T8_est1 \\\n", + "0 174 765 \n", + "1 177 720 \n", + "2 279 1295 \n", + "3 274 1640 \n", + "4 885 4175 \n", + "\n", + " current_methodology_denominator_sans_not_computed \\\n", + "0 765 \n", + "1 720 \n", + "2 1295 \n", + "3 1635 \n", + "4 4175 \n", + "\n", + " current_methodology_denominator current_methodology_percent \\\n", + "0 765 23.0 \n", + "1 720 25.0 \n", + "2 1291 22.0 \n", + "3 1635 17.0 \n", + "4 4135 21.0 \n", + "\n", + " differences_aggregate_denominator \\\n", + "0 0 \n", + "1 0 \n", + "2 -4 \n", + "3 -5 \n", + "4 -40 \n", + "\n", + " differences_aggregate_denominator_sans_not_computed \\\n", + "0 0 \n", + "1 0 \n", + "2 -4 \n", + "3 -5 \n", + "4 -40 \n", + "\n", + " current_methodology_percentile_rank new_threshold_exceeded \\\n", + "0 0.512833 False \n", + "1 0.575315 False \n", + "2 0.479242 False \n", + "3 0.289696 False \n", + "4 0.444306 False \n", + "\n", + " current_threshold_exceeded \n", + "0 False \n", + "1 False \n", + "2 False \n", + "3 False \n", + "4 False " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_null_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "net_difference = non_null_df[(non_null_df[\n", + " \"current_threshold_exceeded\"] != non_null_df[\"new_threshold_exceeded\"])]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# Inserted after the basic stats definition.\n", + "# Load demographic data\n", + "import pathlib\n", + "\n", + "DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n", + "COMPARISON_OUTPUTS_DIR = DATA_DIR / \"comparison_outputs\"\n", + "\n", + "demographics_path = DATA_DIR / \"dataset\" / \"census_acs_2019\" / \"usa.csv\"\n", + "\n", + "demographics_df = pd.read_csv(\n", + " demographics_path,\n", + " dtype={\"GEOID10_TRACT\": \"string\"},\n", + ")\n", + "\n", + "# Set some field names\n", + "BLACK_FIELD_NAME = \"Black or African American alone\"\n", + "AMERICAN_INDIAN_FIELD_NAME = \"American Indian and Alaska Native alone\"\n", + "ASIAN_FIELD_NAME = \"Asian alone\"\n", + "HAWAIIAN_FIELD_NAME = \"Native Hawaiian and Other Pacific alone\"\n", + "TWO_OR_MORE_RACES_FIELD_NAME = \"Two or more races\"\n", + "NON_HISPANIC_WHITE_FIELD_NAME = \"Non-Hispanic White\"\n", + "HISPANIC_FIELD_NAME = \"Hispanic or Latino\"\n", + "PERCENT_PREFIX = \"Percent \"\n", + "\n", + "RE_OUTPUT_FIELDS = [\n", + " BLACK_FIELD_NAME,\n", + " AMERICAN_INDIAN_FIELD_NAME,\n", + " ASIAN_FIELD_NAME,\n", + " HAWAIIAN_FIELD_NAME,\n", + " TWO_OR_MORE_RACES_FIELD_NAME,\n", + " NON_HISPANIC_WHITE_FIELD_NAME,\n", + " HISPANIC_FIELD_NAME,\n", + "]\n", + "\n", + "RE_PERCENT_OUTPUT_FIELDS = [PERCENT_PREFIX + field for field in RE_OUTPUT_FIELDS]\n", + "\n", + "columns_to_keep = (\n", + " [\"GEOID10_TRACT\"]\n", + " + RE_OUTPUT_FIELDS\n", + " + RE_PERCENT_OUTPUT_FIELDS\n", + " + ['Percent of individuals < 200% Federal Poverty Line', \n", + " 'Median value ($) of owner-occupied housing units',\n", + " 'Percent individuals age 25 or over with less than high school degree',\n", + " 'Percent enrollment in college or graduate school',\n", + " 'Linguistic isolation (percent)']\n", + ")\n", + "\n", + "# Join the demographics in.\n", + "merged_df = net_difference.merge(\n", + " demographics_df[columns_to_keep],\n", + " left_on=\"FIPS_tract_id\",\n", + " right_on=\"GEOID10_TRACT\",\n", + " how=\"inner\"\n", + ")\n", + "\n", + "\n", + "# Group bys.\n", + "demographics_directory = COMPARISON_OUTPUTS_DIR / \"demographics_basic_stats\"\n", + "demographics_directory.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# these are not converted into percent 0 - 100 scale\n", + "percent_cols = [x for x in merged_df.columns if \n", + " 'Percent' in x or '(percent)' in x\n", + " ]\n", + "\n", + "merged_df[\n", + " percent_cols] = merged_df[\n", + " percent_cols].apply(lambda x: x * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FIPS_tract_idstatehbrd_rankhbrd_scoresummedcurrent_summed_methodologyT8_est1current_methodology_denominator_sans_not_computedcurrent_methodology_denominatorcurrent_methodology_percent...Percent Asian alonePercent Native Hawaiian and Other Pacific alonePercent Two or more racesPercent Non-Hispanic WhitePercent Hispanic or LatinoPercent of individuals < 200% Federal Poverty LineMedian value ($) of owner-occupied housing unitsPercent individuals age 25 or over with less than high school degreePercent enrollment in college or graduate schoolLinguistic isolation (percent)
0010059507000192.0192310.23414614421861561560736.0...0.0000000.03.28731143.1952660.00000057.11094613750031.6770191.6191210.000000
1010119521000196.1538460.26976717422264564564135.0...0.0000000.00.0000003.1032300.00000059.5946806690024.9512675.6472633.145695
2010150002000191.3461540.22983928547512401245122539.0...1.0327020.00.38726341.1790026.19621349.9139417700013.2335334.2894061.862828
3010150003000191.0576920.22950828048812201215117542.0...0.0000000.00.00000017.5010521.76693365.6289445130029.1840386.3500260.000000
4010550010000198.4615380.30804613417743544043041.0...0.0000000.01.26671426.60098515.48205568.4882906540023.3372232.4108001.489362
\n", + "

5 rows Ă— 35 columns

\n", + "
" + ], + "text/plain": [ + " FIPS_tract_id state hbrd_rank hbrd_score summed \\\n", + "0 01005950700 01 92.019231 0.234146 144 \n", + "1 01011952100 01 96.153846 0.269767 174 \n", + "2 01015000200 01 91.346154 0.229839 285 \n", + "3 01015000300 01 91.057692 0.229508 280 \n", + "4 01055001000 01 98.461538 0.308046 134 \n", + "\n", + " current_summed_methodology T8_est1 \\\n", + "0 218 615 \n", + "1 222 645 \n", + "2 475 1240 \n", + "3 488 1220 \n", + "4 177 435 \n", + "\n", + " current_methodology_denominator_sans_not_computed \\\n", + "0 615 \n", + "1 645 \n", + "2 1245 \n", + "3 1215 \n", + "4 440 \n", + "\n", + " current_methodology_denominator current_methodology_percent ... \\\n", + "0 607 36.0 ... \n", + "1 641 35.0 ... \n", + "2 1225 39.0 ... \n", + "3 1175 42.0 ... \n", + "4 430 41.0 ... \n", + "\n", + " Percent Asian alone Percent Native Hawaiian and Other Pacific alone \\\n", + "0 0.000000 0.0 \n", + "1 0.000000 0.0 \n", + "2 1.032702 0.0 \n", + "3 0.000000 0.0 \n", + "4 0.000000 0.0 \n", + "\n", + " Percent Two or more races Percent Non-Hispanic White \\\n", + "0 3.287311 43.195266 \n", + "1 0.000000 3.103230 \n", + "2 0.387263 41.179002 \n", + "3 0.000000 17.501052 \n", + "4 1.266714 26.600985 \n", + "\n", + " Percent Hispanic or Latino \\\n", + "0 0.000000 \n", + "1 0.000000 \n", + "2 6.196213 \n", + "3 1.766933 \n", + "4 15.482055 \n", + "\n", + " Percent of individuals < 200% Federal Poverty Line \\\n", + "0 57.110946 \n", + "1 59.594680 \n", + "2 49.913941 \n", + "3 65.628944 \n", + "4 68.488290 \n", + "\n", + " Median value ($) of owner-occupied housing units \\\n", + "0 137500 \n", + "1 66900 \n", + "2 77000 \n", + "3 51300 \n", + "4 65400 \n", + "\n", + " Percent individuals age 25 or over with less than high school degree \\\n", + "0 31.677019 \n", + "1 24.951267 \n", + "2 13.233533 \n", + "3 29.184038 \n", + "4 23.337223 \n", + "\n", + " Percent enrollment in college or graduate school \\\n", + "0 1.619121 \n", + "1 5.647263 \n", + "2 4.289406 \n", + "3 6.350026 \n", + "4 2.410800 \n", + "\n", + " Linguistic isolation (percent) \n", + "0 0.000000 \n", + "1 3.145695 \n", + "2 1.862828 \n", + "3 0.000000 \n", + "4 1.489362 \n", + "\n", + "[5 rows x 35 columns]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "import us\n", + "\n", + "mapping = us.states.mapping('fips', 'abbr')\n", + "\n", + "for idx, row in merged_df.iterrows():\n", + " current_row = str(merged_df.loc[idx, 'state'])\n", + " state = mapping.get(current_row, None)\n", + " merged_df.loc[idx, 'state_name'] = state" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5013, 36)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Compute the correlation matrix\n", + "import seaborn as sns\n", + "corr = merged_df[[\"hbrd_rank\", \n", + " \"current_methodology_percentile_rank\"] + percent_cols].corr()\n", + "\n", + "# Generate a mask for the upper triangle\n", + "mask = np.triu(np.ones_like(corr, dtype=bool))\n", + "\n", + "# Set up the matplotlib figure\n", + "f, ax = plt.subplots(figsize=(15, 12))\n", + "\n", + "# Generate a custom diverging colormap\n", + "cmap = sns.diverging_palette(230, 20, as_cmap=True)\n", + "\n", + "# Draw the heatmap with the mask and correct aspect ratio\n", + "sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,\n", + " square=True, linewidths=.5, cbar_kws={\"shrink\": .5})" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "grouped_stats = merged_df.groupby([\"state_name\"]).agg({\n", + " 'GEOID10_TRACT': 'nunique',\n", + " 'Percent of individuals < 200% Federal Poverty Line': [np.median, np.std],\n", + " 'Median value ($) of owner-occupied housing units': [np.median, np.std],\n", + " 'Percent individuals age 25 or over with less than high school degree': [np.median, np.std],\n", + " 'Percent enrollment in college or graduate school': [np.median, np.std],\n", + " 'Percent Black or African American alone': [np.median, np.std],\n", + " 'Percent American Indian and Alaska Native alone': [np.median, np.std],\n", + " 'Percent Non-Hispanic White': [np.median, np.std], \n", + " 'Linguistic isolation (percent)': [np.median, np.std],\n", + " 'Percent Hispanic or Latino': [np.median, np.std],\n", + " 'hbrd_rank': [np.median, np.std],\n", + " 'current_methodology_percent': [np.median, np.std],\n", + " 'current_summed_methodology': [np.median, np.std, np.sum]\n", + "}).reset_index()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "grouped_stats.columns = [' '.join(col).strip() for col in grouped_stats.columns.values]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['state_name', 'GEOID10_TRACT nunique',\n", + " 'Percent of individuals < 200% Federal Poverty Line median',\n", + " 'Percent of individuals < 200% Federal Poverty Line std',\n", + " 'Median value ($) of owner-occupied housing units median',\n", + " 'Median value ($) of owner-occupied housing units std',\n", + " 'Percent individuals age 25 or over with less than high school degree median',\n", + " 'Percent individuals age 25 or over with less than high school degree std',\n", + " 'Percent enrollment in college or graduate school median',\n", + " 'Percent enrollment in college or graduate school std',\n", + " 'Percent Black or African American alone median',\n", + " 'Percent Black or African American alone std',\n", + " 'Percent American Indian and Alaska Native alone median',\n", + " 'Percent American Indian and Alaska Native alone std',\n", + " 'Percent Non-Hispanic White median', 'Percent Non-Hispanic White std',\n", + " 'Linguistic isolation (percent) median',\n", + " 'Linguistic isolation (percent) std',\n", + " 'Percent Hispanic or Latino median', 'Percent Hispanic or Latino std',\n", + " 'hbrd_rank median', 'hbrd_rank std',\n", + " 'current_methodology_percent median', 'current_methodology_percent std',\n", + " 'current_summed_methodology median', 'current_summed_methodology std',\n", + " 'current_summed_methodology sum'],\n", + " dtype='object')" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "grouped_stats.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "grouped_stats_states = grouped_stats[[x for x in grouped_stats \n", + " if \"median\" in x and \n", + " \"Percent\" in x] + [\"GEOID10_TRACT nunique\", \"state_name\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "grouped_stats_states.set_index(\"state_name\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Percent of individuals < 200% Federal Poverty Line median Percent individuals age 25 or over with less than high school degree median Percent enrollment in college or graduate school median Percent Black or African American alone median Percent American Indian and Alaska Native alone median Percent Non-Hispanic White median Percent Hispanic or Latino median GEOID10_TRACT nunique
state_name
AK33.07046910.5860986.5016045.09620012.63370352.24087210.26414212
AL59.38757516.9137957.56397874.4757010.00000016.8269372.38608256
AR60.04119517.9231865.21064355.7103060.00000034.8001015.39374343
AZ57.70065126.5266437.0756255.6077602.75893326.17866056.66125899
CA51.26895231.9902329.1743124.0784490.4722289.84325065.462248897
CO46.30058814.2493257.9066043.8809621.09258646.52824034.48032772
CT50.17689818.9679858.59463123.3867030.00000027.06270836.69563058
DC50.41085415.4830456.94641390.3525770.0000003.9939143.97022317
DE42.44744711.2353687.16747548.9519210.47055836.9102358.75762914
FL51.10930217.0430947.58889422.7603000.00000022.10462426.459155234
GA58.09138817.4952208.78173866.0021550.00000016.7491174.751314121
HI34.6405239.9914248.0929962.0005260.00000011.6621984.94736817
IA45.76192710.9478677.41301111.2195120.02554969.9254357.93507755
ID52.5401366.0718259.1437310.5292651.06339583.4355838.60478525
IL55.34478318.7627467.25308638.0509940.0000009.25583112.976765203
IN58.68210818.0456815.95164435.0849010.00000039.4594119.06376482
KS56.97732518.2332798.18659411.9044760.45919356.78561816.89101050
KY56.32823418.1950865.83430618.9645030.00000063.1455404.23620071
LA61.15131620.0523317.09914376.7875130.00000017.4779322.48051059
MA47.83000120.9422967.76858611.8248780.00000034.79487327.95240884
MD39.75211215.3517317.62244149.9278960.00000022.1150727.09447088
ME42.4021237.4773148.4269661.3470680.36040490.6806281.30890127
MI60.18348616.2672816.96378869.9781660.00000017.6454671.912181145
MN42.50854112.09616810.21069716.9693820.54782058.1542808.03099791
MO55.19430615.2006196.15305936.8479800.00000035.8847104.61922152
MS58.97967617.0385406.71198072.4340180.00000025.2270430.70575543
MT45.0789855.35533213.3026610.1291611.84455488.8817273.65063820
NC56.81125419.2268577.08257542.4155180.25968433.41225210.714959109
ND43.2432436.99017916.4667397.1492562.12194877.9402937.50226713
NE55.4258249.50728713.6915898.2790700.53516864.62978310.86384829
NH39.67807813.1355246.5123402.7739100.00000086.0097285.64457124
NJ46.34573320.8433017.07307525.0784090.00000012.42603643.496410165
NM53.60195416.7253529.3477502.7661803.11614729.44009657.23481727
NV56.08303622.5641365.47547911.8682720.55708528.14339243.94739948
NY47.53685724.0067428.26279025.8594920.00000011.30094728.649040603
OH64.85730319.1609985.71620761.0523220.00000027.2796353.455150157
OK57.61886716.7902917.02701213.9696743.42083848.52185013.89988570
OR45.05710911.4072497.8693561.7578830.74264567.68737214.95778051
PA54.89406815.2657607.53012031.8856820.00000027.3596186.151832165
RI55.85143720.0353277.63214712.3701610.40053429.26877933.46851719
SC53.74211015.1065456.65387146.2740380.00000039.1304354.81528767
SD47.8048787.6506438.2104653.6046323.99484575.2877454.77278519
TN55.41666718.7124467.28065853.9672130.00000035.9933964.44444473
TX56.93950225.4102206.78726514.3435370.00000014.62720549.238149309
UT45.8097409.5987419.4932431.4449290.38485264.48710320.42816633
VA44.75079214.1815067.65054136.8306090.00000032.1986966.607595102
VT43.1128236.8371867.7231700.9235940.05055693.7005651.60550515
WA39.37531712.5430817.4483175.8803020.90327159.32113613.893070102
WI57.28758215.1812696.31025423.5638920.34694422.6303328.54177843
WV52.37579511.7412986.8748136.8052340.00000082.4479241.63657126
WY33.4140924.8757176.5539660.5780351.17455181.37824211.8082799
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def highlight_medians(s):\n", + " # highliht if the current median is greater than the median of medians for that series\n", + " \n", + " is_greater_than_median = s > s.median()\n", + " return ['color: pink; background-color:darkblue' \n", + " if cell else '' for cell in is_greater_than_median]\n", + "grouped_stats_states.style.apply(highlight_medians)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}