From d5d2c947a00482f8c5da83a4a7b04b9ab62157d0 Mon Sep 17 00:00:00 2001 From: Saran Ahluwalia Date: Mon, 20 Dec 2021 08:55:10 -0500 Subject: [PATCH] wip ny, ca, tx, fl comparisons --- ...candidate_tracts_revised_methodology.ipynb | 662 +++++++++--------- 1 file changed, 337 insertions(+), 325 deletions(-) diff --git a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_19_2011_investigate_sixthousand_candidate_tracts_revised_methodology.ipynb b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_19_2011_investigate_sixthousand_candidate_tracts_revised_methodology.ipynb index da085b58..8d1fed62 100644 --- a/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_19_2011_investigate_sixthousand_candidate_tracts_revised_methodology.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/hud_eda_se_12_19_2011_investigate_sixthousand_candidate_tracts_revised_methodology.ipynb @@ -931,6 +931,13 @@ "# final_df.to_csv(\"housing_burdencomparison_two_methods_12192021.csv\", index=False)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read Final Dataframe for all tracts" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -946,14 +953,56 @@ "metadata": {}, "outputs": [], "source": [ + "# select only those tracts that meet the new housing burden definition\n", "final_df = final_df[final_df[\"new_threshold_exceeded\"] == True]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(6801, 14)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6 781\n", + "48 466\n", + "36 466\n", + "12 399\n", + "42 305\n", + "17 299\n", + "39 276\n", + "26 257\n", + "37 203\n", + "34 196\n", + "Name: state, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# california is number 1\n", "final_df.state.value_counts()[:10]" @@ -961,7 +1010,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -970,7 +1019,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -1003,18 +1052,18 @@ "dtype: object" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "census_acs_2019.dtypes\n" + "census_acs_2019.dtypes" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1028,7 +1077,7 @@ "Name: GEOID10_TRACT, dtype: int64" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1039,7 +1088,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1048,7 +1097,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -1071,7 +1120,7 @@ "dtype: object" ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1082,7 +1131,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -1093,7 +1142,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -1375,7 +1424,7 @@ "[5 rows x 38 columns]" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1386,7 +1435,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -1395,7 +1444,7 @@ "(6801, 38)" ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1406,7 +1455,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -1415,7 +1464,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -1424,7 +1473,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -1434,7 +1483,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1444,7 +1493,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -1454,7 +1503,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1559,7 +1608,7 @@ "4 1 " ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1570,7 +1619,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -1581,7 +1630,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -1590,7 +1639,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1851,7 +1900,7 @@ "[5 rows x 74 columns]" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1862,7 +1911,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1929,7 +1978,7 @@ " dtype='object')" ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1969,7 +2018,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -1978,7 +2027,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1989,10 +2038,6 @@ " 'Median value ($) of owner-occupied housing units',\n", " 'Percent individuals age 25 or over with less than high school degree',\n", " 'Percent enrollment in college or graduate school',\n", - "# 'Black or African American alone',\n", - "# 'American Indian and Alaska Native alone', \n", - "# 'Non-Hispanic White', \n", - "# 'Hispanic or Latino',\n", " 'Percent Black or African American alone',\n", " 'Percent American Indian and Alaska Native alone',\n", " 'Percent Non-Hispanic White',\n", @@ -2008,7 +2053,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -2031,7 +2076,7 @@ " dtype='object')" ] }, - "execution_count": 31, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -2042,7 +2087,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -2061,7 +2106,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -2244,7 +2289,7 @@ "4 18.9 " ] }, - "execution_count": 33, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2255,7 +2300,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -2267,7 +2312,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -2291,7 +2336,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -2302,7 +2347,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -2311,7 +2356,7 @@ "" ] }, - "execution_count": 37, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, @@ -2348,16 +2393,16 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(5726, 19)" + "(5726, 20)" ] }, - "execution_count": 38, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -2368,7 +2413,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -2377,7 +2422,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -2386,7 +2431,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -2398,7 +2443,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -2407,7 +2452,7 @@ "(5726, 20)" ] }, - "execution_count": 43, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -2418,7 +2463,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -2677,7 +2722,7 @@ "4 1 AL " ] }, - "execution_count": 44, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -2688,7 +2733,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -2701,7 +2746,7 @@ "Name: state_name, dtype: int64" ] }, - "execution_count": 45, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -2712,7 +2757,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -2722,7 +2767,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -2731,31 +2776,21 @@ "1879" ] }, - "execution_count": 51, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# 1879 out of 5728\n", "sum(list(combined_df.state_name.value_counts()[:4].values))" ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(5726, 20)" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "combined_df.shape" ] @@ -2771,7 +2806,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -3030,7 +3065,7 @@ "4 1 AL " ] }, - "execution_count": 56, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -3041,7 +3076,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -3050,7 +3085,7 @@ "(119, 20)" ] }, - "execution_count": 57, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -3061,7 +3096,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -3070,7 +3105,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -3079,7 +3114,7 @@ "(5607, 20)" ] }, - "execution_count": 59, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -3090,7 +3125,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -3099,7 +3134,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -3109,7 +3144,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -3118,7 +3153,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -3128,7 +3163,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 54, "metadata": {}, "outputs": [ { @@ -3151,7 +3186,7 @@ " dtype='object')" ] }, - "execution_count": 68, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -3162,7 +3197,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -3171,7 +3206,7 @@ "685" ] }, - "execution_count": 69, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -3189,7 +3224,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 56, "metadata": {}, "outputs": [ { @@ -3222,7 +3257,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -3335,7 +3370,7 @@ "max 70.925427 99.442231 " ] }, - "execution_count": 71, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -3348,7 +3383,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -3358,7 +3393,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 61, "metadata": {}, "outputs": [ { @@ -3471,7 +3506,7 @@ "max 96.529499 92.430905 " ] }, - "execution_count": 73, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -3484,7 +3519,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 62, "metadata": {}, "outputs": [ { @@ -3518,7 +3553,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 63, "metadata": {}, "outputs": [], "source": [ @@ -3528,7 +3563,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -3641,7 +3676,7 @@ "max 94.683544 100.000000 " ] }, - "execution_count": 76, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -3654,7 +3689,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ @@ -3664,7 +3699,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -3777,7 +3812,7 @@ "max 98.522167 98.819109 " ] }, - "execution_count": 78, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -3792,17 +3827,161 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### For the CalEnvironScreen's Housing Burden Rank (CHBR) we identify some global trends across all of the tracts in this sample:\n", - " 1. Poverty is most correlated with CHBR and current methodology (< 200 %)\n", - " 2. The current methodology is negativeley correlated with non-Hispanic white \n", - " 3. CHBR is more \"race-neutral\" than current methodology in the sense that there is little to no relationship with racial identification across all states. However, if you look at it state by state, there is a clear pattern of stratification in Latin-X communities (see below for Califonia where the median percentage of the tract that identifies as Latin-X is ~ 64.6 %). This is equivalent to 515, 571 households. oreover, Texas and Florida predominarntly represent tracts comprised of LatinX (\t51% and 35% respectively, and 286, 259 and 302834 households, respectively)\n", - " 4. CHBR is less positively correlated than the current current methodology as it pertains to high school education\n", - " 5. In this sample of New York state's 191 the median percentage of a tract that is ~55.2% (n = 434 tracts)." + "\n", + "### After applying the CalEnvironScreen's ranked percentile methodology for Housing Burden Rank (CHBR), we identify some global trends across all of the tracts considered with reliable populative estimates, in this sample where greater than 90 percent of owned and rented households are considered burdened (n = 5607 tracts)\n", + "\n", + " 1. Poverty (percent of individuals < 200% Federal Poverty Line) has the highest correlation (using Pearson correlation coefficient) correlated with CHBR and with the current methodology's percentile method\n", + " 2. The current methodology is negatively correlated with the percentage of non-Hispanic white populations across all tracts\n", + " 3. CHBR is more \"race-neutral\" than the current methodology in the sense that there is little to no relationship with racial self-identification across all states. However, if one looks at descriptive statistics state-by-state, there is a clear pattern of stratification in Latin-X communities. Focusing on just the most representative states (Florida, Texas, New York, and California - which comprise 33.8 % of the sample considered burdened) we observe the following:\n", + " * In Califonia the median percentage of the tract that identifies as Latin-X is ~ 64.6 %. Moreover, the sample for California represents 506, 990 households in aggregate.\n", + " \n", + " * In addition, Texas and Florida predominantly represent tracts comprised of LatinX (51% and 35% respectively, and 271, 370, and 295, 318 households, respectively).\n", + " 4. CHBR is less positively correlated than the current methodology as it pertains to high school education\n", + " 5. In this sample of New York state's 191 the median percent of individuals < 200% Federal Poverty Line, of a tract that is ~55.2% (n = 432 tracts). Moreover, in New York - where all tracts are considered \"urban\" by the definition L criterion - the median percent of the population that identifies as African American is 25.86%.\n", + " 6. Similarly to (5), in Florida (n = 351 tracts, with one tract considered \"rural\"), we observe that the median percent of the population that identifies as African American is 30.89%. Just to provide some context on why this is nontrivial - the global median of medians for the percent of the population that identifies as African American is ~3.2% of the tracts (n = 67813 tracts) considered as reliable by the CalEnvironScreen methodology." ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['GEOID10_TRACT', 'state', 'Life expectancy (years)',\n", + " 'Unemployed civilians (percent)',\n", + " 'Percent of individuals < 200% Federal Poverty Line',\n", + " 'Median value ($) of owner-occupied housing units',\n", + " 'Percent individuals age 25 or over with less than high school degree',\n", + " 'Percent enrollment in college or graduate school',\n", + " 'Percent Black or African American alone',\n", + " 'Percent American Indian and Alaska Native alone',\n", + " 'Percent Non-Hispanic White', 'Linguistic isolation (percent)',\n", + " 'Current lack of health insurance among adults aged 18-64 years',\n", + " 'Median household income in the past 12 months',\n", + " 'Percent Hispanic or Latino', 'hbrd_rank',\n", + " 'current_methodology_percent', 'current_summed_methodology',\n", + " 'Urban Heuristic Flag', 'state_name'],\n", + " dtype='object')" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "grouped_stats = combined_df.groupby([\"state_name\", \"Urban Heuristic Flag\"]).agg({\n", + " 'GEOID10_TRACT': 'nunique',\n", + " 'Percent of individuals < 200% Federal Poverty Line': [np.median, 'nunique', np.std],\n", + " 'Median value ($) of owner-occupied housing units': [np.median, 'nunique', np.std],\n", + " 'Percent individuals age 25 or over with less than high school degree': [np.median, 'nunique', np.std],\n", + " 'Percent enrollment in college or graduate school': [np.median, 'nunique', np.std],\n", + "# 'Hispanic or Latino': [np.median, 'nunique'. np.std],\n", + " 'Percent Black or African American alone': [np.median, 'nunique', np.std],\n", + " 'Percent American Indian and Alaska Native alone': [np.median, 'nunique', np.std],\n", + " 'Percent Non-Hispanic White': [np.median, 'nunique', np.std], \n", + " 'Linguistic isolation (percent)': [np.median, 'nunique', np.std],\n", + "# 'Percent of tract that is HOLC Grade D': [np.median, 'nunique', np.std],\n", + " 'Current lack of health insurance among adults aged 18-64 years': [np.median, 'nunique', np.std],\n", + " 'Median household income in the past 12 months': [np.median, 'nunique', np.std],\n", + " 'Percent Hispanic or Latino': [np.median, 'nunique', np.std],\n", + " 'hbrd_rank': [np.median, 'nunique', np.std],\n", + " 'current_methodology_percent': [np.median, 'nunique', np.std],\n", + " 'current_summed_methodology': [np.median, 'nunique', np.std, np.sum]\n", + "\n", + "}).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "target_state_groupeds_df = grouped_stats[grouped_stats.state_name.isin(target_states)]" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "target_state_groupeds_df.columns = [' '.join(col).strip() for col in target_state_groupeds_df.columns.values]" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['state_name', 'Urban Heuristic Flag', 'GEOID10_TRACT nunique',\n", + " 'Percent of individuals < 200% Federal Poverty Line median',\n", + " 'Percent of individuals < 200% Federal Poverty Line nunique',\n", + " 'Percent of individuals < 200% Federal Poverty Line std',\n", + " 'Median value ($) of owner-occupied housing units median',\n", + " 'Median value ($) of owner-occupied housing units nunique',\n", + " 'Median value ($) of owner-occupied housing units std',\n", + " 'Percent individuals age 25 or over with less than high school degree median',\n", + " 'Percent individuals age 25 or over with less than high school degree nunique',\n", + " 'Percent individuals age 25 or over with less than high school degree std',\n", + " 'Percent enrollment in college or graduate school median',\n", + " 'Percent enrollment in college or graduate school nunique',\n", + " 'Percent enrollment in college or graduate school std',\n", + " 'Percent Black or African American alone median',\n", + " 'Percent Black or African American alone nunique',\n", + " 'Percent Black or African American alone std',\n", + " 'Percent American Indian and Alaska Native alone median',\n", + " 'Percent American Indian and Alaska Native alone nunique',\n", + " 'Percent American Indian and Alaska Native alone std',\n", + " 'Percent Non-Hispanic White median',\n", + " 'Percent Non-Hispanic White nunique', 'Percent Non-Hispanic White std',\n", + " 'Linguistic isolation (percent) median',\n", + " 'Linguistic isolation (percent) nunique',\n", + " 'Linguistic isolation (percent) std',\n", + " 'Current lack of health insurance among adults aged 18-64 years median',\n", + " 'Current lack of health insurance among adults aged 18-64 years nunique',\n", + " 'Current lack of health insurance among adults aged 18-64 years std',\n", + " 'Median household income in the past 12 months median',\n", + " 'Median household income in the past 12 months nunique',\n", + " 'Median household income in the past 12 months std',\n", + " 'Percent Hispanic or Latino median',\n", + " 'Percent Hispanic or Latino nunique', 'Percent Hispanic or Latino std',\n", + " 'hbrd_rank median', 'hbrd_rank nunique', 'hbrd_rank std',\n", + " 'current_methodology_percent median',\n", + " 'current_methodology_percent nunique',\n", + " 'current_methodology_percent std', 'current_summed_methodology median',\n", + " 'current_summed_methodology nunique', 'current_summed_methodology std',\n", + " 'current_summed_methodology sum'],\n", + " dtype='object')" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target_state_groupeds_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 100, "metadata": {}, "outputs": [ { @@ -3818,324 +3997,157 @@ " vertical-align: top;\n", " }\n", "\n", - " .dataframe thead tr th {\n", - " text-align: left;\n", + " .dataframe thead th {\n", + " text-align: right;\n", " }\n", "\n", "\n", " \n", - " \n", + " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", "
GEOID10_TRACT nuniquestate_nameUrban Heuristic FlagGEOID10_TRACTPercent of individuals < 200% Federal Poverty LineMedian value ($) of owner-occupied housing units...hbrd_rankcurrent_methodology_percentcurrent_summed_methodology
mediannuniquestdmediannuniquestdmediannunique...mediannuniquestdmediannuniquestdmediannuniquestdsumPercent Black or African American alone median
6685CA16.037302e+096961.964706e+0756.10086369513.405773411400.0643...94.8026156262.80593454.0457.771820705.0353260.6880045153336.508294
121FL01.205701e+101NaN48.9764591NaN168300.01...91.0391571NaN36.01NaN238.01NaN23822.120285
13350FL11.208600e+103583.394531e+0758.33421435813.690298165750.0332...94.8167673532.89457451.0428.418766780.0260376.35633830259630.896102
44432NY13.604705e+104343.165346e+0755.19718643413.837026496050.0410...94.8379484052.88689756.5367.182586655.0288358.83756930909225.861088
573TX04.820901e+1031.467538e+0857.802414311.034672123300.03...92.22007331.24966935.021.154701358.03190.43721613130.084998
58371TX14.820135e+103871.390878e+0860.64703038713.538182106600.0343...94.6915973732.76264544.0417.813427690.0272342.11203428494616.522423
\n", - "

6 rows × 48 columns

\n", "" ], "text/plain": [ - " state_name Urban Heuristic Flag GEOID10_TRACT \\\n", - " median nunique std \n", - "6 CA 1 6.037302e+09 696 1.964706e+07 \n", - "12 FL 0 1.205701e+10 1 NaN \n", - "13 FL 1 1.208600e+10 358 3.394531e+07 \n", - "44 NY 1 3.604705e+10 434 3.165346e+07 \n", - "57 TX 0 4.820901e+10 3 1.467538e+08 \n", - "58 TX 1 4.820135e+10 387 1.390878e+08 \n", + " GEOID10_TRACT nunique state_name \\\n", + "6 685 CA \n", + "12 1 FL \n", + "13 350 FL \n", + "44 432 NY \n", + "57 3 TX \n", + "58 371 TX \n", "\n", - " Percent of individuals < 200% Federal Poverty Line \\\n", - " median nunique std \n", - "6 56.100863 695 13.405773 \n", - "12 48.976459 1 NaN \n", - "13 58.334214 358 13.690298 \n", - "44 55.197186 434 13.837026 \n", - "57 57.802414 3 11.034672 \n", - "58 60.647030 387 13.538182 \n", - "\n", - " Median value ($) of owner-occupied housing units ... hbrd_rank \\\n", - " median nunique ... median \n", - "6 411400.0 643 ... 94.802615 \n", - "12 168300.0 1 ... 91.039157 \n", - "13 165750.0 332 ... 94.816767 \n", - "44 496050.0 410 ... 94.837948 \n", - "57 123300.0 3 ... 92.220073 \n", - "58 106600.0 343 ... 94.691597 \n", - "\n", - " current_methodology_percent \\\n", - " nunique std median nunique std \n", - "6 626 2.805934 54.0 45 7.771820 \n", - "12 1 NaN 36.0 1 NaN \n", - "13 353 2.894574 51.0 42 8.418766 \n", - "44 405 2.886897 56.5 36 7.182586 \n", - "57 3 1.249669 35.0 2 1.154701 \n", - "58 373 2.762645 44.0 41 7.813427 \n", - "\n", - " current_summed_methodology \n", - " median nunique std sum \n", - "6 705.0 353 260.688004 515333 \n", - "12 238.0 1 NaN 238 \n", - "13 780.0 260 376.356338 302596 \n", - "44 655.0 288 358.837569 309092 \n", - "57 358.0 3 190.437216 1313 \n", - "58 690.0 272 342.112034 284946 \n", - "\n", - "[6 rows x 48 columns]" + " Percent Black or African American alone median \n", + "6 6.508294 \n", + "12 22.120285 \n", + "13 30.896102 \n", + "44 25.861088 \n", + "57 0.084998 \n", + "58 16.522423 " ] }, - "execution_count": 79, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "grouped_stats[grouped_stats.state_name.isin(target_states)]" + "target_state_groupeds_df[['GEOID10_TRACT nunique','state_name', 'Percent Black or African American alone median']]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ - "target_states" + "target_state_groupeds_df.to_csv(\"top_4_states_12202021.csv\")" ] }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "515571" + "506990" ] }, - "execution_count": 80, + "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "515333 + 238" + "# ca\n", + "506752 + 238" ] }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "286259" + "271370" ] }, - "execution_count": 81, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "284946 + 1313" + "# tx\n", + "270057 + 1313" ] }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "302834" + "295318" ] }, - "execution_count": 82, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "302596 + 238" + "# fl\n", + "295080 + 238" ] }, {