"CalEnviroScreen for the US" example score (#204)

This commit is contained in:
Lucas Merrill Brown 2021-06-22 17:09:53 -07:00 committed by GitHub
commit f542d0e675
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 365 additions and 1093 deletions

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "54615cef",
"metadata": {},
"outputs": [],
@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "49a63129",
"metadata": {},
"outputs": [],
@ -56,148 +56,10 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "2b26dccf",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>census_block_group_id</th>\n",
" <th>census_block_group_population</th>\n",
" <th>cejst_score</th>\n",
" <th>score_b</th>\n",
" <th>cejst_percentile</th>\n",
" <th>score_b_percentile</th>\n",
" <th>score_a_top_percentile_25</th>\n",
" <th>score_b_top_percentile_25</th>\n",
" <th>cejst_priority_community</th>\n",
" <th>census_tract_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10297</th>\n",
" <td>60014001001</td>\n",
" <td>3115</td>\n",
" <td>0.14</td>\n",
" <td>0.02</td>\n",
" <td>0.10</td>\n",
" <td>0.14</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10298</th>\n",
" <td>60014002001</td>\n",
" <td>1037</td>\n",
" <td>0.09</td>\n",
" <td>0.01</td>\n",
" <td>0.05</td>\n",
" <td>0.07</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10299</th>\n",
" <td>60014002002</td>\n",
" <td>988</td>\n",
" <td>0.15</td>\n",
" <td>0.02</td>\n",
" <td>0.11</td>\n",
" <td>0.12</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10300</th>\n",
" <td>60014003001</td>\n",
" <td>1137</td>\n",
" <td>0.03</td>\n",
" <td>0.00</td>\n",
" <td>0.01</td>\n",
" <td>0.02</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10301</th>\n",
" <td>60014003002</td>\n",
" <td>1404</td>\n",
" <td>0.34</td>\n",
" <td>0.09</td>\n",
" <td>0.31</td>\n",
" <td>0.31</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400300</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" census_block_group_id census_block_group_population cejst_score \\\n",
"10297 60014001001 3115 0.14 \n",
"10298 60014002001 1037 0.09 \n",
"10299 60014002002 988 0.15 \n",
"10300 60014003001 1137 0.03 \n",
"10301 60014003002 1404 0.34 \n",
"\n",
" score_b cejst_percentile score_b_percentile \\\n",
"10297 0.02 0.10 0.14 \n",
"10298 0.01 0.05 0.07 \n",
"10299 0.02 0.11 0.12 \n",
"10300 0.00 0.01 0.02 \n",
"10301 0.09 0.31 0.31 \n",
"\n",
" score_a_top_percentile_25 score_b_top_percentile_25 \\\n",
"10297 False False \n",
"10298 False False \n",
"10299 False False \n",
"10300 False False \n",
"10301 False False \n",
"\n",
" cejst_priority_community census_tract_id \n",
"10297 False 6001400100 \n",
"10298 False 6001400200 \n",
"10299 False 6001400200 \n",
"10300 False 6001400300 \n",
"10301 False 6001400300 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Load CEJST score data\n",
"cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"usa.csv\"\n",
@ -240,19 +102,10 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"id": "ec6b27e3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\opt\\justice40-tool\\score\\venv\\lib\\site-packages\\urllib3\\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'justice40-data.s3.amazonaws.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:\n",
"# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip\n",
@ -267,18 +120,10 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"id": "bdf08971",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"C:\\opt\\justice40-tool\\score\\data\\tmp\n"
]
}
],
"outputs": [],
"source": [
"# Extract zip\n",
"print(zip_file_path)\n",
@ -290,231 +135,10 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"id": "29c14b29",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>census_tract_id</th>\n",
" <th>Total Population</th>\n",
" <th>California County</th>\n",
" <th>ZIP</th>\n",
" <th>Nearby City \\r\\n(to help approximate location only)</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>calenviroscreen_score</th>\n",
" <th>calenviroscreen_percentile</th>\n",
" <th>DRAFT CES 4.0\\r\\nPercentile Range</th>\n",
" <th>...</th>\n",
" <th>Poverty</th>\n",
" <th>Poverty Pctl</th>\n",
" <th>Unemployment</th>\n",
" <th>Unemployment Pctl</th>\n",
" <th>Housing Burden</th>\n",
" <th>Housing Burden Pctl</th>\n",
" <th>Pop. Char.</th>\n",
" <th>Pop. Char. Score</th>\n",
" <th>Pop. Char. Pctl</th>\n",
" <th>calenviroscreen_priority_community</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6019001100</td>\n",
" <td>2760</td>\n",
" <td>Fresno</td>\n",
" <td>93706</td>\n",
" <td>Fresno</td>\n",
" <td>-119.78</td>\n",
" <td>36.71</td>\n",
" <td>94.61</td>\n",
" <td>100.00</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>76.60</td>\n",
" <td>98.43</td>\n",
" <td>16.20</td>\n",
" <td>97.15</td>\n",
" <td>30.70</td>\n",
" <td>90.61</td>\n",
" <td>93.73</td>\n",
" <td>9.72</td>\n",
" <td>99.87</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6077000700</td>\n",
" <td>4177</td>\n",
" <td>San Joaquin</td>\n",
" <td>95206</td>\n",
" <td>Stockton</td>\n",
" <td>-121.29</td>\n",
" <td>37.94</td>\n",
" <td>90.83</td>\n",
" <td>99.99</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>70.60</td>\n",
" <td>96.43</td>\n",
" <td>18.50</td>\n",
" <td>98.45</td>\n",
" <td>35.20</td>\n",
" <td>95.61</td>\n",
" <td>93.40</td>\n",
" <td>9.68</td>\n",
" <td>99.84</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6077000100</td>\n",
" <td>4055</td>\n",
" <td>San Joaquin</td>\n",
" <td>95202</td>\n",
" <td>Stockton</td>\n",
" <td>-121.29</td>\n",
" <td>37.95</td>\n",
" <td>85.75</td>\n",
" <td>99.97</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>81.80</td>\n",
" <td>99.50</td>\n",
" <td>17.90</td>\n",
" <td>98.17</td>\n",
" <td>36.40</td>\n",
" <td>96.51</td>\n",
" <td>95.71</td>\n",
" <td>9.92</td>\n",
" <td>99.97</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>6071001600</td>\n",
" <td>5527</td>\n",
" <td>San Bernardino</td>\n",
" <td>91761</td>\n",
" <td>Ontario</td>\n",
" <td>-117.62</td>\n",
" <td>34.06</td>\n",
" <td>83.56</td>\n",
" <td>99.96</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>67.10</td>\n",
" <td>94.82</td>\n",
" <td>6.70</td>\n",
" <td>57.20</td>\n",
" <td>32.10</td>\n",
" <td>92.65</td>\n",
" <td>80.59</td>\n",
" <td>8.36</td>\n",
" <td>93.06</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6037204920</td>\n",
" <td>2639</td>\n",
" <td>Los Angeles</td>\n",
" <td>90023</td>\n",
" <td>Los Angeles</td>\n",
" <td>-118.20</td>\n",
" <td>34.02</td>\n",
" <td>82.90</td>\n",
" <td>99.95</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>64.90</td>\n",
" <td>93.51</td>\n",
" <td>5.60</td>\n",
" <td>43.81</td>\n",
" <td>25.00</td>\n",
" <td>77.95</td>\n",
" <td>83.95</td>\n",
" <td>8.70</td>\n",
" <td>95.78</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 59 columns</p>\n",
"</div>"
],
"text/plain": [
" census_tract_id Total Population California County ZIP \\\n",
"0 6019001100 2760 Fresno 93706 \n",
"1 6077000700 4177 San Joaquin 95206 \n",
"2 6077000100 4055 San Joaquin 95202 \n",
"3 6071001600 5527 San Bernardino 91761 \n",
"4 6037204920 2639 Los Angeles 90023 \n",
"\n",
" Nearby City \\r\\n(to help approximate location only) Longitude Latitude \\\n",
"0 Fresno -119.78 36.71 \n",
"1 Stockton -121.29 37.94 \n",
"2 Stockton -121.29 37.95 \n",
"3 Ontario -117.62 34.06 \n",
"4 Los Angeles -118.20 34.02 \n",
"\n",
" calenviroscreen_score calenviroscreen_percentile \\\n",
"0 94.61 100.00 \n",
"1 90.83 99.99 \n",
"2 85.75 99.97 \n",
"3 83.56 99.96 \n",
"4 82.90 99.95 \n",
"\n",
" DRAFT CES 4.0\\r\\nPercentile Range ... Poverty Poverty Pctl Unemployment \\\n",
"0 95-100% (highest scores) ... 76.60 98.43 16.20 \n",
"1 95-100% (highest scores) ... 70.60 96.43 18.50 \n",
"2 95-100% (highest scores) ... 81.80 99.50 17.90 \n",
"3 95-100% (highest scores) ... 67.10 94.82 6.70 \n",
"4 95-100% (highest scores) ... 64.90 93.51 5.60 \n",
"\n",
" Unemployment Pctl Housing Burden Housing Burden Pctl Pop. Char. \\\n",
"0 97.15 30.70 90.61 93.73 \n",
"1 98.45 35.20 95.61 93.40 \n",
"2 98.17 36.40 96.51 95.71 \n",
"3 57.20 32.10 92.65 80.59 \n",
"4 43.81 25.00 77.95 83.95 \n",
"\n",
" Pop. Char. Score Pop. Char. Pctl calenviroscreen_priority_community \n",
"0 9.72 99.87 True \n",
"1 9.68 99.84 True \n",
"2 9.92 99.97 True \n",
"3 8.36 93.06 True \n",
"4 8.70 95.78 True \n",
"\n",
"[5 rows x 59 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Load comparison index (CalEnviroScreen 4)\n",
"\n",
@ -541,142 +165,10 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"id": "813e5656",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>census_block_group_id</th>\n",
" <th>census_tract_id</th>\n",
" <th>census_block_group_population</th>\n",
" <th>cejst_score</th>\n",
" <th>cejst_percentile</th>\n",
" <th>cejst_priority_community</th>\n",
" <th>calenviroscreen_score</th>\n",
" <th>calenviroscreen_percentile</th>\n",
" <th>calenviroscreen_priority_community</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>60014001001</td>\n",
" <td>6001400100</td>\n",
" <td>3115</td>\n",
" <td>0.14</td>\n",
" <td>0.10</td>\n",
" <td>False</td>\n",
" <td>4.40</td>\n",
" <td>2.38</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>60014002001</td>\n",
" <td>6001400200</td>\n",
" <td>1037</td>\n",
" <td>0.09</td>\n",
" <td>0.05</td>\n",
" <td>False</td>\n",
" <td>5.05</td>\n",
" <td>3.48</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>60014002002</td>\n",
" <td>6001400200</td>\n",
" <td>988</td>\n",
" <td>0.15</td>\n",
" <td>0.11</td>\n",
" <td>False</td>\n",
" <td>5.05</td>\n",
" <td>3.48</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>60014003001</td>\n",
" <td>6001400300</td>\n",
" <td>1137</td>\n",
" <td>0.03</td>\n",
" <td>0.01</td>\n",
" <td>False</td>\n",
" <td>9.92</td>\n",
" <td>13.44</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>60014003002</td>\n",
" <td>6001400300</td>\n",
" <td>1404</td>\n",
" <td>0.34</td>\n",
" <td>0.31</td>\n",
" <td>False</td>\n",
" <td>9.92</td>\n",
" <td>13.44</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" census_block_group_id census_tract_id census_block_group_population \\\n",
"0 60014001001 6001400100 3115 \n",
"1 60014002001 6001400200 1037 \n",
"2 60014002002 6001400200 988 \n",
"3 60014003001 6001400300 1137 \n",
"4 60014003002 6001400300 1404 \n",
"\n",
" cejst_score cejst_percentile cejst_priority_community \\\n",
"0 0.14 0.10 False \n",
"1 0.09 0.05 False \n",
"2 0.15 0.11 False \n",
"3 0.03 0.01 False \n",
"4 0.34 0.31 False \n",
"\n",
" calenviroscreen_score calenviroscreen_percentile \\\n",
"0 4.40 2.38 \n",
"1 5.05 3.48 \n",
"2 5.05 3.48 \n",
"3 9.92 13.44 \n",
"4 9.92 13.44 \n",
"\n",
" calenviroscreen_priority_community \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 False \n",
"4 False "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Join CalEnviroScreen and CEJST data.\n",
"# Note: we're joining on the census *tract*, so there will be multiple CBG entries joined to the same census tract row from CES,\n",
@ -716,58 +208,12 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"id": "939baea4",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" census_tract_id calenviroscreen_score \\\n",
"census_tract_id \n",
"6019001100 6019001100 94.61 \n",
"6077000700 6077000700 90.83 \n",
"6077000100 6077000100 85.75 \n",
"6071001600 6071001600 83.56 \n",
"6037204920 6037204920 82.90 \n",
"\n",
" calenviroscreen_percentile \\\n",
"census_tract_id \n",
"6019001100 100.00 \n",
"6077000700 99.99 \n",
"6077000100 99.97 \n",
"6071001600 99.96 \n",
"6037204920 99.95 \n",
"\n",
" calenviroscreen_priority_community \\\n",
"census_tract_id \n",
"6019001100 True \n",
"6077000700 True \n",
"6077000100 True \n",
"6071001600 True \n",
"6037204920 True \n",
"\n",
" CES Tract has at least one CEJST CBG? \\\n",
"census_tract_id \n",
"6019001100 True \n",
"6077000700 True \n",
"6077000100 True \n",
"6071001600 True \n",
"6037204920 True \n",
"\n",
" CES Tract has 100% CEJST CBGs? \n",
"census_tract_id \n",
"6019001100 True \n",
"6077000700 True \n",
"6077000100 True \n",
"6071001600 False \n",
"6037204920 True \n"
]
}
],
"outputs": [],
"source": [
"# Create analysis\n",
"def calculate_comparison(frame):\n",
@ -826,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"id": "85709225",
"metadata": {
"scrolled": true