diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index 7eb50ce0..a3db0c87 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -87,243 +87,7 @@ "metadata": { "scrolled": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3169: DtypeWarning: Columns (87,88,90) have mixed types.Specify dtype option on import or set low_memory=False.\n", - " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GEOID10Housing burden (percent)Total populationAir toxics cancer riskRespiratory hazard indexDiesel particulate matterParticulate matter (PM2.5)OzoneTraffic proximity and volumeProximity to RMP sites...Score D (top 25th percentile)Score E (percentile)Score E (top 25th percentile)GEOIDState AbbreviationCounty NameState CodeState NameGEOID10_TRACTGEOID10_STATE
00100102010010.1569249.380.790.2810.0040.1291.020.09...False0.35False1001ALAutauga County1.00Alabama0100102010001
10100102010020.15115349.380.790.2810.0040.122.620.07...False0.11False1001ALBaldwin County2.00Alaska0100102010001
20100102020010.25102050.320.810.3010.0740.224.680.08...False0.51False1001ALBarbour County4.00Arizona0100102020001
30100102020020.25115250.320.810.3010.0740.22218.650.09...False0.59False1001ALBibb County5.00Arkansas0100102020001
40100102030010.21255550.770.820.3610.1240.3169.640.08...False0.47False1001ALBlount County6.00California0100102030001
\n", - "

5 rows × 93 columns

\n", - "
" - ], - "text/plain": [ - " GEOID10 Housing burden (percent) Total population \\\n", - "0 010010201001 0.15 692 \n", - "1 010010201002 0.15 1153 \n", - "2 010010202001 0.25 1020 \n", - "3 010010202002 0.25 1152 \n", - "4 010010203001 0.21 2555 \n", - "\n", - " Air toxics cancer risk Respiratory hazard index \\\n", - "0 49.38 0.79 \n", - "1 49.38 0.79 \n", - "2 50.32 0.81 \n", - "3 50.32 0.81 \n", - "4 50.77 0.82 \n", - "\n", - " Diesel particulate matter Particulate matter (PM2.5) Ozone \\\n", - "0 0.28 10.00 40.12 \n", - "1 0.28 10.00 40.12 \n", - "2 0.30 10.07 40.22 \n", - "3 0.30 10.07 40.22 \n", - "4 0.36 10.12 40.31 \n", - "\n", - " Traffic proximity and volume Proximity to RMP sites ... \\\n", - "0 91.02 0.09 ... \n", - "1 2.62 0.07 ... \n", - "2 4.68 0.08 ... \n", - "3 218.65 0.09 ... \n", - "4 69.64 0.08 ... \n", - "\n", - " Score D (top 25th percentile) Score E (percentile) \\\n", - "0 False 0.35 \n", - "1 False 0.11 \n", - "2 False 0.51 \n", - "3 False 0.59 \n", - "4 False 0.47 \n", - "\n", - " Score E (top 25th percentile) GEOID State Abbreviation County Name \\\n", - "0 False 1001 AL Autauga County \n", - "1 False 1001 AL Baldwin County \n", - "2 False 1001 AL Barbour County \n", - "3 False 1001 AL Bibb County \n", - "4 False 1001 AL Blount County \n", - "\n", - " State Code State Name GEOID10_TRACT GEOID10_STATE \n", - "0 1.00 Alabama 01001020100 01 \n", - "1 2.00 Alaska 01001020100 01 \n", - "2 4.00 Arizona 01001020200 01 \n", - "3 5.00 Arkansas 01001020200 01 \n", - "4 6.00 California 01001020300 01 \n", - "\n", - "[5 rows x 93 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Load CEJST score data\n", "cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa.csv\"\n", @@ -350,228 +114,7 @@ "metadata": { "scrolled": false }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GEOID10_TRACTTotal PopulationCalifornia CountyZIPNearby City \\r\\n(to help approximate location only)LongitudeLatitudecalenviroscreen_scorecalenviroscreen_percentileDRAFT CES 4.0\\r\\nPercentile Range...PovertyPoverty PctlUnemploymentUnemployment PctlHousing BurdenHousing Burden PctlPop. Char.Pop. Char. ScorePop. Char. Pctlcalenviroscreen_priority_community
0060190011002760Fresno93706Fresno-119.7836.7194.61100.0095-100% (highest scores)...76.6098.4316.2097.1530.7090.6193.739.7299.87True
1060770007004177San Joaquin95206Stockton-121.2937.9490.8399.9995-100% (highest scores)...70.6096.4318.5098.4535.2095.6193.409.6899.84True
2060770001004055San Joaquin95202Stockton-121.2937.9585.7599.9795-100% (highest scores)...81.8099.5017.9098.1736.4096.5195.719.9299.97True
3060710016005527San Bernardino91761Ontario-117.6234.0683.5699.9695-100% (highest scores)...67.1094.826.7057.2032.1092.6580.598.3693.06True
4060372049202639Los Angeles90023Los Angeles-118.2034.0282.9099.9595-100% (highest scores)...64.9093.515.6043.8125.0077.9583.958.7095.78True
\n", - "

5 rows × 59 columns

\n", - "
" - ], - "text/plain": [ - " GEOID10_TRACT Total Population California County ZIP \\\n", - "0 06019001100 2760 Fresno 93706 \n", - "1 06077000700 4177 San Joaquin 95206 \n", - "2 06077000100 4055 San Joaquin 95202 \n", - "3 06071001600 5527 San Bernardino 91761 \n", - "4 06037204920 2639 Los Angeles 90023 \n", - "\n", - " Nearby City \\r\\n(to help approximate location only) Longitude Latitude \\\n", - "0 Fresno -119.78 36.71 \n", - "1 Stockton -121.29 37.94 \n", - "2 Stockton -121.29 37.95 \n", - "3 Ontario -117.62 34.06 \n", - "4 Los Angeles -118.20 34.02 \n", - "\n", - " calenviroscreen_score calenviroscreen_percentile \\\n", - "0 94.61 100.00 \n", - "1 90.83 99.99 \n", - "2 85.75 99.97 \n", - "3 83.56 99.96 \n", - "4 82.90 99.95 \n", - "\n", - " DRAFT CES 4.0\\r\\nPercentile Range ... Poverty Poverty Pctl Unemployment \\\n", - "0 95-100% (highest scores) ... 76.60 98.43 16.20 \n", - "1 95-100% (highest scores) ... 70.60 96.43 18.50 \n", - "2 95-100% (highest scores) ... 81.80 99.50 17.90 \n", - "3 95-100% (highest scores) ... 67.10 94.82 6.70 \n", - "4 95-100% (highest scores) ... 64.90 93.51 5.60 \n", - "\n", - " Unemployment Pctl Housing Burden Housing Burden Pctl Pop. Char. \\\n", - "0 97.15 30.70 90.61 93.73 \n", - "1 98.45 35.20 95.61 93.40 \n", - "2 98.17 36.40 96.51 95.71 \n", - "3 57.20 32.10 92.65 80.59 \n", - "4 43.81 25.00 77.95 83.95 \n", - "\n", - " Pop. Char. Score Pop. Char. Pctl calenviroscreen_priority_community \n", - "0 9.72 99.87 True \n", - "1 9.68 99.84 True \n", - "2 9.92 99.97 True \n", - "3 8.36 93.06 True \n", - "4 8.70 95.78 True \n", - "\n", - "[5 rows x 59 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Load CalEnviroScreen 4.0\n", "CALENVIROSCREEN_SCORE_FIELD = \"calenviroscreen_score\"\n", @@ -598,168 +141,7 @@ "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
FIDGEOID10_TRACTSTATESTUSABSTATE_NAMECOUNTYCOUNTY_NAMECNTY_FIPSTRACTRCAP_90RCAP_00RCAP_10hud_recap_priority_communitySHAPE_LengthSHAPE_Area
029993010010201001ALAlabama1Autauga1001201000.000.000.00False0.150.00
130627010010202001ALAlabama1Autauga1001202000.000.000.00False0.090.00
229992010010203001ALAlabama1Autauga1001203000.000.000.00False0.100.00
330079010010204001ALAlabama1Autauga1001204000.000.000.00False0.120.00
430078010010205001ALAlabama1Autauga1001205000.000.000.00False0.160.00
\n", - "
" - ], - "text/plain": [ - " FID GEOID10_TRACT STATE STUSAB STATE_NAME COUNTY COUNTY_NAME \\\n", - "0 29993 01001020100 1 AL Alabama 1 Autauga \n", - "1 30627 01001020200 1 AL Alabama 1 Autauga \n", - "2 29992 01001020300 1 AL Alabama 1 Autauga \n", - "3 30079 01001020400 1 AL Alabama 1 Autauga \n", - "4 30078 01001020500 1 AL Alabama 1 Autauga \n", - "\n", - " CNTY_FIPS TRACT RCAP_90 RCAP_00 RCAP_10 hud_recap_priority_community \\\n", - "0 1001 20100 0.00 0.00 0.00 False \n", - "1 1001 20200 0.00 0.00 0.00 False \n", - "2 1001 20300 0.00 0.00 0.00 False \n", - "3 1001 20400 0.00 0.00 0.00 False \n", - "4 1001 20500 0.00 0.00 0.00 False \n", - "\n", - " SHAPE_Length SHAPE_Area \n", - "0 0.15 0.00 \n", - "1 0.09 0.00 \n", - "2 0.10 0.00 \n", - "3 0.12 0.00 \n", - "4 0.16 0.00 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Load HUD data\n", "hud_recap_data_path = DATA_DIR / \"dataset\" / \"hud_recap\" / \"usa.csv\"\n", @@ -775,230 +157,9 @@ "execution_count": null, "id": "a6c85d87", "metadata": { - "scrolled": true + "scrolled": false }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GEOID10_TRACTTotal PopulationCalifornia CountyZIPNearby City \\r\\n(to help approximate location only)LongitudeLatitudecalenviroscreen_scorecalenviroscreen_percentileDRAFT CES 4.0\\r\\nPercentile Range...COUNTYCOUNTY_NAMECNTY_FIPSTRACTRCAP_90RCAP_00RCAP_10hud_recap_priority_communitySHAPE_LengthSHAPE_Area
0060190011002760.00Fresno93706.00Fresno-119.7836.7194.61100.0095-100% (highest scores)...19Fresno601911000.001.001.00True0.090.00
1060770007004177.00San Joaquin95206.00Stockton-121.2937.9490.8399.9995-100% (highest scores)...77San Joaquin60777000.000.000.00True0.070.00
2060770001004055.00San Joaquin95202.00Stockton-121.2937.9585.7599.9795-100% (highest scores)...77San Joaquin60771001.001.001.00True0.060.00
3060710016005527.00San Bernardino91761.00Ontario-117.6234.0683.5699.9695-100% (highest scores)...71San Bernardino607116000.000.000.00True0.250.00
4060372049202639.00Los Angeles90023.00Los Angeles-118.2034.0282.9099.9595-100% (highest scores)...37Los Angeles60372049200.000.000.00False0.040.00
\n", - "

5 rows × 73 columns

\n", - "
" - ], - "text/plain": [ - " GEOID10_TRACT Total Population California County ZIP \\\n", - "0 06019001100 2760.00 Fresno 93706.00 \n", - "1 06077000700 4177.00 San Joaquin 95206.00 \n", - "2 06077000100 4055.00 San Joaquin 95202.00 \n", - "3 06071001600 5527.00 San Bernardino 91761.00 \n", - "4 06037204920 2639.00 Los Angeles 90023.00 \n", - "\n", - " Nearby City \\r\\n(to help approximate location only) Longitude Latitude \\\n", - "0 Fresno -119.78 36.71 \n", - "1 Stockton -121.29 37.94 \n", - "2 Stockton -121.29 37.95 \n", - "3 Ontario -117.62 34.06 \n", - "4 Los Angeles -118.20 34.02 \n", - "\n", - " calenviroscreen_score calenviroscreen_percentile \\\n", - "0 94.61 100.00 \n", - "1 90.83 99.99 \n", - "2 85.75 99.97 \n", - "3 83.56 99.96 \n", - "4 82.90 99.95 \n", - "\n", - " DRAFT CES 4.0\\r\\nPercentile Range ... COUNTY COUNTY_NAME CNTY_FIPS \\\n", - "0 95-100% (highest scores) ... 19 Fresno 6019 \n", - "1 95-100% (highest scores) ... 77 San Joaquin 6077 \n", - "2 95-100% (highest scores) ... 77 San Joaquin 6077 \n", - "3 95-100% (highest scores) ... 71 San Bernardino 6071 \n", - "4 95-100% (highest scores) ... 37 Los Angeles 6037 \n", - "\n", - " TRACT RCAP_90 RCAP_00 RCAP_10 hud_recap_priority_community \\\n", - "0 1100 0.00 1.00 1.00 True \n", - "1 700 0.00 0.00 0.00 True \n", - "2 100 1.00 1.00 1.00 True \n", - "3 1600 0.00 0.00 0.00 True \n", - "4 204920 0.00 0.00 0.00 False \n", - "\n", - " SHAPE_Length SHAPE_Area \n", - "0 0.09 0.00 \n", - "1 0.07 0.00 \n", - "2 0.06 0.00 \n", - "3 0.25 0.00 \n", - "4 0.04 0.00 \n", - "\n", - "[5 rows x 73 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Join all dataframes that use tracts\n", "census_tract_dfs = [calenviroscreen_df, hud_recap_df]\n", @@ -1028,228 +189,7 @@ "metadata": { "scrolled": false }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GEOID10Housing burden (percent)Total populationAir toxics cancer riskRespiratory hazard indexDiesel particulate matterParticulate matter (PM2.5)Ozone_xTraffic proximity and volumeProximity to RMP sites...COUNTYCOUNTY_NAMECNTY_FIPSTRACTRCAP_90RCAP_00RCAP_10hud_recap_priority_communitySHAPE_LengthSHAPE_Area
00100102010010.1569249.380.790.2810.0040.1291.020.09...1.00Autauga1001.0020100.000.000.000.00False0.150.00
10100102010020.15115349.380.790.2810.0040.122.620.07...1.00Autauga1001.0020100.000.000.000.00False0.150.00
20100102020010.25102050.320.810.3010.0740.224.680.08...1.00Autauga1001.0020200.000.000.000.00False0.090.00
30100102020020.25115250.320.810.3010.0740.22218.650.09...1.00Autauga1001.0020200.000.000.000.00False0.090.00
40100102030010.21255550.770.820.3610.1240.3169.640.08...1.00Autauga1001.0020300.000.000.000.00False0.100.00
\n", - "

5 rows × 165 columns

\n", - "
" - ], - "text/plain": [ - " GEOID10 Housing burden (percent) Total population \\\n", - "0 010010201001 0.15 692 \n", - "1 010010201002 0.15 1153 \n", - "2 010010202001 0.25 1020 \n", - "3 010010202002 0.25 1152 \n", - "4 010010203001 0.21 2555 \n", - "\n", - " Air toxics cancer risk Respiratory hazard index \\\n", - "0 49.38 0.79 \n", - "1 49.38 0.79 \n", - "2 50.32 0.81 \n", - "3 50.32 0.81 \n", - "4 50.77 0.82 \n", - "\n", - " Diesel particulate matter Particulate matter (PM2.5) Ozone_x \\\n", - "0 0.28 10.00 40.12 \n", - "1 0.28 10.00 40.12 \n", - "2 0.30 10.07 40.22 \n", - "3 0.30 10.07 40.22 \n", - "4 0.36 10.12 40.31 \n", - "\n", - " Traffic proximity and volume Proximity to RMP sites ... COUNTY \\\n", - "0 91.02 0.09 ... 1.00 \n", - "1 2.62 0.07 ... 1.00 \n", - "2 4.68 0.08 ... 1.00 \n", - "3 218.65 0.09 ... 1.00 \n", - "4 69.64 0.08 ... 1.00 \n", - "\n", - " COUNTY_NAME CNTY_FIPS TRACT RCAP_90 RCAP_00 RCAP_10 \\\n", - "0 Autauga 1001.00 20100.00 0.00 0.00 0.00 \n", - "1 Autauga 1001.00 20100.00 0.00 0.00 0.00 \n", - "2 Autauga 1001.00 20200.00 0.00 0.00 0.00 \n", - "3 Autauga 1001.00 20200.00 0.00 0.00 0.00 \n", - "4 Autauga 1001.00 20300.00 0.00 0.00 0.00 \n", - "\n", - " hud_recap_priority_community SHAPE_Length SHAPE_Area \n", - "0 False 0.15 0.00 \n", - "1 False 0.15 0.00 \n", - "2 False 0.09 0.00 \n", - "3 False 0.09 0.00 \n", - "4 False 0.10 0.00 \n", - "\n", - "[5 rows x 165 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Join tract indices and CEJST data.\n", "# Note: we're joining on the census *tract*, so there will be multiple CBG entries joined to the same census tract row from CES,\n", @@ -1359,383 +299,7 @@ "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Converting calenviroscreen_priority_community to boolean.\n", - "Converting hud_recap_priority_community to boolean.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "223dcb75c0384fd5b93bc2ac3bc07656", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/52 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GEOID10_STATEState nameTotal CBGs in stateTotal population in stateScore A (top 25th percentile) (priority population)Score A (top 25th percentile) (total CBGs)Score A (top 25th percentile) (percent CBGs)Score A (top 25th percentile) (percent population)Score B (top 25th percentile) (priority population)Score B (top 25th percentile) (total CBGs)...Score E (top 25th percentile) (percent CBGs)Score E (top 25th percentile) (percent population)calenviroscreen_priority_community (priority population)calenviroscreen_priority_community (total CBGs)calenviroscreen_priority_community (percent CBGs)calenviroscreen_priority_community (percent population)hud_recap_priority_community (priority population)hud_recap_priority_community (total CBGs)hud_recap_priority_community (percent CBGs)hud_recap_priority_community (percent population)
GEOID10_STATE
01001Alabama34384850771154734513260.390.3215564171323...0.230.19000.000.002351172580.080.05
02002Alaska53473856563868570.110.096386857...0.140.12000.000.00653680.010.01
04004Arizona41786809946195605212300.290.2919608561231...0.300.30000.000.005603533780.090.08
05005Arkansas214729779449607998170.380.32975780826...0.200.18000.000.001012001060.050.03
06006California23212389828471261081071020.310.32125568467065...0.400.42961028756900.250.25174876510130.040.04
\n", - "

5 rows × 32 columns

\n", - "" - ], - "text/plain": [ - " GEOID10_STATE State name Total CBGs in state \\\n", - "GEOID10_STATE \n", - "01 0 01 Alabama 3438 \n", - "02 0 02 Alaska 534 \n", - "04 0 04 Arizona 4178 \n", - "05 0 05 Arkansas 2147 \n", - "06 0 06 California 23212 \n", - "\n", - " Total population in state \\\n", - "GEOID10_STATE \n", - "01 0 4850771 \n", - "02 0 738565 \n", - "04 0 6809946 \n", - "05 0 2977944 \n", - "06 0 38982847 \n", - "\n", - " Score A (top 25th percentile) (priority population) \\\n", - "GEOID10_STATE \n", - "01 0 1547345 \n", - "02 0 63868 \n", - "04 0 1956052 \n", - "05 0 960799 \n", - "06 0 12610810 \n", - "\n", - " Score A (top 25th percentile) (total CBGs) \\\n", - "GEOID10_STATE \n", - "01 0 1326 \n", - "02 0 57 \n", - "04 0 1230 \n", - "05 0 817 \n", - "06 0 7102 \n", - "\n", - " Score A (top 25th percentile) (percent CBGs) \\\n", - "GEOID10_STATE \n", - "01 0 0.39 \n", - "02 0 0.11 \n", - "04 0 0.29 \n", - "05 0 0.38 \n", - "06 0 0.31 \n", - "\n", - " Score A (top 25th percentile) (percent population) \\\n", - "GEOID10_STATE \n", - "01 0 0.32 \n", - "02 0 0.09 \n", - "04 0 0.29 \n", - "05 0 0.32 \n", - "06 0 0.32 \n", - "\n", - " Score B (top 25th percentile) (priority population) \\\n", - "GEOID10_STATE \n", - "01 0 1556417 \n", - "02 0 63868 \n", - "04 0 1960856 \n", - "05 0 975780 \n", - "06 0 12556846 \n", - "\n", - " Score B (top 25th percentile) (total CBGs) ... \\\n", - "GEOID10_STATE ... \n", - "01 0 1323 ... \n", - "02 0 57 ... \n", - "04 0 1231 ... \n", - "05 0 826 ... \n", - "06 0 7065 ... \n", - "\n", - " Score E (top 25th percentile) (percent CBGs) \\\n", - "GEOID10_STATE \n", - "01 0 0.23 \n", - "02 0 0.14 \n", - "04 0 0.30 \n", - "05 0 0.20 \n", - "06 0 0.40 \n", - "\n", - " Score E (top 25th percentile) (percent population) \\\n", - "GEOID10_STATE \n", - "01 0 0.19 \n", - "02 0 0.12 \n", - "04 0 0.30 \n", - "05 0 0.18 \n", - "06 0 0.42 \n", - "\n", - " calenviroscreen_priority_community (priority population) \\\n", - "GEOID10_STATE \n", - "01 0 0 \n", - "02 0 0 \n", - "04 0 0 \n", - "05 0 0 \n", - "06 0 9610287 \n", - "\n", - " calenviroscreen_priority_community (total CBGs) \\\n", - "GEOID10_STATE \n", - "01 0 0 \n", - "02 0 0 \n", - "04 0 0 \n", - "05 0 0 \n", - "06 0 5690 \n", - "\n", - " calenviroscreen_priority_community (percent CBGs) \\\n", - "GEOID10_STATE \n", - "01 0 0.00 \n", - "02 0 0.00 \n", - "04 0 0.00 \n", - "05 0 0.00 \n", - "06 0 0.25 \n", - "\n", - " calenviroscreen_priority_community (percent population) \\\n", - "GEOID10_STATE \n", - "01 0 0.00 \n", - "02 0 0.00 \n", - "04 0 0.00 \n", - "05 0 0.00 \n", - "06 0 0.25 \n", - "\n", - " hud_recap_priority_community (priority population) \\\n", - "GEOID10_STATE \n", - "01 0 235117 \n", - "02 0 6536 \n", - "04 0 560353 \n", - "05 0 101200 \n", - "06 0 1748765 \n", - "\n", - " hud_recap_priority_community (total CBGs) \\\n", - "GEOID10_STATE \n", - "01 0 258 \n", - "02 0 8 \n", - "04 0 378 \n", - "05 0 106 \n", - "06 0 1013 \n", - "\n", - " hud_recap_priority_community (percent CBGs) \\\n", - "GEOID10_STATE \n", - "01 0 0.08 \n", - "02 0 0.01 \n", - "04 0 0.09 \n", - "05 0 0.05 \n", - "06 0 0.04 \n", - "\n", - " hud_recap_priority_community (percent population) \n", - "GEOID10_STATE \n", - "01 0 0.05 \n", - "02 0 0.01 \n", - "04 0 0.08 \n", - "05 0 0.03 \n", - "06 0 0.04 \n", - "\n", - "[5 rows x 32 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "def get_state_distributions(\n", " df: pd.DataFrame, priority_communities_fields: typing.List[str]\n", @@ -2565,50 +1129,7 @@ "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running comparisons for Score A against CalEnviroScreen 4.0...\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "531ec4deb2f54c26ad0f5311fdea0e60", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/8057 [00:00