"CalEnviroScreen for the US" example score (#204)

This commit is contained in:
Lucas Merrill Brown 2021-06-22 17:09:53 -07:00 committed by GitHub
parent a2a321d93d
commit f542d0e675
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 365 additions and 1093 deletions

View file

@ -1,3 +1,4 @@
{
"python.pythonPath": "venv\\Scripts\\python.exe"
}
"python.pythonPath": "venv\\Scripts\\python.exe",
"python.dataScience.sendSelectionToInteractiveWindow": false
}

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "20aa3891",
"metadata": {},
"outputs": [],
@ -21,21 +21,15 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "67a58c24",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\opt\\justice40-tool\\score\\venv\\lib\\site-packages\\urllib3\\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'gaftp.epa.gov'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"download = requests.get(\"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\", verify=False)\n",
"download = requests.get(\n",
" \"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\",\n",
" verify=False,\n",
")\n",
"file_contents = download.content\n",
"zip_file_path = data_path / \"tmp\"\n",
"zip_file = open(zip_file_path / \"downloaded.zip\", \"wb\")\n",
@ -45,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "cc3fb9ec",
"metadata": {},
"outputs": [],
@ -57,101 +51,33 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "b25738bb",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)"
"df = pd.read_csv(ejscreen_csv, dtype={\"ID\": \"string\"}, low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e6994f2d",
"metadata": {},
"outputs": [],
"source": [
"df = df[[\"ID\", \"ACSTOTPOP\", \"LESSHSPCT\", \"LOWINCPCT\"]]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"id": "9fa2077a",
"metadata": {},
"outputs": [],
"source": [
"# write nationwide csv\n",
"df.to_csv(csv_path / f\"usa.csv\", index = False)"
"df.to_csv(csv_path / f\"usa.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"id": "5e5cc12a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating data01 csv\n",
"Generating data02 csv\n",
"Generating data04 csv\n",
"Generating data05 csv\n",
"Generating data06 csv\n",
"Generating data08 csv\n",
"Generating data09 csv\n",
"Generating data10 csv\n",
"Generating data11 csv\n",
"Generating data12 csv\n",
"Generating data13 csv\n",
"Generating data15 csv\n",
"Generating data16 csv\n",
"Generating data17 csv\n",
"Generating data18 csv\n",
"Generating data19 csv\n",
"Generating data20 csv\n",
"Generating data21 csv\n",
"Generating data22 csv\n",
"Generating data23 csv\n",
"Generating data24 csv\n",
"Generating data25 csv\n",
"Generating data26 csv\n",
"Generating data27 csv\n",
"Generating data28 csv\n",
"Generating data29 csv\n",
"Generating data30 csv\n",
"Generating data31 csv\n",
"Generating data32 csv\n",
"Generating data33 csv\n",
"Generating data34 csv\n",
"Generating data35 csv\n",
"Generating data36 csv\n",
"Generating data37 csv\n",
"Generating data38 csv\n",
"Generating data39 csv\n",
"Generating data40 csv\n",
"Generating data41 csv\n",
"Generating data42 csv\n",
"Generating data44 csv\n",
"Generating data45 csv\n",
"Generating data46 csv\n",
"Generating data47 csv\n",
"Generating data48 csv\n",
"Generating data49 csv\n",
"Generating data50 csv\n",
"Generating data51 csv\n",
"Generating data53 csv\n",
"Generating data54 csv\n",
"Generating data55 csv\n",
"Generating data56 csv\n"
]
}
],
"outputs": [],
"source": [
"# write per state csvs\n",
"with open(fips_csv_path) as csv_file:\n",
@ -166,16 +92,8 @@
" print(f\"Generating data{fips} csv\")\n",
" df1 = df[df.ID.str[:2] == fips]\n",
" # we need to name the file data01.csv for ogr2ogr csv merge to work\n",
" df1.to_csv(csv_path / f\"data{fips}.csv\", index = False)"
" df1.to_csv(csv_path / f\"data{fips}.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2674fb20",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -194,7 +112,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
"version": "3.7.1"
}
},
"nbformat": 4,

View file

@ -0,0 +1,331 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "a664f981",
"metadata": {},
"outputs": [],
"source": [
"# Before running this notebook, you must run the notebook `ejscreen_etl.ipynb`.\n",
"\n",
"import collections\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"import csv\n",
"\n",
"# Define some global parameters\n",
"BUCKET_SOCIOECONOMIC = \"Socioeconomic Factors\"\n",
"BUCKET_SENSITIVE = \"Sensitive populations\"\n",
"BUCKET_ENVIRONMENTAL = \"Environmental effects\"\n",
"BUCKET_EXPOSURES = \"Exposures\"\n",
"BUCKETS = [\n",
" BUCKET_SOCIOECONOMIC,\n",
" BUCKET_SENSITIVE,\n",
" BUCKET_ENVIRONMENTAL,\n",
" BUCKET_EXPOSURES,\n",
"]\n",
"\n",
"# There's another aggregation level (a second level of \"buckets\").\n",
"AGGREGATION_POLLUTION = \"Pollution Burden\"\n",
"AGGREGATION_POPULATION = \"Population Characteristics\"\n",
"\n",
"PERCENTILE_FIELD_SUFFIX = \" (percentile)\"\n",
"\n",
"data_path = Path.cwd().parent / \"data\"\n",
"fips_csv_path = data_path / \"fips_states_2010.csv\"\n",
"score_csv_path = data_path / \"score\" / \"csv\"\n",
"\n",
"# Tell pandas to display all columns\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7df430cb",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# EJSCreen csv Load\n",
"ejscreen_csv = data_path / \"dataset\" / \"ejscreen_2020\" / \"usa.csv\"\n",
"df = pd.read_csv(ejscreen_csv, dtype={\"ID\": \"string\"}, low_memory=False)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b8567900",
"metadata": {},
"outputs": [],
"source": [
"# Define a named tuple that will be used for each data set input.\n",
"DataSet = collections.namedtuple(\n",
" typename=\"DataSet\", field_names=[\"input_field\", \"renamed_field\", \"bucket\"]\n",
")\n",
"\n",
"data_sets = [\n",
" # The following data sets have `bucket=None`, because it's not used in the score.\n",
" DataSet(\n",
" input_field=\"ID\", \n",
" # Use the name `GEOID10` to enable geoplatform.gov's workflow.\n",
" renamed_field=\"GEOID10\", bucket=None\n",
" ),\n",
" DataSet(input_field=\"ACSTOTPOP\", renamed_field=\"Total population\", bucket=None),\n",
" # The following data sets have buckets, because they're used in the score\n",
" DataSet(\n",
" input_field=\"CANCER\",\n",
" renamed_field=\"Air toxics cancer risk\",\n",
" bucket=BUCKET_EXPOSURES,\n",
" ),\n",
" DataSet(\n",
" input_field=\"RESP\",\n",
" renamed_field=\"Respiratory hazard index\",\n",
" bucket=BUCKET_EXPOSURES,\n",
" ),\n",
" DataSet(\n",
" input_field=\"DSLPM\",\n",
" renamed_field=\"Diesel particulate matter\",\n",
" bucket=BUCKET_EXPOSURES,\n",
" ),\n",
" DataSet(\n",
" input_field=\"PM25\",\n",
" renamed_field=\"Particulate matter (PM2.5)\",\n",
" bucket=BUCKET_EXPOSURES,\n",
" ),\n",
" DataSet(input_field=\"OZONE\", renamed_field=\"Ozone\", bucket=BUCKET_EXPOSURES),\n",
" DataSet(\n",
" input_field=\"PTRAF\",\n",
" renamed_field=\"Traffic proximity and volume\",\n",
" bucket=BUCKET_EXPOSURES,\n",
" ),\n",
" DataSet(\n",
" input_field=\"PRMP\",\n",
" renamed_field=\"Proximity to RMP sites\",\n",
" bucket=BUCKET_ENVIRONMENTAL,\n",
" ),\n",
" DataSet(\n",
" input_field=\"PTSDF\",\n",
" renamed_field=\"Proximity to TSDF sites\",\n",
" bucket=BUCKET_ENVIRONMENTAL,\n",
" ),\n",
" DataSet(\n",
" input_field=\"PNPL\",\n",
" renamed_field=\"Proximity to NPL sites\",\n",
" bucket=BUCKET_ENVIRONMENTAL,\n",
" ),\n",
" DataSet(\n",
" input_field=\"PWDIS\",\n",
" renamed_field=\"Wastewater discharge\",\n",
" bucket=BUCKET_ENVIRONMENTAL,\n",
" ),\n",
" DataSet(\n",
" input_field=\"PRE1960PCT\",\n",
" renamed_field=\"Percent pre-1960s housing (lead paint indicator)\",\n",
" bucket=BUCKET_ENVIRONMENTAL,\n",
" ),\n",
" DataSet(\n",
" input_field=\"UNDER5PCT\",\n",
" renamed_field=\"Individuals under 5 years old\",\n",
" bucket=BUCKET_SENSITIVE,\n",
" ),\n",
" DataSet(\n",
" input_field=\"OVER64PCT\",\n",
" renamed_field=\"Individuals over 64 years old\",\n",
" bucket=BUCKET_SENSITIVE,\n",
" ),\n",
" DataSet(\n",
" input_field=\"LINGISOPCT\",\n",
" renamed_field=\"Percent of households in linguistic isolation\",\n",
" bucket=BUCKET_SOCIOECONOMIC,\n",
" ),\n",
" DataSet(\n",
" input_field=\"LOWINCPCT\",\n",
" renamed_field=\"Poverty (Less than 200% of federal poverty line)\",\n",
" bucket=BUCKET_SOCIOECONOMIC,\n",
" ),\n",
" DataSet(\n",
" input_field=\"LESSHSPCT\",\n",
" renamed_field=\"Percent individuals age 25 or over with less than high school degree\",\n",
" bucket=BUCKET_SOCIOECONOMIC,\n",
" ),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e152a655",
"metadata": {},
"outputs": [],
"source": [
"# Rename columns:\n",
"renaming_dict = {data_set.input_field: data_set.renamed_field for data_set in data_sets}\n",
"\n",
"df.rename(\n",
" columns=renaming_dict,\n",
" inplace=True,\n",
" errors=\"raise\",\n",
")\n",
"\n",
"columns_to_keep = [data_set.renamed_field for data_set in data_sets]\n",
"df = df[columns_to_keep]\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "27677132",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# calculate percentiles\n",
"for data_set in data_sets:\n",
" df[f\"{data_set.renamed_field}{PERCENTILE_FIELD_SUFFIX}\"] = df[\n",
" data_set.renamed_field\n",
" ].rank(pct=True)\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f7b864f",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Calculate score \"A\" and score \"B\"\n",
"df[\"Score A\"] = df[\n",
" [\n",
" \"Poverty (Less than 200% of federal poverty line) (percentile)\",\n",
" \"Percent individuals age 25 or over with less than high school degree (percentile)\",\n",
" ]\n",
"].mean(axis=1)\n",
"df[\"Score B\"] = (\n",
" df[\"Poverty (Less than 200% of federal poverty line) (percentile)\"]\n",
" * df[\n",
" \"Percent individuals age 25 or over with less than high school degree (percentile)\"\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c107baf",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Calculate \"CalEnviroScreen for the US\" score\n",
"# Average all the percentile values in each bucket into a single score for each of the four buckets.\n",
"for bucket in BUCKETS:\n",
" fields_in_bucket = [\n",
" f\"{data_set.renamed_field}{PERCENTILE_FIELD_SUFFIX}\"\n",
" for data_set in data_sets\n",
" if data_set.bucket == bucket\n",
" ]\n",
" df[f\"{bucket}\"] = df[fields_in_bucket].mean(axis=1)\n",
"\n",
"# Combine the score from the two Exposures and Environmental Effects buckets into a single score called \"Pollution Burden\". The math for this score is: (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5.\n",
"df[AGGREGATION_POLLUTION] = (\n",
" 1.0 * df[f\"{BUCKET_EXPOSURES}\"] + 0.5 * df[f\"{BUCKET_ENVIRONMENTAL}\"]\n",
") / 1.5\n",
"\n",
"# Average the score from the two Sensitive populations and Socioeconomic factors buckets into a single score called \"Population Characteristics\".\n",
"df[AGGREGATION_POPULATION] = df[\n",
" [f\"{BUCKET_SENSITIVE}\", f\"{BUCKET_SOCIOECONOMIC}\"]\n",
"].mean(axis=1)\n",
"\n",
"# Multiply the \"Pollution Burden\" score and the \"Population Characteristics\" together to produce the cumulative impact score.\n",
"df[\"Score C\"] = df[AGGREGATION_POLLUTION] * df[AGGREGATION_POPULATION]\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "729aed12",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Create percentiles for the scores\n",
"for score_field in [\"Score A\", \"Score B\", \"Score C\"]:\n",
" df[f\"{score_field}{PERCENTILE_FIELD_SUFFIX}\"] = df[score_field].rank(pct=True)\n",
" df[f\"{score_field} (top 25th percentile)\"] = (\n",
" df[f\"{score_field}{PERCENTILE_FIELD_SUFFIX}\"] >= 0.75\n",
" )\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3a65af4",
"metadata": {},
"outputs": [],
"source": [
"# write nationwide csv\n",
"df.to_csv(score_csv_path / f\"usa.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58ddd8b3",
"metadata": {},
"outputs": [],
"source": [
"# write per state csvs\n",
"with open(fips_csv_path) as csv_file:\n",
" csv_reader = csv.reader(csv_file, delimiter=\",\")\n",
" line_count = 0\n",
"\n",
" for row in csv_reader:\n",
" if line_count == 0:\n",
" line_count += 1\n",
" else:\n",
" states_fips = row[0].strip()\n",
" print(f\"Generating data{states_fips} csv\")\n",
" df1 = df[df[\"GEOID10\"].str[:2] == states_fips]\n",
" # we need to name the file data01.csv for ogr2ogr csv merge to work\n",
" df1.to_csv(score_csv_path / f\"data{states_fips}.csv\", index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -1,424 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "a664f981",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import pandas as pd\n",
"import csv\n",
"\n",
"data_path = Path.cwd().parent / \"data\"\n",
"fips_csv_path = data_path / \"fips_states_2010.csv\"\n",
"csv_path = data_path / \"score\" / \"csv\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7df430cb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>ACSTOTPOP</th>\n",
" <th>LESSHSPCT</th>\n",
" <th>LOWINCPCT</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>010010201001</td>\n",
" <td>636</td>\n",
" <td>0.208134</td>\n",
" <td>0.385220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>010010201002</td>\n",
" <td>1287</td>\n",
" <td>0.040678</td>\n",
" <td>0.163170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>010010202001</td>\n",
" <td>810</td>\n",
" <td>0.135563</td>\n",
" <td>0.501247</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>010010202002</td>\n",
" <td>1218</td>\n",
" <td>0.192000</td>\n",
" <td>0.393701</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>010010203001</td>\n",
" <td>2641</td>\n",
" <td>0.125473</td>\n",
" <td>0.308217</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID ACSTOTPOP LESSHSPCT LOWINCPCT\n",
"0 010010201001 636 0.208134 0.385220\n",
"1 010010201002 1287 0.040678 0.163170\n",
"2 010010202001 810 0.135563 0.501247\n",
"3 010010202002 1218 0.192000 0.393701\n",
"4 010010203001 2641 0.125473 0.308217"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# EJSCreen csv Load\n",
"ejscreen_csv = data_path / \"dataset\" / \"ejscreen_2020\" / \"usa.csv\"\n",
"df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "27677132",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# calculate percentiles\n",
"df['lesshs_percentile'] = df.LESSHSPCT.rank(pct = True)\n",
"df['lowin_percentile'] = df.LOWINCPCT.rank(pct = True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1f7b864f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>ACSTOTPOP</th>\n",
" <th>LESSHSPCT</th>\n",
" <th>LOWINCPCT</th>\n",
" <th>lesshs_percentile</th>\n",
" <th>lowin_percentile</th>\n",
" <th>score_a</th>\n",
" <th>score_b</th>\n",
" <th>score_a_percentile</th>\n",
" <th>score_b_percentile</th>\n",
" <th>score_a_top_percentile_25</th>\n",
" <th>score_b_top_percentile_25</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>010010201001</td>\n",
" <td>636</td>\n",
" <td>0.208134</td>\n",
" <td>0.385220</td>\n",
" <td>0.793292</td>\n",
" <td>0.625015</td>\n",
" <td>0.709154</td>\n",
" <td>0.495820</td>\n",
" <td>0.739540</td>\n",
" <td>0.743311</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>010010201002</td>\n",
" <td>1287</td>\n",
" <td>0.040678</td>\n",
" <td>0.163170</td>\n",
" <td>0.238550</td>\n",
" <td>0.246722</td>\n",
" <td>0.242636</td>\n",
" <td>0.058856</td>\n",
" <td>0.206805</td>\n",
" <td>0.249590</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>010010202001</td>\n",
" <td>810</td>\n",
" <td>0.135563</td>\n",
" <td>0.501247</td>\n",
" <td>0.634390</td>\n",
" <td>0.772002</td>\n",
" <td>0.703196</td>\n",
" <td>0.489750</td>\n",
" <td>0.733009</td>\n",
" <td>0.738859</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>010010202002</td>\n",
" <td>1218</td>\n",
" <td>0.192000</td>\n",
" <td>0.393701</td>\n",
" <td>0.765126</td>\n",
" <td>0.637158</td>\n",
" <td>0.701142</td>\n",
" <td>0.487506</td>\n",
" <td>0.730848</td>\n",
" <td>0.737357</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>010010203001</td>\n",
" <td>2641</td>\n",
" <td>0.125473</td>\n",
" <td>0.308217</td>\n",
" <td>0.603841</td>\n",
" <td>0.504977</td>\n",
" <td>0.554409</td>\n",
" <td>0.304925</td>\n",
" <td>0.568571</td>\n",
" <td>0.586058</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID ACSTOTPOP LESSHSPCT LOWINCPCT lesshs_percentile \\\n",
"0 010010201001 636 0.208134 0.385220 0.793292 \n",
"1 010010201002 1287 0.040678 0.163170 0.238550 \n",
"2 010010202001 810 0.135563 0.501247 0.634390 \n",
"3 010010202002 1218 0.192000 0.393701 0.765126 \n",
"4 010010203001 2641 0.125473 0.308217 0.603841 \n",
"\n",
" lowin_percentile score_a score_b score_a_percentile \\\n",
"0 0.625015 0.709154 0.495820 0.739540 \n",
"1 0.246722 0.242636 0.058856 0.206805 \n",
"2 0.772002 0.703196 0.489750 0.733009 \n",
"3 0.637158 0.701142 0.487506 0.730848 \n",
"4 0.504977 0.554409 0.304925 0.568571 \n",
"\n",
" score_b_percentile score_a_top_percentile_25 score_b_top_percentile_25 \n",
"0 0.743311 False False \n",
"1 0.249590 False False \n",
"2 0.738859 False False \n",
"3 0.737357 False False \n",
"4 0.586058 False False "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# calculate scores\n",
"df[\"score_a\"] = df[[\"lesshs_percentile\", \"lowin_percentile\"]].mean(axis=1)\n",
"df[\"score_b\"] = df.lesshs_percentile * df.lowin_percentile\n",
"\n",
"# Create percentiles for the scores \n",
"df[\"score_a_percentile\"] = df.score_a.rank(pct = True)\n",
"df[\"score_b_percentile\"] = df.score_b.rank(pct = True)\n",
"df[\"score_a_top_percentile_25\"] = df[\"score_a_percentile\"] >= 0.75\n",
"df[\"score_b_top_percentile_25\"] = df[\"score_b_percentile\"] >= 0.75\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "91755bcf",
"metadata": {},
"outputs": [],
"source": [
"# strip calculations\n",
"df = df[[\"ID\", \"ACSTOTPOP\", \"score_a\",\"score_b\", \"score_a_percentile\", \"score_b_percentile\",\"score_a_top_percentile_25\",\"score_b_top_percentile_25\"]]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b3a65af4",
"metadata": {},
"outputs": [],
"source": [
"# write nationwide csv\n",
"df.to_csv(csv_path / f\"usa.csv\", index = False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "58ddd8b3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating data01 csv\n",
"Generating data02 csv\n",
"Generating data04 csv\n",
"Generating data05 csv\n",
"Generating data06 csv\n",
"Generating data08 csv\n",
"Generating data09 csv\n",
"Generating data10 csv\n",
"Generating data11 csv\n",
"Generating data12 csv\n",
"Generating data13 csv\n",
"Generating data15 csv\n",
"Generating data16 csv\n",
"Generating data17 csv\n",
"Generating data18 csv\n",
"Generating data19 csv\n",
"Generating data20 csv\n",
"Generating data21 csv\n",
"Generating data22 csv\n",
"Generating data23 csv\n",
"Generating data24 csv\n",
"Generating data25 csv\n",
"Generating data26 csv\n",
"Generating data27 csv\n",
"Generating data28 csv\n",
"Generating data29 csv\n",
"Generating data30 csv\n",
"Generating data31 csv\n",
"Generating data32 csv\n",
"Generating data33 csv\n",
"Generating data34 csv\n",
"Generating data35 csv\n",
"Generating data36 csv\n",
"Generating data37 csv\n",
"Generating data38 csv\n",
"Generating data39 csv\n",
"Generating data40 csv\n",
"Generating data41 csv\n",
"Generating data42 csv\n",
"Generating data44 csv\n",
"Generating data45 csv\n",
"Generating data46 csv\n",
"Generating data47 csv\n",
"Generating data48 csv\n",
"Generating data49 csv\n",
"Generating data50 csv\n",
"Generating data51 csv\n",
"Generating data53 csv\n",
"Generating data54 csv\n",
"Generating data55 csv\n",
"Generating data56 csv\n"
]
}
],
"source": [
"# write per state csvs\n",
"with open(fips_csv_path) as csv_file:\n",
" csv_reader = csv.reader(csv_file, delimiter=\",\")\n",
" line_count = 0\n",
"\n",
" for row in csv_reader:\n",
" if line_count == 0:\n",
" line_count += 1\n",
" else:\n",
" fips = row[0].strip()\n",
" print(f\"Generating data{fips} csv\")\n",
" df1 = df[df.ID.str[:2] == fips]\n",
" # we need to name the file data01.csv for ogr2ogr csv merge to work\n",
" df1.to_csv(csv_path / f\"data{fips}.csv\", index = False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e545623b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "54615cef",
"metadata": {},
"outputs": [],
@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "49a63129",
"metadata": {},
"outputs": [],
@ -56,148 +56,10 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "2b26dccf",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>census_block_group_id</th>\n",
" <th>census_block_group_population</th>\n",
" <th>cejst_score</th>\n",
" <th>score_b</th>\n",
" <th>cejst_percentile</th>\n",
" <th>score_b_percentile</th>\n",
" <th>score_a_top_percentile_25</th>\n",
" <th>score_b_top_percentile_25</th>\n",
" <th>cejst_priority_community</th>\n",
" <th>census_tract_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10297</th>\n",
" <td>60014001001</td>\n",
" <td>3115</td>\n",
" <td>0.14</td>\n",
" <td>0.02</td>\n",
" <td>0.10</td>\n",
" <td>0.14</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10298</th>\n",
" <td>60014002001</td>\n",
" <td>1037</td>\n",
" <td>0.09</td>\n",
" <td>0.01</td>\n",
" <td>0.05</td>\n",
" <td>0.07</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10299</th>\n",
" <td>60014002002</td>\n",
" <td>988</td>\n",
" <td>0.15</td>\n",
" <td>0.02</td>\n",
" <td>0.11</td>\n",
" <td>0.12</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10300</th>\n",
" <td>60014003001</td>\n",
" <td>1137</td>\n",
" <td>0.03</td>\n",
" <td>0.00</td>\n",
" <td>0.01</td>\n",
" <td>0.02</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10301</th>\n",
" <td>60014003002</td>\n",
" <td>1404</td>\n",
" <td>0.34</td>\n",
" <td>0.09</td>\n",
" <td>0.31</td>\n",
" <td>0.31</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>6001400300</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" census_block_group_id census_block_group_population cejst_score \\\n",
"10297 60014001001 3115 0.14 \n",
"10298 60014002001 1037 0.09 \n",
"10299 60014002002 988 0.15 \n",
"10300 60014003001 1137 0.03 \n",
"10301 60014003002 1404 0.34 \n",
"\n",
" score_b cejst_percentile score_b_percentile \\\n",
"10297 0.02 0.10 0.14 \n",
"10298 0.01 0.05 0.07 \n",
"10299 0.02 0.11 0.12 \n",
"10300 0.00 0.01 0.02 \n",
"10301 0.09 0.31 0.31 \n",
"\n",
" score_a_top_percentile_25 score_b_top_percentile_25 \\\n",
"10297 False False \n",
"10298 False False \n",
"10299 False False \n",
"10300 False False \n",
"10301 False False \n",
"\n",
" cejst_priority_community census_tract_id \n",
"10297 False 6001400100 \n",
"10298 False 6001400200 \n",
"10299 False 6001400200 \n",
"10300 False 6001400300 \n",
"10301 False 6001400300 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Load CEJST score data\n",
"cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"usa.csv\"\n",
@ -240,19 +102,10 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"id": "ec6b27e3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\opt\\justice40-tool\\score\\venv\\lib\\site-packages\\urllib3\\connectionpool.py:1013: InsecureRequestWarning: Unverified HTTPS request is being made to host 'justice40-data.s3.amazonaws.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:\n",
"# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip\n",
@ -267,18 +120,10 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"id": "bdf08971",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"C:\\opt\\justice40-tool\\score\\data\\tmp\n"
]
}
],
"outputs": [],
"source": [
"# Extract zip\n",
"print(zip_file_path)\n",
@ -290,231 +135,10 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"id": "29c14b29",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>census_tract_id</th>\n",
" <th>Total Population</th>\n",
" <th>California County</th>\n",
" <th>ZIP</th>\n",
" <th>Nearby City \\r\\n(to help approximate location only)</th>\n",
" <th>Longitude</th>\n",
" <th>Latitude</th>\n",
" <th>calenviroscreen_score</th>\n",
" <th>calenviroscreen_percentile</th>\n",
" <th>DRAFT CES 4.0\\r\\nPercentile Range</th>\n",
" <th>...</th>\n",
" <th>Poverty</th>\n",
" <th>Poverty Pctl</th>\n",
" <th>Unemployment</th>\n",
" <th>Unemployment Pctl</th>\n",
" <th>Housing Burden</th>\n",
" <th>Housing Burden Pctl</th>\n",
" <th>Pop. Char.</th>\n",
" <th>Pop. Char. Score</th>\n",
" <th>Pop. Char. Pctl</th>\n",
" <th>calenviroscreen_priority_community</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6019001100</td>\n",
" <td>2760</td>\n",
" <td>Fresno</td>\n",
" <td>93706</td>\n",
" <td>Fresno</td>\n",
" <td>-119.78</td>\n",
" <td>36.71</td>\n",
" <td>94.61</td>\n",
" <td>100.00</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>76.60</td>\n",
" <td>98.43</td>\n",
" <td>16.20</td>\n",
" <td>97.15</td>\n",
" <td>30.70</td>\n",
" <td>90.61</td>\n",
" <td>93.73</td>\n",
" <td>9.72</td>\n",
" <td>99.87</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6077000700</td>\n",
" <td>4177</td>\n",
" <td>San Joaquin</td>\n",
" <td>95206</td>\n",
" <td>Stockton</td>\n",
" <td>-121.29</td>\n",
" <td>37.94</td>\n",
" <td>90.83</td>\n",
" <td>99.99</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>70.60</td>\n",
" <td>96.43</td>\n",
" <td>18.50</td>\n",
" <td>98.45</td>\n",
" <td>35.20</td>\n",
" <td>95.61</td>\n",
" <td>93.40</td>\n",
" <td>9.68</td>\n",
" <td>99.84</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6077000100</td>\n",
" <td>4055</td>\n",
" <td>San Joaquin</td>\n",
" <td>95202</td>\n",
" <td>Stockton</td>\n",
" <td>-121.29</td>\n",
" <td>37.95</td>\n",
" <td>85.75</td>\n",
" <td>99.97</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>81.80</td>\n",
" <td>99.50</td>\n",
" <td>17.90</td>\n",
" <td>98.17</td>\n",
" <td>36.40</td>\n",
" <td>96.51</td>\n",
" <td>95.71</td>\n",
" <td>9.92</td>\n",
" <td>99.97</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>6071001600</td>\n",
" <td>5527</td>\n",
" <td>San Bernardino</td>\n",
" <td>91761</td>\n",
" <td>Ontario</td>\n",
" <td>-117.62</td>\n",
" <td>34.06</td>\n",
" <td>83.56</td>\n",
" <td>99.96</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>67.10</td>\n",
" <td>94.82</td>\n",
" <td>6.70</td>\n",
" <td>57.20</td>\n",
" <td>32.10</td>\n",
" <td>92.65</td>\n",
" <td>80.59</td>\n",
" <td>8.36</td>\n",
" <td>93.06</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6037204920</td>\n",
" <td>2639</td>\n",
" <td>Los Angeles</td>\n",
" <td>90023</td>\n",
" <td>Los Angeles</td>\n",
" <td>-118.20</td>\n",
" <td>34.02</td>\n",
" <td>82.90</td>\n",
" <td>99.95</td>\n",
" <td>95-100% (highest scores)</td>\n",
" <td>...</td>\n",
" <td>64.90</td>\n",
" <td>93.51</td>\n",
" <td>5.60</td>\n",
" <td>43.81</td>\n",
" <td>25.00</td>\n",
" <td>77.95</td>\n",
" <td>83.95</td>\n",
" <td>8.70</td>\n",
" <td>95.78</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 59 columns</p>\n",
"</div>"
],
"text/plain": [
" census_tract_id Total Population California County ZIP \\\n",
"0 6019001100 2760 Fresno 93706 \n",
"1 6077000700 4177 San Joaquin 95206 \n",
"2 6077000100 4055 San Joaquin 95202 \n",
"3 6071001600 5527 San Bernardino 91761 \n",
"4 6037204920 2639 Los Angeles 90023 \n",
"\n",
" Nearby City \\r\\n(to help approximate location only) Longitude Latitude \\\n",
"0 Fresno -119.78 36.71 \n",
"1 Stockton -121.29 37.94 \n",
"2 Stockton -121.29 37.95 \n",
"3 Ontario -117.62 34.06 \n",
"4 Los Angeles -118.20 34.02 \n",
"\n",
" calenviroscreen_score calenviroscreen_percentile \\\n",
"0 94.61 100.00 \n",
"1 90.83 99.99 \n",
"2 85.75 99.97 \n",
"3 83.56 99.96 \n",
"4 82.90 99.95 \n",
"\n",
" DRAFT CES 4.0\\r\\nPercentile Range ... Poverty Poverty Pctl Unemployment \\\n",
"0 95-100% (highest scores) ... 76.60 98.43 16.20 \n",
"1 95-100% (highest scores) ... 70.60 96.43 18.50 \n",
"2 95-100% (highest scores) ... 81.80 99.50 17.90 \n",
"3 95-100% (highest scores) ... 67.10 94.82 6.70 \n",
"4 95-100% (highest scores) ... 64.90 93.51 5.60 \n",
"\n",
" Unemployment Pctl Housing Burden Housing Burden Pctl Pop. Char. \\\n",
"0 97.15 30.70 90.61 93.73 \n",
"1 98.45 35.20 95.61 93.40 \n",
"2 98.17 36.40 96.51 95.71 \n",
"3 57.20 32.10 92.65 80.59 \n",
"4 43.81 25.00 77.95 83.95 \n",
"\n",
" Pop. Char. Score Pop. Char. Pctl calenviroscreen_priority_community \n",
"0 9.72 99.87 True \n",
"1 9.68 99.84 True \n",
"2 9.92 99.97 True \n",
"3 8.36 93.06 True \n",
"4 8.70 95.78 True \n",
"\n",
"[5 rows x 59 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Load comparison index (CalEnviroScreen 4)\n",
"\n",
@ -541,142 +165,10 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"id": "813e5656",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>census_block_group_id</th>\n",
" <th>census_tract_id</th>\n",
" <th>census_block_group_population</th>\n",
" <th>cejst_score</th>\n",
" <th>cejst_percentile</th>\n",
" <th>cejst_priority_community</th>\n",
" <th>calenviroscreen_score</th>\n",
" <th>calenviroscreen_percentile</th>\n",
" <th>calenviroscreen_priority_community</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>60014001001</td>\n",
" <td>6001400100</td>\n",
" <td>3115</td>\n",
" <td>0.14</td>\n",
" <td>0.10</td>\n",
" <td>False</td>\n",
" <td>4.40</td>\n",
" <td>2.38</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>60014002001</td>\n",
" <td>6001400200</td>\n",
" <td>1037</td>\n",
" <td>0.09</td>\n",
" <td>0.05</td>\n",
" <td>False</td>\n",
" <td>5.05</td>\n",
" <td>3.48</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>60014002002</td>\n",
" <td>6001400200</td>\n",
" <td>988</td>\n",
" <td>0.15</td>\n",
" <td>0.11</td>\n",
" <td>False</td>\n",
" <td>5.05</td>\n",
" <td>3.48</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>60014003001</td>\n",
" <td>6001400300</td>\n",
" <td>1137</td>\n",
" <td>0.03</td>\n",
" <td>0.01</td>\n",
" <td>False</td>\n",
" <td>9.92</td>\n",
" <td>13.44</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>60014003002</td>\n",
" <td>6001400300</td>\n",
" <td>1404</td>\n",
" <td>0.34</td>\n",
" <td>0.31</td>\n",
" <td>False</td>\n",
" <td>9.92</td>\n",
" <td>13.44</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" census_block_group_id census_tract_id census_block_group_population \\\n",
"0 60014001001 6001400100 3115 \n",
"1 60014002001 6001400200 1037 \n",
"2 60014002002 6001400200 988 \n",
"3 60014003001 6001400300 1137 \n",
"4 60014003002 6001400300 1404 \n",
"\n",
" cejst_score cejst_percentile cejst_priority_community \\\n",
"0 0.14 0.10 False \n",
"1 0.09 0.05 False \n",
"2 0.15 0.11 False \n",
"3 0.03 0.01 False \n",
"4 0.34 0.31 False \n",
"\n",
" calenviroscreen_score calenviroscreen_percentile \\\n",
"0 4.40 2.38 \n",
"1 5.05 3.48 \n",
"2 5.05 3.48 \n",
"3 9.92 13.44 \n",
"4 9.92 13.44 \n",
"\n",
" calenviroscreen_priority_community \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 False \n",
"4 False "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Join CalEnviroScreen and CEJST data.\n",
"# Note: we're joining on the census *tract*, so there will be multiple CBG entries joined to the same census tract row from CES,\n",
@ -716,58 +208,12 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"id": "939baea4",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" census_tract_id calenviroscreen_score \\\n",
"census_tract_id \n",
"6019001100 6019001100 94.61 \n",
"6077000700 6077000700 90.83 \n",
"6077000100 6077000100 85.75 \n",
"6071001600 6071001600 83.56 \n",
"6037204920 6037204920 82.90 \n",
"\n",
" calenviroscreen_percentile \\\n",
"census_tract_id \n",
"6019001100 100.00 \n",
"6077000700 99.99 \n",
"6077000100 99.97 \n",
"6071001600 99.96 \n",
"6037204920 99.95 \n",
"\n",
" calenviroscreen_priority_community \\\n",
"census_tract_id \n",
"6019001100 True \n",
"6077000700 True \n",
"6077000100 True \n",
"6071001600 True \n",
"6037204920 True \n",
"\n",
" CES Tract has at least one CEJST CBG? \\\n",
"census_tract_id \n",
"6019001100 True \n",
"6077000700 True \n",
"6077000100 True \n",
"6071001600 True \n",
"6037204920 True \n",
"\n",
" CES Tract has 100% CEJST CBGs? \n",
"census_tract_id \n",
"6019001100 True \n",
"6077000700 True \n",
"6077000100 True \n",
"6071001600 False \n",
"6037204920 True \n"
]
}
],
"outputs": [],
"source": [
"# Create analysis\n",
"def calculate_comparison(frame):\n",
@ -826,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"id": "85709225",
"metadata": {
"scrolled": true