mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 18:14:19 -08:00
* generate Geo-aware scores for all zoom levels * usa high progress * testing dissolve * checkpoint * changing type * removing breakpoint * validation notebooks * quick update * score validation * fixes for county merge * code completed
949 lines
28 KiB
Text
949 lines
28 KiB
Text
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import geopandas as gpd\n",
|
||
"import math\n",
|
||
"import pathlib\n",
|
||
"import os\n",
|
||
"import sys"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"module_path = os.path.abspath(os.path.join(\"..\"))\n",
|
||
"if module_path not in sys.path:\n",
|
||
" sys.path.append(module_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def merge_and_simplify_file(file_name: str, usa_df: pd.DataFrame):\n",
|
||
" state_gdf = gpd.read_file(file_name)\n",
|
||
" state_repr = state_gdf.to_crs(\"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs\")\n",
|
||
" state_merged = state_repr.merge(usa_df, on=\"GEOID10\", how=\"left\")\n",
|
||
" state_merged_simplified = state_merged[\n",
|
||
" [\"GEOID10\", \"Score D (percentile)\", \"geometry\"]\n",
|
||
" ].reset_index(drop=True)\n",
|
||
" state_merged_simplified.rename(\n",
|
||
" columns={\"Score D (percentile)\": \"D_SCORE\"}, inplace=True\n",
|
||
" )\n",
|
||
" return state_merged_simplified\n",
|
||
"\n",
|
||
"\n",
|
||
"def aggregate_to_tracts(block_group_df: pd.DataFrame):\n",
|
||
" # The tract identifier is the first 11 digits of the GEOID\n",
|
||
" block_group_df[\"tract\"] = block_group_df.apply(\n",
|
||
" lambda row: row[\"GEOID10\"][0:11], axis=1\n",
|
||
" )\n",
|
||
" state_tracts = block_group_df.dissolve(by=\"tract\", aggfunc=\"mean\")\n",
|
||
" return state_tracts\n",
|
||
"\n",
|
||
"\n",
|
||
"def create_buckets_from_tracts(state_tracts: pd.DataFrame, num_buckets: int):\n",
|
||
" # assign tracts to buckets by D_SCORE\n",
|
||
" state_tracts.sort_values(\"D_SCORE\", inplace=True)\n",
|
||
" D_SCORE_bucket = []\n",
|
||
" num_buckets = num_buckets\n",
|
||
" bucket_size = math.ceil(len(state_tracts.index) / num_buckets)\n",
|
||
" for i in range(len(state_tracts.index)):\n",
|
||
" D_SCORE_bucket.extend([math.floor(i / bucket_size)])\n",
|
||
" state_tracts[\"D_SCORE_bucket\"] = D_SCORE_bucket\n",
|
||
" return state_tracts\n",
|
||
"\n",
|
||
"\n",
|
||
"def aggregate_buckets(state_tracts: pd.DataFrame, agg_func: str):\n",
|
||
" # dissolve tracts by bucket\n",
|
||
" state_attr = state_tracts[[\"D_SCORE\", \"D_SCORE_bucket\", \"geometry\"]].reset_index(\n",
|
||
" drop=True\n",
|
||
" )\n",
|
||
" state_dissolve = state_attr.dissolve(by=\"D_SCORE_bucket\", aggfunc=agg_func)\n",
|
||
" return state_dissolve\n",
|
||
"\n",
|
||
"\n",
|
||
"def breakup_multipolygons(state_bucketed_df: pd.DataFrame, num_buckets: int):\n",
|
||
" compressed = []\n",
|
||
" for i in range(num_buckets):\n",
|
||
" for j in range(len(state_bucketed_df[\"geometry\"][i].geoms)):\n",
|
||
" compressed.append(\n",
|
||
" [\n",
|
||
" state_bucketed_df[\"D_SCORE\"][i],\n",
|
||
" state_bucketed_df[\"geometry\"][i].geoms[j],\n",
|
||
" ]\n",
|
||
" )\n",
|
||
" return compressed\n",
|
||
"\n",
|
||
"\n",
|
||
"def write_to_file(compressed: pd.DataFrame, file_name: str):\n",
|
||
" gdf_compressed = gpd.GeoDataFrame(\n",
|
||
" compressed, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
|
||
" )\n",
|
||
" gdf_compressed.to_file(CENSUS_GEOJSON_DIR / f\"{file_name}_low.geojson\", driver=\"GeoJSON\")\n",
|
||
"\n",
|
||
"\n",
|
||
"def process_file(file_name: str, usa_df: pd.DataFrame, num_buckets:int):\n",
|
||
" print(f\"Processing file {file_name}...\")\n",
|
||
" state_merged_simplified = merge_and_simplify_file(file_name, usa_df)\n",
|
||
" state_tracts = aggregate_to_tracts(state_merged_simplified)\n",
|
||
" state_tracts = create_buckets_from_tracts(state_tracts, num_buckets)\n",
|
||
" state_bucketed_df = aggregate_buckets(state_tracts, \"mean\")\n",
|
||
" compressed = breakup_multipolygons(state_bucketed_df, num_buckets)\n",
|
||
" write_to_file(compressed, file_name)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"id": "Ia5bqxS2LJqe"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
|
||
"CENSUS_GEOJSON_DIR = DATA_DIR / \"census\" / \"geojson\"\n",
|
||
"CEJST_DATA_PATH = DATA_DIR / \"score\" / \"csv\" / \"tiles\" / \"usa.csv\"\n",
|
||
"score_df = pd.read_csv(CEJST_DATA_PATH, dtype={\"GEOID10\": \"object\"}, low_memory=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"id": "Dtf5qD50JvCw"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"master_df = gpd.GeoDataFrame()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty GeoDataFrame\n",
|
||
"Columns: []\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"master_df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "PNdw8bERJyKk"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"for file_name in CENSUS_GEOJSON_DIR.rglob('*.json'):\n",
|
||
" state_gdf = gpd.read_file(file_name)\n",
|
||
" master_df = master_df.append(state_gdf)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "B5SS9y2pLwks"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"master_df = master_df.to_crs(\"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "_C6vaR9HQeLa",
|
||
"outputId": "fab3bc7f-e716-431e-bc76-bd26289ea4a4"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"master_df.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "oMoubjqCQiw5",
|
||
"outputId": "6195ffbc-6275-40c6-bb6a-e0a6bd1e71f0"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>STATEFP10</th>\n",
|
||
" <th>COUNTYFP10</th>\n",
|
||
" <th>TRACTCE10</th>\n",
|
||
" <th>BLKGRPCE10</th>\n",
|
||
" <th>GEOID10</th>\n",
|
||
" <th>NAMELSAD10</th>\n",
|
||
" <th>MTFCC10</th>\n",
|
||
" <th>FUNCSTAT10</th>\n",
|
||
" <th>ALAND10</th>\n",
|
||
" <th>AWATER10</th>\n",
|
||
" <th>INTPTLAT10</th>\n",
|
||
" <th>INTPTLON10</th>\n",
|
||
" <th>geometry</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>01</td>\n",
|
||
" <td>005</td>\n",
|
||
" <td>950500</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>010059505002</td>\n",
|
||
" <td>Block Group 2</td>\n",
|
||
" <td>G5030</td>\n",
|
||
" <td>S</td>\n",
|
||
" <td>191306077</td>\n",
|
||
" <td>605058</td>\n",
|
||
" <td>+31.7728221</td>\n",
|
||
" <td>-085.3325011</td>\n",
|
||
" <td>POLYGON ((-85.17240 31.82508, -85.17334 31.824...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>01</td>\n",
|
||
" <td>005</td>\n",
|
||
" <td>950500</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>010059505001</td>\n",
|
||
" <td>Block Group 1</td>\n",
|
||
" <td>G5030</td>\n",
|
||
" <td>S</td>\n",
|
||
" <td>44574612</td>\n",
|
||
" <td>8952734</td>\n",
|
||
" <td>+31.7523221</td>\n",
|
||
" <td>-085.2009470</td>\n",
|
||
" <td>POLYGON ((-85.16283 31.81051, -85.16284 31.813...</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" STATEFP10 ... geometry\n",
|
||
"0 01 ... POLYGON ((-85.17240 31.82508, -85.17334 31.824...\n",
|
||
"1 01 ... POLYGON ((-85.16283 31.81051, -85.16284 31.813...\n",
|
||
"\n",
|
||
"[2 rows x 13 columns]"
|
||
]
|
||
},
|
||
"execution_count": 69,
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"master_df.head(2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "bAMmGSgzVml0"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_df = pd.read_csv(CEJST_DATA_PATH, dtype={\"GEOID10\": \"object\"})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "U7M7dExdV2Vh"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_merged = master_df.merge(usa_df, on=\"GEOID10\", how=\"left\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "Sr25DUkxWVhg",
|
||
"outputId": "1e804075-0f7d-4174-82d7-e21b8519c8bf"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>STATEFP10</th>\n",
|
||
" <th>COUNTYFP10</th>\n",
|
||
" <th>TRACTCE10</th>\n",
|
||
" <th>BLKGRPCE10</th>\n",
|
||
" <th>GEOID10</th>\n",
|
||
" <th>NAMELSAD10</th>\n",
|
||
" <th>MTFCC10</th>\n",
|
||
" <th>FUNCSTAT10</th>\n",
|
||
" <th>ALAND10</th>\n",
|
||
" <th>AWATER10</th>\n",
|
||
" <th>INTPTLAT10</th>\n",
|
||
" <th>INTPTLON10</th>\n",
|
||
" <th>geometry</th>\n",
|
||
" <th>Housing burden (percent)</th>\n",
|
||
" <th>Total population</th>\n",
|
||
" <th>Air toxics cancer risk</th>\n",
|
||
" <th>Respiratory hazard index</th>\n",
|
||
" <th>Diesel particulate matter</th>\n",
|
||
" <th>Particulate matter (PM2.5)</th>\n",
|
||
" <th>Ozone</th>\n",
|
||
" <th>Traffic proximity and volume</th>\n",
|
||
" <th>Proximity to RMP sites</th>\n",
|
||
" <th>Proximity to TSDF sites</th>\n",
|
||
" <th>Proximity to NPL sites</th>\n",
|
||
" <th>Wastewater discharge</th>\n",
|
||
" <th>Percent pre-1960s housing (lead paint indicator)</th>\n",
|
||
" <th>Individuals under 5 years old</th>\n",
|
||
" <th>Individuals over 64 years old</th>\n",
|
||
" <th>Linguistic isolation (percent)</th>\n",
|
||
" <th>Percent of households in linguistic isolation</th>\n",
|
||
" <th>Poverty (Less than 200% of federal poverty line)</th>\n",
|
||
" <th>Percent individuals age 25 or over with less than high school degree</th>\n",
|
||
" <th>Unemployed civilians (percent)</th>\n",
|
||
" <th>Housing + Transportation Costs % Income for the Regional Typical Household</th>\n",
|
||
" <th>GEOID10 (percentile)</th>\n",
|
||
" <th>Housing burden (percent) (percentile)</th>\n",
|
||
" <th>Total population (percentile)</th>\n",
|
||
" <th>Air toxics cancer risk (percentile)</th>\n",
|
||
" <th>Respiratory hazard index (percentile)</th>\n",
|
||
" <th>Diesel particulate matter (percentile)</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Air toxics cancer risk (min-max normalized)</th>\n",
|
||
" <th>Respiratory hazard index (min-max normalized)</th>\n",
|
||
" <th>Diesel particulate matter (min-max normalized)</th>\n",
|
||
" <th>Particulate matter (PM2.5) (min-max normalized)</th>\n",
|
||
" <th>Ozone (min-max normalized)</th>\n",
|
||
" <th>Traffic proximity and volume (min-max normalized)</th>\n",
|
||
" <th>Proximity to RMP sites (min-max normalized)</th>\n",
|
||
" <th>Proximity to TSDF sites (min-max normalized)</th>\n",
|
||
" <th>Proximity to NPL sites (min-max normalized)</th>\n",
|
||
" <th>Wastewater discharge (min-max normalized)</th>\n",
|
||
" <th>Percent pre-1960s housing (lead paint indicator) (min-max normalized)</th>\n",
|
||
" <th>Individuals under 5 years old (min-max normalized)</th>\n",
|
||
" <th>Individuals over 64 years old (min-max normalized)</th>\n",
|
||
" <th>Linguistic isolation (percent) (min-max normalized)</th>\n",
|
||
" <th>Percent of households in linguistic isolation (min-max normalized)</th>\n",
|
||
" <th>Poverty (Less than 200% of federal poverty line) (min-max normalized)</th>\n",
|
||
" <th>Percent individuals age 25 or over with less than high school degree (min-max normalized)</th>\n",
|
||
" <th>Unemployed civilians (percent) (min-max normalized)</th>\n",
|
||
" <th>Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized)</th>\n",
|
||
" <th>Score A</th>\n",
|
||
" <th>Score B</th>\n",
|
||
" <th>Socioeconomic Factors</th>\n",
|
||
" <th>Sensitive populations</th>\n",
|
||
" <th>Environmental effects</th>\n",
|
||
" <th>Exposures</th>\n",
|
||
" <th>Pollution Burden</th>\n",
|
||
" <th>Population Characteristics</th>\n",
|
||
" <th>Score C</th>\n",
|
||
" <th>Score D</th>\n",
|
||
" <th>Score E</th>\n",
|
||
" <th>Score A (percentile)</th>\n",
|
||
" <th>Score A (top 25th percentile)</th>\n",
|
||
" <th>Score B (percentile)</th>\n",
|
||
" <th>Score B (top 25th percentile)</th>\n",
|
||
" <th>Score C (percentile)</th>\n",
|
||
" <th>Score C (top 25th percentile)</th>\n",
|
||
" <th>Score D (percentile)</th>\n",
|
||
" <th>Score D (top 25th percentile)</th>\n",
|
||
" <th>Score E (percentile)</th>\n",
|
||
" <th>Score E (top 25th percentile)</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>01</td>\n",
|
||
" <td>005</td>\n",
|
||
" <td>950500</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>010059505002</td>\n",
|
||
" <td>Block Group 2</td>\n",
|
||
" <td>G5030</td>\n",
|
||
" <td>S</td>\n",
|
||
" <td>191306077</td>\n",
|
||
" <td>605058</td>\n",
|
||
" <td>+31.7728221</td>\n",
|
||
" <td>-085.3325011</td>\n",
|
||
" <td>POLYGON ((-85.17240 31.82508, -85.17334 31.824...</td>\n",
|
||
" <td>0.176565</td>\n",
|
||
" <td>923.0</td>\n",
|
||
" <td>44.636463</td>\n",
|
||
" <td>0.784089</td>\n",
|
||
" <td>0.121767</td>\n",
|
||
" <td>9.536056</td>\n",
|
||
" <td>34.660008</td>\n",
|
||
" <td>0.880242</td>\n",
|
||
" <td>0.295180</td>\n",
|
||
" <td>0.023752</td>\n",
|
||
" <td>0.019262</td>\n",
|
||
" <td>0.050677</td>\n",
|
||
" <td>0.20177</td>\n",
|
||
" <td>0.047671</td>\n",
|
||
" <td>0.286024</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.276273</td>\n",
|
||
" <td>0.181102</td>\n",
|
||
" <td>0.159836</td>\n",
|
||
" <td>64.0</td>\n",
|
||
" <td>0.000631</td>\n",
|
||
" <td>0.25485</td>\n",
|
||
" <td>0.272930</td>\n",
|
||
" <td>0.944257</td>\n",
|
||
" <td>0.982043</td>\n",
|
||
" <td>0.082062</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.025691</td>\n",
|
||
" <td>0.181789</td>\n",
|
||
" <td>0.020039</td>\n",
|
||
" <td>0.444097</td>\n",
|
||
" <td>0.190363</td>\n",
|
||
" <td>0.000023</td>\n",
|
||
" <td>0.016043</td>\n",
|
||
" <td>0.000054</td>\n",
|
||
" <td>0.002143</td>\n",
|
||
" <td>1.179715e-07</td>\n",
|
||
" <td>0.20177</td>\n",
|
||
" <td>0.090801</td>\n",
|
||
" <td>0.286024</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.276273</td>\n",
|
||
" <td>0.181102</td>\n",
|
||
" <td>0.159836</td>\n",
|
||
" <td>0.322034</td>\n",
|
||
" <td>0.597295</td>\n",
|
||
" <td>0.335222</td>\n",
|
||
" <td>0.638895</td>\n",
|
||
" <td>0.535636</td>\n",
|
||
" <td>0.381877</td>\n",
|
||
" <td>0.494252</td>\n",
|
||
" <td>0.456794</td>\n",
|
||
" <td>0.587265</td>\n",
|
||
" <td>0.268259</td>\n",
|
||
" <td>0.149124</td>\n",
|
||
" <td>0.529853</td>\n",
|
||
" <td>0.617238</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.61452</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.615988</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.565349</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.576986</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>01</td>\n",
|
||
" <td>005</td>\n",
|
||
" <td>950500</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>010059505001</td>\n",
|
||
" <td>Block Group 1</td>\n",
|
||
" <td>G5030</td>\n",
|
||
" <td>S</td>\n",
|
||
" <td>44574612</td>\n",
|
||
" <td>8952734</td>\n",
|
||
" <td>+31.7523221</td>\n",
|
||
" <td>-085.2009470</td>\n",
|
||
" <td>POLYGON ((-85.16283 31.81051, -85.16284 31.813...</td>\n",
|
||
" <td>0.176565</td>\n",
|
||
" <td>818.0</td>\n",
|
||
" <td>44.636463</td>\n",
|
||
" <td>0.784089</td>\n",
|
||
" <td>0.121767</td>\n",
|
||
" <td>9.536056</td>\n",
|
||
" <td>34.660008</td>\n",
|
||
" <td>60.055410</td>\n",
|
||
" <td>0.232153</td>\n",
|
||
" <td>0.027767</td>\n",
|
||
" <td>0.018079</td>\n",
|
||
" <td>0.007115</td>\n",
|
||
" <td>0.00000</td>\n",
|
||
" <td>0.007335</td>\n",
|
||
" <td>0.264059</td>\n",
|
||
" <td>0.039261</td>\n",
|
||
" <td>0.038369</td>\n",
|
||
" <td>0.391198</td>\n",
|
||
" <td>0.186147</td>\n",
|
||
" <td>0.053125</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>0.000626</td>\n",
|
||
" <td>0.25485</td>\n",
|
||
" <td>0.200764</td>\n",
|
||
" <td>0.944257</td>\n",
|
||
" <td>0.982043</td>\n",
|
||
" <td>0.082062</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.025691</td>\n",
|
||
" <td>0.181789</td>\n",
|
||
" <td>0.020039</td>\n",
|
||
" <td>0.444097</td>\n",
|
||
" <td>0.190363</td>\n",
|
||
" <td>0.001598</td>\n",
|
||
" <td>0.012618</td>\n",
|
||
" <td>0.000063</td>\n",
|
||
" <td>0.002011</td>\n",
|
||
" <td>1.656256e-08</td>\n",
|
||
" <td>0.00000</td>\n",
|
||
" <td>0.013971</td>\n",
|
||
" <td>0.264059</td>\n",
|
||
" <td>0.039261</td>\n",
|
||
" <td>0.038369</td>\n",
|
||
" <td>0.391198</td>\n",
|
||
" <td>0.186147</td>\n",
|
||
" <td>0.053125</td>\n",
|
||
" <td>0.412429</td>\n",
|
||
" <td>0.693861</td>\n",
|
||
" <td>0.477826</td>\n",
|
||
" <td>0.728309</td>\n",
|
||
" <td>0.557538</td>\n",
|
||
" <td>0.264424</td>\n",
|
||
" <td>0.530404</td>\n",
|
||
" <td>0.441744</td>\n",
|
||
" <td>0.642924</td>\n",
|
||
" <td>0.284008</td>\n",
|
||
" <td>0.159628</td>\n",
|
||
" <td>0.589397</td>\n",
|
||
" <td>0.723269</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.73044</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.661758</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.608434</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.670349</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>2 rows × 98 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" STATEFP10 COUNTYFP10 ... Score E (percentile) Score E (top 25th percentile)\n",
|
||
"0 01 005 ... 0.576986 False\n",
|
||
"1 01 005 ... 0.670349 False\n",
|
||
"\n",
|
||
"[2 rows x 98 columns]"
|
||
]
|
||
},
|
||
"execution_count": 72,
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"usa_merged.head(2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "ANMlAB8Qmtu8",
|
||
"outputId": "44934741-90a9-4664-fab5-2c39b348d2be"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_merged_compressed = gpd.GeoDataFrame(usa_merged, crs=\"EPSG:4326\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "PBPD9LQctvPJ"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_merged_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_merged.geojson\", driver=\"GeoJSON\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "qAAEr1z-WZAT"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_simplified = usa_merged[\n",
|
||
" [\"GEOID10\", \"Score D (percentile)\", \"geometry\"]\n",
|
||
" ].reset_index(drop=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "SCNUjEbzWg-o"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_simplified.rename(\n",
|
||
" columns={\"Score D (percentile)\": \"D_SCORE\"}, inplace=True\n",
|
||
" )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "Ej70uX0AmW0J",
|
||
"outputId": "88908f5e-b62d-494f-f0ea-649089b6652a"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_cbg_compressed = gpd.GeoDataFrame(\n",
|
||
" usa_simplified, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
|
||
" )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "UE12dWmame3I"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_cbg_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_cbg_scoreD.geojson\", driver=\"GeoJSON\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "wWFBduQQXGtM"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_tracts = aggregate_to_tracts(usa_simplified)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"metadata": {
|
||
"id": "L-PTnEWOpDtX"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"num_buckets = 10"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "kTJafXcqXC01",
|
||
"outputId": "bd197952-76b7-4f99-edef-983f20d7acfb"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tracts_compressed = gpd.GeoDataFrame(\n",
|
||
" usa_tracts, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
|
||
" )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "E2Nh97IlYhCF"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"tracts_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_tracts_score.geojson\", driver=\"GeoJSON\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "557zPMWFZC8R"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_bucketed = create_buckets_from_tracts(usa_tracts)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "k6RRdKlsaO0a"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"usa_aggregated = aggregate_buckets(usa_bucketed, agg_func=\"mean\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "-cm5eET2pA1Z",
|
||
"outputId": "8d5d2e80-ad62-41d5-f1b0-922345f92d62"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(10, 2)"
|
||
]
|
||
},
|
||
"execution_count": 80,
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"usa_aggregated.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "4ZvJra-RaZ4v"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"compressed = breakup_multipolygons(usa_aggregated, num_buckets)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "RDS7Q2WAb4Rx",
|
||
"outputId": "dcd28a31-083d-482e-b000-b4cd1046d4c2"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"36836"
|
||
]
|
||
},
|
||
"execution_count": 82,
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(compressed)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "VXTv8UuXb-qU"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"gdf_compressed = gpd.GeoDataFrame(\n",
|
||
" compressed, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
|
||
" )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "5v7TyB_rcRgT",
|
||
"outputId": "997625cc-c57a-4335-9b27-a08e4f8ad117"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(36836, 2)"
|
||
]
|
||
},
|
||
"execution_count": 84,
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"gdf_compressed.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "5eAnPL8McJpn"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"gdf_compressed.to_file(CENSUS_GEOJSON_DIR / f\"usa_low.geojson\", driver=\"GeoJSON\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"name": "Score_Dissolve_Script",
|
||
"provenance": []
|
||
},
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.2"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|