mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 09:41:26 -08:00
Setup Census Block Group download and GeoJSON encoding (#122)
* initial commit * checkpoint * census block group downloader complete * checkpoint * docker command for ogr2ogr * detecting OS and downloaded fiels * generate mbtiles * per state and national csv * removing testing correction * generating uncompressed mvt * completed ticket * final changes from code review
This commit is contained in:
parent
9155326775
commit
6f568b0e20
16 changed files with 412 additions and 3997 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -126,4 +126,8 @@ dmypy.json
|
|||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
cython_debug/
|
||||
|
||||
# temporary census data
|
||||
score/data/census
|
||||
score/data/tiles
|
||||
|
|
38
score/README.md
Normal file
38
score/README.md
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Justice 40 Score generator
|
||||
|
||||
## Setup
|
||||
|
||||
- Start a terminal
|
||||
- Make sure you have Python 3.9 installed: `python -V` or `python3 -V`
|
||||
- Create a `virtualenv` in this folder: `python -m venv venv`
|
||||
- Activate the virtualenv
|
||||
- Windows: `./venv/Scripts/activate`
|
||||
- Mac/Linux: `source venv/bin/activate`
|
||||
- Install packages: `pip install -r requirements.txt`
|
||||
- If you are a Windows user, you might need to install Build Tools for Visual Studio. [Instructions here](https://stackoverflow.com/a/54136652)
|
||||
|
||||
## Running the Jupyter notebook
|
||||
|
||||
- Start a terminal
|
||||
- Change to this directory (i.e. `cd score`)
|
||||
- Activate your virtualenv (see above)
|
||||
- Type `jupyter notebook`. Your browser should open with a Jupyter Notebook tab
|
||||
|
||||
## Downloading Census Block Groups GeoJSON and Generating CBG CSVs
|
||||
|
||||
- Make sure you have Docker running in your machine
|
||||
- Start a terminal
|
||||
- Change to this directory (i.e. `cd score`)
|
||||
- Activate your virtualenv (see above)
|
||||
- Run `python scripts/download_cbg.py`
|
||||
Note: Census files are not kept in the repository and the download directories are ignored by Git
|
||||
|
||||
## Generating mbtiles
|
||||
|
||||
- Change to this directory (i.e. `cd score`)
|
||||
- Activate your virtualenv (see above)
|
||||
- Run the following script: `python .\scripts\generate_mbtiles.py`
|
||||
|
||||
## Serve the map locally
|
||||
|
||||
- Run: `docker run --rm -it -v ${PWD}/data/tiles:/data -p 8080:80 klokantech/tileserver-gl`
|
0
score/data/census/__init__.py
Normal file
0
score/data/census/__init__.py
Normal file
0
score/data/census/csv/__init__.py
Normal file
0
score/data/census/csv/__init__.py
Normal file
0
score/data/census/geojson/__init__.py
Normal file
0
score/data/census/geojson/__init__.py
Normal file
0
score/data/census/shp/__init__.py
Normal file
0
score/data/census/shp/__init__.py
Normal file
52
score/data/fips_states_2010.csv
Normal file
52
score/data/fips_states_2010.csv
Normal file
|
@ -0,0 +1,52 @@
|
|||
fips,state_name
|
||||
01 ,Alabama
|
||||
02 ,Alaska
|
||||
04 ,Arizona
|
||||
05 ,Arkansas
|
||||
06 ,California
|
||||
08 ,Colorado
|
||||
09 ,Connecticut
|
||||
10 ,Delaware
|
||||
11 ,District of Columbia
|
||||
12 ,Florida
|
||||
13 ,Georgia
|
||||
15 ,Hawaii
|
||||
16 ,Idaho
|
||||
17 ,Illinois
|
||||
18 ,Indiana
|
||||
19 ,Iowa
|
||||
20 ,Kansas
|
||||
21 ,Kentucky
|
||||
22 ,Louisiana
|
||||
23 ,Maine
|
||||
24 ,Maryland
|
||||
25 ,Massachusetts
|
||||
26 ,Michigan
|
||||
27 ,Minnesota
|
||||
28 ,Mississippi
|
||||
29 ,Missouri
|
||||
30 ,Montana
|
||||
31 ,Nebraska
|
||||
32 ,Nevada
|
||||
33 ,New Hampshire
|
||||
34 ,New Jersey
|
||||
35 ,New Mexico
|
||||
36 ,New York
|
||||
37 ,North Carolina
|
||||
38 ,North Dakota
|
||||
39 ,Ohio
|
||||
40 ,Oklahoma
|
||||
41 ,Oregon
|
||||
42 ,Pennsylvania
|
||||
44 ,Rhode Island
|
||||
45 ,South Carolina
|
||||
46 ,South Dakota
|
||||
47 ,Tennessee
|
||||
48 ,Texas
|
||||
49 ,Utah
|
||||
50 ,Vermont
|
||||
51 ,Virginia
|
||||
53 ,Washington
|
||||
54 ,West Virginia
|
||||
55 ,Wisconsin
|
||||
56 ,Wyoming
|
|
0
score/data/tiles/__init__.py
Normal file
0
score/data/tiles/__init__.py
Normal file
133
score/ipython/test.ipynb
Normal file
133
score/ipython/test.ipynb
Normal file
|
@ -0,0 +1,133 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1a4c0c68",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "70b3a793",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv('data/fips_states.csv') "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"id": "c514aad8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>fips</th>\n",
|
||||
" <th>state_name</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Alabama</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Alaska</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>Arizona</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>Arkansas</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>California</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" fips state_name\n",
|
||||
"0 1 Alabama \n",
|
||||
"1 2 Alaska \n",
|
||||
"2 4 Arizona \n",
|
||||
"3 5 Arkansas \n",
|
||||
"4 6 California"
|
||||
]
|
||||
},
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b9ee44d9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
5
score/requirements.txt
Normal file
5
score/requirements.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
ipython
|
||||
jupyter
|
||||
numpy
|
||||
pandas
|
||||
requests
|
121
score/scripts/download_cbg.py
Normal file
121
score/scripts/download_cbg.py
Normal file
|
@ -0,0 +1,121 @@
|
|||
import csv
|
||||
import requests
|
||||
import zipfile
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
data_path = Path.cwd() / "data"
|
||||
|
||||
with requests.Session() as s:
|
||||
# the fips_states_2010.csv is generated from data here
|
||||
# https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
|
||||
fips_csv_path = data_path.joinpath("fips_states_2010.csv")
|
||||
with open(fips_csv_path) as csv_file:
|
||||
csv_reader = csv.reader(csv_file, delimiter=",")
|
||||
line_count = 0
|
||||
for row in csv_reader:
|
||||
if line_count == 0:
|
||||
line_count += 1
|
||||
else:
|
||||
fips = row[0].strip()
|
||||
|
||||
# check if file exists
|
||||
shp_file_path = data_path.joinpath(
|
||||
"census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
|
||||
)
|
||||
if not os.path.isfile(shp_file_path):
|
||||
print(f"downloading {row[1]}")
|
||||
|
||||
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
|
||||
download = s.get(cbg_state_url)
|
||||
file_contents = download.content
|
||||
zip_file_path = data_path.joinpath("census", "downloaded.zip")
|
||||
zip_file = open(zip_file_path, "wb")
|
||||
zip_file.write(file_contents)
|
||||
zip_file.close()
|
||||
|
||||
print(f"extracting {row[1]}")
|
||||
|
||||
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
||||
shp_dir_path = data_path.joinpath("census", "shp", fips)
|
||||
zip_ref.extractall(shp_dir_path)
|
||||
|
||||
geojson_dir_path = data_path.joinpath(
|
||||
"census",
|
||||
"geojson",
|
||||
)
|
||||
if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
|
||||
# ogr2ogr
|
||||
print(f"encoding GeoJSON for {row[1]}")
|
||||
|
||||
# PWD is different for Windows
|
||||
if os.name == "nt":
|
||||
pwd = "%cd%"
|
||||
else:
|
||||
pwd = "${PWD}"
|
||||
cmd = (
|
||||
'docker run --rm -it -v "'
|
||||
+ pwd
|
||||
+ '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
|
||||
+ fips
|
||||
+ ".json /home/data/census/shp/"
|
||||
+ fips
|
||||
+ "/tl_2010_"
|
||||
+ fips
|
||||
+ "_bg10.shp"
|
||||
)
|
||||
print(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
# generate CBG CSV table for pandas
|
||||
## load in memory
|
||||
cbg_national_list = [] # in-memory global list
|
||||
cbg_per_state_list = {} # in-memory dict per state
|
||||
for file in os.listdir(geojson_dir_path):
|
||||
if file.endswith(".json"):
|
||||
print(f"ingesting geoid10 for file {file}")
|
||||
with open(geojson_dir_path.joinpath(file)) as f:
|
||||
geojson = json.load(f)
|
||||
for feature in geojson["features"]:
|
||||
geoid10 = feature["properties"]["GEOID10"]
|
||||
cbg_national_list.append(str(geoid10))
|
||||
geoid10_state_id = geoid10[:2]
|
||||
if not cbg_per_state_list.get(geoid10_state_id):
|
||||
cbg_per_state_list[geoid10_state_id] = []
|
||||
cbg_per_state_list[geoid10_state_id].append(geoid10)
|
||||
|
||||
csv_dir_path = data_path.joinpath(
|
||||
"census",
|
||||
"csv",
|
||||
)
|
||||
## write to individual state csv
|
||||
for state_id in cbg_per_state_list:
|
||||
geoid10_list = cbg_per_state_list[state_id]
|
||||
with open(
|
||||
csv_dir_path.joinpath(f"{state_id}.csv"), mode="w", newline=""
|
||||
) as cbg_csv_file:
|
||||
cbg_csv_file_writer = csv.writer(
|
||||
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
|
||||
)
|
||||
|
||||
for geoid10 in geoid10_list:
|
||||
cbg_csv_file_writer.writerow(
|
||||
[
|
||||
geoid10,
|
||||
]
|
||||
)
|
||||
|
||||
## write US csv
|
||||
with open(csv_dir_path.joinpath("us.csv"), mode="w", newline="") as cbg_csv_file:
|
||||
cbg_csv_file_writer = csv.writer(
|
||||
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
|
||||
)
|
||||
for geoid10 in cbg_national_list:
|
||||
cbg_csv_file_writer.writerow(
|
||||
[
|
||||
geoid10,
|
||||
]
|
||||
)
|
||||
|
||||
print("Census block groups downloading complete")
|
58
score/scripts/generate_mbtiles.py
Normal file
58
score/scripts/generate_mbtiles.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
|
||||
data_path = Path.cwd() / "data"
|
||||
|
||||
# remove existing mbtiles file
|
||||
mb_tiles_path = data_path / "tiles" / "block2010.mbtiles"
|
||||
if os.path.exists(mb_tiles_path):
|
||||
os.remove(mb_tiles_path)
|
||||
|
||||
# remove existing mvt directory
|
||||
mvt_tiles_path = data_path / "tiles" / "mvt"
|
||||
if os.path.exists(mvt_tiles_path):
|
||||
shutil.rmtree(mvt_tiles_path)
|
||||
|
||||
# get a list of all json files to plug in the docker commands below
|
||||
# (workaround since *.json doesn't seem to work)
|
||||
geojson_list = ""
|
||||
geojson_path = data_path / "census" / "geojson"
|
||||
for file in os.listdir(geojson_path):
|
||||
if file.endswith(".json"):
|
||||
geojson_list += f"/home/data/census/geojson/{file} "
|
||||
|
||||
breakpoint()
|
||||
|
||||
if geojson_list == "":
|
||||
print("No GeoJson files found. Please run download_cbg.py first")
|
||||
|
||||
# PWD is different for Windows
|
||||
if os.name == "nt":
|
||||
pwd = "%cd%"
|
||||
else:
|
||||
pwd = "${PWD}"
|
||||
cmd = (
|
||||
'docker run --rm -it -v "'
|
||||
+ pwd
|
||||
+ '"/:/home klokantech/tippecanoe tippecanoe -zg -o /home/data/tiles/block2010.mbtiles --drop-densest-as-needed --extend-zooms-if-still-dropping -l cbg2010 '
|
||||
+ geojson_list
|
||||
)
|
||||
print(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
# if AWS creds are present, generate uncompressed toles
|
||||
# docker run --rm -it -v ${PWD}:/data tippecanoe tippecanoe --no-tile-compression -zg -e /data/tiles_custom -l blocks /data/tabblock2010_01_pophu_joined.json
|
||||
# PWD is different for Windows
|
||||
if os.name == "nt":
|
||||
pwd = "%cd%"
|
||||
else:
|
||||
pwd = "${PWD}"
|
||||
cmd = (
|
||||
'docker run --rm -it -v "'
|
||||
+ pwd
|
||||
+ '"/:/home klokantech/tippecanoe tippecanoe --no-tile-compression -zg -e /home/data/tiles/mvt /home/data/census/geojson/01.json '
|
||||
+ geojson_list
|
||||
)
|
||||
print(cmd)
|
||||
os.system(cmd)
|
|
@ -1,15 +0,0 @@
|
|||
# Tile Server
|
||||
|
||||
## What is it?
|
||||
|
||||
A simple tile server using [pg_tileserv](https://github.com/CrunchyData/pg_tileserv), based on pg_tileserv [docker example](https://github.com/CrunchyData/pg_tileserv/tree/master/examples/docker).
|
||||
|
||||
## How to use it?
|
||||
|
||||
1. Edit variables in `docker-compose.yml` if necessary to customize username/pw
|
||||
2. Run `docker-compose up` to start running the server. It will likely stall in enabling extensions (TODO: figure this out))
|
||||
3. Restart the server with ctrl-c. It should load the data from the `data/` directory exactly one time.
|
||||
|
||||
## Using
|
||||
|
||||
- Point your visualization library to the following URL, and select `vector` tiles: `http://localhost:7800/public.maryland/{z}/{x}/{y}.mvt`
|
File diff suppressed because one or more lines are too long
|
@ -1,36 +0,0 @@
|
|||
version: "3.9"
|
||||
|
||||
services:
|
||||
tileserv:
|
||||
image: pramsey/pg_tileserv:20210210
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://map_dev_user:map_pwd@db/map_dev
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- 7800:7800
|
||||
restart: unless-stopped
|
||||
|
||||
db:
|
||||
image: kartoza/postgis:13-3.1
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
- ./data:/work
|
||||
- ./load-data-db.sh:/docker-entrypoint-initdb.d/load_data-db.sh
|
||||
environment:
|
||||
- POSTGRES_USER=map_dev_user
|
||||
- POSTGRES_PASS=map_pwd
|
||||
- POSTGRES_DB=map_dev
|
||||
- ALLOW_IP_RANGE=0.0.0.0/0
|
||||
ports:
|
||||
- 5434:5432
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -h db -U map_dev_user -d map_dev"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
pgdata:
|
|
@ -1,12 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# echo "[SQL INIT SCRIPT] Creating extension..."
|
||||
# psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "gis" <<-EOSQL
|
||||
# CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
# EOSQL
|
||||
|
||||
# Load Maryland geojson
|
||||
echo "[SQL INIT SCRIPT] Loading data from geojson..."
|
||||
ogr2ogr -progress -f PostgreSQL PG:"host=localhost dbname=map_dev user=map_dev_user password=map_pwd" /work/maryland.geojson -nln maryland
|
||||
echo "Data load complete"
|
Loading…
Add table
Reference in a new issue