ACS data baked in for map (#153)

* starting etl for score

* projection fix

* projection flags

* proper ejscreen etl csv generation

* failing CSV merge -- investigating

* checkpoint

* some etl changes

* completed ticket

* small typo
This commit is contained in:
Jorge Escobar 2021-06-17 18:12:39 -04:00 committed by GitHub
commit 78615e9b1a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 321 additions and 356 deletions

View file

View file

@ -5,70 +5,64 @@ import os
import json
from pathlib import Path
from utils import get_state_fips_codes
data_path = Path.cwd() / "data"
with requests.Session() as s:
# the fips_states_2010.csv is generated from data here
# https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
fips_csv_path = data_path / "fips_states_2010.csv"
with open(fips_csv_path) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
line_count = 0
for row in csv_reader:
if line_count == 0:
line_count += 1
state_fips_codes = get_state_fips_codes()
for fips in state_fips_codes:
# check if file exists
shp_file_path = data_path.joinpath(
"census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
)
if not os.path.isfile(shp_file_path):
print(f"downloading {row[1]}")
# 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
# But using 2010 for now
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
download = s.get(cbg_state_url)
file_contents = download.content
zip_file_path = data_path / "census" / "downloaded.zip"
zip_file = open(zip_file_path, "wb")
zip_file.write(file_contents)
zip_file.close()
print(f"extracting {row[1]}")
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
shp_dir_path = data_path / "census" / "shp" / fips
zip_ref.extractall(shp_dir_path)
geojson_dir_path = data_path.joinpath(
"census",
"geojson",
)
if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
# ogr2ogr
print(f"encoding GeoJSON for {row[1]}")
# PWD is different for Windows
if os.name == "nt":
pwd = "%cd%"
else:
fips = row[0].strip()
# check if file exists
shp_file_path = data_path.joinpath(
"census", "shp", fips, f"tl_2010_{fips}_bg10.shp"
)
if not os.path.isfile(shp_file_path):
print(f"downloading {row[1]}")
# 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/
# But using 2010 for now
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
download = s.get(cbg_state_url)
file_contents = download.content
zip_file_path = data_path / "census" / "downloaded.zip"
zip_file = open(zip_file_path, "wb")
zip_file.write(file_contents)
zip_file.close()
print(f"extracting {row[1]}")
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
shp_dir_path = data_path / "census" / "shp" / fips
zip_ref.extractall(shp_dir_path)
geojson_dir_path = data_path.joinpath(
"census",
"geojson",
)
if not os.path.isfile(geojson_dir_path.joinpath(fips + ".json")):
# ogr2ogr
print(f"encoding GeoJSON for {row[1]}")
# PWD is different for Windows
if os.name == "nt":
pwd = "%cd%"
else:
pwd = "${PWD}"
cmd = (
'docker run --rm -it -v "'
+ pwd
+ '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
+ fips
+ ".json /home/data/census/shp/"
+ fips
+ "/tl_2010_"
+ fips
+ "_bg10.shp"
)
print(cmd)
os.system(cmd)
pwd = "${PWD}"
cmd = (
'docker run --rm -it -v "'
+ pwd
+ '"/:/home osgeo/gdal:alpine-ultrasmall-latest ogr2ogr -f GeoJSON /home/data/census/geojson/'
+ fips
+ ".json /home/data/census/shp/"
+ fips
+ "/tl_2010_"
+ fips
+ "_bg10.shp"
)
print(cmd)
os.system(cmd)
# generate CBG CSV table for pandas
## load in memory
@ -87,10 +81,7 @@ with requests.Session() as s:
cbg_per_state_list[geoid10_state_id] = []
cbg_per_state_list[geoid10_state_id].append(geoid10)
csv_dir_path = data_path.joinpath(
"census",
"csv",
)
csv_dir_path = data_path / "census" / "csv"
## write to individual state csv
for state_id in cbg_per_state_list:
geoid10_list = cbg_per_state_list[state_id]

View file

@ -2,6 +2,8 @@ import os
from pathlib import Path
import shutil
from utils import get_state_fips_codes
data_path = Path.cwd() / "data"
# remove existing mbtiles file
@ -14,17 +16,41 @@ mvt_tiles_path = data_path / "tiles" / "mvt"
if os.path.exists(mvt_tiles_path):
shutil.rmtree(mvt_tiles_path)
# Merge scores into json
# TODO: for this first pass, just merging ACS EJScren indicators
# Per https://github.com/usds/justice40-tool/issues/102
if os.name == "nt":
pwd = "%cd%"
else:
pwd = "${PWD}"
state_fips_codes = get_state_fips_codes()
for fips in state_fips_codes:
cmd = (
'docker run --rm -v "'
+ pwd
+ '"/:/home '
+ "osgeo/gdal:alpine-small-latest ogr2ogr -f GeoJSON "
+ f"-sql \"SELECT * FROM tl_2010_{fips}_bg10 LEFT JOIN '/home/data/dataset/ejscreen_2020/data{fips}.csv'.data{fips} ON tl_2010_{fips}_bg10.GEOID10 = data{fips}.ID\" "
+ f"/home/data/score/geojson/{fips}.json /home/data/census/shp/{fips}/tl_2010_{fips}_bg10.dbf"
)
print(cmd)
os.system(cmd)
# get a list of all json files to plug in the docker commands below
# (workaround since *.json doesn't seem to work)
geojson_list = ""
geojson_path = data_path / "census" / "geojson"
geojson_path = data_path / "score" / "geojson"
for file in os.listdir(geojson_path):
if file.endswith(".json"):
geojson_list += f"/home/data/census/geojson/{file} "
geojson_list += f"/home/data/score/geojson/{file} "
if geojson_list == "":
print("No GeoJson files found. Please run download_cbg.py first")
print("No GeoJson files found. Please run scripts/download_cbg.py first")
# generate mbtiles file
# PWD is different for Windows
if os.name == "nt":
pwd = "%cd%"
@ -33,7 +59,7 @@ else:
cmd = (
'docker run --rm -it -v "'
+ pwd
+ '"/:/home klokantech/tippecanoe tippecanoe -s_srs EPSG:4269 -t_srs EPSG:4326 --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --drop-densest-as-needed --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
+ '"/:/home klokantech/tippecanoe tippecanoe --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 '
+ geojson_list
)
print(cmd)

20
score/scripts/utils.py Normal file
View file

@ -0,0 +1,20 @@
# common usage functions
import csv
from pathlib import Path
def get_state_fips_codes():
data_path = Path.cwd() / "data"
fips_csv_path = data_path / "fips_states_2010.csv"
fips_state_list = []
with open(fips_csv_path) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
line_count = 0
for row in csv_reader:
if line_count == 0:
line_count += 1
else:
fips = row[0].strip()
fips_state_list.append(fips)
return fips_state_list