Starting Tribal Boundaries Work (#1736)

* starting tribal pr

* further pipeline work

* bia merge working

* alaska villages and tribal geo generate

* tribal folders

* adding data full run

* tile generation

* tribal tile deploy
This commit is contained in:
Jorge Escobar 2022-07-30 01:13:10 -04:00 committed by GitHub
commit 8149ac31c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 463 additions and 51 deletions

86
.github/workflows/tribal-deploy.yml vendored Normal file
View file

@ -0,0 +1,86 @@
name: Tribal Layer Deploy
on:
workflow_dispatch:
inputs:
confirm-action:
description: This will deploy tribal map layer, are you sure you want to proceed? (Y/n)
default: n
required: true
env:
BE_CDN_ID: E1324VDMNCO97N
jobs:
deploy_data:
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
python-version: [3.9]
steps:
- name: Checkout source
uses: actions/checkout@v2
- name: Print variables to help debug
uses: hmarr/debug-action@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Load cached Poetry installation
uses: actions/cache@v2
id: cached-poetry-dependencies
with:
path: ~/.cache/pypoetry/virtualenvs
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/combine-tilefy.yml') }}
- name: Install poetry
uses: snok/install-poetry@v1
- name: Print Poetry settings
run: poetry show -v
- name: Install dependencies
run: poetry add s4cmd && poetry install
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
- name: Install GDAL/ogr2ogr
run: |
sudo add-apt-repository ppa:ubuntugis/ppa
sudo apt-get update
sudo apt-get -y install gdal-bin
ogrinfo --version
- name: Set timezone for tippecanoe
uses: szenius/set-timezone@v1.0
with:
timezoneLinux: "America/Los_Angeles"
- name: Get tippecanoe
run: |
sudo apt-get install -y software-properties-common libsqlite3-dev zlib1g-dev
sudo apt-add-repository -y ppa:git-core/ppa
sudo mkdir -p /tmp/tippecanoe-src
sudo git clone https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe-src
- name: Make tippecanoe
working-directory: /tmp/tippecanoe-src
run: |
sudo /usr/bin/bash -c make
mkdir -p /usr/local/bin
cp tippecanoe /usr/local/bin/tippecanoe
tippecanoe -v
- name: Run Scripts
run: |
poetry run python3 data_pipeline/application.py etl-run --dataset tribal
poetry run python3 data_pipeline/application.py generate-map-tiles --generate-tribal-layer
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Deploy to Geoplatform AWS
run: |
poetry run s4cmd put ./data_pipeline/data/tribal/geojson/ s3://justice40-data/data-pipeline/data/tribal/geojson --recursive --force --API-ACL=public-read --num-threads=250
poetry run s4cmd put ./data_pipeline/data/tribal/tiles/ s3://justice40-data/data-pipeline/data/tribal/tiles --recursive --force --API-ACL=public-read --num-threads=250
- name: Invalidate cache on AWS CDNs
uses: chetan/invalidate-cloudfront-action@master
env:
DISTRIBUTION: ${{env.BE_CDN_ID}}
PATHS: "/*"
AWS_REGION: "us-east-1"
AWS_ACCESS_KEY_ID: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}

View file

@ -68,7 +68,7 @@
] ]
}, },
{ {
"name": "Generate Map Tiles", "name": "Generate Score Tiles",
"type": "python", "type": "python",
"request": "launch", "request": "launch",
"module": "data_pipeline.application", "module": "data_pipeline.application",
@ -76,6 +76,16 @@
"generate-map-tiles" "generate-map-tiles"
] ]
}, },
{
"name": "Generate Tribal Tiles",
"type": "python",
"request": "launch",
"module": "data_pipeline.application",
"args": [
"generate-map-tiles",
"-t"
]
},
{ {
"name": "ETL Run", "name": "ETL Run",
"type": "python", "type": "python",
@ -85,6 +95,37 @@
"etl-run" "etl-run"
] ]
}, },
{
"name": "ETL Run NRI",
"type": "python",
"request": "launch",
"module": "data_pipeline.application",
"args": [
"etl-run",
"--dataset",
"national_risk_index"
]
},
{
"name": "ETL Run Tribal",
"type": "python",
"request": "launch",
"module": "data_pipeline.application",
"args": [
"etl-run",
"--dataset",
"tribal"
]
},
{
"name": "Data Full Run",
"type": "python",
"request": "launch",
"module": "data_pipeline.application",
"args": [
"data-full-run",
]
},
{ {
"name": "poetry install", "name": "poetry install",
"type": "python", "type": "python",

View file

@ -13,6 +13,9 @@ from data_pipeline.etl.sources.census.etl_utils import (
reset_data_directories as census_reset, reset_data_directories as census_reset,
zip_census_data, zip_census_data,
) )
from data_pipeline.etl.sources.tribal.etl_utils import (
reset_data_directories as tribal_reset,
)
from data_pipeline.tile.generate import generate_tiles from data_pipeline.tile.generate import generate_tiles
from data_pipeline.utils import ( from data_pipeline.utils import (
data_folder_cleanup, data_folder_cleanup,
@ -57,6 +60,7 @@ def data_cleanup():
census_reset(data_path) census_reset(data_path)
data_folder_cleanup() data_folder_cleanup()
tribal_reset(data_path)
score_folder_cleanup() score_folder_cleanup()
temp_folder_cleanup() temp_folder_cleanup()
@ -168,13 +172,21 @@ def geo_score(data_source: str):
@cli.command( @cli.command(
help="Generate map tiles", help="Generate map tiles. Pass -t to generate tribal layer as well.",
) )
def generate_map_tiles(): @click.option(
"-t",
"--generate-tribal-layer",
default=False,
required=False,
is_flag=True,
type=bool,
)
def generate_map_tiles(generate_tribal_layer):
"""CLI command to generate the map tiles""" """CLI command to generate the map tiles"""
data_path = settings.APP_ROOT / "data" data_path = settings.APP_ROOT / "data"
generate_tiles(data_path) generate_tiles(data_path, generate_tribal_layer)
sys.exit() sys.exit()
@ -271,7 +283,7 @@ def data_full_run(check: bool, data_source: str):
score_geo(data_source) score_geo(data_source)
logger.info("*** Generating Map Tiles") logger.info("*** Generating Map Tiles")
generate_tiles(data_path) generate_tiles(data_path, True)
file = "first_run.txt" file = "first_run.txt"
cmd = f"touch {data_path}/{file}" cmd = f"touch {data_path}/{file}"

View file

@ -125,8 +125,15 @@ DATASET_LIST = [
"class_name": "MarylandEJScreenETL", "class_name": "MarylandEJScreenETL",
}, },
] ]
CENSUS_INFO = { CENSUS_INFO = {
"name": "census", "name": "census",
"module_dir": "census", "module_dir": "census",
"class_name": "CensusETL", "class_name": "CensusETL",
} }
TRIBAL_INFO = {
"name": "tribal",
"module_dir": "tribal",
"class_name": "TribalETL",
}

View file

@ -22,7 +22,9 @@ def _get_datasets_to_run(dataset_to_run: str) -> typing.List[dict]:
None None
""" """
dataset_list = constants.DATASET_LIST dataset_list = constants.DATASET_LIST
etls_to_search = dataset_list + [constants.CENSUS_INFO] etls_to_search = (
dataset_list + [constants.CENSUS_INFO] + [constants.TRIBAL_INFO]
)
if dataset_to_run: if dataset_to_run:
dataset_element = next( dataset_element = next(

View file

@ -3,8 +3,8 @@ import json
import subprocess import subprocess
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
import geopandas as gpd import geopandas as gpd
from data_pipeline.etl.base import ExtractTransformLoad from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger, unzip_file_from_url from data_pipeline.utils import get_module_logger, unzip_file_from_url

View file

@ -0,0 +1,195 @@
from pathlib import Path
import geopandas as gpd
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger, unzip_file_from_url
logger = get_module_logger(__name__)
class TribalETL(ExtractTransformLoad):
def __init__(self):
self.GEOJSON_BASE_PATH = self.DATA_PATH / "tribal" / "geojson"
self.CSV_BASE_PATH = self.DATA_PATH / "tribal" / "csv"
self.NATIONAL_TRIBAL_GEOJSON_PATH = self.GEOJSON_BASE_PATH / "usa.json"
self.USA_TRIBAL_DF_LIST = []
def extract(self) -> None:
"""Extract the tribal geojson zip files from Justice40 S3 data folder
Returns:
None
"""
logger.info("Downloading Tribal Data")
bia_geojson_url = "https://justice40-data.s3.amazonaws.com/data-sources/BIA_National_LAR_json.zip"
alaska_geojson_url = "https://justice40-data.s3.amazonaws.com/data-sources/Alaska_Native_Villages_json.zip"
unzip_file_from_url(
bia_geojson_url,
self.TMP_PATH,
self.DATA_PATH / "tribal" / "geojson" / "bia_national_lar",
)
unzip_file_from_url(
alaska_geojson_url,
self.TMP_PATH,
self.DATA_PATH / "tribal" / "geojson" / "alaska_native_villages",
)
pass
def _transform_bia_national_lar(self, tribal_geojson_path: Path) -> None:
"""Transform the Tribal BIA National Lar Geodataframe and appends it to the
national Tribal Dataframe List
Args:
tribal_geojson_path (Path): the Path to the Tribal Geojson
Returns:
None
"""
bia_national_lar_df = gpd.read_file(tribal_geojson_path)
bia_national_lar_df.drop(
["OBJECTID", "GISAcres", "Shape_Length", "Shape_Area"],
axis=1,
inplace=True,
)
bia_national_lar_df.rename(
columns={"TSAID": "tribalId", "LARName": "landAreaName"},
inplace=True,
)
self.USA_TRIBAL_DF_LIST.append(bia_national_lar_df)
def _transform_bia_aian_supplemental(
self, tribal_geojson_path: Path
) -> None:
"""Transform the Tribal BIA Supplemental Geodataframe and appends it to the
national Tribal Dataframe List
Args:
tribal_geojson_path (Path): the Path to the Tribal Geojson
Returns:
None
"""
bia_aian_supplemental_df = gpd.read_file(tribal_geojson_path)
bia_aian_supplemental_df.drop(
["OBJECTID", "GISAcres", "Source", "Shape_Length", "Shape_Area"],
axis=1,
inplace=True,
)
bia_aian_supplemental_df.rename(
columns={"Land_Area_": "landAreaName"},
inplace=True,
)
self.USA_TRIBAL_DF_LIST.append(bia_aian_supplemental_df)
def _transform_bia_tsa(self, tribal_geojson_path: Path) -> None:
"""Transform the Tribal BIA TSA Geodataframe and appends it to the
national Tribal Dataframe List
Args:
tribal_geojson_path (Path): the Path to the Tribal Geojson
Returns:
None
"""
bia_tsa_df = gpd.read_file(tribal_geojson_path)
bia_tsa_df.drop(
["OBJECTID", "GISAcres", "Shape_Length", "Shape_Area"],
axis=1,
inplace=True,
)
bia_tsa_df.rename(
columns={"TSAID": "tribalId", "LARName": "landAreaName"},
inplace=True,
)
self.USA_TRIBAL_DF_LIST.append(bia_tsa_df)
def _transform_alaska_native_villages(
self, tribal_geojson_path: Path
) -> None:
"""Transform the Alaska Native Villages Geodataframe and appends it to the
national Tribal Dataframe List
Args:
tribal_geojson_path (Path): the Path to the Tribal Geojson
Returns:
None
"""
alaska_native_villages_df = gpd.read_file(tribal_geojson_path)
alaska_native_villages_df.rename(
columns={
"GlobalID": "tribalId",
"TRIBALOFFICENAME": "landAreaName",
},
inplace=True,
)
self.USA_TRIBAL_DF_LIST.append(alaska_native_villages_df)
def transform(self) -> None:
"""Transform the tribal geojson files to generate national CSVs and GeoJSONs
Returns:
None
"""
logger.info("Transforming Tribal Data")
# load the geojsons
bia_national_lar_geojson = (
self.GEOJSON_BASE_PATH / "bia_national_lar" / "BIA_TSA.json"
)
bia_aian_supplemental_geojson = (
self.GEOJSON_BASE_PATH
/ "bia_national_lar"
/ "BIA_AIAN_Supplemental.json"
)
bia_tsa_geojson_geojson = (
self.GEOJSON_BASE_PATH / "bia_national_lar" / "BIA_TSA.json"
)
alaska_native_villages_geojson = (
self.GEOJSON_BASE_PATH
/ "alaska_native_villages"
/ "AlaskaNativeVillages.gdb.geojson"
)
self._transform_bia_national_lar(bia_national_lar_geojson)
self._transform_bia_aian_supplemental(bia_aian_supplemental_geojson)
self._transform_bia_tsa(bia_tsa_geojson_geojson)
self._transform_alaska_native_villages(alaska_native_villages_geojson)
def load(self) -> None:
"""Create tribal national CSV and GeoJSON
Returns:
None
"""
logger.info("Saving Tribal GeoJson and CSV")
usa_tribal_df = gpd.GeoDataFrame(
pd.concat(self.USA_TRIBAL_DF_LIST, ignore_index=True)
)
usa_tribal_df = usa_tribal_df.to_crs(
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
)
logger.info("Writing national geojson file")
usa_tribal_df.to_file(
self.NATIONAL_TRIBAL_GEOJSON_PATH, driver="GeoJSON"
)

View file

@ -0,0 +1,28 @@
from pathlib import Path
from data_pipeline.utils import (
get_module_logger,
remove_all_from_dir,
remove_files_from_dir,
)
logger = get_module_logger(__name__)
def reset_data_directories(
data_path: Path,
) -> None:
"""Empties all tribal files"""
tribal_data_path = data_path / "tribal"
# csv
csv_path = tribal_data_path / "csv"
remove_files_from_dir(
csv_path,
".csv",
)
# geojson
geojson_path = tribal_data_path / "geojson"
remove_all_from_dir(geojson_path)

View file

@ -7,55 +7,96 @@ from data_pipeline.utils import get_module_logger, remove_all_from_dir
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
def generate_tiles(data_path: Path) -> None: def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
"""Generates map tiles from geojson files
score_tiles_path = data_path / "score" / "tiles" Args:
high_tile_path = score_tiles_path / "high" data_path (Path): Path to data folder
low_tile_path = score_tiles_path / "low" generate_tribal_layer (bool): If true, generate the tribal layer of the map
score_geojson_dir = data_path / "score" / "geojson"
USA_HIGH_MIN_ZOOM = 5 Returns:
USA_HIGH_MAX_ZOOM = 11 None
USA_LOW_MIN_ZOOM = 0 """
USA_LOW_MAX_ZOOM = 7
# remove existing mbtiles file def _generate_score_tiles() -> None:
remove_all_from_dir(score_tiles_path) """Generates score map tiles"""
score_tiles_path = data_path / "score" / "tiles"
high_tile_path = score_tiles_path / "high"
low_tile_path = score_tiles_path / "low"
score_geojson_dir = data_path / "score" / "geojson"
# create dirs USA_HIGH_MIN_ZOOM = 5
os.mkdir(high_tile_path) USA_HIGH_MAX_ZOOM = 11
os.mkdir(low_tile_path) USA_LOW_MIN_ZOOM = 0
USA_LOW_MAX_ZOOM = 7
# generate high mbtiles file # remove existing mbtiles file
logger.info("Generating USA High mbtiles file") remove_all_from_dir(score_tiles_path)
cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --layer=blocks "
cmd += f"--output={high_tile_path}/usa_high.mbtiles "
cmd += str(score_geojson_dir / "usa-high.json")
call(cmd, shell=True)
# generate high mvts # create dirs
logger.info("Generating USA High mvt folders and files") os.mkdir(high_tile_path)
cmd = "tippecanoe " os.mkdir(low_tile_path)
cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --no-tile-compression "
cmd += "--drop-densest-as-needed "
cmd += f"--output-to-directory={high_tile_path} --layer=blocks "
cmd += str(score_geojson_dir / "usa-high.json")
call(cmd, shell=True)
# generate low mbtiles file # generate high mbtiles file
logger.info("Generating USA Low mbtiles file") logger.info("Generating USA High mbtiles file")
cmd = "tippecanoe " cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --layer=blocks " cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --layer=blocks "
cmd += f"--output={low_tile_path}/usa_low.mbtiles " cmd += f"--output={high_tile_path}/usa_high.mbtiles "
cmd += str(score_geojson_dir / "usa-low.json") cmd += str(score_geojson_dir / "usa-high.json")
call(cmd, shell=True) call(cmd, shell=True)
# generate low mvts # generate high mvts
logger.info("Generating USA Low mvt folders and files") logger.info("Generating USA High mvt folders and files")
cmd = "tippecanoe " cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --no-tile-compression " cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --no-tile-compression "
cmd += "--drop-densest-as-needed " cmd += "--drop-densest-as-needed "
cmd += f"--output-to-directory={low_tile_path} --layer=blocks " cmd += f"--output-to-directory={high_tile_path} --layer=blocks "
cmd += str(score_geojson_dir / "usa-low.json") cmd += str(score_geojson_dir / "usa-high.json")
call(cmd, shell=True) call(cmd, shell=True)
# generate low mbtiles file
logger.info("Generating USA Low mbtiles file")
cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --layer=blocks "
cmd += f"--output={low_tile_path}/usa_low.mbtiles "
cmd += str(score_geojson_dir / "usa-low.json")
call(cmd, shell=True)
# generate low mvts
logger.info("Generating USA Low mvt folders and files")
cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --no-tile-compression "
cmd += "--drop-densest-as-needed "
cmd += f"--output-to-directory={low_tile_path} --layer=blocks "
cmd += str(score_geojson_dir / "usa-low.json")
call(cmd, shell=True)
def _generate_tribal_tiles() -> None:
"""Generates tribal layer tiles"""
tribal_tiles_path = data_path / "tribal" / "tiles"
tribal_geojson_dir = data_path / "tribal" / "geojson"
# remove existing mbtiles file
remove_all_from_dir(tribal_tiles_path)
# generate mbtiles file
logger.info("Generating Tribal mbtiles file")
cmd = "tippecanoe "
cmd += "--layer=blocks "
cmd += f"--output={tribal_tiles_path}/usa.mbtiles "
cmd += str(tribal_geojson_dir / "usa.json")
call(cmd, shell=True)
# generate mvts
logger.info("Generating Tribal mvt folders and files")
cmd = "tippecanoe "
cmd += "--no-tile-compression "
cmd += "--drop-densest-as-needed "
cmd += f"--output-to-directory={tribal_tiles_path} --layer=blocks "
cmd += str(tribal_geojson_dir / "usa.json")
call(cmd, shell=True)
if generate_tribal_layer:
_generate_tribal_tiles()
else:
_generate_score_tiles()