Fargate Serverless Workers for Census Data Enrichment and Tile Generation (#230)

* add basic infrastructure

* add cloudfront distribution

* WIP checkpoint

* add ecs cluster

* add conditions and route53 dns entry to cloudfront

* WIP checkin

* Added a raw execution mode for demo/testing

* Add pre-defined Task for ogr2ogr

* Tweak Task Definition name

* Mostly working except for logging error

* Add additional logging permissions

* Succesfully executed ogr2ogr in fargate.  S3 permissions needs to be addresses

* Add multipart permissions

* Add a few more actions

* Put IAM Policy on the correct resource

* Deploy lambda and update events

* fix iam permissions 🤦🏻‍♂️

* Add reference to Tippecanoe container

* Clean up to only use named actions

* Refactor resources to include support for tippecanoe

* Make a more interesting GDAL command

* Pull all ECS variables into environment file; successful test of running tippecanoe container

* Support pre/post commands

* Refactor codebase and enable linting

* Implement many-to-many enrichment between USDS CSV files and Census zipped shapefiles

* Change the GDAL image to one with the built-in drivers

* Add some additional fixes to support the enrichment use case

* Clean up old hello-world example

* Expand the README to include ways to execute the lambdas

* Validate scheduled lambda execution and then comment out

Co-authored-by: Tim Zwolak <timothypage@gmail.com>
This commit is contained in:
Lucas Scharenbroich 2021-06-30 08:29:01 -05:00 committed by GitHub
commit 38fff9cea8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 7271 additions and 0 deletions

View file

@ -0,0 +1,18 @@
{
"action": "enrichment",
"sourceBucketName": "j40-sit-justice40-data-harvester-data",
"sourceBucketPrefix": "usds/custom.csv",
"age": 86400,
"censusBucketName": "j40-sit-justice40-data-harvester-data",
"censusBucketPrefix": "census/tabblock2010_01_pophu.zip",
"pre": [
"wget https://j40-sit-justice40-data-harvester-data.s3.amazonaws.com/usds/${source.Key} -O /tmp/custom.csv"
],
"command": [
"--debug", "ON",
"-f", "GeoJSON",
"-sql", "SELECT * FROM ${census.Key:base} LEFT JOIN '/tmp/custom.csv'.custom ON ${census.Key:base}.BLOCKID10 = custom.BLOCKID10",
"/vsis3/j40-sit-justice40-data-harvester-data/joined/${source.Key:base}-${census.Key:base}.json",
"/vsizip//vsicurl/https://j40-sit-justice40-data-harvester-data.s3.amazonaws.com/census/${census.Key}"
]
}

View file

@ -0,0 +1,10 @@
{
"action": "gdal",
"command": [
"ogrinfo",
"-al",
"-so",
"-ro",
"/vsizip//vsicurl/https://j40-sit-justice40-data-harvester-data.s3.amazonaws.com/census/tabblock2010_01_pophu.zip"
]
}

View file

@ -0,0 +1,10 @@
{
"action": "ogr2ogr",
"command": [
"--debug", "ON",
"-f",
"GeoJSON",
"/vsis3/j40-sit-justice40-data-harvester-data/sources/tabblock2010_01_pophu.json",
"/vsizip//vsicurl/https://j40-sit-justice40-data-harvester-data.s3.amazonaws.com/census/tabblock2010_01_pophu.zip"
]
}

View file

@ -0,0 +1,24 @@
{
"action": "tippecanoe",
"pre": [
"curl https://gp-sit-tileservice-tile-cache.s3.amazonaws.com/usds/usa.csv -o /tmp/usa.csv",
"curl https://gp-sit-tileservice-tile-cache.s3.amazonaws.com/usds/tristate.mbtiles -o /tmp/tristate.mbtiles"
],
"post": [
"aws s3 cp /tmp/tl_2010_bg_with_data.mbtiles s3://j40-sit-justice40-data-harvester-data/output/tl_2010_bg_with_data.mbtiles",
"tile-join --force -pk -pC -n tl_2010_bg -e /tmp/tiles /tmp/tl_2010_bg_with_data.mbtiles",
"aws s3 sync /tmp/tiles s3://j40-sit-justice40-data-harvester-data/output/tiles"
],
"command": [
"tile-join",
"--force",
"-pk",
"-n",
"tl_2010_bg",
"-o",
"/tmp/tl_2010_bg_with_data.mbtiles",
"-c",
"/tmp/usa.csv",
"/tmp/tristate.mbtiles"
]
}