DetectChangesForWorker: handler: functions/detect-changes-for-worker/index.handler name: ${self:provider.stage}-DetectChangesForWorker description: Scans an S3 bucket (with prefix) for items that have changes recently and sends them to ECS Tasks for processing runtime: nodejs12.x memorySize: 512 timeout: 900 environment: REGION: ${self:provider.region} STAGE: ${self:provider.stage} ECS_CLUSTER: !Ref ECSCluster VPC_SUBNET_ID: Fn::ImportValue: ${self:provider.stage}-PrivateSubnetOne GDAL_TASK_DEFINITION: ${self:custom.environment.GDAL_TASK_DEFINITION_NAME} GDAL_CONTAINER_DEFINITION: ${self:custom.environment.GDAL_CONTAINER_DEFINITION_NAME} TIPPECANOE_TASK_DEFINITION: ${self:custom.environment.TIPPECANOE_TASK_DEFINITION_NAME} TIPPECANOE_CONTAINER_DEFINITION: ${self:custom.environment.TIPPECANOE_CONTAINER_DEFINITION_NAME} # The ECS Tasks can be kicked of my invoking the lambda on a schedule. This can provide the # ability to do nightly refreshed of the data. # events: # - schedule: # rate: cron(*/2 * * * ? *) # Fire every 2 minutes # input: # action: "gdal" # command: # - "ogrinfo" # - "-al" # - "-so" # - "-ro" # - "/vsizip//vsicurl/https://j40-sit-justice40-data-harvester-data.s3.amazonaws.com/census/tabblock2010_01_pophu.zip" # - schedule: # rate: cron(0 5 * * ? *) # Scan for updated data at Midnight Eastern Time # input: # action: enrichment # sourceBucketName: !Ref DataBucket # sourceBucketPrefix: usds/custom.csv # age: 86400 # Seconds # censusBucketName: j40-sit-justice40-data-harvester-data # censusBucketPrefix: census/tabblock2010_01_pophu.zip # pre: # - Fn::Join: ['', ["wget https://j40-sit-justice40-data-harvester-data.s3.amazonaws.com/usds/$", "{source.Key} -O /tmp/custom.csv"]] # command: # - "-f" # - "GeoJSON" # - "-sql" # - Fn::Join: ['', ["SELECT * FROM $", "{census.Key:base} LEFT JOIN '/tmp/custom.csv'.custom ON $", "{census.Key:base}.BLOCKID10 = custom.BLOCKID10"]] # - Fn::Join: ['', ["/vsis3/j40-sit-justice40-data-harvester-data/joined/$", "{source.Key:base}-$", "{census.Key:base}.json"]] # - Fn::Join: ['', ["/vsizip//vsicurl/https://j40-sit-justice40-data-harvester-data.s3.amazonaws.com/census/$", "{census.Key}"]] # - schedule: # rate: cron(0 7 * * ? *) # Run two hours after the generating any GeoJSON # input: # action: tippecanoe # pre: # - "curl https://gp-sit-tileservice-tile-cache.s3.amazonaws.com/usds/usa.csv -o /tmp/usa.csv" # - "curl https://gp-sit-tileservice-tile-cache.s3.amazonaws.com/usds/tristate.mbtiles -o /tmp/tristate.mbtiles" # post: # - "aws s3 cp /tmp/tl_2010_bg_with_data.mbtiles s3://j40-sit-justice40-data-harvester-data/output/tl_2010_bg_with_data.mbtiles" # - "tile-join --force -pk -pC -n tl_2010_bg -e /tmp/tiles /tmp/tl_2010_bg_with_data.mbtiles" # - "aws s3 sync /tmp/tiles s3://j40-sit-justice40-data-harvester-data/output/tiles" # command: # - "tile-join" # - "--force" # - "-pk" # - "-n" # - "tl_2010_bg" # - "-o" # - "/tmp/tl_2010_bg_with_data.mbtiles" # - "-c" # - "/tmp/usa.csv" # - "/tmp/tristate.mbtiles"