Combine + Tilefy (#806)

* init

* score-post

* added score csv s3 download; remore poetry cmds from readme

* working census tile fetch

* PR review

* Github Actions Work
This commit is contained in:
Jorge Escobar 2021-11-01 18:05:05 -04:00 committed by GitHub
commit 1b17af84c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 560 additions and 371 deletions

View file

@ -25,6 +25,8 @@ from data_pipeline.utils import (
logger = get_module_logger(__name__)
dataset_cli_help = "Grab the data from either 'local' for local access or 'aws' to retrieve from Justice40 S3 repository"
@click.group()
def cli():
@ -93,7 +95,13 @@ def census_data_download(zip_compress):
@cli.command(
help="Run all ETL processes or a specific one",
)
@click.option("-d", "--dataset", required=False, type=str)
@click.option(
"-d",
"--dataset",
required=False,
type=str,
help=dataset_cli_help,
)
def etl_run(dataset: str):
"""Run a specific or all ETL processes
@ -134,9 +142,16 @@ def score_full_run():
@cli.command(help="Generate Geojson files with scores baked in")
@click.option("-d", "--data-source", default="local", required=False, type=str)
@click.option(
"-s",
"--data-source",
default="local",
required=False,
type=str,
help=dataset_cli_help,
)
def geo_score(data_source: str):
"""CLI command to generate the score
"""CLI command to combine score with GeoJSON data and generate low and high files
Args:
data_source (str): Source for the census data (optional)
@ -166,11 +181,29 @@ def generate_map_tiles():
@cli.command(
help="Run etl_score_post to create score csv, tile csv, and downloadable zip",
)
def generate_score_post():
"""CLI command to generate score, tile, and downloadable files"""
@click.option(
"-s",
"--data-source",
default="local",
required=False,
type=str,
help=dataset_cli_help,
)
def generate_score_post(data_source: str):
"""CLI command to generate score, tile, and downloadable files
Args:
data_source (str): Source for the census data (optional)
Options:
- local: fetch census and score data from the local data directory
- aws: fetch census and score from AWS S3 J40 data repository
Returns:
None
"""
downloadable_cleanup()
score_post()
score_post(data_source)
sys.exit()
@ -183,11 +216,23 @@ def generate_score_post():
is_flag=True,
help="Check if data run has been run before, and don't run it if so.",
)
def data_full_run(check):
@click.option(
"-s",
"--data-source",
default="local",
required=False,
type=str,
help=dataset_cli_help,
)
def data_full_run(check: bool, data_source: str):
"""CLI command to run ETL, score, JSON combine and generate tiles in one command
Args:
check (bool): Run the full data run only if the first run sempahore file is not set (optional)
data_source (str): Source for the census data (optional)
Options:
- local: fetch census and score data from the local data directory
- aws: fetch census and score from AWS S3 J40 data repository
Returns:
None
@ -206,8 +251,9 @@ def data_full_run(check):
score_folder_cleanup()
temp_folder_cleanup()
logger.info("*** Downloading census data")
etl_runner("census")
if data_source == "local":
logger.info("*** Downloading census data")
etl_runner("census")
logger.info("*** Running all ETLs")
etl_runner()
@ -215,8 +261,11 @@ def data_full_run(check):
logger.info("*** Generating Score")
score_generate()
logger.info("*** Running Post Score scripts")
score_post(data_source)
logger.info("*** Combining Score with Census Geojson")
score_geo()
score_geo(data_source)
logger.info("*** Generating Map Tiles")
generate_tiles(data_path)