Run all Census, ETL, Score, Combine and Tilefy in one command (#662)

* Run all Census, ETL, Score, Combine and Tilefy in one command

* docker cmd

* some docker improvements

* feedback updates

* lint
This commit is contained in:
Jorge Escobar 2021-09-14 14:15:34 -04:00 committed by GitHub
commit 5bd63c083b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 13 deletions

View file

@ -1,3 +1,5 @@
from subprocess import call
import sys
import click
from data_pipeline.config import settings
@ -11,6 +13,7 @@ from data_pipeline.utils import (
get_module_logger,
score_folder_cleanup,
temp_folder_cleanup,
check_first_run,
)
logger = get_module_logger(__name__)
@ -34,17 +37,22 @@ def census_cleanup():
census_reset(data_path)
logger.info("Cleaned up all census data files")
sys.exit()
@cli.command(help="Clean up all data folders")
def data_cleanup():
"""CLI command to clean up the all the data folders"""
data_path = settings.APP_ROOT / "data"
census_reset(data_path)
data_folder_cleanup()
score_folder_cleanup()
temp_folder_cleanup()
logger.info("Cleaned up all data folders")
sys.exit()
@cli.command(
@ -63,6 +71,7 @@ def census_data_download():
etl_runner("census")
logger.info("Completed downloading census data")
sys.exit()
@cli.command(
@ -80,6 +89,7 @@ def etl_run(dataset: str):
"""
etl_runner(dataset)
sys.exit()
@cli.command(
@ -88,7 +98,9 @@ def etl_run(dataset: str):
def score_run():
"""CLI command to generate the score"""
score_folder_cleanup()
score_generate()
sys.exit()
@cli.command(
@ -102,6 +114,7 @@ def score_full_run():
temp_folder_cleanup()
etl_runner()
score_generate()
sys.exit()
@cli.command(help="Generate Geojson files with scores baked in")
@ -109,6 +122,7 @@ def geo_score():
"""CLI command to generate the score"""
score_geo()
sys.exit()
@cli.command(
@ -119,6 +133,62 @@ def generate_map_tiles():
data_path = settings.APP_ROOT / "data"
generate_tiles(data_path)
sys.exit()
@cli.command(
help="Data Full Run (Census download, ETLs, score, combine and tile generation)",
)
@click.option(
"-c",
"--check",
is_flag=True,
help="Check if data run has been run before, and don't run it if so.",
)
def data_full_run(check):
"""CLI command to run ETL, score, JSON combine and generate tiles in one command
Args:
check (bool): Run the full data run only if the first run sempahore file is not set (optional)
Returns:
None
"""
data_path = settings.APP_ROOT / "data"
if check and not check_first_run():
# check if the data full run has been run before
logger.info("*** The data full run was already executed")
sys.exit()
# census directories
logger.info("*** Initializing all data folders")
census_reset(data_path)
data_folder_cleanup()
score_folder_cleanup()
temp_folder_cleanup()
logger.info("*** Downloading census data")
etl_runner("census")
logger.info("*** Running all ETLs")
etl_runner()
logger.info("*** Generating Score")
score_generate()
logger.info("*** Combining Score with Census Geojson")
score_geo()
logger.info("*** Generating Map Tiles")
generate_tiles(data_path)
file = "first_run.txt"
cmd = f"touch {data_path}/{file}"
call(cmd, shell=True)
logger.info("*** Map data ready")
sys.exit()
if __name__ == "__main__":