User Story 2152 – Clean up logging (#2155)

Update logging messages and message consistency

This update includes changes to the level of many log messages. Rather than everything being logged at the info level, it differentiates between debug, info, warning, and error messages. It also changes the default log level to info to avoid much of the noise previously in the logs.

It also removes many extra log messages, and adds additional decorators at the beginning of each pipeline run.
This commit is contained in:
Travis Newby 2023-02-08 13:08:55 -06:00 committed by GitHub
commit 03a6d3c660
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
63 changed files with 307 additions and 339 deletions

View file

@ -28,6 +28,8 @@ logger = get_module_logger(__name__)
dataset_cli_help = "Grab the data from either 'local' for local access or 'aws' to retrieve from Justice40 S3 repository"
LOG_LINE_WIDTH = 60
@click.group()
def cli():
@ -37,23 +39,26 @@ def cli():
@cli.command(help="Clean up all census data folders")
def census_cleanup():
"""CLI command to clean up the census data folder"""
log_title("Clean Up Census Data")
data_path = settings.APP_ROOT / "data"
# census directories
logger.info("Initializing all census data")
log_info("Cleaning up all census data")
census_reset(data_path)
logger.info("Cleaned up all census data files")
log_goodbye()
sys.exit()
@cli.command(help="Clean up all data folders")
def data_cleanup():
"""CLI command to clean up the all the data folders"""
log_title("Clean Up Data ")
data_path = settings.APP_ROOT / "data"
log_info("Cleaning up all data folders")
census_reset(data_path)
data_folder_cleanup()
tribal_reset(data_path)
@ -61,7 +66,7 @@ def data_cleanup():
temp_folder_cleanup()
geo_score_folder_cleanup()
logger.info("Cleaned up all data folders")
log_goodbye()
sys.exit()
@ -77,19 +82,19 @@ def data_cleanup():
def census_data_download(zip_compress):
"""CLI command to download all census shape files from the Census FTP and extract the geojson
to generate national and by state Census Block Group CSVs"""
logger.info("Initializing all census data")
log_title("Download Census Data ")
data_path = settings.APP_ROOT / "data"
census_reset(data_path)
logger.info("Downloading census data")
log_info("Downloading census data")
etl_runner("census")
if zip_compress:
log_info("Zipping census data")
zip_census_data()
logger.info("Completed downloading census data")
log_goodbye()
sys.exit()
@ -103,10 +108,14 @@ def census_data_download(zip_compress):
help=dataset_cli_help,
)
def pull_census_data(data_source: str):
logger.info("Pulling census data from %s", data_source)
log_title("Pull Census Data")
log_info(f"Pulling census data from {data_source}")
data_path = settings.APP_ROOT / "data" / "census"
check_census_data_source(data_path, data_source)
logger.info("Finished pulling census data")
log_goodbye()
sys.exit()
@ -129,8 +138,12 @@ def etl_run(dataset: str):
Returns:
None
"""
log_title("Run ETL")
log_info("Running dataset(s)")
etl_runner(dataset)
log_goodbye()
sys.exit()
@ -139,9 +152,15 @@ def etl_run(dataset: str):
)
def score_run():
"""CLI command to generate the score"""
log_title("Score", "Generate Score")
log_info("Cleaning up data folders")
score_folder_cleanup()
log_info("Generating score")
score_generate()
log_goodbye()
sys.exit()
@ -150,63 +169,25 @@ def score_run():
)
def score_full_run():
"""CLI command to run ETL and generate the score in one command"""
log_title("Score Full Run", "Run ETL and Generate Score (no tiles)")
log_info("Cleaning up data folders")
data_folder_cleanup()
score_folder_cleanup()
temp_folder_cleanup()
log_info("Running all ETLs")
etl_runner()
log_info("Generating score")
score_generate()
sys.exit()
@cli.command(help="Generate Geojson files with scores baked in")
@click.option(
"-s",
"--data-source",
default="local",
required=False,
type=str,
help=dataset_cli_help,
)
def geo_score(data_source: str):
"""CLI command to combine score with GeoJSON data and generate low and high files
Args:
data_source (str): Source for the census data (optional)
Options:
- local: fetch census and score data from the local data directory
- aws: fetch census and score from AWS S3 J40 data repository
Returns:
None
"""
geo_score_folder_cleanup()
score_geo(data_source=data_source)
log_goodbye()
sys.exit()
@cli.command(
help="Generate map tiles. Pass -t to generate tribal layer as well.",
)
@click.option(
"-t",
"--generate-tribal-layer",
default=False,
required=False,
is_flag=True,
type=bool,
)
def generate_map_tiles(generate_tribal_layer):
"""CLI command to generate the map tiles"""
data_path = settings.APP_ROOT / "data"
generate_tiles(data_path, generate_tribal_layer)
sys.exit()
@cli.command(
help="Run etl_score_post to create score csv, tile csv, and downloadable zip",
help="Run etl_score_post to create score csv, tile csv, and downloadable zip"
)
@click.option(
"-s",
@ -228,9 +209,74 @@ def generate_score_post(data_source: str):
Returns:
None
"""
log_title(
"Generate Score Post ", "Create Score CSV, Tile CSV, Downloadable ZIP"
)
log_info("Cleaning up downloadable folder")
downloadable_cleanup()
log_info("Running score post activities")
score_post(data_source)
log_goodbye()
sys.exit()
@cli.command(help="Generate GeoJSON files with scores baked in")
@click.option(
"-s",
"--data-source",
default="local",
required=False,
type=str,
help=dataset_cli_help,
)
def geo_score(data_source: str):
"""CLI command to combine score with GeoJSON data and generate low and high files
Args:
data_source (str): Source for the census data (optional)
Options:
- local: fetch census and score data from the local data directory
- aws: fetch census and score from AWS S3 J40 data repository
Returns:
None
"""
log_title("Generate GeoJSON", "Combine Score and GeoJSON")
log_info("Cleaning up geo score folder")
geo_score_folder_cleanup()
log_info("Combining score with GeoJSON")
score_geo(data_source=data_source)
log_goodbye()
sys.exit()
@cli.command(
help="Generate map tiles. Pass -t to generate tribal layer as well.",
)
@click.option(
"-t",
"--generate-tribal-layer",
default=False,
required=False,
is_flag=True,
type=bool,
)
def generate_map_tiles(generate_tribal_layer):
"""CLI command to generate the map tiles"""
log_title("Generate Map Tiles")
data_path = settings.APP_ROOT / "data"
log_info("Generating tiles")
generate_tiles(data_path, generate_tribal_layer)
log_goodbye()
sys.exit()
@ -264,49 +310,74 @@ def data_full_run(check: bool, data_source: str):
Returns:
None
"""
log_title("Full Run", "Census DL, ETL, Score, Combine, Generate Tiles")
data_path = settings.APP_ROOT / "data"
if check:
if not check_first_run():
# check if the data full run has been run before
logger.info("*** The data full run was already executed")
log_info("The data full run was already executed")
sys.exit()
else:
# census directories
logger.info("*** Initializing all data folders")
log_info("Cleaning up data folders")
census_reset(data_path)
data_folder_cleanup()
score_folder_cleanup()
temp_folder_cleanup()
if data_source == "local":
logger.info("*** Downloading census data")
log_info("Downloading census data")
etl_runner("census")
logger.info("*** Running all ETLs")
log_info("Running all ETLs")
etl_runner()
logger.info("*** Generating Score")
log_info("Generating score")
score_generate()
logger.info("*** Running Post Score scripts")
log_info("Running post score")
downloadable_cleanup()
score_post(data_source)
logger.info("*** Combining Score with Census Geojson")
log_info("Combining score with census GeoJSON")
score_geo(data_source)
logger.info("*** Generating Map Tiles")
log_info("Generating map tiles")
generate_tiles(data_path, True)
log_info("Completing pipeline")
file = "first_run.txt"
cmd = f"touch {data_path}/{file}"
call(cmd, shell=True)
logger.info("*** Map data ready")
log_goodbye()
sys.exit()
def log_title(title: str, subtitle: str = None):
"""Logs a title in our fancy title format"""
logger.info("-" * LOG_LINE_WIDTH)
logger.info("")
logger.info(f"{title}")
if subtitle:
logger.info(f"{subtitle}")
logger.info("")
logger.info("-" * LOG_LINE_WIDTH)
logger.info("")
def log_info(info: str):
"""Logs a general informational message"""
logger.info(f"- {info}")
def log_goodbye():
"""Logs a goodbye message"""
logger.info("- Finished. Bye!")
if __name__ == "__main__":
cli()