mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-11 11:44:19 -07:00
ETL Classes for Data Sets (#260)
* first commit * checkpoint * checkpoint * first extract module 🎉 * completed census acs etl class * completed ejscreen etl * completed etl * score generation ready * improving census load and separation * score generation working 🎉 * completed etls * new score generation * PR reviews * run specific etl; starting docstrings * docstrings work * more docstrings * completed docstrings * adding pyenv version * more reasonable poetry req for python * PR comments
This commit is contained in:
parent
69ef32485c
commit
842312f69f
33 changed files with 2628 additions and 2872 deletions
|
@ -1,26 +1,31 @@
|
|||
from config import settings
|
||||
import click
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from config import settings
|
||||
from etl.sources.census.etl_utils import reset_data_directories as census_reset
|
||||
from utils import remove_files_from_dir, remove_all_from_dir, get_module_logger
|
||||
from utils import (
|
||||
get_module_logger,
|
||||
data_folder_cleanup,
|
||||
score_folder_cleanup,
|
||||
temp_folder_cleanup,
|
||||
)
|
||||
from etl.sources.census.etl import download_census_csvs
|
||||
from etl.runner import etl_runner, score_generate
|
||||
|
||||
|
||||
settings.APP_ROOT = Path.cwd()
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli():
|
||||
"""Defines a click group for the commands below"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Clean up all data folders",
|
||||
help="Clean up all census data folders",
|
||||
)
|
||||
def data_cleanup():
|
||||
def census_cleanup():
|
||||
"""CLI command to clean up the census data folder"""
|
||||
|
||||
data_path = settings.APP_ROOT / "data"
|
||||
|
||||
|
@ -28,32 +33,59 @@ def data_cleanup():
|
|||
logger.info(f"Initializing all census data")
|
||||
census_reset(data_path)
|
||||
|
||||
# dataset directory
|
||||
logger.info(f"Initializing all dataset directoriees")
|
||||
remove_all_from_dir(data_path / "dataset")
|
||||
logger.info("Cleaned up all census data files")
|
||||
|
||||
# score directory
|
||||
logger.info(f"Initializing all score data")
|
||||
remove_files_from_dir(data_path / "score" / "csv", ".csv")
|
||||
remove_files_from_dir(data_path / "score" / "geojson", ".json")
|
||||
|
||||
# cleanup tmp dir
|
||||
logger.info(f"Initializing all temp directoriees")
|
||||
remove_all_from_dir(data_path / "tmp")
|
||||
@cli.command(
|
||||
help="Clean up all data folders",
|
||||
)
|
||||
def data_cleanup():
|
||||
"""CLI command to clean up the all the data folders"""
|
||||
|
||||
logger.info("Cleaned up all data files")
|
||||
data_folder_cleanup()
|
||||
score_folder_cleanup()
|
||||
temp_folder_cleanup()
|
||||
|
||||
logger.info("Cleaned up all data folders")
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Census data download",
|
||||
)
|
||||
def census_data_download():
|
||||
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
||||
to generate national and by state Census Block Group CSVs"""
|
||||
|
||||
logger.info("Downloading census data")
|
||||
data_path = settings.APP_ROOT / "data"
|
||||
download_census_csvs(data_path)
|
||||
|
||||
logger.info("Completed downloading census data")
|
||||
exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run all ETL processes or a specific one",
|
||||
)
|
||||
@click.option("-d", "--dataset", required=False, type=str)
|
||||
def etl_run(dataset: str):
|
||||
"""Run a specific or all ETL processes
|
||||
|
||||
Args:
|
||||
dataset (str): Name of the ETL module to be run (optional)
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
etl_runner(dataset)
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Generate Score",
|
||||
)
|
||||
def score_run():
|
||||
"""CLI command to generate the score"""
|
||||
score_generate()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue