mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 09:41:26 -08:00
Improve download retry logic
This commit is contained in:
parent
9e33932600
commit
d4898b8f55
4 changed files with 30 additions and 7 deletions
|
@ -12,7 +12,8 @@ settings = Dynaconf(
|
||||||
# set root dir
|
# set root dir
|
||||||
settings.APP_ROOT = pathlib.Path(data_pipeline.__file__).resolve().parent
|
settings.APP_ROOT = pathlib.Path(data_pipeline.__file__).resolve().parent
|
||||||
settings.DATA_PATH = settings.APP_ROOT / "data"
|
settings.DATA_PATH = settings.APP_ROOT / "data"
|
||||||
settings.REQUESTS_DEFAULT_TIMOUT = 3600
|
settings.REQUESTS_DEFAULT_TIMOUT = 300
|
||||||
|
settings.REQUESTS_DEFAULT_RETRIES = 3
|
||||||
# To set an environment use:
|
# To set an environment use:
|
||||||
# Linux/OSX: export ENV_FOR_DYNACONF=staging
|
# Linux/OSX: export ENV_FOR_DYNACONF=staging
|
||||||
# Windows: set ENV_FOR_DYNACONF=staging
|
# Windows: set ENV_FOR_DYNACONF=staging
|
||||||
|
|
|
@ -12,13 +12,26 @@ from tenacity import retry, stop_after_attempt, wait_exponential
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _log_retry_failure(retry_state):
|
||||||
|
logger.warning(
|
||||||
|
f"Failure downloading {retry_state.kwargs['file_url']}. Will retry."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Downloader:
|
class Downloader:
|
||||||
"""A simple class to encapsulate the download capabilities of the application"""
|
"""A simple class to encapsulate the download capabilities of the application"""
|
||||||
|
|
||||||
|
num_retries = (
|
||||||
|
settings.REQUEST_RETRIES
|
||||||
|
if "REQUEST_RETRIES" in settings
|
||||||
|
else settings.REQUESTS_DEFAULT_RETRIES
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(num_retries),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
|
before_sleep=_log_retry_failure,
|
||||||
)
|
)
|
||||||
def download_file_from_url(
|
def download_file_from_url(
|
||||||
cls,
|
cls,
|
||||||
|
@ -43,9 +56,12 @@ class Downloader:
|
||||||
|
|
||||||
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
||||||
logger.debug(f"Downloading {file_url}")
|
logger.debug(f"Downloading {file_url}")
|
||||||
response = requests.get(
|
timeout = (
|
||||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
settings.REQUEST_TIMEOUT
|
||||||
|
if "REQUEST_TIMEOUT" in settings
|
||||||
|
else settings.REQUESTS_DEFAULT_TIMOUT
|
||||||
)
|
)
|
||||||
|
response = requests.get(file_url, verify=verify, timeout=timeout)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
file_contents = response.content
|
file_contents = response.content
|
||||||
logger.debug("Downloaded.")
|
logger.debug("Downloaded.")
|
||||||
|
@ -64,8 +80,9 @@ class Downloader:
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@retry(
|
@retry(
|
||||||
stop=stop_after_attempt(3),
|
stop=stop_after_attempt(num_retries),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
|
before_sleep=_log_retry_failure,
|
||||||
)
|
)
|
||||||
def download_zip_file_from_url(
|
def download_zip_file_from_url(
|
||||||
cls,
|
cls,
|
||||||
|
|
|
@ -147,9 +147,12 @@ def download_file_from_url(
|
||||||
if not os.path.isdir(download_file_name.parent):
|
if not os.path.isdir(download_file_name.parent):
|
||||||
os.mkdir(download_file_name.parent)
|
os.mkdir(download_file_name.parent)
|
||||||
|
|
||||||
response = requests.get(
|
timeout = (
|
||||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
settings.REQUEST_TIMEOUT
|
||||||
|
if "REQUEST_TIMEOUT" in settings
|
||||||
|
else settings.REQUESTS_DEFAULT_TIMOUT
|
||||||
)
|
)
|
||||||
|
response = requests.get(file_url, verify=verify, timeout=timeout)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
file_contents = response.content
|
file_contents = response.content
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
AWS_JUSTICE40_DATASOURCES_URL = "https://justice40-data.s3.amazonaws.com/data-sources"
|
AWS_JUSTICE40_DATASOURCES_URL = "https://justice40-data.s3.amazonaws.com/data-sources"
|
||||||
AWS_JUSTICE40_DATAPIPELINE_URL = "https://justice40-data.s3.amazonaws.com/data-versions/2.0"
|
AWS_JUSTICE40_DATAPIPELINE_URL = "https://justice40-data.s3.amazonaws.com/data-versions/2.0"
|
||||||
DATASOURCE_RETRIEVAL_FROM_AWS = true
|
DATASOURCE_RETRIEVAL_FROM_AWS = true
|
||||||
|
REQUEST_TIMEOUT = 120
|
||||||
|
REQUEST_RETRIES = 2
|
||||||
|
|
||||||
[development]
|
[development]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue