mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 01:31:25 -08:00
Improve download retry logic
This commit is contained in:
parent
9e33932600
commit
d4898b8f55
4 changed files with 30 additions and 7 deletions
|
@ -12,7 +12,8 @@ settings = Dynaconf(
|
|||
# set root dir
|
||||
settings.APP_ROOT = pathlib.Path(data_pipeline.__file__).resolve().parent
|
||||
settings.DATA_PATH = settings.APP_ROOT / "data"
|
||||
settings.REQUESTS_DEFAULT_TIMOUT = 3600
|
||||
settings.REQUESTS_DEFAULT_TIMOUT = 300
|
||||
settings.REQUESTS_DEFAULT_RETRIES = 3
|
||||
# To set an environment use:
|
||||
# Linux/OSX: export ENV_FOR_DYNACONF=staging
|
||||
# Windows: set ENV_FOR_DYNACONF=staging
|
||||
|
|
|
@ -12,13 +12,26 @@ from tenacity import retry, stop_after_attempt, wait_exponential
|
|||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
def _log_retry_failure(retry_state):
|
||||
logger.warning(
|
||||
f"Failure downloading {retry_state.kwargs['file_url']}. Will retry."
|
||||
)
|
||||
|
||||
|
||||
class Downloader:
|
||||
"""A simple class to encapsulate the download capabilities of the application"""
|
||||
|
||||
num_retries = (
|
||||
settings.REQUEST_RETRIES
|
||||
if "REQUEST_RETRIES" in settings
|
||||
else settings.REQUESTS_DEFAULT_RETRIES
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
stop=stop_after_attempt(num_retries),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
before_sleep=_log_retry_failure,
|
||||
)
|
||||
def download_file_from_url(
|
||||
cls,
|
||||
|
@ -43,9 +56,12 @@ class Downloader:
|
|||
|
||||
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
||||
logger.debug(f"Downloading {file_url}")
|
||||
response = requests.get(
|
||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
||||
timeout = (
|
||||
settings.REQUEST_TIMEOUT
|
||||
if "REQUEST_TIMEOUT" in settings
|
||||
else settings.REQUESTS_DEFAULT_TIMOUT
|
||||
)
|
||||
response = requests.get(file_url, verify=verify, timeout=timeout)
|
||||
if response.status_code == 200:
|
||||
file_contents = response.content
|
||||
logger.debug("Downloaded.")
|
||||
|
@ -64,8 +80,9 @@ class Downloader:
|
|||
|
||||
@classmethod
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
stop=stop_after_attempt(num_retries),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
before_sleep=_log_retry_failure,
|
||||
)
|
||||
def download_zip_file_from_url(
|
||||
cls,
|
||||
|
|
|
@ -147,9 +147,12 @@ def download_file_from_url(
|
|||
if not os.path.isdir(download_file_name.parent):
|
||||
os.mkdir(download_file_name.parent)
|
||||
|
||||
response = requests.get(
|
||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
||||
timeout = (
|
||||
settings.REQUEST_TIMEOUT
|
||||
if "REQUEST_TIMEOUT" in settings
|
||||
else settings.REQUESTS_DEFAULT_TIMOUT
|
||||
)
|
||||
response = requests.get(file_url, verify=verify, timeout=timeout)
|
||||
if response.status_code == 200:
|
||||
file_contents = response.content
|
||||
else:
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
AWS_JUSTICE40_DATASOURCES_URL = "https://justice40-data.s3.amazonaws.com/data-sources"
|
||||
AWS_JUSTICE40_DATAPIPELINE_URL = "https://justice40-data.s3.amazonaws.com/data-versions/2.0"
|
||||
DATASOURCE_RETRIEVAL_FROM_AWS = true
|
||||
REQUEST_TIMEOUT = 120
|
||||
REQUEST_RETRIES = 2
|
||||
|
||||
[development]
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue