From 7af92f575b99e4c43c481265ad3f2adb930290ec Mon Sep 17 00:00:00 2001 From: ericiwamoto <100735505+ericiwamoto@users.noreply.github.com> Date: Thu, 26 Dec 2024 08:35:22 -0800 Subject: [PATCH] Add retry logic to downloader --- .linkspector.yml | 4 ++++ QUICKSTART.md | 7 +++++-- data/data-pipeline/data_pipeline/etl/downloader.py | 9 +++++++++ data/data-pipeline/pyproject.toml | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 .linkspector.yml diff --git a/.linkspector.yml b/.linkspector.yml new file mode 100644 index 00000000..0202c8c1 --- /dev/null +++ b/.linkspector.yml @@ -0,0 +1,4 @@ +dirs: + - . +ignorePatterns: + - pattern: '^http://localhost.*$' \ No newline at end of file diff --git a/QUICKSTART.md b/QUICKSTART.md index 12cbc154..22f953ac 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -19,7 +19,10 @@ Use `docker compose` to run the application: ```sh $ docker compose up ``` +Docker compose will spin up three containers: A data pipeline container, a data server, and a web server. -> Note: This may take a while – possibly even an hour or two – since it has to build the containers and then download and process all the data. +The data pipeline container can run the entire data pipeline, or any individual step. By default it will simply display the options for the full pipeline run. To have it actually run the pipeline, remove the `, "--help"` from the `[command]` in the `docker-compose.yml` file before launch. Note that it can take an hour or more to run the full pipeline. Furthermore, the data container mounts your local repo directory to read and write files so if you've previously run the pipeline manually on your local system, your score and map tile files will get overwritten. -After it initializes, you should be able to open the application in your browser at [http://localhost:8000](http://localhost:8000). +The data server will make the files created by the data pipeline container available to the web server + +The web server will run the application website. After it initializes, you should be able to open the web server in your browser at [http://localhost:8000](http://localhost:8000). \ No newline at end of file diff --git a/data/data-pipeline/data_pipeline/etl/downloader.py b/data/data-pipeline/data_pipeline/etl/downloader.py index fd0fec50..4cc4f83e 100644 --- a/data/data-pipeline/data_pipeline/etl/downloader.py +++ b/data/data-pipeline/data_pipeline/etl/downloader.py @@ -7,6 +7,7 @@ import shutil from pathlib import Path from data_pipeline.config import settings from data_pipeline.utils import get_module_logger +from tenacity import retry, stop_after_attempt, wait_exponential logger = get_module_logger(__name__) @@ -15,6 +16,10 @@ class Downloader: """A simple class to encapsulate the download capabilities of the application""" @classmethod + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=4, max=10), + ) def download_file_from_url( cls, file_url: str, @@ -58,6 +63,10 @@ class Downloader: return download_file_name @classmethod + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=4, max=10), + ) def download_zip_file_from_url( cls, file_url: str, diff --git a/data/data-pipeline/pyproject.toml b/data/data-pipeline/pyproject.toml index f7122078..47b676dd 100644 --- a/data/data-pipeline/pyproject.toml +++ b/data/data-pipeline/pyproject.toml @@ -41,6 +41,7 @@ xlsxwriter = "^2.0.0" pydantic = "^1.9.0" Rtree = "^1.0.0" fiona = "~1.8.21" +tenacity = ">=5.0.2" [tool.poetry.group.dev.dependencies] black = "^21"