mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-20 08:41:26 -08:00
Docker update to set pipeline commands as a variable
This commit is contained in:
parent
739c6f9306
commit
be6d532851
4 changed files with 13 additions and 15 deletions
|
@ -14,15 +14,15 @@ Install [`docker`](https://docs.docker.com/get-docker/). See [Install Docker](IN
|
|||
|
||||
> _Important_: To be able to run the entire application, you may need to increase the memory allocated for docker to at least 8096 MB. See [this post](https://stackoverflow.com/a/44533437) for more details.
|
||||
|
||||
Use `docker compose` to run the application:
|
||||
Use [`docker compose`](https://docs.docker.com/compose/) to run the full application:
|
||||
|
||||
```sh
|
||||
$ docker compose up
|
||||
$ PIPELINE_CMD="data_pipeline.application full-run" docker compose up
|
||||
```
|
||||
Docker compose will spin up three containers: A data pipeline container, a data server, and a web server.
|
||||
The above command will build and spin up three containers: A data pipeline container, a data server, and a web server.
|
||||
|
||||
The data pipeline container can run the entire data pipeline, or any individual step. By default it will simply display the options for the full pipeline run. To have it actually run the pipeline, remove the `, "--help"` from the `[command]` in the `docker-compose.yml` file before launch. Note that it can take an hour or more to run the full pipeline. Furthermore, the data container mounts your local repo directory to read and write files so if you've previously run the pipeline manually on your local system, your score and map tile files will get overwritten.
|
||||
The data pipeline container can run the entire data pipeline, or any individual step. Because running the entire pipeline is a time-consuming process, the application command has been turned into a variable so individual parts of the pipeline can be run by docker compose. Once the full-run has been completed, you can change the PIPELINE_CMD environment variable to any other valid parameter for future runs. For example setting `PIPELINE_CMD="full-run --help"` would show the options for the full-run command. This would be helpful if you didn't want to run the data pipeline but merely wanted to see front end changes.
|
||||
|
||||
The data server will make the files created by the data pipeline container available to the web server
|
||||
The data server will make the files created by the data pipeline container available to the web server. The data pipeline container mounts the local repo directories to read and write files. The data server presents the local files to the webserver to render the map and downloadables.
|
||||
|
||||
The web server will run the application website. After it initializes, you should be able to open the web server in your browser at [http://localhost:8000](http://localhost:8000).
|
||||
The web server will run the application website. After it initializes, you should be able to open the web server in your browser at [`http://localhost:8000`](http://localhost:8000). If the data pipeline container is set to run the full data pipeline, the website will not pick up the changes until the pipeline completes.
|
|
@ -53,6 +53,6 @@ RUN poetry config virtualenvs.create false \
|
|||
&& poetry install --only main --no-interaction --no-ansi
|
||||
RUN pip install openpyxl
|
||||
|
||||
# Default behavior is to output the options for "full-run". This prevents the entire pipeline from running unintentionally.
|
||||
ENTRYPOINT [ "poetry", "run", "python3", "-m", "data_pipeline.application"]
|
||||
CMD ["full-run", "--help"]
|
||||
# Default behavior is to output the options for the base application. This prevents the entire pipeline from running unintentionally.
|
||||
ENV PIPELINE_CMD="data_pipeline.application --help"
|
||||
CMD ["sh", "-c", "poetry run python3 -m $PIPELINE_CMD"]
|
2
data/data-pipeline/poetry.lock
generated
2
data/data-pipeline/poetry.lock
generated
|
@ -5053,4 +5053,4 @@ test = ["mypy", "pre-commit", "pytest", "pytest-asyncio", "websockets (>=10.0)"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "04639d2eaf33218ba4fef190f76620b00fb2285d86d58458511d85dafd304658"
|
||||
content-hash = "3b59246d964a1dd70669a6b612fbd4d8581a22959e3b6f8e3b542d102e125903"
|
||||
|
|
|
@ -14,9 +14,7 @@ services:
|
|||
ENV_FOR_DYNACONF: development
|
||||
PYTHONUNBUFFERED: 1
|
||||
TZ: America/Los_Angeles
|
||||
# The argument (and options) for the scoring step you want to run. Replace "full-run" with "etl-run" or whatever step you want to run
|
||||
# To add arguments follow this example: command: ["generate-map-tiles", "--arg1", "value1", "--arg2", "value2"]
|
||||
command: ["full-run", "--help"]
|
||||
PIPELINE_CMD: $PIPELINE_CMD
|
||||
|
||||
# The score_server serves the data-pipeline volume as a URL
|
||||
j40_score_server:
|
||||
|
@ -37,8 +35,8 @@ services:
|
|||
container_name: j40_website_1
|
||||
build: client
|
||||
environment:
|
||||
# See the client readme for more info on environment variables:
|
||||
# https://github.com/usds/justice40-tool/blob/main/client/README.md
|
||||
# See the client readme for more info on environment variables:
|
||||
# https://github.com/usds/justice40-tool/blob/main/client/README.md
|
||||
DATA_SOURCE: local
|
||||
TZ: America/Los_Angeles
|
||||
volumes:
|
||||
|
|
Loading…
Add table
Reference in a new issue