mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-21 09:11:26 -08:00
Fix docker
This commit is contained in:
parent
aa88249f37
commit
682b2d34a7
10 changed files with 111 additions and 54 deletions
|
@ -14,10 +14,10 @@ Install [`docker`](https://docs.docker.com/get-docker/). See [Install Docker](IN
|
|||
|
||||
> _Important_: To be able to run the entire application, you may need to increase the memory allocated for docker to at least 8096 MB. See [this post](https://stackoverflow.com/a/44533437) for more details.
|
||||
|
||||
Use `docker-compose` to run the application:
|
||||
Use `docker compose` to run the application:
|
||||
|
||||
```sh
|
||||
$ docker-compose up
|
||||
$ docker compose up
|
||||
```
|
||||
|
||||
> Note: This may take a while – possibly even an hour or two – since it has to build the containers and then download and process all the data.
|
||||
|
|
|
@ -1 +1,8 @@
|
|||
node_modules
|
||||
.git
|
||||
.gitignore
|
||||
*Dockerfile*
|
||||
*docker-compose*
|
||||
.cache
|
||||
public
|
||||
node_modules
|
||||
npm-debug.log
|
|
@ -4,17 +4,14 @@ FROM node:14
|
|||
# this working directory
|
||||
WORKDIR /client
|
||||
|
||||
# Copy the package.json and package_lock.json files from local to the docker image / container
|
||||
COPY package*.json ./
|
||||
|
||||
# install all packages as a layer in the docker image / container
|
||||
RUN npm install
|
||||
|
||||
# copy all local files from the working directory to the docker image/container however we must use
|
||||
# dockerignore to ignore node_modules so that the image can use what what was just installed from the above
|
||||
# step.
|
||||
COPY . .
|
||||
|
||||
# install all packages as a layer in the docker image / container
|
||||
RUN npm ci
|
||||
|
||||
ENV PORT=6000
|
||||
|
||||
EXPOSE 6000
|
||||
|
|
17
data/data-pipeline/.dockerignore
Normal file
17
data/data-pipeline/.dockerignore
Normal file
|
@ -0,0 +1,17 @@
|
|||
./data_pipeline/data/census/csv/*
|
||||
./data_pipeline/data/census/geojson/*
|
||||
./data_pipeline/data/census/shp/*
|
||||
./data_pipeline/data/dataset/*
|
||||
./data_pipeline/data/score/csv/*
|
||||
./data_pipeline/data/score/downloadable/*
|
||||
./data_pipeline/data/score/geojson/*
|
||||
./data_pipeline/data/score/search/*
|
||||
./data_pipeline/data/score/shapefile/*
|
||||
./data_pipeline/data/score/tiles/*
|
||||
./data_pipeline/data/sources/*
|
||||
./data_pipeline/data/tmp/*
|
||||
./data_pipeline/data/tribal/csv/*
|
||||
./data_pipeline/data/tribal/geographic_data/*
|
||||
./data_pipeline/data/tribal/geojson/*
|
||||
./data_pipeline/data/tribal/tiles/*
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:20.04
|
||||
FROM ubuntu:22.04
|
||||
|
||||
ENV TZ=America/Los_Angeles
|
||||
|
||||
|
@ -10,13 +10,13 @@ RUN apt-get update && TZ=America/Los_Angeles DEBIAN_FRONTEND=noninteractive apt-
|
|||
git \
|
||||
unzip \
|
||||
wget \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
gdal-bin
|
||||
software-properties-common \
|
||||
libsqlite3-dev \
|
||||
zlib1g-dev
|
||||
|
||||
# tippeanoe
|
||||
# tippecanoe
|
||||
RUN apt-get update
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
RUN apt-get install -y software-properties-common libsqlite3-dev zlib1g-dev
|
||||
RUN apt-add-repository -y ppa:git-core/ppa
|
||||
RUN mkdir -p /tmp/tippecanoe-src && git clone https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe-src
|
||||
WORKDIR /tmp/tippecanoe-src
|
||||
|
@ -24,26 +24,35 @@ RUN /bin/sh -c make && make install
|
|||
|
||||
## gdal
|
||||
RUN add-apt-repository ppa:ubuntugis/ppa
|
||||
RUN apt-get -y install gdal-bin
|
||||
RUN apt-get -y install gdal-bin libgdal-dev
|
||||
|
||||
# Install python3.10
|
||||
RUN add-apt-repository ppa:deadsnakes/ppa
|
||||
RUN apt install -y python3.10-dev
|
||||
RUN apt install -y python3-pip
|
||||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
|
||||
RUN update-alternatives --config python3
|
||||
|
||||
# Copy all project files into the container
|
||||
COPY . /data-pipeline
|
||||
WORKDIR /data-pipeline
|
||||
|
||||
# Python package installation using poetry. See:
|
||||
# https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
|
||||
ENV PYTHONFAULTHANDLER=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONHASHSEED=random \
|
||||
PIP_NO_CACHE_DIR=off \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
||||
PIP_DEFAULT_TIMEOUT=100 \
|
||||
POETRY_VERSION=1.1.12
|
||||
|
||||
WORKDIR /data-pipeline
|
||||
COPY . /data-pipeline
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONHASHSEED=random \
|
||||
PIP_NO_CACHE_DIR=off \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
||||
PIP_DEFAULT_TIMEOUT=100 \
|
||||
POETRY_VERSION=1.8.4
|
||||
|
||||
RUN pip install "poetry==$POETRY_VERSION"
|
||||
RUN poetry config virtualenvs.create false \
|
||||
&& poetry config virtualenvs.in-project false \
|
||||
&& poetry install --no-dev --no-interaction --no-ansi
|
||||
&& poetry config virtualenvs.in-project false \
|
||||
&& poetry install --only main --no-interaction --no-ansi
|
||||
RUN pip install openpyxl
|
||||
|
||||
# Copy all project files into the container
|
||||
|
||||
CMD python3 -m data_pipeline.application data-full-run --check -s aws
|
||||
# Default behavior is to output the options for "full-run". This prevents the entire pipeline from running unintentionally.
|
||||
ENTRYPOINT [ "poetry", "run", "python3", "-m", "data_pipeline.application"]
|
||||
CMD ["full-run", "--help"]
|
|
@ -88,10 +88,11 @@ def data_cleanup():
|
|||
log_info("Cleaning up all data folders")
|
||||
census_reset(data_path)
|
||||
data_folder_cleanup()
|
||||
tribal_reset(data_path)
|
||||
downloadable_cleanup()
|
||||
score_folder_cleanup()
|
||||
temp_folder_cleanup()
|
||||
geo_score_folder_cleanup()
|
||||
tribal_reset(data_path)
|
||||
|
||||
log_goodbye()
|
||||
|
||||
|
@ -304,45 +305,67 @@ def data_full_run(check: bool, data_source: str, use_cache: bool):
|
|||
log_title("Full Run", "Census DL, ETL, Score, Combine, Generate Tiles")
|
||||
|
||||
data_path = settings.APP_ROOT / "data"
|
||||
first_run = False
|
||||
|
||||
if check:
|
||||
if not check_first_run():
|
||||
# check if the data full run has been run before
|
||||
log_info("The data full run was already executed")
|
||||
sys.exit()
|
||||
first_run = True
|
||||
|
||||
if first_run:
|
||||
log_info("The data full run was already executed")
|
||||
sys.exit()
|
||||
|
||||
else:
|
||||
# census directories
|
||||
# Directory cleanup
|
||||
log_info("Cleaning up data folders")
|
||||
census_reset(data_path)
|
||||
data_folder_cleanup()
|
||||
downloadable_cleanup()
|
||||
score_folder_cleanup()
|
||||
geo_score_folder_cleanup()
|
||||
temp_folder_cleanup()
|
||||
tribal_reset(data_path)
|
||||
|
||||
if data_source == "local":
|
||||
log_info("Downloading census data")
|
||||
etl_runner("census", use_cache)
|
||||
|
||||
log_info("Running all ETLs")
|
||||
etl_runner(use_cache=use_cache)
|
||||
log_info("Running all ETLs")
|
||||
etl_runner(use_cache=True)
|
||||
|
||||
log_info("Running tribal ETL")
|
||||
etl_runner("tribal", use_cache)
|
||||
|
||||
else:
|
||||
log_info("Downloading census data")
|
||||
etl_runner("census", use_cache=False)
|
||||
|
||||
log_info("Running all ETLs")
|
||||
etl_runner(use_cache=False)
|
||||
|
||||
log_info("Running tribal ETL")
|
||||
etl_runner("tribal", use_cache=False)
|
||||
|
||||
log_info("Generating score")
|
||||
score_generate()
|
||||
|
||||
log_info("Running post score")
|
||||
downloadable_cleanup()
|
||||
score_post(data_source)
|
||||
|
||||
log_info("Combining score with census GeoJSON")
|
||||
score_geo(data_source)
|
||||
log_info("Combining score with census GeoJSON")
|
||||
score_geo(data_source)
|
||||
|
||||
log_info("Generating map tiles")
|
||||
generate_tiles(data_path, True)
|
||||
log_info("Generating map tiles")
|
||||
generate_tiles(data_path, False)
|
||||
|
||||
log_info("Completing pipeline")
|
||||
file = "first_run.txt"
|
||||
cmd = f"touch {data_path}/{file}"
|
||||
call(cmd, shell=True)
|
||||
log_info("Generating tribal map tiles")
|
||||
generate_tiles(data_path, True)
|
||||
|
||||
log_info("Completing pipeline")
|
||||
file = "first_run.txt"
|
||||
cmd = f"touch {data_path}/{file}"
|
||||
call(cmd, shell=True)
|
||||
|
||||
log_goodbye()
|
||||
|
||||
|
@ -427,6 +450,7 @@ def full_post_etl(ctx):
|
|||
ctx.invoke(generate_score_post, data_source=None)
|
||||
ctx.invoke(geo_score, data_source=None)
|
||||
ctx.invoke(generate_map_tiles, generate_tribal_layer=False)
|
||||
ctx.invoke(generate_map_tiles, generate_tribal_layer=True)
|
||||
|
||||
|
||||
@cli.command(
|
||||
|
@ -440,6 +464,7 @@ def full_run(ctx, use_cache):
|
|||
ctx.invoke(data_cleanup)
|
||||
ctx.invoke(census_data_download, zip_compress=False, use_cache=use_cache)
|
||||
ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
|
||||
ctx.invoke(etl_run, dataset="tribal", use_cache=use_cache)
|
||||
ctx.invoke(full_post_etl)
|
||||
|
||||
|
||||
|
|
|
@ -21,5 +21,9 @@ def reset_data_directories(
|
|||
)
|
||||
|
||||
# geojson
|
||||
geojson_path = tribal_data_path / "geojson"
|
||||
geojson_path = tribal_data_path / "geographic_data"
|
||||
remove_all_from_dir(geojson_path)
|
||||
|
||||
# tiles
|
||||
tiles_path = tribal_data_path / "tiles"
|
||||
remove_all_from_dir(tiles_path)
|
||||
|
|
|
@ -218,6 +218,7 @@ def score_folder_cleanup() -> None:
|
|||
remove_all_from_dir(data_path / "score" / "geojson")
|
||||
remove_all_from_dir(data_path / "score" / "tiles")
|
||||
remove_all_from_dir(data_path / "score" / "shapefile")
|
||||
remove_all_from_dir(data_path / "score" / "search")
|
||||
downloadable_cleanup()
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
version: "3.4"
|
||||
services:
|
||||
# The j40_data_pipeline service runs the ETL pipeline to create the score
|
||||
score:
|
||||
|
@ -15,6 +14,9 @@ services:
|
|||
ENV_FOR_DYNACONF: development
|
||||
PYTHONUNBUFFERED: 1
|
||||
TZ: America/Los_Angeles
|
||||
# The argument (and options) for the scoring step you want to run. Replace "full-run" with "etl-run" or whatever step you want to run
|
||||
# To add arguments follow this example: command: ["generate-map-tiles", "--arg1", "value1", "--arg2", "value2"]
|
||||
command: ["full-run", "--help"]
|
||||
|
||||
# The score_server serves the data-pipeline volume as a URL
|
||||
j40_score_server:
|
||||
|
@ -23,26 +25,21 @@ services:
|
|||
build: data/data-serve/.
|
||||
volumes:
|
||||
- ./data/data-pipeline/data_pipeline/data/score:/data/data-pipeline/data_pipeline/data/score
|
||||
- ./data/data-pipeline/data_pipeline/data/tribal:/data/data-pipeline/data_pipeline/data/tribal
|
||||
ports:
|
||||
- 5000:8080
|
||||
environment:
|
||||
TZ: America/Los_Angeles
|
||||
|
||||
|
||||
#The j40_website service runs the web app / map / site
|
||||
j40_website:
|
||||
image: j40_website
|
||||
container_name: j40_website_1
|
||||
build: ./client
|
||||
build: client
|
||||
environment:
|
||||
# See the client readme for more info on environment variables:
|
||||
# https://github.com/usds/justice40-tool/blob/main/client/README.md
|
||||
DATA_SOURCE: local
|
||||
|
||||
# If you want the map to render a MapBox base map (as opposed to the
|
||||
# open source one from CartoDB), please create your own API TOKEN from
|
||||
# your MapBox account and add the token here:
|
||||
MAPBOX_STYLES_READ_TOKEN: ""
|
||||
TZ: America/Los_Angeles
|
||||
volumes:
|
||||
- ./client/src:/client/src
|
||||
|
|
Loading…
Add table
Reference in a new issue