mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 14:11:17 -07:00
Fix docker
This commit is contained in:
parent
aa88249f37
commit
682b2d34a7
10 changed files with 111 additions and 54 deletions
|
@ -14,10 +14,10 @@ Install [`docker`](https://docs.docker.com/get-docker/). See [Install Docker](IN
|
||||||
|
|
||||||
> _Important_: To be able to run the entire application, you may need to increase the memory allocated for docker to at least 8096 MB. See [this post](https://stackoverflow.com/a/44533437) for more details.
|
> _Important_: To be able to run the entire application, you may need to increase the memory allocated for docker to at least 8096 MB. See [this post](https://stackoverflow.com/a/44533437) for more details.
|
||||||
|
|
||||||
Use `docker-compose` to run the application:
|
Use `docker compose` to run the application:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ docker-compose up
|
$ docker compose up
|
||||||
```
|
```
|
||||||
|
|
||||||
> Note: This may take a while – possibly even an hour or two – since it has to build the containers and then download and process all the data.
|
> Note: This may take a while – possibly even an hour or two – since it has to build the containers and then download and process all the data.
|
||||||
|
|
|
@ -1 +1,8 @@
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
*Dockerfile*
|
||||||
|
*docker-compose*
|
||||||
|
.cache
|
||||||
|
public
|
||||||
node_modules
|
node_modules
|
||||||
|
npm-debug.log
|
|
@ -4,17 +4,14 @@ FROM node:14
|
||||||
# this working directory
|
# this working directory
|
||||||
WORKDIR /client
|
WORKDIR /client
|
||||||
|
|
||||||
# Copy the package.json and package_lock.json files from local to the docker image / container
|
|
||||||
COPY package*.json ./
|
|
||||||
|
|
||||||
# install all packages as a layer in the docker image / container
|
|
||||||
RUN npm install
|
|
||||||
|
|
||||||
# copy all local files from the working directory to the docker image/container however we must use
|
# copy all local files from the working directory to the docker image/container however we must use
|
||||||
# dockerignore to ignore node_modules so that the image can use what what was just installed from the above
|
# dockerignore to ignore node_modules so that the image can use what what was just installed from the above
|
||||||
# step.
|
# step.
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
# install all packages as a layer in the docker image / container
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
ENV PORT=6000
|
ENV PORT=6000
|
||||||
|
|
||||||
EXPOSE 6000
|
EXPOSE 6000
|
||||||
|
|
17
data/data-pipeline/.dockerignore
Normal file
17
data/data-pipeline/.dockerignore
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
./data_pipeline/data/census/csv/*
|
||||||
|
./data_pipeline/data/census/geojson/*
|
||||||
|
./data_pipeline/data/census/shp/*
|
||||||
|
./data_pipeline/data/dataset/*
|
||||||
|
./data_pipeline/data/score/csv/*
|
||||||
|
./data_pipeline/data/score/downloadable/*
|
||||||
|
./data_pipeline/data/score/geojson/*
|
||||||
|
./data_pipeline/data/score/search/*
|
||||||
|
./data_pipeline/data/score/shapefile/*
|
||||||
|
./data_pipeline/data/score/tiles/*
|
||||||
|
./data_pipeline/data/sources/*
|
||||||
|
./data_pipeline/data/tmp/*
|
||||||
|
./data_pipeline/data/tribal/csv/*
|
||||||
|
./data_pipeline/data/tribal/geographic_data/*
|
||||||
|
./data_pipeline/data/tribal/geojson/*
|
||||||
|
./data_pipeline/data/tribal/tiles/*
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
FROM ubuntu:20.04
|
FROM ubuntu:22.04
|
||||||
|
|
||||||
ENV TZ=America/Los_Angeles
|
ENV TZ=America/Los_Angeles
|
||||||
|
|
||||||
|
@ -10,13 +10,13 @@ RUN apt-get update && TZ=America/Los_Angeles DEBIAN_FRONTEND=noninteractive apt-
|
||||||
git \
|
git \
|
||||||
unzip \
|
unzip \
|
||||||
wget \
|
wget \
|
||||||
python3-dev \
|
software-properties-common \
|
||||||
python3-pip \
|
libsqlite3-dev \
|
||||||
gdal-bin
|
zlib1g-dev
|
||||||
|
|
||||||
# tippeanoe
|
# tippecanoe
|
||||||
|
RUN apt-get update
|
||||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
RUN apt-get install -y software-properties-common libsqlite3-dev zlib1g-dev
|
|
||||||
RUN apt-add-repository -y ppa:git-core/ppa
|
RUN apt-add-repository -y ppa:git-core/ppa
|
||||||
RUN mkdir -p /tmp/tippecanoe-src && git clone https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe-src
|
RUN mkdir -p /tmp/tippecanoe-src && git clone https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe-src
|
||||||
WORKDIR /tmp/tippecanoe-src
|
WORKDIR /tmp/tippecanoe-src
|
||||||
|
@ -24,26 +24,35 @@ RUN /bin/sh -c make && make install
|
||||||
|
|
||||||
## gdal
|
## gdal
|
||||||
RUN add-apt-repository ppa:ubuntugis/ppa
|
RUN add-apt-repository ppa:ubuntugis/ppa
|
||||||
RUN apt-get -y install gdal-bin
|
RUN apt-get -y install gdal-bin libgdal-dev
|
||||||
|
|
||||||
|
# Install python3.10
|
||||||
|
RUN add-apt-repository ppa:deadsnakes/ppa
|
||||||
|
RUN apt install -y python3.10-dev
|
||||||
|
RUN apt install -y python3-pip
|
||||||
|
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
|
||||||
|
RUN update-alternatives --config python3
|
||||||
|
|
||||||
|
# Copy all project files into the container
|
||||||
|
COPY . /data-pipeline
|
||||||
|
WORKDIR /data-pipeline
|
||||||
|
|
||||||
# Python package installation using poetry. See:
|
# Python package installation using poetry. See:
|
||||||
# https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
|
# https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
|
||||||
ENV PYTHONFAULTHANDLER=1 \
|
ENV PYTHONFAULTHANDLER=1 \
|
||||||
PYTHONUNBUFFERED=1 \
|
PYTHONUNBUFFERED=1 \
|
||||||
PYTHONHASHSEED=random \
|
PYTHONHASHSEED=random \
|
||||||
PIP_NO_CACHE_DIR=off \
|
PIP_NO_CACHE_DIR=off \
|
||||||
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
||||||
PIP_DEFAULT_TIMEOUT=100 \
|
PIP_DEFAULT_TIMEOUT=100 \
|
||||||
POETRY_VERSION=1.1.12
|
POETRY_VERSION=1.8.4
|
||||||
|
|
||||||
WORKDIR /data-pipeline
|
|
||||||
COPY . /data-pipeline
|
|
||||||
|
|
||||||
RUN pip install "poetry==$POETRY_VERSION"
|
RUN pip install "poetry==$POETRY_VERSION"
|
||||||
RUN poetry config virtualenvs.create false \
|
RUN poetry config virtualenvs.create false \
|
||||||
&& poetry config virtualenvs.in-project false \
|
&& poetry config virtualenvs.in-project false \
|
||||||
&& poetry install --no-dev --no-interaction --no-ansi
|
&& poetry install --only main --no-interaction --no-ansi
|
||||||
|
RUN pip install openpyxl
|
||||||
|
|
||||||
# Copy all project files into the container
|
# Default behavior is to output the options for "full-run". This prevents the entire pipeline from running unintentionally.
|
||||||
|
ENTRYPOINT [ "poetry", "run", "python3", "-m", "data_pipeline.application"]
|
||||||
CMD python3 -m data_pipeline.application data-full-run --check -s aws
|
CMD ["full-run", "--help"]
|
|
@ -88,10 +88,11 @@ def data_cleanup():
|
||||||
log_info("Cleaning up all data folders")
|
log_info("Cleaning up all data folders")
|
||||||
census_reset(data_path)
|
census_reset(data_path)
|
||||||
data_folder_cleanup()
|
data_folder_cleanup()
|
||||||
tribal_reset(data_path)
|
downloadable_cleanup()
|
||||||
score_folder_cleanup()
|
score_folder_cleanup()
|
||||||
temp_folder_cleanup()
|
temp_folder_cleanup()
|
||||||
geo_score_folder_cleanup()
|
geo_score_folder_cleanup()
|
||||||
|
tribal_reset(data_path)
|
||||||
|
|
||||||
log_goodbye()
|
log_goodbye()
|
||||||
|
|
||||||
|
@ -304,45 +305,67 @@ def data_full_run(check: bool, data_source: str, use_cache: bool):
|
||||||
log_title("Full Run", "Census DL, ETL, Score, Combine, Generate Tiles")
|
log_title("Full Run", "Census DL, ETL, Score, Combine, Generate Tiles")
|
||||||
|
|
||||||
data_path = settings.APP_ROOT / "data"
|
data_path = settings.APP_ROOT / "data"
|
||||||
|
first_run = False
|
||||||
|
|
||||||
if check:
|
if check:
|
||||||
if not check_first_run():
|
if not check_first_run():
|
||||||
# check if the data full run has been run before
|
# check if the data full run has been run before
|
||||||
log_info("The data full run was already executed")
|
first_run = True
|
||||||
sys.exit()
|
|
||||||
|
if first_run:
|
||||||
|
log_info("The data full run was already executed")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# census directories
|
# Directory cleanup
|
||||||
log_info("Cleaning up data folders")
|
log_info("Cleaning up data folders")
|
||||||
census_reset(data_path)
|
census_reset(data_path)
|
||||||
data_folder_cleanup()
|
data_folder_cleanup()
|
||||||
|
downloadable_cleanup()
|
||||||
score_folder_cleanup()
|
score_folder_cleanup()
|
||||||
|
geo_score_folder_cleanup()
|
||||||
temp_folder_cleanup()
|
temp_folder_cleanup()
|
||||||
|
tribal_reset(data_path)
|
||||||
|
|
||||||
if data_source == "local":
|
if data_source == "local":
|
||||||
log_info("Downloading census data")
|
log_info("Downloading census data")
|
||||||
etl_runner("census", use_cache)
|
etl_runner("census", use_cache)
|
||||||
|
|
||||||
log_info("Running all ETLs")
|
log_info("Running all ETLs")
|
||||||
etl_runner(use_cache=use_cache)
|
etl_runner(use_cache=True)
|
||||||
|
|
||||||
|
log_info("Running tribal ETL")
|
||||||
|
etl_runner("tribal", use_cache)
|
||||||
|
|
||||||
|
else:
|
||||||
|
log_info("Downloading census data")
|
||||||
|
etl_runner("census", use_cache=False)
|
||||||
|
|
||||||
|
log_info("Running all ETLs")
|
||||||
|
etl_runner(use_cache=False)
|
||||||
|
|
||||||
|
log_info("Running tribal ETL")
|
||||||
|
etl_runner("tribal", use_cache=False)
|
||||||
|
|
||||||
log_info("Generating score")
|
log_info("Generating score")
|
||||||
score_generate()
|
score_generate()
|
||||||
|
|
||||||
log_info("Running post score")
|
log_info("Running post score")
|
||||||
downloadable_cleanup()
|
|
||||||
score_post(data_source)
|
score_post(data_source)
|
||||||
|
|
||||||
log_info("Combining score with census GeoJSON")
|
log_info("Combining score with census GeoJSON")
|
||||||
score_geo(data_source)
|
score_geo(data_source)
|
||||||
|
|
||||||
log_info("Generating map tiles")
|
log_info("Generating map tiles")
|
||||||
generate_tiles(data_path, True)
|
generate_tiles(data_path, False)
|
||||||
|
|
||||||
log_info("Completing pipeline")
|
log_info("Generating tribal map tiles")
|
||||||
file = "first_run.txt"
|
generate_tiles(data_path, True)
|
||||||
cmd = f"touch {data_path}/{file}"
|
|
||||||
call(cmd, shell=True)
|
log_info("Completing pipeline")
|
||||||
|
file = "first_run.txt"
|
||||||
|
cmd = f"touch {data_path}/{file}"
|
||||||
|
call(cmd, shell=True)
|
||||||
|
|
||||||
log_goodbye()
|
log_goodbye()
|
||||||
|
|
||||||
|
@ -427,6 +450,7 @@ def full_post_etl(ctx):
|
||||||
ctx.invoke(generate_score_post, data_source=None)
|
ctx.invoke(generate_score_post, data_source=None)
|
||||||
ctx.invoke(geo_score, data_source=None)
|
ctx.invoke(geo_score, data_source=None)
|
||||||
ctx.invoke(generate_map_tiles, generate_tribal_layer=False)
|
ctx.invoke(generate_map_tiles, generate_tribal_layer=False)
|
||||||
|
ctx.invoke(generate_map_tiles, generate_tribal_layer=True)
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(
|
||||||
|
@ -440,6 +464,7 @@ def full_run(ctx, use_cache):
|
||||||
ctx.invoke(data_cleanup)
|
ctx.invoke(data_cleanup)
|
||||||
ctx.invoke(census_data_download, zip_compress=False, use_cache=use_cache)
|
ctx.invoke(census_data_download, zip_compress=False, use_cache=use_cache)
|
||||||
ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
|
ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
|
||||||
|
ctx.invoke(etl_run, dataset="tribal", use_cache=use_cache)
|
||||||
ctx.invoke(full_post_etl)
|
ctx.invoke(full_post_etl)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,5 +21,9 @@ def reset_data_directories(
|
||||||
)
|
)
|
||||||
|
|
||||||
# geojson
|
# geojson
|
||||||
geojson_path = tribal_data_path / "geojson"
|
geojson_path = tribal_data_path / "geographic_data"
|
||||||
remove_all_from_dir(geojson_path)
|
remove_all_from_dir(geojson_path)
|
||||||
|
|
||||||
|
# tiles
|
||||||
|
tiles_path = tribal_data_path / "tiles"
|
||||||
|
remove_all_from_dir(tiles_path)
|
||||||
|
|
|
@ -218,6 +218,7 @@ def score_folder_cleanup() -> None:
|
||||||
remove_all_from_dir(data_path / "score" / "geojson")
|
remove_all_from_dir(data_path / "score" / "geojson")
|
||||||
remove_all_from_dir(data_path / "score" / "tiles")
|
remove_all_from_dir(data_path / "score" / "tiles")
|
||||||
remove_all_from_dir(data_path / "score" / "shapefile")
|
remove_all_from_dir(data_path / "score" / "shapefile")
|
||||||
|
remove_all_from_dir(data_path / "score" / "search")
|
||||||
downloadable_cleanup()
|
downloadable_cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
version: "3.4"
|
|
||||||
services:
|
services:
|
||||||
# The j40_data_pipeline service runs the ETL pipeline to create the score
|
# The j40_data_pipeline service runs the ETL pipeline to create the score
|
||||||
score:
|
score:
|
||||||
|
@ -15,6 +14,9 @@ services:
|
||||||
ENV_FOR_DYNACONF: development
|
ENV_FOR_DYNACONF: development
|
||||||
PYTHONUNBUFFERED: 1
|
PYTHONUNBUFFERED: 1
|
||||||
TZ: America/Los_Angeles
|
TZ: America/Los_Angeles
|
||||||
|
# The argument (and options) for the scoring step you want to run. Replace "full-run" with "etl-run" or whatever step you want to run
|
||||||
|
# To add arguments follow this example: command: ["generate-map-tiles", "--arg1", "value1", "--arg2", "value2"]
|
||||||
|
command: ["full-run", "--help"]
|
||||||
|
|
||||||
# The score_server serves the data-pipeline volume as a URL
|
# The score_server serves the data-pipeline volume as a URL
|
||||||
j40_score_server:
|
j40_score_server:
|
||||||
|
@ -23,26 +25,21 @@ services:
|
||||||
build: data/data-serve/.
|
build: data/data-serve/.
|
||||||
volumes:
|
volumes:
|
||||||
- ./data/data-pipeline/data_pipeline/data/score:/data/data-pipeline/data_pipeline/data/score
|
- ./data/data-pipeline/data_pipeline/data/score:/data/data-pipeline/data_pipeline/data/score
|
||||||
|
- ./data/data-pipeline/data_pipeline/data/tribal:/data/data-pipeline/data_pipeline/data/tribal
|
||||||
ports:
|
ports:
|
||||||
- 5000:8080
|
- 5000:8080
|
||||||
environment:
|
environment:
|
||||||
TZ: America/Los_Angeles
|
TZ: America/Los_Angeles
|
||||||
|
|
||||||
|
|
||||||
#The j40_website service runs the web app / map / site
|
#The j40_website service runs the web app / map / site
|
||||||
j40_website:
|
j40_website:
|
||||||
image: j40_website
|
image: j40_website
|
||||||
container_name: j40_website_1
|
container_name: j40_website_1
|
||||||
build: ./client
|
build: client
|
||||||
environment:
|
environment:
|
||||||
# See the client readme for more info on environment variables:
|
# See the client readme for more info on environment variables:
|
||||||
# https://github.com/usds/justice40-tool/blob/main/client/README.md
|
# https://github.com/usds/justice40-tool/blob/main/client/README.md
|
||||||
DATA_SOURCE: local
|
DATA_SOURCE: local
|
||||||
|
|
||||||
# If you want the map to render a MapBox base map (as opposed to the
|
|
||||||
# open source one from CartoDB), please create your own API TOKEN from
|
|
||||||
# your MapBox account and add the token here:
|
|
||||||
MAPBOX_STYLES_READ_TOKEN: ""
|
|
||||||
TZ: America/Los_Angeles
|
TZ: America/Los_Angeles
|
||||||
volumes:
|
volumes:
|
||||||
- ./client/src:/client/src
|
- ./client/src:/client/src
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue