Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use docker compose V2 to cache downloaded packages #4034

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
139 changes: 87 additions & 52 deletions Dockerfile.test.cpu
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# This requires docker compose v2 and above

ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION}

Expand Down Expand Up @@ -28,17 +30,21 @@ ENV DEBIAN_FRONTEND=noninteractive
# Set default shell to /bin/bash
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

# Make Ubuntu not delete downloaded packages
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# Log given ARGs (and all other environment vars)
RUN env | sort

# Prepare to install specific g++ versions
RUN apt-get update -qq && apt-get install -y --no-install-recommends software-properties-common && \
rm -rf /var/lib/apt/lists/*

RUN add-apt-repository ppa:ubuntu-toolchain-r/test
RUN --mount=type=cache,target=/var/cache/apt,sharing=private --mount=type=cache,target=/var/lib/apt,sharing=private \
apt-get update -qq && apt-get install -y --no-install-recommends software-properties-common
RUN --mount=type=cache,target=/var/cache/apt,sharing=private --mount=type=cache,target=/var/lib/apt,sharing=private \
add-apt-repository ppa:ubuntu-toolchain-r/test

# Install essential packages.
RUN apt-get update -qq && apt-get install -y --no-install-recommends \
RUN --mount=type=cache,target=/var/cache/apt,sharing=private --mount=type=cache,target=/var/lib/apt,sharing=private \
apt-get update -qq && apt-get install -y --no-install-recommends \
wget \
ca-certificates \
cmake \
Expand All @@ -47,21 +53,23 @@ RUN apt-get update -qq && apt-get install -y --no-install-recommends \
git \
build-essential \
g++-${GPP_VERSION} \
moreutils && \
rm -rf /var/lib/apt/lists/*
moreutils

# setup ssh service
RUN ssh-keygen -f /root/.ssh/id_rsa -q -N ''
RUN cp -v /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys

# Install Python.
RUN apt-get update -qq && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-distutils && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/var/cache/apt,sharing=private --mount=type=cache,target=/var/lib/apt,sharing=private \
apt-get update -qq && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-distutils
RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python
RUN ln -s -f /usr/bin/python${PYTHON_VERSION} /usr/bin/python${PYTHON_VERSION/%.*/}
RUN wget --progress=dot:mega https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py
# do not cache get-pip.py wget download
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
wget --progress=dot:mega https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py

RUN pip install --no-cache-dir -U --force requests pytest mock pytest-forked parameterized
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
pip install -U --force requests pytest mock pytest-forked parameterized

# Add launch helper scripts
RUN echo "env SPARK_HOME=/spark SPARK_DRIVER_MEM=512m PYSPARK_PYTHON=/usr/bin/python${PYTHON_VERSION} PYSPARK_DRIVER_PYTHON=/usr/bin/python${PYTHON_VERSION} \"\$@\"" > /spark_env.sh
Expand All @@ -72,43 +80,53 @@ RUN chmod a+x /pytest.sh
RUN chmod a+x /pytest_standalone.sh

# Install Spark stand-alone cluster.
RUN if [[ -n ${SPARK_PACKAGE} ]]; then \
wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC /tmp; \
RUN --mount=type=cache,target=/wget,sharing=private \
if [[ -n ${SPARK_PACKAGE} ]]; then \
mkdir -p /wget/$(dirname "${SPARK_PACKAGE}"); \
wget -c --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O /wget/${SPARK_PACKAGE}; \
tar -xzC /tmp -f /wget/${SPARK_PACKAGE}; \
archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v /tmp/\${archive/%.tgz/} /spark"; \
fi

# Install PySpark.
RUN apt-get update -qq && apt install -y openjdk-8-jdk-headless
RUN if [[ ${SPARK_PACKAGE} != *"-preview"* ]]; then \
pip install --no-cache-dir ${PYSPARK_PACKAGE}; \
RUN --mount=type=cache,target=/var/cache/apt,sharing=private --mount=type=cache,target=/var/lib/apt,sharing=private \
apt-get update -qq && apt install -y openjdk-8-jdk-headless
RUN --mount=type=cache,target=/var/cache/apt,sharing=private --mount=type=cache,target=/var/lib/apt,sharing=private \
--mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${SPARK_PACKAGE} != *"-preview"* ]]; then \
pip install ${PYSPARK_PACKAGE}; \
else \
apt-get update -qq && apt-get install pandoc; \
pip install --no-cache-dir pypandoc; \
(cd /spark/python && python setup.py sdist && pip install --no-cache-dir dist/pyspark-*.tar.gz && rm dist/pyspark-*); \
pip install pypandoc; \
(cd /spark/python && python setup.py sdist && pip install dist/pyspark-*.tar.gz && rm dist/pyspark-*); \
fi

# Pin cloudpickle to 1.3.0
# Dill breaks clouldpickle > 1.3.0 when using Spark2
# https://github.com/cloudpipe/cloudpickle/issues/393
RUN if [[ ${PYSPARK_PACKAGE} == "pyspark==2."* ]]; then \
pip install --no-cache-dir cloudpickle==1.3.0; \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${PYSPARK_PACKAGE} == "pyspark==2."* ]]; then \
pip install cloudpickle==1.3.0; \
fi

# Install Ray.
RUN pip install --no-cache-dir ray
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
pip install ray

# Install MPI.
RUN if [[ ${MPI_KIND} == "OpenMPI" ]]; then \
wget --progress=dot:mega -O /tmp/openmpi-4.1.4-bin.tar.gz https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.4.tar.gz && \
cd /tmp && tar -zxf /tmp/openmpi-4.1.4-bin.tar.gz && \
RUN --mount=type=cache,target=/var/cache/apt,sharing=private --mount=type=cache,target=/var/lib/apt,sharing=private \
--mount=type=cache,target=/wget \
if [[ ${MPI_KIND} == "OpenMPI" ]]; then \
wget -c --progress=dot:mega -O /wget/openmpi-4.1.4-bin.tar.gz https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.4.tar.gz && \
cd /tmp && tar -zxf /wget/openmpi-4.1.4-bin.tar.gz && \
mkdir openmpi-4.1.4/build && cd openmpi-4.1.4/build && ../configure --prefix=/usr/local && \
make -j all && make install && ldconfig && \
echo "mpirun -allow-run-as-root -np 2 -H localhost:2 -bind-to none -map-by slot -mca mpi_abort_print_stack 1 -tag-output" > /mpirun_command; \
elif [[ ${MPI_KIND} == "ONECCL" ]]; then \
wget --progress=dot:mega -O /tmp/oneccl.tar.gz https://github.com/oneapi-src/oneCCL/archive/${CCL_PACKAGE}.tar.gz && \
cd /tmp && tar -zxf oneccl.tar.gz && \
wget --progress=dot:mega -O /wget/${CCL_PACKAGE}.tar.gz https://github.com/oneapi-src/oneCCL/archive/${CCL_PACKAGE}.tar.gz && \
cd /tmp && tar -zxf /wget/${CCL_PACKAGE}.tar.gz && \
mkdir oneCCL-${CCL_PACKAGE}/build && cd oneCCL-${CCL_PACKAGE}/build && cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local/oneccl -DCMAKE_BUILD_TYPE=Release && make -j install && \
rm /tmp/oneccl.tar.gz && rm -Rf /tmp/oneCCL-${CCL_PACKAGE} && \
rm -Rf /tmp/oneCCL-${CCL_PACKAGE} && \
sed -i 's/if \[ -z \"\${I_MPI_ROOT}\" \]/if [ -z \"${I_MPI_ROOT:-}\" ]/g' /usr/local/oneccl/env/setvars.sh && \
sed -i 's/ \$1/ \${1:-}/g' /usr/local/oneccl/env/setvars.sh && \
echo ". /usr/local/oneccl/env/setvars.sh" > /oneccl_env && \
Expand All @@ -135,62 +153,75 @@ RUN if [[ ${MPI_KIND} == "OpenMPI" ]]; then \
# Install mpi4py.
# This requires SETUPTOOLS_USE_DISTUTILS=stdlib as with setuptools>=60.1.0 installing mpi4py broke
# https://github.com/mpi4py/mpi4py/issues/157#issuecomment-1001022274
RUN if [[ ${MPI_KIND} != "None" ]]; then \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${MPI_KIND} != "None" ]]; then \
if [[ ${MPI_KIND} == "ONECCL" ]]; then \
export I_MPI_ROOT=/usr/local/oneccl; \
export MPICC=/usr/local/oneccl/bin/mpicc; \
fi; \
SETUPTOOLS_USE_DISTUTILS=stdlib pip install --no-cache-dir mpi4py; \
SETUPTOOLS_USE_DISTUTILS=stdlib pip install mpi4py; \
fi

# Install TensorFlow and Keras (releases).
# Pin scipy!=1.4.0: https://github.com/scipy/scipy/issues/11237
# Pin protobuf~=3.20 for tensorflow<2.6.5: https://github.com/tensorflow/tensorflow/issues/56077
RUN if [[ ${TENSORFLOW_PACKAGE} != "tf-nightly" ]]; then \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${TENSORFLOW_PACKAGE} != "tf-nightly" ]]; then \
PROTOBUF_PACKAGE=""; \
if [[ ${TENSORFLOW_PACKAGE} == tensorflow*==1.15.* ]] || \
[[ ${TENSORFLOW_PACKAGE} == tensorflow-cpu==2.[012345].* ]]; then \
PROTOBUF_PACKAGE="protobuf~=3.20"; \
fi; \
pip install --no-cache-dir ${TENSORFLOW_PACKAGE} ${PROTOBUF_PACKAGE}; \
pip install ${TENSORFLOW_PACKAGE} ${PROTOBUF_PACKAGE}; \
if [[ ${KERAS_PACKAGE} != "None" ]]; then \
pip uninstall -y keras; \
pip install --no-cache-dir ${KERAS_PACKAGE} "scipy!=1.4.0" "pandas<1.1.0" "numpy<1.24.0"; \
pip install ${KERAS_PACKAGE} "scipy!=1.4.0" "pandas<1.1.0" "numpy<1.24.0"; \
fi; \
mkdir -p ~/.keras; \
python -c "import tensorflow as tf; tf.keras.datasets.mnist.load_data()"; \
fi

# Pin h5py < 3 for tensorflow: https://github.com/tensorflow/tensorflow/issues/44467
RUN pip install 'h5py<3.0' 'numpy<1.24.0' --force-reinstall
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
pip install 'h5py<3.0' 'numpy<1.24.0' --force-reinstall

# Install PyTorch (releases).
# Pin Pillow<7.0 for torchvision < 0.5.0: https://github.com/pytorch/vision/issues/1718
# Pin Pillow!=8.3.0 for torchvision: https://github.com/pytorch/vision/issues/4146
RUN if [[ ${PYTORCH_PACKAGE} != "torch-nightly" ]]; then \
pip install --no-cache-dir ${PYTORCH_PACKAGE} ${TORCHVISION_PACKAGE} -f https://download.pytorch.org/whl/torch_stable.html; \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${PYTORCH_PACKAGE} != "torch-nightly" ]]; then \
pip install ${PYTORCH_PACKAGE} ${TORCHVISION_PACKAGE} -f https://download.pytorch.org/whl/torch_stable.html; \
if [[ "${TORCHVISION_PACKAGE/%+*/}" == torchvision==0.[1234].* ]]; then \
pip install --no-cache-dir "Pillow<7.0" --no-deps; \
pip install "Pillow<7.0" --no-deps; \
else \
pip install --no-cache-dir "Pillow!=8.3.0" --no-deps; \
pip install "Pillow!=8.3.0" --no-deps; \
fi; \
pip install ${PYTORCH_LIGHTNING_PACKAGE}; \
fi


# Install MXNet (releases).
RUN if [[ ${MXNET_PACKAGE} != "mxnet-nightly" ]]; then \
pip install --no-cache-dir ${MXNET_PACKAGE} ; \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${MXNET_PACKAGE} != "mxnet-nightly" ]]; then \
pip install ${MXNET_PACKAGE} ; \
fi

# Prefetch Spark MNIST dataset.
RUN mkdir -p /work /data && wget --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/mnist.bz2 -O /data/mnist.bz2
RUN --mount=type=cache,target=/wget,sharing=private \
mkdir -p /work /data; \
wget -c --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/mnist.bz2 -O /wget/mnist.bz2; \
cp /wget/mnist.bz2 /data/

# Prefetch Spark Rossmann dataset.
RUN mkdir -p /work /data && wget --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/rossmann.tgz -O - | tar -xzC /data
RUN --mount=type=cache,target=/wget,sharing=private \
mkdir -p /work /data; \
wget -c --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/rossmann.tgz -O /wget/rossmann.tgz; \
tar -xzC /data -f /wget/rossmann.tgz

# Prefetch PyTorch datasets.
RUN wget --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/pytorch_datasets.tgz -O - | tar -xzC /data
RUN --mount=type=cache,target=/wget,sharing=private \
wget -c --progress=dot:mega https://horovod-datasets.s3.amazonaws.com/pytorch_datasets.tgz -O /wget/pytorch_datasets.tgz; \
tar -xzC /data -f /wget/pytorch_datasets.tgz

### END OF CACHE ###
COPY . /horovod
Expand All @@ -199,31 +230,35 @@ COPY . /horovod

# Install TensorFlow and Keras (nightly).
# Pin scipy!=1.4.0: https://github.com/scipy/scipy/issues/11237
RUN if [[ ${TENSORFLOW_PACKAGE} == "tf-nightly" ]]; then \
pip install --no-cache-dir ${TENSORFLOW_PACKAGE}; \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${TENSORFLOW_PACKAGE} == "tf-nightly" ]]; then \
pip install ${TENSORFLOW_PACKAGE}; \
if [[ ${KERAS_PACKAGE} != "None" ]]; then \
pip uninstall -y keras-nightly; \
pip install --no-cache-dir ${KERAS_PACKAGE} "scipy!=1.4.0" "pandas<1.1.0" "numpy<1.24.0"; \
pip install ${KERAS_PACKAGE} "scipy!=1.4.0" "pandas<1.1.0" "numpy<1.24.0"; \
fi; \
mkdir -p ~/.keras; \
python -c "import tensorflow as tf; tf.keras.datasets.mnist.load_data()"; \
fi

# Install PyTorch (nightly).
# Pin Pillow!=8.3.0 for torchvision: https://github.com/pytorch/vision/issues/4146
RUN if [[ ${PYTORCH_PACKAGE} == "torch-nightly" ]]; then \
pip install --no-cache-dir --pre torch ${TORCHVISION_PACKAGE} -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; \
pip install --no-cache-dir "Pillow!=8.3.0" --no-deps; \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${PYTORCH_PACKAGE} == "torch-nightly" ]]; then \
pip install --pre torch ${TORCHVISION_PACKAGE} -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; \
pip install "Pillow!=8.3.0" --no-deps; \
pip install ${PYTORCH_LIGHTNING_PACKAGE}; \
fi

# Install MXNet (nightly).
RUN if [[ ${MXNET_PACKAGE} == "mxnet-nightly" ]]; then \
pip install --no-cache-dir --pre mxnet -f https://dist.mxnet.io/python/all; \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${MXNET_PACKAGE} == "mxnet-nightly" ]]; then \
pip install --pre mxnet -f https://dist.mxnet.io/python/all; \
fi

# Install Horovod.
RUN if [[ ${MPI_KIND} == "ONECCL" ]]; then \
RUN --mount=type=cache,target=/root/.cache/pip,sharing=private \
if [[ ${MPI_KIND} == "ONECCL" ]]; then \
if [ -z "${LD_LIBRARY_PATH:-}" ]; then \
export LD_LIBRARY_PATH=""; \
fi; \
Expand All @@ -236,7 +271,7 @@ RUN if [[ ${MPI_KIND} == "ONECCL" ]]; then \
fi; \
cd /horovod && \
python setup.py sdist && \
bash -c "${HOROVOD_BUILD_FLAGS} HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_MXNET=1 pip install --no-cache-dir -v $(ls /horovod/dist/horovod-*.tar.gz)[spark,ray]"
bash -c "${HOROVOD_BUILD_FLAGS} HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_MXNET=1 pip install -v $(ls /horovod/dist/horovod-*.tar.gz)[spark,ray]"

# Show the effective python package version to easily spot version differences
RUN pip freeze | sort
Expand Down
1 change: 0 additions & 1 deletion docker-compose.test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: '2.3'
services:
test-cpu-base:
build:
Expand Down