Skip to content

Commit

Permalink
Updated Containerfile for OSU+OpenMPI with explicit git refs
Browse files Browse the repository at this point in the history
  • Loading branch information
Madeeks committed Feb 14, 2025
1 parent 58346bf commit e4d0b51
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 17 deletions.
38 changes: 22 additions & 16 deletions examples/container/osu-ompi/Containerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
FROM docker.io/nvidia/cuda:12.6.3-devel-ubuntu24.04

ARG OMPI_VER=5.0.6
ARG GDRCOPY_VER=2.3
FROM docker.io/nvidia/cuda:12.8.0-devel-ubuntu24.04

RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive \
Expand Down Expand Up @@ -33,29 +30,35 @@ RUN apt-get update \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*

RUN git clone --depth 1 -b v${GDRCOPY_VER} https://github.com/NVIDIA/gdrcopy.git && \
cd gdrcopy && \
export CUDA_PATH=${CUDA_HOME:-$(echo $(which nvcc) | grep -o '.*cuda')} && \
make CC=gcc CUDA=$CUDA_PATH lib && \
make lib_install && \
cd ../ && rm -rf gdrcopy
ARG GDRCOPY_VER=2.4.4
RUN git clone --depth 1 --branch v${GDRCOPY_VER} https://github.com/NVIDIA/gdrcopy.git \
&& cd gdrcopy \
&& export CUDA_PATH=${CUDA_HOME:-$(echo $(which nvcc) | grep -o '.*cuda')} \
&& make CC=gcc CUDA=$CUDA_PATH lib \
&& make lib_install \
&& cd ../ && rm -rf gdrcopy

RUN git clone --depth 1 https://github.com/HewlettPackard/shs-cassini-headers.git \
ARG cassini_headers_ref=9a8a738a879f007849fbc69be8e3487a4abf0952
RUN git clone https://github.com/HewlettPackard/shs-cassini-headers.git \
&& cd shs-cassini-headers \
&& git checkout ${cassini_headers_ref} \
&& cp -r include/* /usr/include/ \
&& cp -r share/* /usr/share/ \
&& cd .. \
&& rm -r shs-cassini-headers

RUN git clone --depth 1 https://github.com/HewlettPackard/shs-cxi-driver.git \
ARG cxi_driver_ref=caa8bf41a25817111f137bb7e8be1e45c4e6758f
RUN git clone https://github.com/HewlettPackard/shs-cxi-driver.git \
&& cd shs-cxi-driver \
&& git checkout ${cxi_ddriver_ref} \
&& cp -r include/* /usr/include/ \
&& cd .. \
&& rm -r shs-cxi-driver

RUN git clone --depth 1 https://github.com/HewlettPackard/shs-libcxi.git \
ARG libcxi_ref=31a183b521d1da670574e2a1bf59a91cb579b105
RUN git clone https://github.com/HewlettPackard/shs-libcxi.git \
&& cd shs-libcxi \
&& git checkout release/shs-12.0 \
&& git checkout ${libcxi_ref} \
&& ./autogen.sh \
&& ./configure --prefix=/usr --with-cuda=/usr/local/cuda \
&& make -j$(nproc) \
Expand All @@ -65,10 +68,12 @@ RUN git clone --depth 1 https://github.com/HewlettPackard/shs-libcxi.git \
&& rm -r shs-libcxi

# Install libfabric
ARG libfabric_version=2.0.0
ARG libfabric_ref=36b974dc27c67fd0268e0ab575fd01592f756992
# Libfabric 2.0.0 seems to have a bug preventing compilation of the CXI provider, using main branch for now
ARG libfabric_version=main
RUN git clone --depth 1 --branch ${libfabric_version} https://github.com/ofiwg/libfabric.git \
RUN git clone --branch main --single-branch https://github.com/ofiwg/libfabric.git \
&& cd libfabric \
&& git checkout ${libfabric_ref} \
&& ./autogen.sh \
&& ./configure --prefix=/usr --with-cuda=/usr/local/cuda --enable-cuda-dlopen --enable-gdrcopy-dlopen --enable-cxi --enable-lnx --enable-efa \
&& make -j$(nproc) \
Expand All @@ -77,6 +82,7 @@ RUN git clone --depth 1 --branch ${libfabric_version} https://github.com/ofiwg/l
&& cd .. \
&& rm -rf libfabric

ARG OMPI_VER=5.0.6
RUN wget -q https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-${OMPI_VER}.tar.gz \
&& tar xf openmpi-${OMPI_VER}.tar.gz \
&& cd openmpi-${OMPI_VER} \
Expand Down
2 changes: 1 addition & 1 deletion examples/container/osu-ompi/omb-cxi-ompi.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
image="/capstor/scratch/cscs/amadonna/localhost+omb-cxi.sqsh"
image="/capstor/scratch/cscs/amadonna/localhost+omb-cxi-ompi.sqsh"
writable=true
entrypoint=false

Expand Down

0 comments on commit e4d0b51

Please sign in to comment.