diff --git a/examples/container/osu-ompi/Containerfile b/examples/container/osu-ompi/Containerfile index 9f96fe8..db0abd2 100644 --- a/examples/container/osu-ompi/Containerfile +++ b/examples/container/osu-ompi/Containerfile @@ -1,7 +1,4 @@ -FROM docker.io/nvidia/cuda:12.6.3-devel-ubuntu24.04 - -ARG OMPI_VER=5.0.6 -ARG GDRCOPY_VER=2.3 +FROM docker.io/nvidia/cuda:12.8.0-devel-ubuntu24.04 RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive \ @@ -33,29 +30,35 @@ RUN apt-get update \ --no-install-recommends \ && rm -rf /var/lib/apt/lists/* -RUN git clone --depth 1 -b v${GDRCOPY_VER} https://github.com/NVIDIA/gdrcopy.git && \ - cd gdrcopy && \ - export CUDA_PATH=${CUDA_HOME:-$(echo $(which nvcc) | grep -o '.*cuda')} && \ - make CC=gcc CUDA=$CUDA_PATH lib && \ - make lib_install && \ - cd ../ && rm -rf gdrcopy +ARG GDRCOPY_VER=2.4.4 +RUN git clone --depth 1 --branch v${GDRCOPY_VER} https://github.com/NVIDIA/gdrcopy.git \ + && cd gdrcopy \ + && export CUDA_PATH=${CUDA_HOME:-$(echo $(which nvcc) | grep -o '.*cuda')} \ + && make CC=gcc CUDA=$CUDA_PATH lib \ + && make lib_install \ + && cd ../ && rm -rf gdrcopy -RUN git clone --depth 1 https://github.com/HewlettPackard/shs-cassini-headers.git \ +ARG cassini_headers_ref=9a8a738a879f007849fbc69be8e3487a4abf0952 +RUN git clone https://github.com/HewlettPackard/shs-cassini-headers.git \ && cd shs-cassini-headers \ + && git checkout ${cassini_headers_ref} \ && cp -r include/* /usr/include/ \ && cp -r share/* /usr/share/ \ && cd .. \ && rm -r shs-cassini-headers -RUN git clone --depth 1 https://github.com/HewlettPackard/shs-cxi-driver.git \ +ARG cxi_driver_ref=caa8bf41a25817111f137bb7e8be1e45c4e6758f +RUN git clone https://github.com/HewlettPackard/shs-cxi-driver.git \ && cd shs-cxi-driver \ + && git checkout ${cxi_ddriver_ref} \ && cp -r include/* /usr/include/ \ && cd .. \ && rm -r shs-cxi-driver -RUN git clone --depth 1 https://github.com/HewlettPackard/shs-libcxi.git \ +ARG libcxi_ref=31a183b521d1da670574e2a1bf59a91cb579b105 +RUN git clone https://github.com/HewlettPackard/shs-libcxi.git \ && cd shs-libcxi \ - && git checkout release/shs-12.0 \ + && git checkout ${libcxi_ref} \ && ./autogen.sh \ && ./configure --prefix=/usr --with-cuda=/usr/local/cuda \ && make -j$(nproc) \ @@ -65,10 +68,12 @@ RUN git clone --depth 1 https://github.com/HewlettPackard/shs-libcxi.git \ && rm -r shs-libcxi # Install libfabric +ARG libfabric_version=2.0.0 +ARG libfabric_ref=36b974dc27c67fd0268e0ab575fd01592f756992 # Libfabric 2.0.0 seems to have a bug preventing compilation of the CXI provider, using main branch for now -ARG libfabric_version=main -RUN git clone --depth 1 --branch ${libfabric_version} https://github.com/ofiwg/libfabric.git \ +RUN git clone --branch main --single-branch https://github.com/ofiwg/libfabric.git \ && cd libfabric \ + && git checkout ${libfabric_ref} \ && ./autogen.sh \ && ./configure --prefix=/usr --with-cuda=/usr/local/cuda --enable-cuda-dlopen --enable-gdrcopy-dlopen --enable-cxi --enable-lnx --enable-efa \ && make -j$(nproc) \ @@ -77,6 +82,7 @@ RUN git clone --depth 1 --branch ${libfabric_version} https://github.com/ofiwg/l && cd .. \ && rm -rf libfabric +ARG OMPI_VER=5.0.6 RUN wget -q https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-${OMPI_VER}.tar.gz \ && tar xf openmpi-${OMPI_VER}.tar.gz \ && cd openmpi-${OMPI_VER} \ diff --git a/examples/container/osu-ompi/omb-cxi-ompi.toml b/examples/container/osu-ompi/omb-cxi-ompi.toml index f4deedd..83c4245 100644 --- a/examples/container/osu-ompi/omb-cxi-ompi.toml +++ b/examples/container/osu-ompi/omb-cxi-ompi.toml @@ -1,4 +1,4 @@ -image="/capstor/scratch/cscs/amadonna/localhost+omb-cxi.sqsh" +image="/capstor/scratch/cscs/amadonna/localhost+omb-cxi-ompi.sqsh" writable=true entrypoint=false