From ab33990dea3ce9c90dd6356d9d10e0f9c5d61243 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Wed, 9 Oct 2024 00:23:26 -0600 Subject: [PATCH 01/29] WIP: Trilinos mpi build --- CMakeLists.txt | 3 + scripts/build-matar.sh | 8 ++ scripts/matar-install.sh | 12 ++- scripts/setup-env.sh | 4 + scripts/trilinos-install.sh | 169 ++++++++++++++++++++++++++++++++++++ 5 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 scripts/trilinos-install.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c8a0bfb..1ecbdd7c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,9 @@ if(Matar_ENABLE_KOKKOS) if("${Matar_KOKKOS_PACKAGE}" STREQUAL "Trilinos") find_package(Trilinos REQUIRED) add_definitions(-DTRILINOS_INTERFACE=1) + elif(Matar_ENABLE_TRILINOS) + find_package(Trilinos REQUIRED) + add_definitions(-DTRILINOS_INTERFACE=1) else() find_package(Kokkos REQUIRED) endif() diff --git a/scripts/build-matar.sh b/scripts/build-matar.sh index cac2b4f9..12553008 100755 --- a/scripts/build-matar.sh +++ b/scripts/build-matar.sh @@ -162,6 +162,12 @@ if [ "$machine" = "mac" ] && [ $build_cores -ne 1 ]; then # Nothing to do, default is already 1 fi +if [ "$trilinos" = "enabled" ] && [ "$kokkos_build_type" = "none" ]; then + echo "Error: Kokkos none cannot be requested with Trilinos" + show_help + return 1 +fi + echo "Building based on these argument options:" echo "Build action - ${build_action}" @@ -185,6 +191,8 @@ if [ "$build_action" = "full-app" ]; then if [ "$trilinos" = "disabled" ]; then source kokkos-install.sh ${kokkos_build_type} + elif [ "$trilinos" = "enabled" ]; then + source trilinos-install.sh ${kokkos_build_type} fi source matar-install.sh ${kokkos_build_type} ${trilinos} source cmake_build_${execution}.sh ${kokkos_build_type} ${trilinos} diff --git a/scripts/matar-install.sh b/scripts/matar-install.sh index c664b060..0b421c75 100644 --- a/scripts/matar-install.sh +++ b/scripts/matar-install.sh @@ -8,16 +8,26 @@ mkdir -p ${MATAR_BUILD_DIR} cmake_options=( -D CMAKE_INSTALL_PREFIX="${MATAR_INSTALL_DIR}" - -D CMAKE_PREFIX_PATH="${KOKKOS_INSTALL_DIR}" ) if [ "$kokkos_build_type" = "none" ]; then cmake_options+=( -D Matar_ENABLE_KOKKOS=OFF ) +elif [ "$trilinos" = "enabled" ]; then + if [ ! -d "${TRILINOS_INSTALL_DIR}/lib" ]; then + Trilinos_DIR=${TRILINOS_INSTALL_DIR}/lib64/cmake/Trilinos + else + Trilinos_DIR=${TRILINOS_INSTALL_DIR}/lib/cmake/Trilinos + fi + cmake_options=( + -D Trilinos_DIR="$Trilinos_DIR" + -D Matar_ENABLE_TRILINOS=ON + ) else cmake_options+=( -D Matar_ENABLE_KOKKOS=ON + -D CMAKE_PREFIX_PATH="${KOKKOS_INSTALL_DIR}" ) fi diff --git a/scripts/setup-env.sh b/scripts/setup-env.sh index fa97861a..e6fb36dd 100644 --- a/scripts/setup-env.sh +++ b/scripts/setup-env.sh @@ -31,6 +31,10 @@ export KOKKOS_SOURCE_DIR=${basedir}/src/Kokkos/kokkos export KOKKOS_BUILD_DIR=${builddir}/kokkos export KOKKOS_INSTALL_DIR=${installdir}/kokkos +export TRILINOS_SOURCE_DIR=${basedir}/lib/Trilinos +export TRILINOS_BUILD_DIR=${TRILINOS_SOURCE_DIR}/build-${kokkos_build_type} +export TRILINOS_INSTALL_DIR=${TRILINOS_BUILD_DIR} + export MATAR_SOURCE_DIR=${basedir} export MATAR_BUILD_DIR=${builddir}/matar export MATAR_INSTALL_DIR=${installdir}/matar diff --git a/scripts/trilinos-install.sh b/scripts/trilinos-install.sh new file mode 100644 index 00000000..b6f4f3dd --- /dev/null +++ b/scripts/trilinos-install.sh @@ -0,0 +1,169 @@ +#!/bin/bash -e + +kokkos_build_type="${1}" + +# If all arguments are valid, you can use them in your script as needed +echo "Trilinos Kokkos Build Type: $kokkos_build_type" + +#check if Trilinos directory exists, git clone Trilinos if it doesn't +[ -d "${TRILINOS_SOURCE_DIR}" ] && echo "Directory Trilinos exists, skipping Trilinos download" + +if [ ! -d "${TRILINOS_SOURCE_DIR}" ] +then + echo "Directory Trilinos does not exist, downloading Trilinos...." + git clone --depth 1 https://github.com/trilinos/Trilinos.git ${TRILINOS_SOURCE_DIR} +fi + +#check if Trilinos build directory exists, create Trilinos/build if it doesn't +[ -d "${TRILINOS_BUILD_DIR}" ] && echo "Directory ${TRILINOS_BUILD_DIR} exists, moving on" + +if [ ! -d "${TRILINOS_BUILD_DIR}" ] +then + echo "Directory ${TRILINOS_BUILD_DIR} does not exist, creating it...." + rm -rf ${TRILINOS_BUILD_DIR} ${TRILINOS_INSTALL_DIR} + mkdir -p ${TRILINOS_BUILD_DIR} +fi + +#check if Trilinos library files were installed, install them otherwise. +[ -d "${TRILINOS_BUILD_DIR}/lib" ] && echo "Directory ${TRILINOS_BUILD_DIR}/lib exists, assuming successful installation; delete build folder and run build script again if there was an environment error that has been corrected." + +[ -d "${TRILINOS_BUILD_DIR}/lib64" ] && echo "Directory ${TRILINOS_BUILD_DIR}/lib64 exists, assuming successful installation; delete build folder and run build script again if there was an environment error that has been corrected." + +if [ ! -d "${TRILINOS_BUILD_DIR}/lib" ] && [ ! -d "${TRILINOS_BUILD_DIR}/lib64" ] +then + echo "Directory Trilinos/build/lib does not exist, compiling Trilinos (this might take a while)...." + +CUDA_ADDITIONS=( +-D TPL_ENABLE_CUDA=ON +-D TPL_ENABLE_CUBLAS=ON +-D TPL_ENABLE_CUSPARSE=ON +-D Kokkos_ENABLE_CUDA=ON +-D Kokkos_ENABLE_CUDA_LAMBDA=ON +-D Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON +-D Kokkos_ENABLE_DEPRECATED_CODE=OFF +-D Kokkos_ENABLE_CUDA_UVM=OFF +-D Trilinos_ENABLE_KokkosKernels=ON +-D KokkosKernels_ENABLE_TPL_CUBLAS=ON +-D KokkosKernels_ENABLE_TPL_CUSPARSE=ON +-D Tpetra_ENABLE_CUDA=ON +-D Xpetra_ENABLE_Kokkos_Refactor=ON +-D MueLu_ENABLE_Kokkos_Refactor=ON +) + +# Kokkos flags for Hip +HIP_ADDITIONS=( +export OMPI_CXX=hipcc +-D Kokkos_ENABLE_HIP=ON +-D Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE=ON +-D Kokkos_ENABLE_DEPRECATED_CODE=OFF +-D Kokkos_ARCH_VEGA90A=ON +-D Trilinos_ENABLE_KokkosKernels=ON +-D KokkosKernels_ENABLE_TPL_CUBLAS=OFF +-D KokkosKernels_ENABLE_TPL_CUSPARSE=OFF +-D Tpetra_INST_HIP=ON +-D Xpetra_ENABLE_Kokkos_Refactor=ON +) + +# Kokkos flags for OpenMP +OPENMP_ADDITIONS=( +-D Trilinos_ENABLE_OpenMP=ON +) + +# Flags for building with MKL, which is supported at MSU HPCC +MSU_ADDITIONS=( +-D BLAS_LIBRARY_NAMES="libmkl_rt.so" +-D BLAS_LIBRARY_DIRS="/apps/spack-managed/gcc-11.3.1/intel-oneapi-mkl-2022.2.1-7l7jlsd56x2kljiskrcvsoenmq4y3cu7/mkl/2022.2.1/lib/intel64" +-D LAPACK_LIBRARY_NAMES="libmkl_rt.so" +-D LAPACK_LIBRARY_DIRS="/apps/spack-managed/gcc-11.3.1/intel-oneapi-mkl-2022.2.1-7l7jlsd56x2kljiskrcvsoenmq4y3cu7/mkl/2022.2.1/lib/intel64" +-D TPL_ENABLE_MKL:BOOL=ON +-D MKL_LIBRARY_DIRS:FILEPATH="/apps/spack-managed/gcc-11.3.1/intel-oneapi-mkl-2022.2.1-7l7jlsd56x2kljiskrcvsoenmq4y3cu7/mkl/2022.2.1/lib/intel64" +-D MKL_LIBRARY_NAMES:STRING="mkl_rt" +-D MKL_INCLUDE_DIRS:FILEPATH="/apps/spack-managed/gcc-11.3.1/intel-oneapi-mkl-2022.2.1-7l7jlsd56x2kljiskrcvsoenmq4y3cu7/mkl/2022.2.1/include" +) + +# Configure kokkos using CMake +cmake_options=( +-D CMAKE_BUILD_TYPE=Release +-D Trilinos_MUST_FIND_ALL_TPL_LIBS=TRUE +-D CMAKE_CXX_STANDARD=17 +-D TPL_ENABLE_MPI=ON +) + +echo "**** Machine = ${machine} ****" +if [ "$machine" = "msu" ]; then + echo "**** WARNING: Verify MKL path in trilinos-install.sh ****" + cmake_options+=( + ${MSU_ADDITIONS[@]} + ) +fi + +cmake_options+=( +-D Trilinos_ENABLE_Kokkos=ON +${ADDITIONS[@]} +-D Trilinos_ENABLE_Amesos2=OFF +-D Trilinos_ENABLE_Belos=OFF +-D Trilinos_ENABLE_MueLu=OFF +-D Trilinos_ENABLE_ROL=OFF +-D Trilinos_ENABLE_Ifpack2=OFF +-D Trilinos_ENABLE_Zoltan2=ON +-D Trilinos_ENABLE_Anasazi=OFF +-D MueLu_ENABLE_TESTS=OFF +-D Trilinos_ENABLE_ALL_PACKAGES=OFF +-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES=OFF +-D Trilinos_ENABLE_TESTS=OFF +-D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR} +) + +# Flags for building with Intel MKL library +#INTEL_MKL_ADDITIONS=( +#-D TPL_ENABLE_MKL=ON +#-D BLAS_LIBRARY_NAMES="libmkl_rt.so" +#-D BLAS_LIBRARY_DIRS="$MKLROOT/lib/intel64" +#-D LAPACK_LIBRARY_NAMES="libmkl_rt.so" +#-D LAPACK_LIBRARY_DIRS="$MKLROOT/lib/intel64" +#-D MKL_LIBRARY_DIRS="$MKLROOT/lib/intel64" +#-D MKL_LIBRARY_NAMES="mkl_rt" +#-D MKL_INCLUDE_DIRS="$MKLROOT/include" +#) + +#echo "**** Intel MKL = ${intel_mkl} ****" +#if [ "$intel_mkl" = "enabled" ]; then + #echo "**** assuming MKL installation at $MKLROOT ****" + #cmake_options+=( + #${INTEL_MKL_ADDITIONS[@]} + #) +#fi + +if [ "$kokkos_build_type" = "openmp" ]; then + cmake_options+=( + ${OPENMP_ADDITIONS[@]} + ) +elif [ "$kokkos_build_type" = "cuda" ]; then + export OMPI_CXX=${TRILINOS_SOURCE_DIR}/packages/kokkos/bin/nvcc_wrapper + export CUDA_LAUNCH_BLOCKING=1 + cmake_options+=( + ${CUDA_ADDITIONS[@]} + ) +elif [ "$kokkos_build_type" = "hip" ]; then + export OMPI_CXX=hipcc + cmake_options+=( + ${HIP_ADDITIONS[@]} + ) +fi + +# Print CMake options for reference +echo "CMake Options: ${cmake_options[@]}" + +# Configure Trilinos +cmake "${cmake_options[@]}" -B "${TRILINOS_BUILD_DIR}" -S "${TRILINOS_SOURCE_DIR}" + +# Build Trilinos +echo "Building Trilinos..." +make -C "${TRILINOS_BUILD_DIR}" -j${FIERRO_BUILD_CORES} + +# Install Trilinos +echo "Installing Trilinos..." +make -C "${TRILINOS_BUILD_DIR}" install all + +echo "Trilinos installation complete." +fi From d6405f061cdd5a430e158d208b7f561724728646 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Wed, 9 Oct 2024 20:27:19 -0600 Subject: [PATCH 02/29] ENH: Trilinos build logic for mpi wrappers --- CMakeLists.txt | 8 +++- examples/CMakeLists.txt | 45 +++++++++++++++++++++-- examples/gArrayofgArrays/CMakeLists.txt | 4 +- examples/halfspace_cooling/CMakeLists.txt | 2 +- examples/laplace/CMakeLists.txt | 2 +- examples/laplaceMPI/CMakeLists.txt | 2 +- examples/matar_fortran/CMakeLists.txt | 4 +- examples/phaseFieldMPI/CMakeLists.txt | 4 +- examples/sparsetests/CMakeLists.txt | 2 +- examples/test_rocm/CMakeLists.txt | 4 +- examples/virtualFcnKokkos/CMakeLists.txt | 4 +- examples/virtualFcnMATAR/CMakeLists.txt | 2 +- examples/watt-graph/CMakeLists.txt | 2 +- scripts/cmake_build_examples.sh | 16 +++++++- scripts/matar-install.sh | 5 ++- scripts/trilinos-install.sh | 12 +++--- 16 files changed, 86 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ecbdd7c..4406c1b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ if(Matar_ENABLE_KOKKOS) if("${Matar_KOKKOS_PACKAGE}" STREQUAL "Trilinos") find_package(Trilinos REQUIRED) add_definitions(-DTRILINOS_INTERFACE=1) - elif(Matar_ENABLE_TRILINOS) + elseif(Matar_ENABLE_TRILINOS) find_package(Trilinos REQUIRED) add_definitions(-DTRILINOS_INTERFACE=1) else() @@ -61,7 +61,11 @@ if(Matar_ENABLE_KOKKOS) if (Matar_ENABLE_MPI) find_package(MPI REQUIRED) add_definitions(-DHAVE_MPI=1) - target_link_libraries(matar INTERFACE Kokkos::kokkos MPI::MPI_CXX) + if(Matar_ENABLE_TRILINOS) + target_link_libraries(matar INTERFACE Trilinos::all_selected_libs MPI::MPI_CXX) + else() + target_link_libraries(matar INTERFACE Kokkos::kokkos MPI::MPI_CXX) + endif() else() target_link_libraries(matar INTERFACE Kokkos::kokkos) endif() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0f3e6aab..d25d5d40 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -10,9 +10,9 @@ if (NOT TARGET distclean) INCLUDE(../cmake/Modules/TargetDistclean.cmake OPTIONAL) endif (NOT TARGET distclean) +find_package(Matar REQUIRED) set(LINKING_LIBRARIES matar) -find_package(Matar REQUIRED) if (MPI) find_package(MPI REQUIRED) add_definitions(-DHAVE_MPI=1) @@ -36,9 +36,43 @@ if (NOT KOKKOS) endif() if (KOKKOS) - find_package(Kokkos REQUIRED) #new + if (Matar_ENABLE_TRILINOS) + find_package(Trilinos REQUIRED) #new + # Assume if the CXX compiler exists, the rest do too. + if (EXISTS ${Trilinos_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER ${Trilinos_CXX_COMPILER}) + set(CMAKE_C_COMPILER ${Trilinos_C_COMPILER}) + set(CMAKE_Fortran_COMPILER ${Trilinos_Fortran_COMPILER}) + endif() + if(NOT DISTRIBUTION) + # Make sure to use same compilers and flags as Trilinos + set(CMAKE_CXX_FLAGS "${Trilinos_CXX_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "${Trilinos_C_COMPILER_FLAGS} ${CMAKE_C_FLAGS}") + set(CMAKE_Fortran_FLAGS "${Trilinos_Fortran_COMPILER_FLAGS} ${CMAKE_Fortran_FLAGS}") + endif() + + message("\nFound Trilinos! Here are the details: ") + message(" Trilinos_DIR = ${Trilinos_DIR}") + message(" Trilinos_VERSION = ${Trilinos_VERSION}") + message(" Trilinos_PACKAGE_LIST = ${Trilinos_PACKAGE_LIST}") + message(" Trilinos_LIBRARIES = ${Trilinos_LIBRARIES}") + message(" Trilinos_INCLUDE_DIRS = ${Trilinos_INCLUDE_DIRS}") + message(" Trilinos_LIBRARY_DIRS = ${Trilinos_LIBRARY_DIRS}") + message(" Trilinos_TPL_LIST = ${Trilinos_TPL_LIST}") + message(" Trilinos_TPL_INCLUDE_DIRS = ${Trilinos_TPL_INCLUDE_DIRS}") + message(" Trilinos_TPL_LIBRARIES = ${Trilinos_TPL_LIBRARIES}") + message(" Trilinos_TPL_LIBRARY_DIRS = ${Trilinos_TPL_LIBRARY_DIRS}") + message(" Trilinos_BUILD_SHARED_LIBS = ${Trilinos_BUILD_SHARED_LIBS}") + message("End of Trilinos details\n") + + include_directories(${Trilinos_INCLUDE_DIRS} ${Trilinos_TPL_INCLUDE_DIRS}) + list(APPEND LINKING_LIBRARIES Trilinos::all_selected_libs) + add_definitions(-DTRILINOS_INTERFACE=1) + else() + find_package(Kokkos REQUIRED) #new + list(APPEND LINKING_LIBRARIES Kokkos::kokkos) + endif() - list(APPEND LINKING_LIBRARIES Kokkos::kokkos) add_definitions(-DHAVE_KOKKOS=1) @@ -76,6 +110,11 @@ if (KOKKOS) add_executable(annkokkos ann_kokkos.cpp) target_link_libraries(annkokkos ${LINKING_LIBRARIES}) + if (Matar_ENABLE_TRILINOS) + #add_executable(anndistributed ann_distributed.cpp) + #target_link_libraries(anndistributed ${LINKING_LIBRARIES}) + endif() + if (OPENMP) add_executable(parallel_hello_world parallel_hello_world.cpp) target_link_libraries(parallel_hello_world ${LINKING_LIBRARIES}) diff --git a/examples/gArrayofgArrays/CMakeLists.txt b/examples/gArrayofgArrays/CMakeLists.txt index acbfb6fa..33a5fa97 100644 --- a/examples/gArrayofgArrays/CMakeLists.txt +++ b/examples/gArrayofgArrays/CMakeLists.txt @@ -4,7 +4,7 @@ find_package(Matar REQUIRED) if (KOKKOS) add_definitions(-DHAVE_KOKKOS=1) - find_package(Kokkos REQUIRED) + #find_package(Kokkos REQUIRED) add_definitions(-DHAVE_KOKKOS=1) if (CUDA) @@ -18,6 +18,6 @@ if (KOKKOS) endif() add_executable(viewofview main.cpp) - target_link_libraries(viewofview matar Kokkos::kokkos) + target_link_libraries(viewofview ${LINKING_LIBRARIES}) endif() diff --git a/examples/halfspace_cooling/CMakeLists.txt b/examples/halfspace_cooling/CMakeLists.txt index 5714039a..dbcaa6f9 100644 --- a/examples/halfspace_cooling/CMakeLists.txt +++ b/examples/halfspace_cooling/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.1.3) find_package(Matar REQUIRED) if (KOKKOS) - find_package(Kokkos REQUIRED) #new + #find_package(Kokkos REQUIRED) #new add_executable(halfspace_cooling halfspace_cooling.cpp) diff --git a/examples/laplace/CMakeLists.txt b/examples/laplace/CMakeLists.txt index 45d5fbd4..acbd4a1f 100644 --- a/examples/laplace/CMakeLists.txt +++ b/examples/laplace/CMakeLists.txt @@ -15,7 +15,7 @@ if (NOT KOKKOS) endif() if (KOKKOS) - find_package(Kokkos REQUIRED) #new + #find_package(Kokkos REQUIRED) #new add_executable(kokkosview main_kokkosview.cpp) add_executable(carraykokkos_default_indexing main_carraykokkos_default_indexing.cpp) diff --git a/examples/laplaceMPI/CMakeLists.txt b/examples/laplaceMPI/CMakeLists.txt index cedc01b2..d9d4ec6c 100644 --- a/examples/laplaceMPI/CMakeLists.txt +++ b/examples/laplaceMPI/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.1.3) if (KOKKOS) - find_package(Kokkos REQUIRED) #new + #find_package(Kokkos REQUIRED) #new find_package(MPI REQUIRED) #add_executable(laplace_mpi laplace_mpi.cpp) diff --git a/examples/matar_fortran/CMakeLists.txt b/examples/matar_fortran/CMakeLists.txt index f99bc326..37f6ce39 100644 --- a/examples/matar_fortran/CMakeLists.txt +++ b/examples/matar_fortran/CMakeLists.txt @@ -1,10 +1,10 @@ cmake_minimum_required(VERSION 3.18) # specify languages that project will use here -project(matar-fortran LANGUAGES CXX Fortran) +#project(matar-fortran LANGUAGES CXX Fortran) if (KOKKOS) - find_package(Kokkos REQUIRED) #new + #find_package(Kokkos REQUIRED) #new add_definitions(-DHAVE_KOKKOS=1) if (CUDA) diff --git a/examples/phaseFieldMPI/CMakeLists.txt b/examples/phaseFieldMPI/CMakeLists.txt index efa63e45..3650430a 100644 --- a/examples/phaseFieldMPI/CMakeLists.txt +++ b/examples/phaseFieldMPI/CMakeLists.txt @@ -1,13 +1,13 @@ cmake_minimum_required(VERSION 3.1.3) -project (phasefield_mpi) +#project (phasefield_mpi) if (KOKKOS) add_definitions(-DHAVE_KOKKOS=1) - find_package(Kokkos REQUIRED) + #find_package(Kokkos REQUIRED) find_package(MPI REQUIRED) # heffte compilation flags diff --git a/examples/sparsetests/CMakeLists.txt b/examples/sparsetests/CMakeLists.txt index f17d7f5f..b8e3164d 100644 --- a/examples/sparsetests/CMakeLists.txt +++ b/examples/sparsetests/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.1.3) if (KOKKOS) - find_package(Kokkos REQUIRED) + #find_package(Kokkos REQUIRED) add_definitions(-DHAVE_KOKKOS=1) add_executable(matVec matVec.cpp) diff --git a/examples/test_rocm/CMakeLists.txt b/examples/test_rocm/CMakeLists.txt index 39d9e61d..31c4c2e2 100644 --- a/examples/test_rocm/CMakeLists.txt +++ b/examples/test_rocm/CMakeLists.txt @@ -1,9 +1,9 @@ cmake_minimum_required(VERSION 3.1.3) -project (test_rocm) +#project (test_rocm) if (KOKKOS) - find_package(Kokkos REQUIRED) #new + #find_package(Kokkos REQUIRED) #new add_executable(test_rocm SomeClass.cpp main.cpp) diff --git a/examples/virtualFcnKokkos/CMakeLists.txt b/examples/virtualFcnKokkos/CMakeLists.txt index 0af21ba3..b0673270 100644 --- a/examples/virtualFcnKokkos/CMakeLists.txt +++ b/examples/virtualFcnKokkos/CMakeLists.txt @@ -1,11 +1,11 @@ cmake_minimum_required(VERSION 3.1.3) -project (virttestkokkos) +#project (virttestkokkos) find_package(Matar REQUIRED) if (KOKKOS) - find_package(Kokkos REQUIRED) #new + #find_package(Kokkos REQUIRED) #new add_executable(virttestkokkos child.cpp child.hpp inherited_inits.cpp inherited_inits.hpp kokkos_alias.h main_kokkos_vfcn.cpp parents.h) diff --git a/examples/virtualFcnMATAR/CMakeLists.txt b/examples/virtualFcnMATAR/CMakeLists.txt index 27a32df7..4e232051 100644 --- a/examples/virtualFcnMATAR/CMakeLists.txt +++ b/examples/virtualFcnMATAR/CMakeLists.txt @@ -4,7 +4,7 @@ find_package(Matar REQUIRED) if (KOKKOS) add_definitions(-DHAVE_KOKKOS=1) - find_package(Kokkos REQUIRED) + #find_package(Kokkos REQUIRED) add_definitions(-DHAVE_KOKKOS=1) if (CUDA) diff --git a/examples/watt-graph/CMakeLists.txt b/examples/watt-graph/CMakeLists.txt index 9491b38d..9db93716 100644 --- a/examples/watt-graph/CMakeLists.txt +++ b/examples/watt-graph/CMakeLists.txt @@ -7,7 +7,7 @@ if (NOT KOKKOS) endif() if (KOKKOS) - find_package(Kokkos REQUIRED) + #find_package(Kokkos REQUIRED) add_definitions(-DHAVE_KOKKOS=1) add_executable(test_kokkos_floyd kokkos_floyd.cpp) diff --git a/scripts/cmake_build_examples.sh b/scripts/cmake_build_examples.sh index cca1ceeb..b2254b75 100644 --- a/scripts/cmake_build_examples.sh +++ b/scripts/cmake_build_examples.sh @@ -10,7 +10,6 @@ then fi cmake_options=( - -D CMAKE_PREFIX_PATH="${MATAR_INSTALL_DIR};${KOKKOS_INSTALL_DIR}" -D CMAKE_BUILD_TYPE=Release #-D CMAKE_BUILD_TYPE=Debug ) @@ -19,13 +18,26 @@ if [ "$kokkos_build_type" = "none" ]; then cmake_options+=( -D KOKKOS=OFF ) +elif [ "$trilinos" = "enabled" ]; then + if [ ! -d "${TRILINOS_INSTALL_DIR}/lib" ]; then + Trilinos_DIR=${TRILINOS_INSTALL_DIR}/lib64/cmake/Trilinos + else + Trilinos_DIR=${TRILINOS_INSTALL_DIR}/lib/cmake/Trilinos + fi + cmake_options+=( + -D CMAKE_PREFIX_PATH="${MATAR_INSTALL_DIR}" + -D Trilinos_DIR="$Trilinos_DIR" + -D Matar_ENABLE_TRILINOS=ON + -D KOKKOS=ON + ) else cmake_options+=( + -D CMAKE_PREFIX_PATH="${MATAR_INSTALL_DIR};${KOKKOS_INSTALL_DIR}" -D KOKKOS=ON ) fi -if [[ "$kokkos_build_type" = *"mpi"* ]]; then +if [[ "$kokkos_build_type" = *"mpi"* ]] || [ "$trilinos" = "enabled" ]; then cmake_options+=( -D MPI=ON ) diff --git a/scripts/matar-install.sh b/scripts/matar-install.sh index 0b421c75..ccea03d7 100644 --- a/scripts/matar-install.sh +++ b/scripts/matar-install.sh @@ -20,9 +20,10 @@ elif [ "$trilinos" = "enabled" ]; then else Trilinos_DIR=${TRILINOS_INSTALL_DIR}/lib/cmake/Trilinos fi - cmake_options=( + cmake_options+=( -D Trilinos_DIR="$Trilinos_DIR" -D Matar_ENABLE_TRILINOS=ON + -D Matar_ENABLE_KOKKOS=ON ) else cmake_options+=( @@ -31,7 +32,7 @@ else ) fi -if [[ "$kokkos_build_type" = *"mpi"* ]]; then +if [[ "$kokkos_build_type" = *"mpi"* ]] || [ "$trilinos" = "enabled" ]; then cmake_options+=( -D Matar_ENABLE_MPI=ON ) diff --git a/scripts/trilinos-install.sh b/scripts/trilinos-install.sh index b6f4f3dd..be5ac2f7 100644 --- a/scripts/trilinos-install.sh +++ b/scripts/trilinos-install.sh @@ -46,8 +46,7 @@ CUDA_ADDITIONS=( -D KokkosKernels_ENABLE_TPL_CUBLAS=ON -D KokkosKernels_ENABLE_TPL_CUSPARSE=ON -D Tpetra_ENABLE_CUDA=ON --D Xpetra_ENABLE_Kokkos_Refactor=ON --D MueLu_ENABLE_Kokkos_Refactor=ON +-D MueLu_ENABLE_Kokkos_Refactor=OFF ) # Kokkos flags for Hip @@ -61,7 +60,6 @@ export OMPI_CXX=hipcc -D KokkosKernels_ENABLE_TPL_CUBLAS=OFF -D KokkosKernels_ENABLE_TPL_CUSPARSE=OFF -D Tpetra_INST_HIP=ON --D Xpetra_ENABLE_Kokkos_Refactor=ON ) # Kokkos flags for OpenMP @@ -134,17 +132,17 @@ ${ADDITIONS[@]} #) #fi -if [ "$kokkos_build_type" = "openmp" ]; then +if [ "$kokkos_build_type" = "openmp" ] || [ "$kokkos_build_type" = "openmp_mpi" ]; then cmake_options+=( ${OPENMP_ADDITIONS[@]} ) -elif [ "$kokkos_build_type" = "cuda" ]; then +elif [ "$kokkos_build_type" = "cuda" ] || [ "$kokkos_build_type" = "cuda_mpi" ]; then export OMPI_CXX=${TRILINOS_SOURCE_DIR}/packages/kokkos/bin/nvcc_wrapper export CUDA_LAUNCH_BLOCKING=1 cmake_options+=( ${CUDA_ADDITIONS[@]} ) -elif [ "$kokkos_build_type" = "hip" ]; then +elif [ "$kokkos_build_type" = *"hip"* ] || [ "$kokkos_build_type" = *"hip_mpi"* ]; then export OMPI_CXX=hipcc cmake_options+=( ${HIP_ADDITIONS[@]} @@ -159,7 +157,7 @@ cmake "${cmake_options[@]}" -B "${TRILINOS_BUILD_DIR}" -S "${TRILINOS_SOURCE_DIR # Build Trilinos echo "Building Trilinos..." -make -C "${TRILINOS_BUILD_DIR}" -j${FIERRO_BUILD_CORES} +make -C "${TRILINOS_BUILD_DIR}" -j${MATAR_BUILD_CORES} # Install Trilinos echo "Installing Trilinos..." From 8038c93b7a5795f3beff84a74cd83b3bde4289a0 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Thu, 10 Oct 2024 18:13:01 -0600 Subject: [PATCH 03/29] WIP: tpetra wrapper example --- examples/CMakeLists.txt | 4 +- examples/ann_distributed.cpp | 347 +++++++++++++++++++++++++++++ src/include/tpetra_wrapper_types.h | 96 ++++---- 3 files changed, 390 insertions(+), 57 deletions(-) create mode 100644 examples/ann_distributed.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index d25d5d40..1705442e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -111,8 +111,8 @@ if (KOKKOS) target_link_libraries(annkokkos ${LINKING_LIBRARIES}) if (Matar_ENABLE_TRILINOS) - #add_executable(anndistributed ann_distributed.cpp) - #target_link_libraries(anndistributed ${LINKING_LIBRARIES}) + add_executable(anndistributed ann_distributed.cpp) + target_link_libraries(anndistributed ${LINKING_LIBRARIES}) endif() if (OPENMP) diff --git a/examples/ann_distributed.cpp b/examples/ann_distributed.cpp new file mode 100644 index 00000000..3d8b8caa --- /dev/null +++ b/examples/ann_distributed.cpp @@ -0,0 +1,347 @@ +/********************************************************************************************** + � 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#include +#include +#include +#include +#include + +#include "matar.h" + +using namespace mtr; // matar namespace + + + +// ================================================================= +// Artificial Neural Network (ANN) +// +// For a single layer, we have x_i inputs with weights_{ij}, +// creating y_j outputs. We have +// y_j = Fcn(b_j) = Fcn( Sum_i {x_i w_{ij}} ) +// where the activation function Fcn is applied to b_j, creating +// outputs y_j. For multiple layers, we have +// b_j^l = Sum_i (x_i^{l-1} w_{ij}^l) +// where l is a layer, and as before, an activation function is +// applied to b_j^l, creating outputs y_j^l. +// +// ================================================================= + + +// ================================================================= +// +// Number of nodes in each layer including inputs and outputs +// +// ================================================================= +std::vector num_nodes_in_layer = {64000, 32000, 16000, 8000, 4000, 100, 25, 6} ; +// {9, 50, 100, 300, 200, 100, 20, 6} + + + +// ================================================================= +// +// data types and classes +// +// ================================================================= + +// array of ANN structs +struct ANNLayer_t{ + + DCArrayKokkos outputs; // dims = [layer] + DFArrayKokkos weights; // dims = [layer-1, layer] + DCArrayKokkos biases; // dims = [layer] + TpetraMVArray distributed_outputs; + TpetraPartitionMap input_vector_map, output_vector_map; + TpetraMVArray distributed_weights; + +}; // end struct + + + +// ================================================================= +// +// functions +// +// ================================================================= +void vec_mat_multiply(DCArrayKokkos &inputs, + DCArrayKokkos &outputs, + DFArrayKokkos &matrix){ + + const size_t num_i = inputs.size(); + const size_t num_j = outputs.size(); + + using team_t = typename Kokkos::TeamPolicy<>::member_type; + Kokkos::parallel_for ("MatVec", Kokkos::TeamPolicy<> (num_j, Kokkos::AUTO), + KOKKOS_LAMBDA (const team_t& team_h) { + + float sum = 0; + int j = team_h.league_rank(); + Kokkos::parallel_reduce (Kokkos::TeamThreadRange (team_h, num_i), + [&] (int i, float& lsum) { + lsum += inputs(i)*matrix(i,j); + }, sum); // end parallel reduce + + outputs(j) = sum; + + }); // end parallel for + + + FOR_ALL(j,0,num_j, { + if(fabs(outputs(j) - num_i)>= 1e-15){ + printf("error in vec mat multiply test \n"); + } + }); + + return; + +}; // end function + +KOKKOS_INLINE_FUNCTION +float sigmoid(const float value){ + return 1.0/(1.0 + exp(-value)); // exp2f doesn't work with CUDA +}; // end function + + +KOKKOS_INLINE_FUNCTION +float sigmoid_derivative(const float value){ + float sigval = sigmoid(value); + return sigval*(1.0 - sigval); // exp2f doesn't work with CUDA +}; // end function + + + + +void forward_propagate_layer(DCArrayKokkos &inputs, + DCArrayKokkos &outputs, + DFArrayKokkos &weights, + const DCArrayKokkos &biases){ + + const size_t num_i = inputs.size(); + const size_t num_j = outputs.size(); + + +/* + FOR_ALL(j, 0, num_j,{ + + //printf("thread = %d \n", omp_get_thread_num()); + + float value = 0.0; + for(int i=0; i::member_type; + Kokkos::parallel_for ("MatVec", Kokkos::TeamPolicy<> (num_j, Kokkos::AUTO), + KOKKOS_LAMBDA (const team_t& team_h) { + + float sum = 0; + int j = team_h.league_rank(); + Kokkos::parallel_reduce (Kokkos::TeamThreadRange (team_h, num_i), + [&] (int i, float& lsum) { + lsum += inputs(i)*weights(i,j) + biases(j); + }, sum); // end parallel reduce + + outputs(j) = 1.0/(1.0 + exp(-sum)); + + }); // end parallel for + + + + return; + +}; // end function + + +void set_biases(const DCArrayKokkos &biases){ + const size_t num_j = biases.size(); + + FOR_ALL(j,0,num_j, { + biases(j) = 0.0; + }); // end parallel for + +}; // end function + + +void set_weights(const DFArrayKokkos &weights){ + + const size_t num_i = weights.dims(0); + const size_t num_j = weights.dims(1); + + FOR_ALL(i,0,num_i, + j,0,num_j, { + + weights(i,j) = 1.0; + }); // end parallel for + +}; // end function + + +// ================================================================= +// +// Main function +// +// ================================================================= +int main(int argc, char* argv[]) +{ + Kokkos::initialize(argc, argv); + { + + // ================================================================= + // allocate arrays + // ================================================================= + + // note: the num_nodes_in_layer has the inputs into the ANN, so subtract 1 for the layers + size_t num_layers = num_nodes_in_layer.size()-1; + + CMatrix ANNLayers(num_layers); // starts at 1 and goes to num_layers + + // input and ouput values to ANN + DCArrayKokkos inputs(num_nodes_in_layer[0]); + + + // set the strides + // layer 0 are the inputs to the ANN + // layer n-1 are the outputs from the ANN + for (size_t layer=1; layer<=num_layers; layer++){ + + // dimensions + size_t num_i = num_nodes_in_layer[layer-1]; + size_t num_j = num_nodes_in_layer[layer]; + + // allocate the weights in this layer + ANNLayers(layer).weights = DFArrayKokkos (num_i, num_j); + ANNLayers(layer).outputs = DCArrayKokkos (num_j); + ANNLayers(layer).biases = DCArrayKokkos (num_j); + + } // end for + + + // ================================================================= + // set weights, biases, and inputs + // ================================================================= + + // inputs to ANN + for (size_t i=0; i