Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add google benchmark #86

Merged
merged 7 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
install-*
benchmark/benchmark*
build-matar-*
install/*
54 changes: 54 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
cmake_minimum_required(VERSION 3.1.3)

project (matarbenchmark)

set(benchmark_DIR "benchmark/build")

find_package(benchmark REQUIRED)
find_package(Matar REQUIRED)





if (NOT KOKKOS)
add_executable(BM_Carray src/CArray_benchmark.cpp)
target_link_libraries(BM_Carray matar benchmark::benchmark)
endif()

if (KOKKOS)
find_package(Kokkos REQUIRED) #new

add_definitions(-DHAVE_KOKKOS=1)

add_executable(BM_CArray src/CArray_benchmark.cpp)
target_link_libraries(BM_CArray matar Kokkos::kokkos benchmark::benchmark)

add_executable(BM_CArrayDevice src/CArrayDevice_benchmark.cpp)
target_link_libraries(BM_CArrayDevice matar Kokkos::kokkos benchmark::benchmark)

if (CUDA)
add_definitions(-DHAVE_CUDA=1)
elseif (HIP)
add_definitions(-DHAVE_HIP=1)
elseif (OPENMP)
add_definitions(-DHAVE_OPENMP=1)
elseif (THREADS)
add_definitions(-DHAVE_THREADS=1)
endif()
endif()

# find_package(Kokkos REQUIRED) #new

# set(This matar_benchmark)

# set(Sources
# src/serial_types_benchmark.cpp
# )




# if (KOKKOKS)
# target_link_libraries(${This} matar Kokkos::kokkos benchmark::benchmark)
# endif ()
112 changes: 112 additions & 0 deletions benchmark/src/CArrayDevice_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <assert.h>
#include <memory> // for shared_ptr
#include <benchmark/benchmark.h>
#include "matar.h"

using namespace mtr; // matar namespace

// ------- vector vector multiply ------------- //
static void BM_CArrayDevice_1d_multiply(benchmark::State& state)
{

int size = state.range(0);

CArrayDevice<double> A(size);
CArrayDevice<double> B(size);
CArrayDevice<double> C(size);

FOR_ALL(i, 0, size, {
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
});
// Begin benchmarked section
for (auto _ : state){
FOR_ALL(i, 0, size, {
C(i) = A(i)*B(i);
});
} // end benchmarked section

// Kokkos::finalize();

}
BENCHMARK(BM_CArrayDevice_1d_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark Multiplying 2 1D CArrayDevice of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);

// ------- vector vector dot product ------------- //
static void BM_CArrayDevice_vec_vec_dot(benchmark::State& state)
{
int size = state.range(0);

CArrayDevice<double> A(size);
CArrayDevice<double> B(size);
double C = 0.0;

FOR_ALL(i, 0, size, {
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
});


// Begin benchmarked section
for (auto _ : state){

double loc_sum = 0;
double C = 0;
REDUCE_SUM(i, 0, size,
loc_sum, {
loc_sum += A(i)*B(i);
}, C);
} // end benchmarked section
}
BENCHMARK(BM_CArrayDevice_vec_vec_dot)
->Unit(benchmark::kMillisecond)
->Name("Benchmark dot product of 2 1D CArrayDevice of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);


// ------- matrix matrix multiply ------------- //
static void BM_CArrayDevice_mat_mat_multiply(benchmark::State& state)
{
int size = state.range(0);

CArrayDevice<double> A(size, size);
CArrayDevice<double> B(size, size);
CArrayDevice<double> C(size, size);

FOR_ALL(i, 0, size,
j, 0, size, {
A(i,j) = (double)i+(double)j+1.0;
B(i,j) = (double)i+(double)j+2.0;
C(i,j) = 0.0;
});

// Begin benchmarked section
for (auto _ : state){

FOR_ALL(i, 0, size,
j, 0, size, {
for(int k = 0; k < size; k++){
C(i,k) += A(i,j)*B(j,k);
}
});
} // end benchmarked section
}
BENCHMARK(BM_CArrayDevice_mat_mat_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark matrix-matrix multiply of CArrayDevice of size ")
->RangeMultiplier(2)->Range(1<<3, 1<<10);


// Run Benchmarks
int main(int argc, char** argv)
{
Kokkos::initialize();
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
Kokkos::finalize();
}
106 changes: 106 additions & 0 deletions benchmark/src/CArray_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <assert.h>
#include <memory> // for shared_ptr
#include <benchmark/benchmark.h>
#include "matar.h"

using namespace mtr; // matar namespace

// ------- vector vector multiply ------------- //
static void BM_CArray_1d_multiply(benchmark::State& state)
{
// const int size = 4000;

int size = state.range(0);

CArray<double> A(size);
CArray<double> B(size);
CArray<double> C(size);

for(int i=0; i<size; i++){
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
}

// Begin benchmarked section
for (auto _ : state){
for(int i=0; i<size; i++){
C(i) = A(i)*B(i);
}
} // end benchmarked section
}
BENCHMARK(BM_CArray_1d_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark Multiplying 2 1D CArrays of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);


// ------- vector vector dot product ------------- //
static void BM_Carray_vec_vec_dot(benchmark::State& state)
{
int size = state.range(0);

// Begin benchmarked section
for (auto _ : state){
CArray<double> A(size);
CArray<double> B(size);
double C = 0.0;

for(int i = 0; i < size; i++){
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
}

for(int i = 0; i < size; i++){
C += A(i)*B(i);
}
} // end benchmarked section
}
BENCHMARK(BM_CArray_1d_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark dot product of 2 1D CArrays of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);

// ------- matrix matrix multiply ------------- //
static void BM_CArray_mat_mat_multiply(benchmark::State& state)
{
int size = state.range(0);

// Begin benchmarked section
for (auto _ : state){
CArray<double> A(size, size);
CArray<double> B(size, size);
CArray<double> C(size, size);

for(int i = 0; i < size; i++){
for(int j = 0; j < size; j++){
A(i,j) = (double)i+(double)j+1.0;
B(i,j) = (double)i+(double)j+2.0;
C(i,j) = 0.0;
}
}

for(int i = 0; i < size; i++){
for(int j = 0; j < size; j++){
for(int k = 0; k < size; k++){
C(i,k) += A(i,j)*B(j,k);
}
}
}
} // end benchmarked section
}
BENCHMARK(BM_CArray_mat_mat_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark matrix-matrix multiply of CArrays of size ")
->RangeMultiplier(2)->Range(1<<3, 1<<10);

// Run benchmarks
int main(int argc, char** argv)
{
Kokkos::initialize();
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
Kokkos::finalize();
}
3 changes: 2 additions & 1 deletion scripts/build-matar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ show_help() {
echo " "
echo " examples builds examples"
echo " test builds tests"
echo " benchmark builds benchmarks for MATAR"
echo " "
echo " --kokkos_build_type The desired kokkos parallel backend to use. The default is 'serial'"
echo " "
Expand Down Expand Up @@ -52,7 +53,7 @@ build_cores="1"

# Define arrays of valid options
valid_build_action=("full-app" "set-env" "install-matar" "install-kokkos" "matar")
valid_execution=("examples" "test")
valid_execution=("examples" "test" "benchmark")
valid_kokkos_build_types=("none" "serial" "openmp" "pthreads" "cuda" "hip")
valid_machines=("darwin" "chicoma" "linux" "mac")

Expand Down
42 changes: 42 additions & 0 deletions scripts/cmake_build_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash -e

kokkos_build_type="${1}"

if [ ! -d "${BENCHMARK_SOURCE_DIR}/benchmark" ]
then
echo "Missing googlebenchmark for benchmarking, downloading and installing...."
git clone https://github.com/google/benchmark.git ${BENCHMARK_SOURCE_DIR}/benchmark
cd ${BENCHMARK_SOURCE_DIR}/benchmark
cmake -E make_directory "build"
cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../
cmake --build "build" --config Release -j${MATAR_BUILD_CORES}
# Test install
cmake -E chdir "build" ctest --build-config Release
fi

cmake_options=(
-D CMAKE_PREFIX_PATH="${MATAR_INSTALL_DIR};${KOKKOS_INSTALL_DIR};${BENCHMARK_INSTALL_DIR}"
-D BENCHMARK_DOWNLOAD_DEPENDENCIES=on
-DCMAKE_BUILD_TYPE=Release
)



if [ "$kokkos_build_type" = "none" ]; then
cmake_options+=(
-D KOKKOS=OFF
)
else
cmake_options+=(
-D KOKKOS=ON
)
fi
# Print CMake options for reference
echo "CMake Options: ${cmake_options[@]}"

cmake "${cmake_options[@]}" -B "${BENCHMARK_BUILD_DIR}" -S "${BENCHMARK_SOURCE_DIR}"

# Build benchmark
make -C "${BENCHMARK_BUILD_DIR}" -j${MATAR_BUILD_CORES}

cd $basedir
4 changes: 4 additions & 0 deletions scripts/setup-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ export EXAMPLE_BUILD_DIR=${builddir}
export TEST_SOURCE_DIR=${basedir}/test
export TEST_BUILD_DIR=${builddir}

export BENCHMARK_SOURCE_DIR=${basedir}/benchmark
export BENCHMARK_INSTALL_DIR=${basedir}/benchmark/build
export BENCHMARK_BUILD_DIR=${builddir}

export KOKKOS_SOURCE_DIR=${basedir}/src/Kokkos/kokkos
export KOKKOS_BUILD_DIR=${builddir}/kokkos
export KOKKOS_INSTALL_DIR=${installdir}/kokkos
Expand Down
4 changes: 2 additions & 2 deletions src/kokkos_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -7523,7 +7523,7 @@ DynamicRaggedRightArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::DynamicRaggedRig
stride_ = SArray1D(strides_tag_string, dim1_);
#ifdef HAVE_CLASS_LAMBDA
Kokkos::parallel_for("StridesInit", dim1_, KOKKOS_CLASS_LAMBDA(const int i) {
strides_((i) = 0;
strides_(i) = 0;
});
#else
set_strides_functor execution_functor(0, stride_);
Expand Down Expand Up @@ -7700,7 +7700,7 @@ DynamicRaggedDownArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::DynamicRaggedDown
stride_ = SArray1D(strides_tag_string, dim2_);
#ifdef HAVE_CLASS_LAMBDA
Kokkos::parallel_for("StridesInit", dim2_, KOKKOS_CLASS_LAMBDA(const int i) {
strides_((i) = 0;
strides_(i) = 0;
});
#else
set_strides_functor execution_functor(0, stride_);
Expand Down
Loading