Skip to content

Commit

Permalink
Merge pull request #86 from lanl/benchmark
Browse files Browse the repository at this point in the history
Add google benchmark
  • Loading branch information
jacob-moore22 authored May 20, 2024
2 parents 6c8abc3 + 9763ae3 commit 513f587
Show file tree
Hide file tree
Showing 8 changed files with 325 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
install-*
benchmark/benchmark*
build-matar-*
install/*
54 changes: 54 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
cmake_minimum_required(VERSION 3.1.3)

project (matarbenchmark)

set(benchmark_DIR "benchmark/build")

find_package(benchmark REQUIRED)
find_package(Matar REQUIRED)





if (NOT KOKKOS)
add_executable(BM_Carray src/CArray_benchmark.cpp)
target_link_libraries(BM_Carray matar benchmark::benchmark)
endif()

if (KOKKOS)
find_package(Kokkos REQUIRED) #new

add_definitions(-DHAVE_KOKKOS=1)

add_executable(BM_CArray src/CArray_benchmark.cpp)
target_link_libraries(BM_CArray matar Kokkos::kokkos benchmark::benchmark)

add_executable(BM_CArrayDevice src/CArrayDevice_benchmark.cpp)
target_link_libraries(BM_CArrayDevice matar Kokkos::kokkos benchmark::benchmark)

if (CUDA)
add_definitions(-DHAVE_CUDA=1)
elseif (HIP)
add_definitions(-DHAVE_HIP=1)
elseif (OPENMP)
add_definitions(-DHAVE_OPENMP=1)
elseif (THREADS)
add_definitions(-DHAVE_THREADS=1)
endif()
endif()

# find_package(Kokkos REQUIRED) #new

# set(This matar_benchmark)

# set(Sources
# src/serial_types_benchmark.cpp
# )




# if (KOKKOKS)
# target_link_libraries(${This} matar Kokkos::kokkos benchmark::benchmark)
# endif ()
112 changes: 112 additions & 0 deletions benchmark/src/CArrayDevice_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <assert.h>
#include <memory> // for shared_ptr
#include <benchmark/benchmark.h>
#include "matar.h"

using namespace mtr; // matar namespace

// ------- vector vector multiply ------------- //
static void BM_CArrayDevice_1d_multiply(benchmark::State& state)
{

int size = state.range(0);

CArrayDevice<double> A(size);
CArrayDevice<double> B(size);
CArrayDevice<double> C(size);

FOR_ALL(i, 0, size, {
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
});
// Begin benchmarked section
for (auto _ : state){
FOR_ALL(i, 0, size, {
C(i) = A(i)*B(i);
});
} // end benchmarked section

// Kokkos::finalize();

}
BENCHMARK(BM_CArrayDevice_1d_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark Multiplying 2 1D CArrayDevice of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);

// ------- vector vector dot product ------------- //
static void BM_CArrayDevice_vec_vec_dot(benchmark::State& state)
{
int size = state.range(0);

CArrayDevice<double> A(size);
CArrayDevice<double> B(size);
double C = 0.0;

FOR_ALL(i, 0, size, {
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
});


// Begin benchmarked section
for (auto _ : state){

double loc_sum = 0;
double C = 0;
REDUCE_SUM(i, 0, size,
loc_sum, {
loc_sum += A(i)*B(i);
}, C);
} // end benchmarked section
}
BENCHMARK(BM_CArrayDevice_vec_vec_dot)
->Unit(benchmark::kMillisecond)
->Name("Benchmark dot product of 2 1D CArrayDevice of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);


// ------- matrix matrix multiply ------------- //
static void BM_CArrayDevice_mat_mat_multiply(benchmark::State& state)
{
int size = state.range(0);

CArrayDevice<double> A(size, size);
CArrayDevice<double> B(size, size);
CArrayDevice<double> C(size, size);

FOR_ALL(i, 0, size,
j, 0, size, {
A(i,j) = (double)i+(double)j+1.0;
B(i,j) = (double)i+(double)j+2.0;
C(i,j) = 0.0;
});

// Begin benchmarked section
for (auto _ : state){

FOR_ALL(i, 0, size,
j, 0, size, {
for(int k = 0; k < size; k++){
C(i,k) += A(i,j)*B(j,k);
}
});
} // end benchmarked section
}
BENCHMARK(BM_CArrayDevice_mat_mat_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark matrix-matrix multiply of CArrayDevice of size ")
->RangeMultiplier(2)->Range(1<<3, 1<<10);


// Run Benchmarks
int main(int argc, char** argv)
{
Kokkos::initialize();
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
Kokkos::finalize();
}
106 changes: 106 additions & 0 deletions benchmark/src/CArray_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <assert.h>
#include <memory> // for shared_ptr
#include <benchmark/benchmark.h>
#include "matar.h"

using namespace mtr; // matar namespace

// ------- vector vector multiply ------------- //
static void BM_CArray_1d_multiply(benchmark::State& state)
{
// const int size = 4000;

int size = state.range(0);

CArray<double> A(size);
CArray<double> B(size);
CArray<double> C(size);

for(int i=0; i<size; i++){
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
}

// Begin benchmarked section
for (auto _ : state){
for(int i=0; i<size; i++){
C(i) = A(i)*B(i);
}
} // end benchmarked section
}
BENCHMARK(BM_CArray_1d_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark Multiplying 2 1D CArrays of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);


// ------- vector vector dot product ------------- //
static void BM_Carray_vec_vec_dot(benchmark::State& state)
{
int size = state.range(0);

// Begin benchmarked section
for (auto _ : state){
CArray<double> A(size);
CArray<double> B(size);
double C = 0.0;

for(int i = 0; i < size; i++){
A(i) = (double)i+1.0;
B(i) = (double)i+2.0;
}

for(int i = 0; i < size; i++){
C += A(i)*B(i);
}
} // end benchmarked section
}
BENCHMARK(BM_CArray_1d_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark dot product of 2 1D CArrays of size ")
->RangeMultiplier(2)->Range(1<<12, 1<<20);

// ------- matrix matrix multiply ------------- //
static void BM_CArray_mat_mat_multiply(benchmark::State& state)
{
int size = state.range(0);

// Begin benchmarked section
for (auto _ : state){
CArray<double> A(size, size);
CArray<double> B(size, size);
CArray<double> C(size, size);

for(int i = 0; i < size; i++){
for(int j = 0; j < size; j++){
A(i,j) = (double)i+(double)j+1.0;
B(i,j) = (double)i+(double)j+2.0;
C(i,j) = 0.0;
}
}

for(int i = 0; i < size; i++){
for(int j = 0; j < size; j++){
for(int k = 0; k < size; k++){
C(i,k) += A(i,j)*B(j,k);
}
}
}
} // end benchmarked section
}
BENCHMARK(BM_CArray_mat_mat_multiply)
->Unit(benchmark::kMillisecond)
->Name("Benchmark matrix-matrix multiply of CArrays of size ")
->RangeMultiplier(2)->Range(1<<3, 1<<10);

// Run benchmarks
int main(int argc, char** argv)
{
Kokkos::initialize();
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
Kokkos::finalize();
}
3 changes: 2 additions & 1 deletion scripts/build-matar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ show_help() {
echo " "
echo " examples builds examples"
echo " test builds tests"
echo " benchmark builds benchmarks for MATAR"
echo " "
echo " --kokkos_build_type The desired kokkos parallel backend to use. The default is 'serial'"
echo " "
Expand Down Expand Up @@ -52,7 +53,7 @@ build_cores="1"

# Define arrays of valid options
valid_build_action=("full-app" "set-env" "install-matar" "install-kokkos" "matar")
valid_execution=("examples" "test")
valid_execution=("examples" "test" "benchmark")
valid_kokkos_build_types=("none" "serial" "openmp" "pthreads" "cuda" "hip")
valid_machines=("darwin" "chicoma" "linux" "mac")

Expand Down
42 changes: 42 additions & 0 deletions scripts/cmake_build_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash -e

kokkos_build_type="${1}"

if [ ! -d "${BENCHMARK_SOURCE_DIR}/benchmark" ]
then
echo "Missing googlebenchmark for benchmarking, downloading and installing...."
git clone https://github.com/google/benchmark.git ${BENCHMARK_SOURCE_DIR}/benchmark
cd ${BENCHMARK_SOURCE_DIR}/benchmark
cmake -E make_directory "build"
cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../
cmake --build "build" --config Release -j${MATAR_BUILD_CORES}
# Test install
cmake -E chdir "build" ctest --build-config Release
fi

cmake_options=(
-D CMAKE_PREFIX_PATH="${MATAR_INSTALL_DIR};${KOKKOS_INSTALL_DIR};${BENCHMARK_INSTALL_DIR}"
-D BENCHMARK_DOWNLOAD_DEPENDENCIES=on
-DCMAKE_BUILD_TYPE=Release
)



if [ "$kokkos_build_type" = "none" ]; then
cmake_options+=(
-D KOKKOS=OFF
)
else
cmake_options+=(
-D KOKKOS=ON
)
fi
# Print CMake options for reference
echo "CMake Options: ${cmake_options[@]}"

cmake "${cmake_options[@]}" -B "${BENCHMARK_BUILD_DIR}" -S "${BENCHMARK_SOURCE_DIR}"

# Build benchmark
make -C "${BENCHMARK_BUILD_DIR}" -j${MATAR_BUILD_CORES}

cd $basedir
4 changes: 4 additions & 0 deletions scripts/setup-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ export EXAMPLE_BUILD_DIR=${builddir}
export TEST_SOURCE_DIR=${basedir}/test
export TEST_BUILD_DIR=${builddir}

export BENCHMARK_SOURCE_DIR=${basedir}/benchmark
export BENCHMARK_INSTALL_DIR=${basedir}/benchmark/build
export BENCHMARK_BUILD_DIR=${builddir}

export KOKKOS_SOURCE_DIR=${basedir}/src/Kokkos/kokkos
export KOKKOS_BUILD_DIR=${builddir}/kokkos
export KOKKOS_INSTALL_DIR=${installdir}/kokkos
Expand Down
4 changes: 2 additions & 2 deletions src/kokkos_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -7523,7 +7523,7 @@ DynamicRaggedRightArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::DynamicRaggedRig
stride_ = SArray1D(strides_tag_string, dim1_);
#ifdef HAVE_CLASS_LAMBDA
Kokkos::parallel_for("StridesInit", dim1_, KOKKOS_CLASS_LAMBDA(const int i) {
strides_((i) = 0;
strides_(i) = 0;
});
#else
set_strides_functor execution_functor(0, stride_);
Expand Down Expand Up @@ -7700,7 +7700,7 @@ DynamicRaggedDownArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::DynamicRaggedDown
stride_ = SArray1D(strides_tag_string, dim2_);
#ifdef HAVE_CLASS_LAMBDA
Kokkos::parallel_for("StridesInit", dim2_, KOKKOS_CLASS_LAMBDA(const int i) {
strides_((i) = 0;
strides_(i) = 0;
});
#else
set_strides_functor execution_functor(0, stride_);
Expand Down

0 comments on commit 513f587

Please sign in to comment.