Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dpct raw conversion of OneapiBackEnd #210

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include(pkgs/JSONSupport)
include(pkgs/MPISupport)
include(pkgs/OpenMPSupport)
include(pkgs/CUDASupport)
include(pkgs/OneApiSupport)

# Create gz_read executable
add_executable(gz_read standard-suite/binary-traces/gz_read.cc)
Expand Down
5 changes: 5 additions & 0 deletions cmake/pkgs/OneApiSupport.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
option(USE_ONEAPI "Enable support for OneApi")

if (USE_ONEAPI)
add_definitions(-DUSE_ONEAPI)
endif()
6 changes: 6 additions & 0 deletions src/Spatter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ if (USE_CUDA)
set(CUDA_INCLUDE_FILES CudaBackend.hh)
endif()

if (USE_ONEAPI)
set(ONEAPI_INCLUDE_FILES OneApiBackend.hh)
endif()

set(SPATTER_INCLUDE_FILES
${CUDA_INCLUDE_FILES}
${ONEAPI_INCLUDE_FILES}
Configuration.hh
Input.hh
JSONParser.hh
Expand All @@ -17,6 +22,7 @@ set(SPATTER_INCLUDE_FILES
Timer.hh
)

message(SPATTER_INCLUDE_FILES="${SPATTER_INCLUDE_FILES}")
add_library(Spatter STATIC
${SPATTER_INCLUDE_FILES}
Configuration.cc
Expand Down
205 changes: 205 additions & 0 deletions src/Spatter/Configuration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -922,4 +922,209 @@ void Configuration<Spatter::CUDA>::setup() {
}
#endif

#ifdef USE_ONEAPI
Configuration<Spatter::OneApi>::Configuration(const size_t id,
const std::string name, const std::string kernel,
const aligned_vector<size_t> &pattern,
const aligned_vector<size_t> &pattern_gather,
const aligned_vector<size_t> &pattern_scatter,
aligned_vector<double> &sparse, size_t &sparse_size,
aligned_vector<double> &sparse_gather, size_t &sparse_gather_size,
aligned_vector<double> &sparse_scatter, size_t &sparse_scatter_size,
aligned_vector<double> &dense, size_t &dense_size,
aligned_vector<aligned_vector<double>> &dense_perthread, const size_t delta,
const size_t delta_gather, const size_t delta_scatter, const long int seed,
const size_t wrap, const size_t count, const size_t shared_mem,
const size_t local_work_size, const unsigned long nruns,
const bool aggregate, const bool atomic, const unsigned long verbosity)
: ConfigurationBase(id, name, kernel, pattern, pattern_gather,
pattern_scatter, sparse, sparse_size, sparse_gather,
sparse_gather_size, sparse_scatter, sparse_scatter_size, dense,
dense_size, dense_perthread, delta, delta_gather, delta_scatter, seed,
wrap, count, shared_mem, local_work_size, 1, nruns, aggregate, atomic,
verbosity) {
setup();
}

Configuration<Spatter::OneApi>::~Configuration() {
cudaFree(dev_pattern);
cudaFree(dev_pattern_gather);
cudaFree(dev_pattern_scatter);

cudaFree(dev_sparse);
cudaFree(dev_sparse_gather);
cudaFree(dev_sparse_scatter);

cudaFree(dev_dense);
}

int Configuration<Spatter::OneApi>::run(bool timed, unsigned long run_id) {
return ConfigurationBase::run(timed, run_id);
}

void Configuration<Spatter::OneApi>::gather(bool timed, unsigned long run_id) {
size_t pattern_length = pattern.size();

#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif

float time_ms = oneapi_gather_wrapper(
dev_pattern, dev_sparse, dev_dense, pattern_length, delta, wrap, count);

cudaDeviceSynchronize();

if (timed)
time_seconds[run_id] = ((double)time_ms / 1000.0);
}

void Configuration<Spatter::OneApi>::scatter(bool timed, unsigned long run_id) {
size_t pattern_length = pattern.size();

#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif

float time_ms = 0.0;

if (atomic)
time_ms = oneapi_scatter_atomic_wrapper(
dev_pattern, dev_sparse, dev_dense, pattern_length, delta, wrap, count);
else
time_ms = oneapi_scatter_wrapper(
dev_pattern, dev_sparse, dev_dense, pattern_length, delta, wrap, count);

cudaDeviceSynchronize();

if (timed)
time_seconds[run_id] = ((double)time_ms / 1000.0);
}

void Configuration<Spatter::OneApi>::scatter_gather(
bool timed, unsigned long run_id) {
assert(pattern_scatter.size() == pattern_gather.size());
int pattern_length = static_cast<int>(pattern_scatter.size());

#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif

float time_ms = 0.0;

if (atomic)
time_ms = oneapi_scatter_gather_atomic_wrapper(dev_pattern_scatter,
dev_sparse_scatter, dev_pattern_gather, dev_sparse_gather,
pattern_length, delta_scatter, delta_gather, wrap, count);
else
time_ms = oneapi_scatter_gather_wrapper(dev_pattern_scatter,
dev_sparse_scatter, dev_pattern_gather, dev_sparse_gather,
pattern_length, delta_scatter, delta_gather, wrap, count);

cudaDeviceSynchronize();

if (timed)
time_seconds[run_id] = ((double)time_ms / 1000.0);
}

void Configuration<Spatter::OneApi>::multi_gather(
bool timed, unsigned long run_id) {
int pattern_length = static_cast<int>(pattern_gather.size());

#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif

float time_ms = oneapi_multi_gather_wrapper(dev_pattern, dev_pattern_gather,
dev_sparse, dev_dense, pattern_length, delta, wrap, count);

cudaDeviceSynchronize();

if (timed)
time_seconds[run_id] = ((double)time_ms / 1000.0);
}

void Configuration<Spatter::OneApi>::multi_scatter(
bool timed, unsigned long run_id) {
int pattern_length = static_cast<int>(pattern_scatter.size());

#ifdef USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif

float time_ms = 0.0;

if (atomic)
time_ms =
oneapi_multi_scatter_atomic_wrapper(dev_pattern, dev_pattern_scatter,
dev_sparse, dev_dense, pattern_length, delta, wrap, count);
else
time_ms = oneapi_multi_scatter_wrapper(dev_pattern, dev_pattern_scatter,
dev_sparse, dev_dense, pattern_length, delta, wrap, count);

cudaDeviceSynchronize();

if (timed)
time_seconds[run_id] = ((double)time_ms / 1000.0);
}

void Configuration<Spatter::OneApi>::setup() {
ConfigurationBase::setup();

if (sparse.size() < sparse_size) {
sparse.resize(sparse_size);

for (size_t i = 0; i < sparse.size(); ++i)
sparse[i] = rand();
}

if (sparse_gather.size() < sparse_gather_size) {
sparse_gather.resize(sparse_gather_size);

for (size_t i = 0; i < sparse_gather.size(); ++i)
sparse_gather[i] = rand();
}

if (sparse_scatter.size() < sparse_scatter_size) {
sparse_scatter.resize(sparse_scatter_size);

for (size_t i = 0; i < sparse_scatter.size(); ++i)
sparse_scatter[i] = rand();
}

if (dense.size() < dense_size) {
dense.resize(dense_size);

for (size_t i = 0; i < dense.size(); ++i)
dense[i] = rand();
}

cudaMalloc((void **)&dev_pattern, sizeof(size_t) * pattern.size());
cudaMalloc( (void **)&dev_pattern_gather, sizeof(size_t) * pattern_gather.size());
cudaMalloc( (void **)&dev_pattern_scatter, sizeof(size_t) * pattern_scatter.size());

cudaMalloc((void **)&dev_sparse, sizeof(double) * sparse.size());
cudaMalloc( (void **)&dev_sparse_gather, sizeof(double) * sparse_gather.size());
cudaMalloc( (void **)&dev_sparse_scatter, sizeof(double) * sparse_scatter.size());
cudaMalloc((void **)&dev_dense, sizeof(double) * dense.size());

cudaMemcpy(dev_pattern, pattern.data(),
sizeof(size_t) * pattern.size(), cudaMemcpyHostToDevice);
cudaMemcpy(dev_pattern_gather, pattern_gather.data(),
sizeof(size_t) * pattern_gather.size(), cudaMemcpyHostToDevice);
cudaMemcpy(dev_pattern_scatter, pattern_scatter.data(),
sizeof(size_t) * pattern_scatter.size(), cudaMemcpyHostToDevice);

cudaMemcpy(dev_sparse, sparse.data(),
sizeof(double) * sparse.size(), cudaMemcpyHostToDevice);
cudaMemcpy(dev_sparse_gather, sparse_gather.data(),
sizeof(double) * sparse_gather.size(), cudaMemcpyHostToDevice);
cudaMemcpy(dev_sparse_scatter, sparse_scatter.data(),
sizeof(double) * sparse_scatter.size(), cudaMemcpyHostToDevice);
cudaMemcpy(dev_dense, dense.data(),
sizeof(double) * dense.size(), cudaMemcpyHostToDevice);

cudaDeviceSynchronize();
}
#endif

} // namespace Spatter
44 changes: 44 additions & 0 deletions src/Spatter/Configuration.hh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ inline void gpuAssert(
}
#endif

#ifdef USE_ONEAPI
#include "OneApiBackend.hh"
#endif

#include "AlignedAllocator.hh"
#include "SpatterTypes.hh"
#include "Timer.hh"
Expand Down Expand Up @@ -230,6 +234,46 @@ public:
};
#endif

#ifdef USE_ONEAPI
template <> class Configuration<Spatter::OneApi> : public ConfigurationBase {
public:
Configuration(const size_t id, const std::string name,
const std::string kernel, const aligned_vector<size_t> &pattern,
const aligned_vector<size_t> &pattern_gather,
const aligned_vector<size_t> &pattern_scatter,
aligned_vector<double> &sparse, size_t &sparse_size,
aligned_vector<double> &sparse_gather, size_t &sparse_gather_size,
aligned_vector<double> &sparse_scatter, size_t &sparse_scatter_size,
aligned_vector<double> &dense, size_t &dense_size,
aligned_vector<aligned_vector<double>> &dense_perthread,
const size_t delta, const size_t delta_gather, const size_t delta_scatter,
const long int seed, const size_t wrap, const size_t count,
const size_t shared_mem, const size_t local_work_size,
const unsigned long nruns, const bool aggregate, const bool atomic,
const unsigned long verbosity);

~Configuration();

int run(bool timed, unsigned long run_id);
void gather(bool timed, unsigned long run_id);
void scatter(bool timed, unsigned long run_id);
void scatter_gather(bool timed, unsigned long run_id);
void multi_gather(bool timed, unsigned long run_id);
void multi_scatter(bool timed, unsigned long run_id);
void setup();

public:
size_t *dev_pattern;
size_t *dev_pattern_gather;
size_t *dev_pattern_scatter;

double *dev_sparse;
double *dev_sparse_gather;
double *dev_sparse_scatter;

double *dev_dense;
};
#endif
} // namespace Spatter

#endif
27 changes: 24 additions & 3 deletions src/Spatter/Input.hh
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,10 @@ int parse_input(const int argc, char **argv, ClArgs &cl) {
[](unsigned char c) { return std::tolower(c); });

if ((backend.compare("serial") != 0) &&
(backend.compare("openmp") != 0) && (backend.compare("cuda") != 0)) {
std::cerr << "Valid Backends are: serial, openmp, cuda" << std::endl;
(backend.compare("openmp") != 0) &&
(backend.compare("cuda") != 0) &&
(backend.compare("oneapi") != 0)) {
std::cerr << "Valid Backends are: serial, openmp, cuda, oneapi" << std::endl;
return -1;
}
if (backend.compare("openmp") == 0) {
Expand All @@ -335,6 +337,12 @@ int parse_input(const int argc, char **argv, ClArgs &cl) {
#ifndef USE_CUDA
std::cerr << "FAIL - CUDA Backend is not Enabled" << std::endl;
return -1;
#endif
}
if (backend.compare("oneapi") == 0) {
#ifndef USE_ONEAPI
std::cerr << "FAIL - OneApi Backend is not Enabled" << std::endl;
return -1;
#endif
}
break;
Expand Down Expand Up @@ -485,12 +493,15 @@ int parse_input(const int argc, char **argv, ClArgs &cl) {
// Set default backend if one was not specified
if (backend.compare("") == 0) {
backend = "serial";
// Assume only one of USE_CUDA and USE_OPENMP can be true at once
// Assume only one of USE_OPENMP, USE_CUDA and USE_ONEAPI can be true at once
#ifdef USE_OPENMP
backend = "openmp";
#endif
#ifdef USE_CUDA
backend = "cuda";
#endif
#ifdef USE_ONEAPI
backend = "oneapi";
#endif
}

Expand Down Expand Up @@ -626,6 +637,16 @@ int parse_input(const int argc, char **argv, ClArgs &cl) {
cl.dense_perthread, delta, delta_gather, delta_scatter, seed, wrap,
count, shared_mem, local_work_size, nruns, aggregate, atomic,
verbosity);
#endif
#ifdef USE_ONEAPI
else if (backend.compare("oneapi") == 0)
c = std::make_unique<Spatter::Configuration<Spatter::OneApi>>(0,
config_name, kernel, pattern, pattern_gather, pattern_scatter,
cl.sparse, cl.sparse_size, cl.sparse_gather, cl.sparse_gather_size,
cl.sparse_scatter, cl.sparse_scatter_size, cl.dense, cl.dense_size,
cl.dense_perthread, delta, delta_gather, delta_scatter, seed, wrap,
count, shared_mem, local_work_size, nruns, aggregate, atomic,
verbosity);
#endif
else {
std::cerr << "Invalid Backend " << backend << std::endl;
Expand Down
11 changes: 11 additions & 0 deletions src/Spatter/JSONParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,17 @@ std::unique_ptr<Spatter::ConfigurationBase> JSONParser::operator[](
data_[index]["wrap"], data_[index]["count"], shared_mem_,
data_[index]["local-work-size"], data_[index]["nruns"], aggregate_,
atomic_, verbosity_);
#endif
#ifdef USE_ONEAPI
else if (backend_.compare("oneapi") == 0)
c = std::make_unique<Spatter::Configuration<Spatter::OneApi>>(index,
data_[index]["name"], data_[index]["kernel"], pattern, pattern_gather,
pattern_scatter, sparse, sparse_size, sparse_gather, sparse_gather_size,
sparse_scatter, sparse_scatter_size, dense, dense_size, dense_perthread,
delta, delta_gather, delta_scatter, data_[index]["seed"],
data_[index]["wrap"], data_[index]["count"], shared_mem_,
data_[index]["local-work-size"], data_[index]["nruns"], aggregate_,
atomic_, verbosity_);
#endif
else {
std::cerr << "Invalid Backend " << backend_ << std::endl;
Expand Down
Loading
Loading