You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
Consider a graph with more than MAX_OPGRAPH_OPS nodes, for example in this code
#include"cudnn-frontend/include/cudnn_frontend.h"namespacefe= cudnn_frontend;
intmain()
{
cudnnHandle_t handle;
assert(cudnnCreate(&handle) == CUDNN_STATUS_SUCCESS);
auto graph = std::make_shared<fe::graph::Graph>();
auto x = graph->tensor(
fe::graph::Tensor_attributes()
.set_name("x")
.set_dim({1, 1, 1})
.set_stride({1, 1, 1})
.set_data_type(fe::DataType_t::FLOAT));
auto y = graph->tensor(
fe::graph::Tensor_attributes()
.set_name("y")
.set_dim({1, 1, 1})
.set_stride({1, 1, 1})
.set_data_type(fe::DataType_t::FLOAT));
auto inX = x;
auto inY = y;
for (int i = 0; i < 60; ++i) {
auto sum = graph->pointwise(
x,
y,
fe::graph::Pointwise_attributes()
.set_mode(fe::PointwiseMode_t::ADD)
.set_compute_data_type(fe::DataType_t::FLOAT));
sum->set_data_type(fe::DataType_t::FLOAT);
x = y;
y = sum;
}
y->set_output(true);
assert(graph->validate().is_good());
auto r = graph->build_operation_graph(handle);
if (!r.is_good()) {
std::cerr << r.get_message() << std::endl;
return0;
}
assert(graph->create_execution_plans({fe::HeurMode_t::A}).is_good());
assert(graph->build_plans(handle, fe::BuildPlanPolicy_t::ALL).is_good());
float One = 1;
void* inXPtr;
assert(cudaMalloc(&inXPtr, sizeof(float)) == cudaSuccess);
assert(cudaMemcpy(inXPtr, &One, sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
void* inYPtr;
assert(cudaMalloc(&inYPtr, sizeof(float)) == cudaSuccess);
assert(cudaMemcpy(inYPtr, &One, sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
void* outPtr;
assert(cudaMalloc(&outPtr, sizeof(float)) == cudaSuccess);
void* workspacePtr;
assert(cudaMalloc(&workspacePtr, graph->get_workspace_size()) == cudaSuccess);
std::unordered_map<std::shared_ptr<fe::graph::Tensor_attributes>, void*> tensorMap;
tensorMap[inX] = inXPtr;
tensorMap[inY] = inYPtr;
tensorMap[y] = outPtr;
r = graph->execute(handle, tensorMap, workspacePtr);
if (!r.is_good()) {
std::cerr << r.get_message() << std::endl;
return0;
}
assert(cudaDeviceSynchronize() == cudaSuccess);
float outData;
assert(cudaMemcpy(&outData, outPtr, sizeof(float), cudaMemcpyDeviceToHost) == cudaSuccess);
std::cout << outData << std::endl;
assert(cudaFree(inXPtr) == cudaSuccess);
assert(cudaFree(inYPtr) == cudaSuccess);
assert(cudaFree(outPtr) == cudaSuccess);
assert(cudaFree(workspacePtr) == cudaSuccess);
assert(cudnnDestroy(handle) == CUDNN_STATUS_SUCCESS);
}
Instead of giving an error during compilation it crashes with the following trace because of the out-of-bounds access to m_operationGraph.ops.
(gdb) bt
#0 0x00005555555a56d5 in __gnu_cxx::__exchange_and_add (__val=-1, __mem=0xf8) at /usr/include/c++/11/ext/atomicity.h:66
#1 __gnu_cxx::__exchange_and_add_dispatch (__val=-1, __mem=0xf8) at /usr/include/c++/11/ext/atomicity.h:101
#2 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (this=0xf0) at /usr/include/c++/11/bits/shared_ptr_base.h:165
#3 0x0000555555596de3 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (this=0x7fffffffcfe8,
__in_chrg=<optimized out>) at /usr/include/c++/11/bits/shared_ptr_base.h:705
#4 0x000055555556decc in std::__shared_ptr<cudnn_frontend::OpaqueBackendPointer, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (
this=0x7fffffffcfe0, __in_chrg=<optimized out>) at /usr/include/c++/11/bits/shared_ptr_base.h:1154
#5 0x00005555555a75e6 in std::__shared_ptr<cudnn_frontend::OpaqueBackendPointer, (__gnu_cxx::_Lock_policy)2>::operator= (
this=0x7fffffffd4e8, __r=...) at /usr/include/c++/11/bits/shared_ptr_base.h:1250
#6 0x000055555559883e in std::shared_ptr<cudnn_frontend::OpaqueBackendPointer>::operator= (this=0x7fffffffd4e8, __r=...)
at /usr/include/c++/11/bits/shared_ptr.h:385
#7 0x0000555555578ef1 in cudnn_frontend::OperationGraphBuilder_v8::setOperationGraph (this=0x7fffffffd170, numOps_=60,
ops_=0x555555ab2f80) at cudnn-frontend/include/cudnn_frontend_OperationGraph.h:157
#8 0x0000555555586552 in cudnn_frontend::ICudnn::create_cudnn_operation_graph (this=0x5555562310f8, handle=0x5555557379c0)
at cudnn-frontend/include/cudnn_frontend/node/../cudnn_interface.h:123
#9 0x0000555555592b85 in cudnn_frontend::graph::Graph::build_operation_graph (this=0x5555562310f0, handle=0x5555557379c0)
at cudnn-frontend/include/cudnn_frontend/graph_interface.h:265
#10 0x00005555555681d4 in main () at example.cpp:43
Expected behavior
I expect to get an error in this case, so my code can fallback to another computational engine.
Also, I expect this restriction to be mentioned in the documentation.
The text was updated successfully, but these errors were encountered:
Describe the bug
Consider a graph with more than MAX_OPGRAPH_OPS nodes, for example in this code
Instead of giving an error during compilation it crashes with the following trace because of the out-of-bounds access to m_operationGraph.ops.
Expected behavior
I expect to get an error in this case, so my code can fallback to another computational engine.
Also, I expect this restriction to be mentioned in the documentation.
The text was updated successfully, but these errors were encountered: