diff --git a/source/lib/src/NNPInter.cc b/source/lib/src/NNPInter.cc index ae72db8487..5faee38d34 100644 --- a/source/lib/src/NNPInter.cc +++ b/source/lib/src/NNPInter.cc @@ -3,7 +3,7 @@ #include "SimulationRegion.h" #include -#define MAGIC_NUMBER 256 +#define MAGIC_NUMBER 1024 typedef double compute_t; #ifdef USE_CUDA_TOOLKIT diff --git a/source/op/cuda/descrpt_se_a.cu b/source/op/cuda/descrpt_se_a.cu index 8b6b3ee575..2ceeeaca9a 100644 --- a/source/op/cuda/descrpt_se_a.cu +++ b/source/op/cuda/descrpt_se_a.cu @@ -18,7 +18,7 @@ limitations under the License. #include #include -#define MAGIC_NUMBER 256 +#define MAGIC_NUMBER 1024 #ifdef HIGH_PREC typedef double VALUETYPE; @@ -339,8 +339,8 @@ void DescrptSeALauncher(const VALUETYPE* coord, key, i_idx ); - const int ITEMS_PER_THREAD = 4; - const int BLOCK_THREADS = 64; + const int ITEMS_PER_THREAD = 8; + const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD; // BlockSortKernel<<>> ( BlockSortKernel <<>> (key, key + nloc * MAGIC_NUMBER); diff --git a/source/op/cuda/descrpt_se_r.cu b/source/op/cuda/descrpt_se_r.cu index 2a4a126166..59de009e63 100644 --- a/source/op/cuda/descrpt_se_r.cu +++ b/source/op/cuda/descrpt_se_r.cu @@ -19,7 +19,7 @@ limitations under the License. #include #include -#define MAGIC_NUMBER 256 +#define MAGIC_NUMBER 1024 #ifdef HIGH_PREC typedef double VALUETYPE; @@ -311,8 +311,8 @@ void DescrptSeRLauncher(const VALUETYPE* coord, key, i_idx ); - const int ITEMS_PER_THREAD = 4; - const int BLOCK_THREADS = 64; + const int ITEMS_PER_THREAD = 8; + const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD; BlockSortKernel <<>> (key, key + nloc * MAGIC_NUMBER); format_nlist_fill_b_se_r<<>> ( nlist, diff --git a/source/op/descrpt_se_a_gpu.cc b/source/op/descrpt_se_a_gpu.cc index 93c83016fb..9b66c9532c 100644 --- a/source/op/descrpt_se_a_gpu.cc +++ b/source/op/descrpt_se_a_gpu.cc @@ -7,7 +7,7 @@ #include "tensorflow/core/framework/shape_inference.h" using namespace tensorflow; // NOLINT(build/namespaces) -#define MAGIC_NUMBER 256 +#define MAGIC_NUMBER 1024 #ifdef HIGH_PREC typedef double VALUETYPE ; @@ -159,6 +159,7 @@ class DescrptSeAOp : public OpKernel { OP_REQUIRES (context, (ntypes == int(sel_a.size())), errors::InvalidArgument ("number of types should match the length of sel array")); OP_REQUIRES (context, (ntypes == int(sel_r.size())), errors::InvalidArgument ("number of types should match the length of sel array")); + OP_REQUIRES (context, (nnei <= 1024), errors::InvalidArgument ("Assert failed, max neighbor size of atom(nnei) " + std::to_string(nnei) + " is larger than 1024, which currently is not supported by deepmd-kit.")); // Create output tensors TensorShape descrpt_shape ; diff --git a/source/op/descrpt_se_r_gpu.cc b/source/op/descrpt_se_r_gpu.cc index 65e2682ef0..14deea74c0 100644 --- a/source/op/descrpt_se_r_gpu.cc +++ b/source/op/descrpt_se_r_gpu.cc @@ -148,6 +148,7 @@ class DescrptSeROp : public OpKernel { OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of box should be 9")); OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of avg should be ndescrpt")); OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of std should be ndescrpt")); + OP_REQUIRES (context, (nnei <= 1024), errors::InvalidArgument ("Assert failed, max neighbor size of atom(nnei) " + std::to_string(nnei) + " is larger than 1024, which currently is not supported by deepmd-kit.")); // Create output tensors TensorShape descrpt_shape ;