diff --git a/source/lib/src/NNPInter.cc b/source/lib/src/NNPInter.cc
index ae72db8487..5faee38d34 100644
--- a/source/lib/src/NNPInter.cc
+++ b/source/lib/src/NNPInter.cc
@@ -3,7 +3,7 @@
 #include "SimulationRegion.h"
 #include <stdexcept>	
 
-#define MAGIC_NUMBER 256
+#define MAGIC_NUMBER 1024
 typedef double compute_t;
 
 #ifdef  USE_CUDA_TOOLKIT
diff --git a/source/op/cuda/descrpt_se_a.cu b/source/op/cuda/descrpt_se_a.cu
index 8b6b3ee575..2ceeeaca9a 100644
--- a/source/op/cuda/descrpt_se_a.cu
+++ b/source/op/cuda/descrpt_se_a.cu
@@ -18,7 +18,7 @@ limitations under the License.
 #include <cub/block/block_radix_sort.cuh>
 #include <cuda_runtime.h>
 
-#define MAGIC_NUMBER 256
+#define MAGIC_NUMBER 1024
 
 #ifdef HIGH_PREC
     typedef double  VALUETYPE;
@@ -339,8 +339,8 @@ void DescrptSeALauncher(const VALUETYPE* coord,
                             key,
                             i_idx
         );
-        const int ITEMS_PER_THREAD = 4;
-        const int BLOCK_THREADS = 64;
+        const int ITEMS_PER_THREAD = 8;
+        const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
         // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
         BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
         
diff --git a/source/op/cuda/descrpt_se_r.cu b/source/op/cuda/descrpt_se_r.cu
index 2a4a126166..59de009e63 100644
--- a/source/op/cuda/descrpt_se_r.cu
+++ b/source/op/cuda/descrpt_se_r.cu
@@ -19,7 +19,7 @@ limitations under the License.
 #include <cuda_runtime.h>
 #include <fstream>
 
-#define MAGIC_NUMBER 256
+#define MAGIC_NUMBER 1024
 
 #ifdef HIGH_PREC
     typedef double  VALUETYPE;
@@ -311,8 +311,8 @@ void DescrptSeRLauncher(const VALUETYPE* coord,
                             key,
                             i_idx
         );
-        const int ITEMS_PER_THREAD = 4;
-        const int BLOCK_THREADS = 64;
+        const int ITEMS_PER_THREAD = 8;
+        const int BLOCK_THREADS = MAGIC_NUMBER / ITEMS_PER_THREAD;
         BlockSortKernel<int_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (key, key + nloc * MAGIC_NUMBER);
         format_nlist_fill_b_se_r<<<nblock, LEN>>> (
                             nlist,
diff --git a/source/op/descrpt_se_a_gpu.cc b/source/op/descrpt_se_a_gpu.cc
index 93c83016fb..9b66c9532c 100644
--- a/source/op/descrpt_se_a_gpu.cc
+++ b/source/op/descrpt_se_a_gpu.cc
@@ -7,7 +7,7 @@
 #include "tensorflow/core/framework/shape_inference.h"
 
 using namespace tensorflow;  // NOLINT(build/namespaces)
-#define MAGIC_NUMBER 256
+#define MAGIC_NUMBER 1024
 
 #ifdef HIGH_PREC
     typedef double VALUETYPE ;
@@ -159,6 +159,7 @@ class DescrptSeAOp : public OpKernel {
         
         OP_REQUIRES (context, (ntypes == int(sel_a.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
         OP_REQUIRES (context, (ntypes == int(sel_r.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
+        OP_REQUIRES (context, (nnei <= 1024),	                errors::InvalidArgument ("Assert failed, max neighbor size of atom(nnei) " + std::to_string(nnei) + " is larger than 1024, which currently is not supported by deepmd-kit."));
         
         // Create output tensors
         TensorShape descrpt_shape ;
diff --git a/source/op/descrpt_se_r_gpu.cc b/source/op/descrpt_se_r_gpu.cc
index 65e2682ef0..14deea74c0 100644
--- a/source/op/descrpt_se_r_gpu.cc
+++ b/source/op/descrpt_se_r_gpu.cc
@@ -148,6 +148,7 @@ class DescrptSeROp : public OpKernel {
         OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		    errors::InvalidArgument ("number of box should be 9"));
         OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of avg should be ndescrpt"));
         OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of std should be ndescrpt"));   
+        OP_REQUIRES (context, (nnei <= 1024),	                errors::InvalidArgument ("Assert failed, max neighbor size of atom(nnei) " + std::to_string(nnei) + " is larger than 1024, which currently is not supported by deepmd-kit."));
         
         // Create output tensors
         TensorShape descrpt_shape ;