Skip to content

Commit

Permalink
Merge pull request #139 from denghuilu/devel-up
Browse files Browse the repository at this point in the history
fix prod_force GPU kernels error of wrong output
  • Loading branch information
amcadmus authored Dec 16, 2019
2 parents 324c527 + bf9ba83 commit ab355d0
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 11 deletions.
5 changes: 5 additions & 0 deletions source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ include_directories(${TensorFlow_INCLUDE_DIRS})
if (BUILD_CPP_IF)
set (LIB_DEEPMD "deepmd")
set (LIB_DEEPMD_OP "deepmd_op")
if (USE_CUDA_TOOLKIT)
set (LIB_DEEPMD_OP_CUDA "deepmd_op_cuda")
else()
set (LIB_DEEPMD_OP_CUDA "")
endif()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 4.9)
set (LIB_DEEPMD_NATIVE "deepmd_native_md")
set (LIB_DEEPMD_IPI "deepmd_ipi")
Expand Down
2 changes: 1 addition & 1 deletion source/lmp/env.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ TF_RPATH=`echo $TENSORFLOW_LIBRARY_PATH | sed "s/;/ -Wl,-rpath=/g"`

NNP_INC=" -std=c++11 @PREC_DEF@ @TTM_DEF@ -I$TF_INCLUDE_DIRS -I$DEEPMD_ROOT/include/deepmd "
NNP_PATH=" -L$TF_LIBRARY_PATH -L$DEEPMD_ROOT/lib"
NNP_LIB=" -Wl,--no-as-needed -l@LIB_DEEPMD_OP@ -l@LIB_DEEPMD@ -ldeepmd_op_cuda -ltensorflow_cc -ltensorflow_framework -Wl,-rpath=$TF_RPATH -Wl,-rpath=$DEEPMD_ROOT/lib"
NNP_LIB=" -Wl,--no-as-needed -l@LIB_DEEPMD_OP@ -l@LIB_DEEPMD_OP_CUDA@ -l@LIB_DEEPMD@ -ltensorflow_cc -ltensorflow_framework -Wl,-rpath=$TF_RPATH -Wl,-rpath=$DEEPMD_ROOT/lib"
17 changes: 12 additions & 5 deletions source/op/cuda/prod_force_se_a.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@ __global__ void deriv_wrt_center_atom_se_a(VALUETYPE * force,
const VALUETYPE * in_deriv,
const int ndescrpt)
{
const unsigned int idx = blockIdx.x;
const unsigned int idy = threadIdx.x;
const unsigned int idz = blockIdx.y;
const unsigned int idx = blockIdx.y;
const unsigned int idy = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int idz = threadIdx.y;

if (idy >= ndescrpt) {
return;
}

atomicAdd(force + idx * 3 + idz, -1.0 * net_deriv[idx * ndescrpt + idy] * in_deriv[idx * ndescrpt * 3 + idy * 3 + idz]);
}
Expand Down Expand Up @@ -84,8 +88,11 @@ void ProdForceSeALauncher(VALUETYPE * force,
{
// std::cout << "I'm here!" << std::endl;
cudaErrcheck(cudaMemset(force, 0.0, sizeof(VALUETYPE) * nall * 3));
dim3 grid(nloc, 3);
deriv_wrt_center_atom_se_a<<<grid, ndescrpt>>>(force, net_deriv, in_deriv, ndescrpt);
const int LEN1 = 256;
const int nblock1 = (ndescrpt + LEN1 -1) / LEN1;
dim3 grid(nblock1, nloc);
dim3 thread(LEN1, 3);
deriv_wrt_center_atom_se_a<<<grid, thread>>>(force, net_deriv, in_deriv, ndescrpt);

const int LEN = 64;
int nblock = (nloc + LEN -1) / LEN;
Expand Down
17 changes: 12 additions & 5 deletions source/op/cuda/prod_force_se_r.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,14 @@ __global__ void deriv_wrt_center_atom_se_r(VALUETYPE * force,
const VALUETYPE * in_deriv,
const int ndescrpt)
{
const unsigned int idx = blockIdx.x;
const unsigned int idy = threadIdx.x;
const unsigned int idz = blockIdx.y;
const unsigned int idx = blockIdx.y;
const unsigned int idy = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int idz = threadIdx.y;

if (idy >= ndescrpt) {
return;
}

atomicAdd(force + idx * 3 + idz, -1.0 * net_deriv[idx * ndescrpt + idy] * in_deriv[idx * ndescrpt * 3 + idy * 3 + idz]);
}

Expand Down Expand Up @@ -81,8 +85,11 @@ void ProdForceSeRLauncher(VALUETYPE * force,
const int n_a_shift)
{
cudaErrcheck(cudaMemset(force, 0.0, sizeof(VALUETYPE) * nall * 3));
dim3 grid(nloc, 3);
deriv_wrt_center_atom_se_r<<<grid, ndescrpt>>>(force, net_deriv, in_deriv, ndescrpt);
const int LEN1 = 256;
const int nblock1 = (ndescrpt + LEN1 -1) / LEN1;
dim3 grid(nblock1, nloc);
dim3 thread(LEN1, 3);
deriv_wrt_center_atom_se_r<<<grid, thread>>>(force, net_deriv, in_deriv, ndescrpt);

const int LEN = 64;
int nblock = (nloc + LEN -1) / LEN;
Expand Down

0 comments on commit ab355d0

Please sign in to comment.