From 65dcf1096a751fce3785cd3cb8e842d6ea923655 Mon Sep 17 00:00:00 2001 From: Xiaojie Wu Date: Tue, 18 Feb 2025 20:51:11 -0800 Subject: [PATCH] copy instead of casting --- gpu4pyscf/lib/gdft/libxc.cu | 75 ++++++++++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/gpu4pyscf/lib/gdft/libxc.cu b/gpu4pyscf/lib/gdft/libxc.cu index eaf954fa..82edb0bd 100644 --- a/gpu4pyscf/lib/gdft/libxc.cu +++ b/gpu4pyscf/lib/gdft/libxc.cu @@ -68,6 +68,47 @@ static void _add_out(double *out, const double *buf, double coef, int np, int di extern "C" { +__host__ +void copy_gga2lda(xc_gga_out_params *gga, xc_lda_out_params *lda){ + lda->zk = gga->zk; + lda->vrho = gga->vrho; + lda->v2rho2 = gga->v2rho2; + lda->v3rho3 = gga->v3rho3; + lda->v4rho4 = gga->v4rho4; +} + +__host__ +void copy_mgga2lda(xc_mgga_out_params *mgga, xc_lda_out_params *lda){ + lda->zk = mgga->zk; + lda->vrho = mgga->vrho; + lda->v2rho2 = mgga->v2rho2; + lda->v3rho3 = mgga->v3rho3; + lda->v4rho4 = mgga->v4rho4; +} + +__host__ +void copy_mgga2gga(xc_mgga_out_params *mgga, xc_gga_out_params *gga){ + gga->zk = mgga->zk; + + gga->vrho = mgga->vrho; + gga->vsigma = mgga->vsigma; + + gga->v2rho2 = mgga->v2rho2; + gga->v2rhosigma = mgga->v2rhosigma; + gga->v2sigma2 = mgga->v2sigma2; + + gga->v3rho3 = mgga->v3rho3; + gga->v3rho2sigma = mgga->v3rho2sigma; + gga->v3rhosigma2 = mgga->v3rhosigma2; + gga->v3sigma3 = mgga->v3sigma3; + + gga->v4rho4 = mgga->v4rho4; + gga->v4rho3sigma = mgga->v4rho3sigma; + gga->v4rho2sigma2 = mgga->v4rho2sigma2; + gga->v4rhosigma3 = mgga->v4rhosigma3; + gga->v4sigma4 = mgga->v4sigma4; +} + __host__ void _memset_lda(xc_lda_out_params *out, int order, int np, const xc_dimensions *dim){ if(order >= 0) cudaMemset(out->zk, 0, sizeof(double)*np*dim->zk); @@ -225,12 +266,12 @@ int _xc_lda(const xc_func_type *func, int np, int order, const double *rho, __host__ int _xc_gga(const xc_func_type *func, int np, int order, const double *rho, const double *sigma, xc_gga_out_params *out){ - + if(func->info->gga == NULL){ fprintf(stderr, "Nested xc functional is not supported\n"); return 1; } - + //xc_dimensions* dim = (xc_dimensions *) malloc(sizeof(xc_dimensions)); //memcpy(dim, &(func->dim), sizeof(xc_dimensions)); //DEVICE_INIT(xc_dimensions, dim, &(func->dim), 1); @@ -238,7 +279,7 @@ int _xc_gga(const xc_func_type *func, int np, int order, const double *rho, cons const xc_dimensions *dim = &(func->dim); _memset_gga(out, order, np, dim); //FREE(dim); - + cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf(stderr, "CUDA Error of memset_gga: %s\n", cudaGetErrorString(err)); @@ -271,7 +312,7 @@ int _xc_mgga(const xc_func_type *func, int np, int order, const double *rho, con fprintf(stderr, "Nested xc functional is not supported\n"); return 1; } - + //xc_dimensions* dim = (xc_dimensions *) malloc(sizeof(xc_dimensions)); //memcpy(dim, &(func->dim), sizeof(xc_dimensions)); //DEVICE_INIT(xc_dimensions, dim, &(func->dim), 1); @@ -310,7 +351,7 @@ int GDFT_xc_lda(cudaStream_t stream, xc_lda_out_params *out, xc_lda_out_params *buf) { int ierr = 0; - + int order = -1; if(out->zk != NULL) order = 0; if(out->vrho != NULL) order = 1; @@ -338,11 +379,11 @@ int GDFT_xc_lda(cudaStream_t stream, dim3 threads(THREADS); dim3 blocks((np+THREADS-1)/THREADS); - + for (int ii=0; ii< n_func_aux; ii++){ xc_func_type *aux = func->func_aux[ii]; double coef = func->mix_coef[ii]; - + /* Evaluate the functional */ switch(aux->info->family){ case XC_FAMILY_LDA:{ @@ -396,13 +437,15 @@ int GDFT_xc_gga(cudaStream_t stream, /* Evaluate the functional */ switch(aux->info->family){ case XC_FAMILY_LDA:{ - xc_lda_out_params *out_lda = (xc_lda_out_params *)(buf); + xc_lda_out_params *out_lda = (xc_lda_out_params *)malloc(sizeof(xc_lda_out_params)); + copy_gga2lda(buf, out_lda); ierr = _xc_lda(aux, np, order, rho, out_lda); ADD_LDA; + free(out_lda); break; } case XC_FAMILY_GGA:{ - xc_gga_out_params *out_gga = (xc_gga_out_params *)(buf); + xc_gga_out_params *out_gga = buf; ierr = _xc_gga(aux, np, order, rho, sigma, out_gga); ADD_GGA; break; @@ -447,27 +490,31 @@ int GDFT_xc_mgga(cudaStream_t stream, dim3 threads(THREADS); dim3 blocks((np+THREADS-1)/THREADS); - + for (int ii=0; ii< n_func_aux; ii++){ xc_func_type *aux = func->func_aux[ii]; double coef = func->mix_coef[ii]; - + /* Evaluate the functional */ switch(aux->info->family){ case XC_FAMILY_LDA:{ - xc_lda_out_params *out_lda = (xc_lda_out_params *)(buf); + xc_lda_out_params *out_lda = (xc_lda_out_params *)malloc(sizeof(xc_lda_out_params)); + copy_mgga2lda(buf, out_lda); ierr = _xc_lda(aux, np, order, rho, out_lda); ADD_LDA; + free(out_lda); break; } case XC_FAMILY_GGA:{ - xc_gga_out_params *out_gga = (xc_gga_out_params *)(buf); + xc_gga_out_params *out_gga = (xc_gga_out_params *) malloc(sizeof(xc_gga_out_params)); + copy_mgga2gga(buf, out_gga); ierr = _xc_gga(aux, np, order, rho, sigma, out_gga); ADD_GGA; + free(out_gga); break; } case XC_FAMILY_MGGA:{ - xc_mgga_out_params *out_mgga = (xc_mgga_out_params *)(buf); + xc_mgga_out_params *out_mgga = buf; ierr = _xc_mgga(aux, np, order, rho, sigma, lapl, tau, out_mgga); ADD_MGGA; break;