Skip to content

Commit

Permalink
Fix the repeated initial guess and use diagonal precondition in LR::H…
Browse files Browse the repository at this point in the history
…Solver; Fix a segfault and non-hermitian in multi-k op_lr_exx (#5468)

* fix the initial guess

* diag-precondition

* fix segfault and non-hermitian in op_lr_exx
  • Loading branch information
maki49 authored Nov 13, 2024
1 parent 9718c41 commit 4f1be57
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 30 deletions.
2 changes: 1 addition & 1 deletion source/module_lr/dm_trans/dm_trans_parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ std::vector<container::Tensor> cal_dm_trans_pblas(const std::complex<double>* X_
// &beta, dm_trans[isk].data<std::complex<double>>(), &i1, &i1, pmat.desc);

// ============== [C_virt * X * C_occ^\dagger]^T=============
// ============== = [C_occ^* * X^T * C_virt^T]^T=============
// ============== = [C_occ^* * X^T * C_virt^T]=============
// 1. X*C_occ^\dagger
char transa = 'N';
char transb = 'C';
Expand Down
17 changes: 14 additions & 3 deletions source/module_lr/esolver_lrtd_lcao.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "module_base/scalapack_connector.h"
#include "module_parameter/parameter.h"
#include "module_lr/ri_benchmark/ri_benchmark.h"
#include "module_lr/operator_casida/operator_lr_diag.h" // for precondition

#ifdef __EXX
template<>
Expand Down Expand Up @@ -421,20 +422,29 @@ void LR::ESolver_LR<T, TR>::runner(int istep, UnitCell& cell)
if (GlobalV::MY_RANK == 0) { assert(nst == LR_Util::write_value(efile(label), prec, e, nst)); }
assert(nst * dim == LR_Util::write_value(vfile(label), prec, v, nst, dim));
};
std::vector<double> precondition(this->input.lr_solver == "lapack" ? 0 : nloc_per_band, 1.0);
// allocate and initialize A matrix and density matrix
if (openshell)
{
for (int is : {0, 1})
{
const int offset_is = is * this->paraX_[0].get_local_size();
OperatorLRDiag<double> pre_op(this->eig_ks.c + is * nk * (nocc[0] + nvirt[0]), this->paraX_[is], this->nk, this->nocc[is], this->nvirt[is]);
if (input.lr_solver != "lapack") { pre_op.act(1, offset_is, 1, precondition.data() + offset_is, precondition.data() + offset_is); }
}
std::cout << "Solving spin-conserving excitation for open-shell system." << std::endl;
HamiltULR<T> hulr(xc_kernel, nspin, this->nbasis, this->nocc, this->nvirt, this->ucell, orb_cutoff_, GlobalC::GridD, *this->psi_ks, this->eig_ks,
#ifdef __EXX
this->exx_lri, this->exx_info.info_global.hybrid_alpha,
#endif
this->gint_, this->pot, this->kv, this->paraX_, this->paraC_, this->paraMat_);
LR::HSolver::solve(hulr, this->X[0].template data<T>(), nloc_per_band, nstates, this->pelec->ekb.c, this->input.lr_solver, this->input.lr_thr);
LR::HSolver::solve(hulr, this->X[0].template data<T>(), nloc_per_band, nstates, this->pelec->ekb.c, this->input.lr_solver, this->input.lr_thr, precondition);
if (input.out_wfc_lr) { write_states("openshell", this->pelec->ekb.c, this->X[0].template data<T>(), nloc_per_band, nstates); }
}
else
{
OperatorLRDiag<double> pre_op(this->eig_ks.c, this->paraX_[0], this->nk, this->nocc[0], this->nvirt[0]);
if (input.lr_solver != "lapack") { pre_op.act(1, nloc_per_band, 1, precondition.data(), precondition.data()); }
auto spin_types = std::vector<std::string>({ "singlet", "triplet" });
for (int is = 0;is < nspin;++is)
{
Expand All @@ -447,7 +457,7 @@ void LR::ESolver_LR<T, TR>::runner(int istep, UnitCell& cell)
spin_types[is], input.ri_hartree_benchmark, (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector<int>({})));
// solve the Casida equation
LR::HSolver::solve(hlr, this->X[is].template data<T>(), nloc_per_band, nstates,
this->pelec->ekb.c + is * nstates, this->input.lr_solver, this->input.lr_thr/*,
this->pelec->ekb.c + is * nstates, this->input.lr_solver, this->input.lr_thr, precondition/*,
!std::set<std::string>({ "hf", "hse" }).count(this->xc_kernel)*/); //whether the kernel is Hermitian
if (input.out_wfc_lr) { write_states(spin_types[is], this->pelec->ekb.c + is * nstates, this->X[is].template data<T>(), nloc_per_band, nstates); }
}
Expand Down Expand Up @@ -565,8 +575,9 @@ void LR::ESolver_LR<T, TR>::set_X_initial_guess()
const int is_in_x = openshell ? 0 : is; // if openshell, spin-up and spin-down are put together
if (px.in_this_processor(virt_global, occ_global))
{
const int xstart_pair = ik * px.get_local_size();
const int ipair_loc = px.global2local_col(occ_global) * px.get_row_size() + px.global2local_row(virt_global);
X[is_in_x].data<T>()[xstart_bs + ipair_loc] = (static_cast<T>(1.0) / static_cast<T>(nk));
X[is_in_x].data<T>()[xstart_bs + xstart_pair + ipair_loc] = (static_cast<T>(1.0) / static_cast<T>(nk));
}
}
}
Expand Down
11 changes: 5 additions & 6 deletions source/module_lr/hsolver_lrtd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ namespace LR
double* eig,
const std::string method,
const Real<T>& diag_ethr, ///< threshold for diagonalization
const std::vector<Real<T>>& precondition,
const bool hermitian = true)
{
ModuleBase::TITLE("HSolverLR", "solve");
const std::vector<std::string> spin_types = { "singlet", "triplet" };
// note: if not TDA, the eigenvalues will be complex
// then we will need a new constructor of DiagoDavid

// 1. allocate precondition and eigenvalue
std::vector<Real<T>> precondition(dim);
// 1. allocate eigenvalue
std::vector<Real<T>> eigenvalue(nband); //nstates
// 2. select the method
#ifdef __MPI
Expand All @@ -67,9 +67,7 @@ namespace LR
}
else
{
// 3. set precondition and diagethr
for (int i = 0; i < dim; ++i) { precondition[i] = static_cast<Real<T>>(1.0); }

// 3. set maxiter and funcs
const int maxiter = hsolver::DiagoIterAssist<T>::PW_DIAG_NMAX;

auto hpsi_func = [&hm](T* psi_in, T* hpsi, const int ld_psi, const int nvec) {hm.hPsi(psi_in, hpsi, ld_psi, nvec);};
Expand Down Expand Up @@ -139,7 +137,8 @@ namespace LR

auto psi_tensor = ct::TensorMap(psi, ct::DataTypeToEnum<T>::value, ct::DeviceType::CpuDevice, ct::TensorShape({ nband, dim }));
auto eigen_tensor = ct::TensorMap(eigenvalue.data(), ct::DataTypeToEnum<Real<T>>::value, ct::DeviceType::CpuDevice, ct::TensorShape({ nband }));
auto precon_tensor = ct::TensorMap(precondition.data(), ct::DataTypeToEnum<Real<T>>::value, ct::DeviceType::CpuDevice, ct::TensorShape({ dim }));
std::vector<Real<T>> precondition_(precondition); //since TensorMap does not support const pointer
auto precon_tensor = ct::TensorMap(precondition_.data(), ct::DataTypeToEnum<Real<T>>::value, ct::DeviceType::CpuDevice, ct::TensorShape({ dim }));
auto hpsi_func = [&hm](const ct::Tensor& psi_in, ct::Tensor& hpsi) {hm.hPsi(psi_in.data<T>(), hpsi.data<T>(), psi_in.shape().dim_size(0) /*nbasis_local*/, 1/*band-by-band*/);};
auto spsi_func = [&hm](const ct::Tensor& psi_in, ct::Tensor& spsi)
{ std::memcpy(spsi.data<T>(), psi_in.data<T>(), sizeof(T) * psi_in.NumElements()); };
Expand Down
29 changes: 20 additions & 9 deletions source/module_lr/operator_casida/operator_lr_exx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ namespace LR
void OperatorLREXX<double>::cal_DM_onebase(const int io, const int iv, const int ik) const
{
ModuleBase::TITLE("OperatorLREXX", "cal_DM_onebase");
// NOTICE: DM_onebase will be passed into `cal_energy` interface and conjugated by "zdotc".
// So the formula should be the same as RHS. instead of LHS of the A-matrix,
// i.e. c1v · conj(c2o) · e^{-ik(R2-R1)}
assert(ik == 0);
for (auto cell : this->BvK_cells)
{
Expand All @@ -42,8 +45,12 @@ namespace LR
const int nw2 = aims_nbasis.empty() ? ucell.atoms[it2].nw : aims_nbasis[it2];
for (int iw1 = 0;iw1 < nw1;++iw1)
for (int iw2 = 0;iw2 < nw2;++iw2)
if (this->pmat.in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2)))
D2d(iw1, iw2) = this->psi_ks_full(ik, io, ucell.itiaiw2iwt(it1, ia1, iw1)) * this->psi_ks_full(ik, nocc + iv, ucell.itiaiw2iwt(it2, ia2, iw2));
{
const int iwt1 = ucell.itiaiw2iwt(it1, ia1, iw1);
const int iwt2 = ucell.itiaiw2iwt(it2, ia2, iw2);
if (this->pmat.in_this_processor(iwt1, iwt2))
D2d(iw1, iw2) = this->psi_ks_full(ik, io, iwt1) * this->psi_ks_full(ik, nocc + iv, iwt2);
}
}
}
}
Expand All @@ -52,6 +59,9 @@ namespace LR
void OperatorLREXX<std::complex<double>>::cal_DM_onebase(const int io, const int iv, const int ik) const
{
ModuleBase::TITLE("OperatorLREXX", "cal_DM_onebase");
// NOTICE: DM_onebase will be passed into `cal_energy` interface and conjugated by "zdotc".
// So the formula should be the same as RHS. instead of LHS of the A-matrix,
// i.e. c1v · conj(c2o) · e^{-ik(R2-R1)}
for (auto cell : this->BvK_cells)
{
std::complex<double> frac = RI::Global_Func::convert<std::complex<double>>(std::exp(
Expand All @@ -68,8 +78,12 @@ namespace LR
const int nw2 = aims_nbasis.empty() ? ucell.atoms[it2].nw : aims_nbasis[it2];
for (int iw1 = 0;iw1 < nw1;++iw1)
for (int iw2 = 0;iw2 < nw2;++iw2)
if (this->pmat.in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2)))
D2d(iw1, iw2) = frac * std::conj(this->psi_ks_full(ik, io, ucell.itiaiw2iwt(it1, ia1, iw1))) * this->psi_ks_full(ik, nocc + iv, ucell.itiaiw2iwt(it2, ia2, iw2));
{
const int iwt1 = ucell.itiaiw2iwt(it1, ia1, iw1);
const int iwt2 = ucell.itiaiw2iwt(it2, ia2, iw2);
if (this->pmat.in_this_processor(iwt1, iwt2))
D2d(iw1, iw2) = frac * std::conj(this->psi_ks_full(ik, io, iwt2)) * this->psi_ks_full(ik, nocc + iv, iwt1);
}
}
}
}
Expand All @@ -78,9 +92,6 @@ namespace LR
void OperatorLREXX<T>::act(const int nbands, const int nbasis, const int npol, const T* psi_in, T* hpsi, const int ngk_ik, const bool is_first_node)const
{
ModuleBase::TITLE("OperatorLREXX", "act");

const int& nk = this->kv.get_nks() / this->nspin;

// convert parallel info to LibRI interfaces
std::vector<std::tuple<std::set<TA>, std::set<TA>>> judge = RI_2D_Comm::get_2D_judge(this->pmat);

Expand Down Expand Up @@ -120,12 +131,12 @@ namespace LR
{
const int xstart_bk = ik * pX.get_local_size();
this->cal_DM_onebase(io, iv, ik); //set Ds_onebase for all e-h pairs (not only on this processor)
// LR_Util::print_CV(Ds_onebase[is], "Ds_onebase of occ " + std::to_string(io) + ", virtual " + std::to_string(iv) + " in OperatorLREXX", 1e-10);
// LR_Util::print_CV(Ds_onebase, "Ds_onebase of occ " + std::to_string(io) + ", virtual " + std::to_string(iv) + " in OperatorLREXX", 1e-10);
const T& ene = 2 * alpha * //minus for exchange(but here plus is right, why?), 2 for Hartree to Ry
lri->exx_lri.post_2D.cal_energy(this->Ds_onebase, lri->Hexxs[0]);
if (this->pX.in_this_processor(iv, io))
{
hpsi[xstart_bk + ik * pX.get_local_size() + this->pX.global2local_col(io) * this->pX.get_row_size() + this->pX.global2local_row(iv)] += ene;
hpsi[xstart_bk + this->pX.global2local_col(io) * this->pX.get_row_size() + this->pX.global2local_row(iv)] += ene;
}
}
}
Expand Down
20 changes: 12 additions & 8 deletions source/module_lr/operator_casida/operator_lr_exx.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace LR
const Parallel_Orbitals& pmat_in,
const double& alpha = 1.0,
const std::vector<int>& aims_nbasis = {})
: nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt),
: nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin),
psi_ks(psi_ks_in), DM_trans(DM_trans_in), exx_lri(exx_lri_in), kv(kv_in),
pX(pX_in), pc(pc_in), pmat(pmat_in), ucell(ucell_in), alpha(alpha),
aims_nbasis(aims_nbasis)
Expand All @@ -42,8 +42,11 @@ namespace LR
this->is_first_node = false;

// reduce psi_ks for later use
this->psi_ks_full.resize(this->kv.get_nks(), nocc + nvirt, this->naos);
LR_Util::gather_2d_to_full(this->pc, this->psi_ks.get_pointer(), this->psi_ks_full.get_pointer(), false, this->naos, nocc + nvirt);
this->psi_ks_full.resize(this->nk, nocc + nvirt, this->naos);
for (int ik = 0;ik < nk;++ik)
{
LR_Util::gather_2d_to_full(this->pc, &this->psi_ks(ik, 0, 0), &this->psi_ks_full(ik, 0, 0), false, this->naos, nocc + nvirt);
}

// get cells in BvK supercell
const TC period = RI_Util::get_Born_vonKarmen_period(kv_in);
Expand All @@ -63,12 +66,13 @@ namespace LR
const int ngk_ik = 0,
const bool is_first_node = false) const override;

private:
private:
//global sizes
const int& nspin;
const int& naos;
const int& nocc;
const int& nvirt;
const int nspin = 1;
const int naos = 1;
const int nocc = 1;
const int nvirt = 1;
const int nk = 1; ///< number of k-points
const double alpha = 1.0; //(allow non-ref constant)
const bool cal_dm_trans = false;
const bool tdm_sym = false; ///< whether transition density matrix is symmetric
Expand Down
2 changes: 1 addition & 1 deletion tests/integrate/291_NO_KP_LR/result.ref
Original file line number Diff line number Diff line change
@@ -1 +1 @@
totexcitationenergyref 0.784471
totexcitationenergyref 0.784274
2 changes: 1 addition & 1 deletion tests/integrate/391_NO_GO_LR/result.ref
Original file line number Diff line number Diff line change
@@ -1 +1 @@
totexcitationenergyref 2.641295
totexcitationenergyref 2.641190
2 changes: 1 addition & 1 deletion tests/integrate/392_NO_GO_LR_HF/result.ref
Original file line number Diff line number Diff line change
@@ -1 +1 @@
totexcitationenergyref 3.067979
totexcitationenergyref 3.067058

0 comments on commit 4f1be57

Please sign in to comment.