44 std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
46 chi_i.Checkerboard() = psi_i.Checkerboard();
52 assert(phi.Checkerboard() == psi.Checkerboard());
54 auto pdiag = &this->
d_diag[0];
55 auto pupper = &this->
d_upper[0];
56 auto plower = &this->
d_lower[0];
68 for(
int s=0; s<
Ls; s++){
70 uint64_t idx_u = ss+((s+1)%
Ls);
71 uint64_t idx_l = ss+((s+
Ls-1)%
Ls);
74 coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
82 std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
84 chi_i.Checkerboard() = psi_i.Checkerboard();
91 assert(phi.Checkerboard() == psi.Checkerboard());
93 auto pdiag = &this->
d_diag[0];
94 auto pupper = &this->
d_upper[0];
95 auto plower = &this->
d_lower[0];
107 for(
int s=0; s<
Ls; s++){
109 uint64_t idx_u = ss+((s+1)%
Ls);
110 uint64_t idx_l = ss+((s+
Ls-1)%
Ls);
113 coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
122 chi_i.Checkerboard() = psi_i.Checkerboard();
128 auto plee = & this->d_lee [0];
129 auto pdee = & this->d_dee [0];
130 auto puee = & this->d_uee [0];
131 auto pleem = & this->d_leem[0];
132 auto pueem = & this->d_ueem[0];
140 uint64_t nloop=grid->
oSites()/Ls;
144 spinor tmp,
acc, res;
153 for(
int s=1;s<Ls-1;s++){
155 res -= plee[s-1]*tmp;
161 res = psi(ss+Ls-1) - plee[Ls-2]*tmp -
acc;
164 acc = (1.0/pdee[Ls ])*res;
165 tmp = (1.0/pdee[Ls-1])*res;
169 for (
int s=Ls-2;s>=0;s--){
170 res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*
acc;
180 chi_i.Checkerboard() = psi_i.Checkerboard();
186 auto plee = & this->lee[0];
187 auto pdee = & this->dee[0];
188 auto puee = & this->uee[0];
190 auto pleem = & this->leem[0];
191 auto pueem = & this->ueem[0];
193 assert(psi.Checkerboard() == psi.Checkerboard());
195 auto nloop = grid->
oSites()/Ls;
199 spinor tmp,
acc, res;
208 for(
int s=1;s<Ls-1;s++){
224 for (
int s=Ls-2;s>=0;s--){
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
#define acc(v, a, off, step, n)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
accelerator_inline void spProj5m(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProj5p(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
deviceVector< Coeff_t > d_upper
deviceVector< Coeff_t > d_diag
deviceVector< Coeff_t > d_lower
virtual void MooeeInv(const FermionField &in, FermionField &out)
virtual void M5D(const FermionField &psi, FermionField &chi)
virtual void M5Ddag(const FermionField &psi, FermionField &chi)
virtual void MooeeInvDag(const FermionField &in, FermionField &out)