44 const FermionField &phi_i,
46 std::vector<Coeff_t> &lower,
47 std::vector<Coeff_t> &diag,
48 std::vector<Coeff_t> &upper)
51 chi_i.Checkerboard()=psi_i.Checkerboard();
56 assert(phi.Checkerboard() == psi.Checkerboard());
70 uint64_t nloop = grid->
oSites();
76 uint64_t idx_u = ss+((s+1)%
Ls);
77 uint64_t idx_l = ss+((s+
Ls-1)%
Ls);
80 coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
87 const FermionField &phi_i,
89 std::vector<Coeff_t> &lower,
90 std::vector<Coeff_t> &diag,
91 std::vector<Coeff_t> &upper)
93 chi_i.Checkerboard()=psi_i.Checkerboard();
98 assert(phi.Checkerboard() == psi.Checkerboard());
111 uint64_t nloop = grid->
oSites();
117 uint64_t idx_u = ss+((s+1)%
Ls);
118 uint64_t idx_l = ss+((s+
Ls-1)%
Ls);
121 coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
129 chi_i.Checkerboard()=psi_i.Checkerboard();
143 auto plee = & d_lee [0];
144 auto pdee = & d_dee [0];
145 auto puee = & d_uee [0];
146 auto pleem = & d_leem[0];
147 auto pueem = & d_ueem[0];
149 uint64_t nloop = grid->
oSites()/Ls;
153 spinor tmp,
acc, res;
164 for(
int s=1;s<Ls-1;s++){
166 res -= plee[s-1]*tmp;
172 res = psi(ss+Ls-1) - plee[Ls-2]*tmp -
acc;
175 res = (1.0/pdee[Ls-1])*res;
179 for (
int s=Ls-2;s>=0;s--){
180 res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*
acc;
192 chi_i.Checkerboard()=psi_i.Checkerboard();
205 auto plee = & d_lee [0];
206 auto pdee = & d_dee [0];
207 auto puee = & d_uee [0];
208 auto pleem = & d_leem[0];
209 auto pueem = & d_ueem[0];
211 assert(psi.Checkerboard() == psi.Checkerboard());
213 uint64_t nloop = grid->
oSites()/Ls;
217 spinor tmp,
acc, res;
228 for(
int s=1;s<Ls-1;s++){
243 for (
int s=Ls-2;s>=0;s--){
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
#define acc(v, a, off, step, n)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
accelerator_inline void spProj5m(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProj5p(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
virtual void MooeeInvDag(const FermionField &in, FermionField &out)
deviceVector< Coeff_t > d_upper
deviceVector< Coeff_t > d_diag
virtual void M5Ddag(const FermionField &psi, FermionField &chi)
virtual void M5D(const FermionField &psi, FermionField &chi)
deviceVector< Coeff_t > d_lower
virtual void MooeeInv(const FermionField &in, FermionField &out)