Grid 0.7.0
DomainWallEOFAFermionCache.h
Go to the documentation of this file.
1/*************************************************************************************
2
3Grid physics library, www.github.com/paboyle/Grid
4
5Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc
6
7Copyright (C) 2017
8
9Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
12Author: paboyle <paboyle@ph.ed.ac.uk>
13Author: David Murphy <dmurphy@phys.columbia.edu>
14Author: Gianluca Filaci <g.filaci@ed.ac.uk>
15
16This program is free software; you can redistribute it and/or modify
17it under the terms of the GNU General Public License as published by
18the Free Software Foundation; either version 2 of the License, or
19(at your option) any later version.
20
21This program is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24GNU General Public License for more details.
25
26You should have received a copy of the GNU General Public License along
27with this program; if not, write to the Free Software Foundation, Inc.,
2851 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29
30See the full license in the file "LICENSE" in the top level distribution directory
31*************************************************************************************/
32 /* END LEGAL */
33
36
38
39// FIXME -- make a version of these routines with site loop outermost for cache reuse.
40// Pminus fowards
41// Pplus backwards..
42template<class Impl>
43void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
44 std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
45{
46 chi_i.Checkerboard() = psi_i.Checkerboard();
47 int Ls = this->Ls;
48 GridBase* grid = psi_i.Grid();
49 autoView( phi , phi_i, AcceleratorRead);
50 autoView( psi , psi_i, AcceleratorRead);
51 autoView( chi , chi_i, AcceleratorWrite);
52 assert(phi.Checkerboard() == psi.Checkerboard());
53
54 auto pdiag = &this->d_diag[0];
55 auto pupper = &this->d_upper[0];
56 auto plower = &this->d_lower[0];
57
58 acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
59 acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
60 acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
61
62 // Flops = 6.0*(Nc*Ns) *Ls*vol
63
64 auto nloop=grid->oSites()/Ls;
65 accelerator_for(sss,nloop,Simd::Nsimd(),{
66 auto ss=sss*Ls;
67 typedef decltype(coalescedRead(psi[0])) spinor;
68 for(int s=0; s<Ls; s++){
69 spinor tmp1, tmp2;
70 uint64_t idx_u = ss+((s+1)%Ls);
71 uint64_t idx_l = ss+((s+Ls-1)%Ls);
72 spProj5m(tmp1, psi(idx_u));
73 spProj5p(tmp2, psi(idx_l));
74 coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
75 }
76 });
77
78}
79
80template<class Impl>
81void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
82 std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
83{
84 chi_i.Checkerboard() = psi_i.Checkerboard();
85 GridBase* grid = psi_i.Grid();
86 int Ls = this->Ls;
87
88 autoView( psi , psi_i, AcceleratorRead);
89 autoView( phi , phi_i, AcceleratorRead);
90 autoView( chi , chi_i, AcceleratorWrite);
91 assert(phi.Checkerboard() == psi.Checkerboard());
92
93 auto pdiag = &this->d_diag[0];
94 auto pupper = &this->d_upper[0];
95 auto plower = &this->d_lower[0];
96
97 acceleratorCopyToDevice(&diag[0] ,&pdiag[0],Ls*sizeof(Coeff_t));
98 acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
99 acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
100
101 // Flops = 6.0*(Nc*Ns) *Ls*vol
102
103 auto nloop=grid->oSites()/Ls;
104 accelerator_for(sss,nloop,Simd::Nsimd(),{
105 typedef decltype(coalescedRead(psi[0])) spinor;
106 auto ss=sss*Ls;
107 for(int s=0; s<Ls; s++){
108 spinor tmp1, tmp2;
109 uint64_t idx_u = ss+((s+1)%Ls);
110 uint64_t idx_l = ss+((s+Ls-1)%Ls);
111 spProj5p(tmp1, psi(idx_u));
112 spProj5m(tmp2, psi(idx_l));
113 coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
114 }
115 });
116
117}
118
119template<class Impl>
120void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionField& chi_i)
121{
122 chi_i.Checkerboard() = psi_i.Checkerboard();
123 GridBase* grid = psi_i.Grid();
124 autoView( psi, psi_i, AcceleratorRead);
125 autoView( chi, chi_i, AcceleratorWrite);
126 int Ls = this->Ls;
127
128 auto plee = & this->d_lee [0];
129 auto pdee = & this->d_dee [0];
130 auto puee = & this->d_uee [0];
131 auto pleem = & this->d_leem[0];
132 auto pueem = & this->d_ueem[0];
133
134 acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
135 acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
136 acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
137 acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
138 acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
139
140 uint64_t nloop=grid->oSites()/Ls;
141 accelerator_for(sss,nloop,Simd::Nsimd(),{
142 uint64_t ss=sss*Ls;
143 typedef decltype(coalescedRead(psi[0])) spinor;
144 spinor tmp, acc, res;
145
146 // Apply (L^{\prime})^{-1} L_m^{-1}
147 res = psi(ss);
148 spProj5m(tmp,res);
149 acc = pleem[0]*tmp;
150 spProj5p(tmp,res);
151 coalescedWrite(chi[ss],res);
152
153 for(int s=1;s<Ls-1;s++){
154 res = psi(ss+s);
155 res -= plee[s-1]*tmp;
156 spProj5m(tmp,res);
157 acc += pleem[s]*tmp;
158 spProj5p(tmp,res);
159 coalescedWrite(chi[ss+s],res);
160 }
161 res = psi(ss+Ls-1) - plee[Ls-2]*tmp - acc;
162
163 // Apply U_m^{-1} D^{-1} U^{-1}
164 acc = (1.0/pdee[Ls ])*res;
165 tmp = (1.0/pdee[Ls-1])*res;
167 spProj5m(tmp,tmp);
168 coalescedWrite(chi[ss+Ls-1], acc + tmp);
169 for (int s=Ls-2;s>=0;s--){
170 res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
171 spProj5m(tmp,res);
172 coalescedWrite(chi[ss+s],res);
173 }
174 });
175}
176
177template<class Impl>
178void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i)
179{
180 chi_i.Checkerboard() = psi_i.Checkerboard();
181 GridBase* grid = psi_i.Grid();
182 autoView( psi, psi_i, AcceleratorRead);
183 autoView( chi, chi_i, AcceleratorWrite);
184 int Ls = this->Ls;
185
186 auto plee = & this->lee[0];
187 auto pdee = & this->dee[0];
188 auto puee = & this->uee[0];
189
190 auto pleem = & this->leem[0];
191 auto pueem = & this->ueem[0];
192
193 assert(psi.Checkerboard() == psi.Checkerboard());
194
195 auto nloop = grid->oSites()/Ls;
196 accelerator_for(sss,nloop,Simd::Nsimd(),{
197 uint64_t ss=sss*Ls;
198 typedef decltype(coalescedRead(psi[0])) spinor;
199 spinor tmp, acc, res;
200
201 // Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
202 res = psi(ss);
203 spProj5p(tmp,res);
204 acc = conjugate(pueem[0])*tmp;
205 spProj5m(tmp,res);
206 coalescedWrite(chi[ss],res);
207
208 for(int s=1;s<Ls-1;s++){
209 res = psi(ss+s);
210 res -= conjugate(puee[s-1])*tmp;
211 spProj5p(tmp,res);
212 acc += conjugate(pueem[s])*tmp;
213 spProj5m(tmp,res);
214 coalescedWrite(chi[ss+s],res);
215 }
216 res = psi(ss+Ls-1) - conjugate(puee[Ls-2])*tmp - acc;
217
218 // Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
219 acc = conjugate(1.0/pdee[Ls-1])*res;
220 tmp = conjugate(1.0/pdee[Ls ])*res;
222 spProj5p(tmp,tmp);
223 coalescedWrite(chi[ss+Ls-1], acc + tmp);
224 for (int s=Ls-2;s>=0;s--){
225 res = conjugate(1.0/pdee[s])*chi(ss+s) - conjugate(plee[s])*tmp - conjugate(pleem[s])*acc;
226 spProj5p(tmp,res);
227 coalescedWrite(chi[ss+s],res);
228 }
229 });
230
231}
232
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
#define acc(v, a, off, step, n)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
#define autoView(l_v, l, mode)
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
accelerator_inline void spProj5m(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:146
accelerator_inline void spProj5p(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:140
deviceVector< Coeff_t > d_upper
deviceVector< Coeff_t > d_diag
deviceVector< Coeff_t > d_lower
virtual void MooeeInv(const FermionField &in, FermionField &out)
virtual void M5D(const FermionField &psi, FermionField &chi)
virtual void M5Ddag(const FermionField &psi, FermionField &chi)
virtual void MooeeInvDag(const FermionField &in, FermionField &out)
int oSites(void) const