Grid 0.7.0
CayleyFermion5Dcache.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
12Author: paboyle <paboyle@ph.ed.ac.uk>
13Author: Gianluca Filaci <g.filaci@ed.ac.uk>
14
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; either version 2 of the License, or
18 (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License along
26 with this program; if not, write to the Free Software Foundation, Inc.,
27 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28
29 See the full license in the file "LICENSE" in the top level distribution directory
30*************************************************************************************/
31/* END LEGAL */
32
35
36
38
39// Pminus fowards
40// Pplus backwards..
41template<class Impl>
42void
43CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
44 const FermionField &phi_i,
45 FermionField &chi_i,
46 std::vector<Coeff_t> &lower,
47 std::vector<Coeff_t> &diag,
48 std::vector<Coeff_t> &upper)
49{
50
51 chi_i.Checkerboard()=psi_i.Checkerboard();
52 GridBase *grid=psi_i.Grid();
53 autoView(psi , psi_i,AcceleratorRead);
54 autoView(phi , phi_i,AcceleratorRead);
55 autoView(chi , chi_i,AcceleratorWrite);
56 assert(phi.Checkerboard() == psi.Checkerboard());
57
58 int Ls =this->Ls;
59
60 acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t));
61 acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t));
62 acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t));
63
64 auto pdiag = &d_diag[0];
65 auto pupper = &d_upper[0];
66 auto plower = &d_lower[0];
67
68 // 10 = 3 complex mult + 2 complex add
69 // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
70 uint64_t nloop = grid->oSites();
71 accelerator_for(sss,nloop,Simd::Nsimd(),{
72 uint64_t s = sss%Ls;
73 uint64_t ss= sss-s;
74 typedef decltype(coalescedRead(psi[0])) spinor;
75 spinor tmp1, tmp2;
76 uint64_t idx_u = ss+((s+1)%Ls);
77 uint64_t idx_l = ss+((s+Ls-1)%Ls);
78 spProj5m(tmp1,psi(idx_u));
79 spProj5p(tmp2,psi(idx_l));
80 coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
81 });
82}
83
84template<class Impl>
85void
86CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
87 const FermionField &phi_i,
88 FermionField &chi_i,
89 std::vector<Coeff_t> &lower,
90 std::vector<Coeff_t> &diag,
91 std::vector<Coeff_t> &upper)
92{
93 chi_i.Checkerboard()=psi_i.Checkerboard();
94 GridBase *grid=psi_i.Grid();
95 autoView(psi , psi_i,AcceleratorRead);
96 autoView(phi , phi_i,AcceleratorRead);
98 assert(phi.Checkerboard() == psi.Checkerboard());
99
100 int Ls=this->Ls;
101
102 acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t));
103 acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t));
104 acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t));
105
106 auto pdiag = &d_diag[0];
107 auto pupper = &d_upper[0];
108 auto plower = &d_lower[0];
109
110 // Flops = 6.0*(Nc*Ns) *Ls*vol
111 uint64_t nloop = grid->oSites();
112 accelerator_for(sss,nloop,Simd::Nsimd(),{
113 uint64_t s = sss%Ls;
114 uint64_t ss= sss-s;
115 typedef decltype(coalescedRead(psi[0])) spinor;
116 spinor tmp1,tmp2;
117 uint64_t idx_u = ss+((s+1)%Ls);
118 uint64_t idx_l = ss+((s+Ls-1)%Ls);
119 spProj5p(tmp1,psi(idx_u));
120 spProj5m(tmp2,psi(idx_l));
121 coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
122 });
123}
124
125template<class Impl>
126void
127CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi_i)
128{
129 chi_i.Checkerboard()=psi_i.Checkerboard();
130 GridBase *grid=psi_i.Grid();
131
132 autoView(psi , psi_i,AcceleratorRead);
133 autoView(chi , chi_i,AcceleratorWrite);
134
135 int Ls=this->Ls;
136
137 acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
138 acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
139 acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
140 acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
141 acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
142
143 auto plee = & d_lee [0];
144 auto pdee = & d_dee [0];
145 auto puee = & d_uee [0];
146 auto pleem = & d_leem[0];
147 auto pueem = & d_ueem[0];
148
149 uint64_t nloop = grid->oSites()/Ls;
150 accelerator_for(sss,nloop,Simd::Nsimd(),{
151 uint64_t ss=sss*Ls;
152 typedef decltype(coalescedRead(psi[0])) spinor;
153 spinor tmp, acc, res;
154
155 // X = Nc*Ns
156 // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
157 // Apply (L^{\prime})^{-1} L_m^{-1}
158 res = psi(ss);
159 spProj5m(tmp,res);
160 acc = pleem[0]*tmp;
161 spProj5p(tmp,res);
162 coalescedWrite(chi[ss],res);
163
164 for(int s=1;s<Ls-1;s++){
165 res = psi(ss+s);
166 res -= plee[s-1]*tmp;
167 spProj5m(tmp,res);
168 acc += pleem[s]*tmp;
169 spProj5p(tmp,res);
170 coalescedWrite(chi[ss+s],res);
171 }
172 res = psi(ss+Ls-1) - plee[Ls-2]*tmp - acc;
173
174 // Apply U_m^{-1} D^{-1} U^{-1}
175 res = (1.0/pdee[Ls-1])*res;
176 coalescedWrite(chi[ss+Ls-1],res);
177 spProj5p(acc,res);
178 spProj5m(tmp,res);
179 for (int s=Ls-2;s>=0;s--){
180 res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
181 spProj5m(tmp,res);
182 coalescedWrite(chi[ss+s],res);
183 }
184 });
185
186}
187
188template<class Impl>
189void
190CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i)
191{
192 chi_i.Checkerboard()=psi_i.Checkerboard();
193 GridBase *grid=psi_i.Grid();
194 int Ls=this->Ls;
195
196 autoView(psi , psi_i,AcceleratorRead);
197 autoView(chi , chi_i,AcceleratorWrite);
198
199 acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
200 acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
201 acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
202 acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
203 acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
204
205 auto plee = & d_lee [0];
206 auto pdee = & d_dee [0];
207 auto puee = & d_uee [0];
208 auto pleem = & d_leem[0];
209 auto pueem = & d_ueem[0];
210
211 assert(psi.Checkerboard() == psi.Checkerboard());
212
213 uint64_t nloop = grid->oSites()/Ls;
214 accelerator_for(sss,nloop,Simd::Nsimd(),{
215 uint64_t ss=sss*Ls;
216 typedef decltype(coalescedRead(psi[0])) spinor;
217 spinor tmp, acc, res;
218
219 // X = Nc*Ns
220 // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
221 // Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
222 res = psi(ss);
223 spProj5p(tmp,res);
224 acc = conjugate(pueem[0])*tmp;
225 spProj5m(tmp,res);
226 coalescedWrite(chi[ss],res);
227
228 for(int s=1;s<Ls-1;s++){
229 res = psi(ss+s);
230 res -= conjugate(puee[s-1])*tmp;
231 spProj5p(tmp,res);
232 acc += conjugate(pueem[s])*tmp;
233 spProj5m(tmp,res);
234 coalescedWrite(chi[ss+s],res);
235 }
236 res = psi(ss+Ls-1) - conjugate(puee[Ls-2])*tmp - acc;
237
238 // Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
239 res = conjugate(1.0/pdee[Ls-1])*res;
240 coalescedWrite(chi[ss+Ls-1],res);
241 spProj5m(acc,res);
242 spProj5p(tmp,res);
243 for (int s=Ls-2;s>=0;s--){
244 res = conjugate(1.0/pdee[s])*chi(ss+s) - conjugate(plee[s])*tmp - conjugate(pleem[s])*acc;
245 spProj5p(tmp,res);
246 coalescedWrite(chi[ss+s],res);
247 }
248 });
249
250}
251
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
#define acc(v, a, off, step, n)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
#define autoView(l_v, l, mode)
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
accelerator_inline void spProj5m(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:146
accelerator_inline void spProj5p(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:140
virtual void MooeeInvDag(const FermionField &in, FermionField &out)
deviceVector< Coeff_t > d_upper
deviceVector< Coeff_t > d_diag
virtual void M5Ddag(const FermionField &psi, FermionField &chi)
virtual void M5D(const FermionField &psi, FermionField &chi)
deviceVector< Coeff_t > d_lower
virtual void MooeeInv(const FermionField &in, FermionField &out)
int oSites(void) const