Grid 0.7.0
MobiusEOFAFermionCache.h
Go to the documentation of this file.
1/*************************************************************************************
2
3Grid physics library, www.github.com/paboyle/Grid
4
5Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc
6
7Copyright (C) 2017
8
9Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
12Author: paboyle <paboyle@ph.ed.ac.uk>
13Author: David Murphy <dmurphy@phys.columbia.edu>
14Author: Gianluca Filaci <g.filaci@ed.ac.uk>
15
16This program is free software; you can redistribute it and/or modify
17it under the terms of the GNU General Public License as published by
18the Free Software Foundation; either version 2 of the License, or
19(at your option) any later version.
20
21This program is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24GNU General Public License for more details.
25
26You should have received a copy of the GNU General Public License along
27with this program; if not, write to the Free Software Foundation, Inc.,
2851 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29
30See the full license in the file "LICENSE" in the top level distribution directory
31*************************************************************************************/
32 /* END LEGAL */
33
36
38
39
40template<class Impl>
41void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
42 std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
43{
44 chi_i.Checkerboard() = psi_i.Checkerboard();
45 GridBase *grid = psi_i.Grid();
46 int Ls = this->Ls;
47 autoView(psi , psi_i, AcceleratorRead);
48 autoView(phi , phi_i, AcceleratorRead);
49 autoView(chi , chi_i, AcceleratorWrite);
50
51 assert(phi.Checkerboard() == psi.Checkerboard());
52
53 auto pdiag = &this->d_diag[0];
54 auto pupper = &this->d_upper[0];
55 auto plower = &this->d_lower[0];
56
57 acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
58 acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
59 acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
60
61 // Flops = 6.0*(Nc*Ns) *Ls*vol
62 int nloop = grid->oSites()/Ls;
63 accelerator_for(sss,nloop,Simd::Nsimd(),{
64 uint64_t ss = sss*Ls;
65 typedef decltype(coalescedRead(psi[0])) spinor;
66 spinor tmp1;
67 spinor tmp2;
68 for(int s=0; s<Ls; s++){
69 uint64_t idx_u = ss+((s+1)%Ls);
70 uint64_t idx_l = ss+((s+Ls-1)%Ls);
71 spProj5m(tmp1, psi(idx_u));
72 spProj5p(tmp2, psi(idx_l));
73 coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
74 }
75 });
76
77}
78
79template<class Impl>
80void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
81 std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
82 std::vector<Coeff_t> &shift_coeffs)
83{
84 chi_i.Checkerboard() = psi_i.Checkerboard();
85 GridBase *grid = psi_i.Grid();
86 int Ls = this->Ls;
87 autoView(psi , psi_i, AcceleratorRead);
88 autoView(phi , phi_i, AcceleratorRead);
89 autoView(chi , chi_i, AcceleratorWrite);
90
91 auto pm = this->pm;
92 int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
93
94 assert(phi.Checkerboard() == psi.Checkerboard());
95
96 auto pdiag = &this->d_diag[0];
97 auto pupper = &this->d_upper[0];
98 auto plower = &this->d_lower[0];
99 auto pshift_coeffs = &this->d_shift_coefficients[0];
100
101 acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
102 acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
103 acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
104 acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t));
105
106 // Flops = 6.0*(Nc*Ns) *Ls*vol
107 int nloop = grid->oSites()/Ls;
108 accelerator_for(sss,nloop,Simd::Nsimd(),{
109 uint64_t ss = sss*Ls;
110 typedef decltype(coalescedRead(psi[0])) spinor;
111 spinor tmp1;
112 spinor tmp2;
113 spinor tmp;
114 for(int s=0; s<Ls; s++){
115 uint64_t idx_u = ss+((s+1)%Ls);
116 uint64_t idx_l = ss+((s+Ls-1)%Ls);
117 spProj5m(tmp1, psi(idx_u));
118 spProj5p(tmp2, psi(idx_l));
119
120 if(pm == 1){ spProj5p(tmp, psi(ss+shift_s)); }
121 else { spProj5m(tmp, psi(ss+shift_s)); }
122
123 coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 +plower[s]*tmp2 + pshift_coeffs[s]*tmp);
124 }
125 });
126
127}
128
129template<class Impl>
130void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
131 std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
132{
133 chi_i.Checkerboard() = psi_i.Checkerboard();
134 GridBase *grid = psi_i.Grid();
135 int Ls = this->Ls;
136 autoView(psi , psi_i, AcceleratorRead);
137 autoView(phi , phi_i, AcceleratorRead);
138 autoView(chi , chi_i, AcceleratorWrite);
139
140 assert(phi.Checkerboard() == psi.Checkerboard());
141
142 auto pdiag = &this->d_diag[0];
143 auto pupper = &this->d_upper[0];
144 auto plower = &this->d_lower[0];
145
146 acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
147 acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
148 acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
149
150 // Flops = 6.0*(Nc*Ns) *Ls*vol
151 int nloop = grid->oSites()/Ls;
152 accelerator_for(sss,nloop,Simd::Nsimd(), {
153 uint64_t ss = sss*Ls;
154
155 typedef decltype(coalescedRead(psi[0])) spinor;
156 spinor tmp1, tmp2;
157
158 for(int s=0; s<Ls; s++){
159 uint64_t idx_u = ss+((s+1)%Ls);
160 uint64_t idx_l = ss+((s+Ls-1)%Ls);
161 spProj5p(tmp1, psi(idx_u));
162 spProj5m(tmp2, psi(idx_l));
163 coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
164 }
165 });
166}
167
168template<class Impl>
169void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
170 std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
171 std::vector<Coeff_t> &shift_coeffs)
172{
173 chi_i.Checkerboard() = psi_i.Checkerboard();
174 GridBase *grid = psi_i.Grid();
175 int Ls = this->Ls;
176 int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
177 autoView(psi , psi_i, AcceleratorRead);
178 autoView(phi , phi_i, AcceleratorRead);
179 autoView(chi , chi_i, AcceleratorWrite);
180
181 assert(phi.Checkerboard() == psi.Checkerboard());
182
183 auto pdiag = &this->d_diag[0];
184 auto pupper = &this->d_upper[0];
185 auto plower = &this->d_lower[0];
186 auto pshift_coeffs = &this->d_shift_coefficients[0];
187
188 acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
189 acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
190 acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
191 acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t));
192
193 // Flops = 6.0*(Nc*Ns) *Ls*vol
194 auto pm = this->pm;
195
196 int nloop = grid->oSites()/Ls;
197 accelerator_for(sss,nloop,Simd::Nsimd(),{
198 uint64_t ss = sss*Ls;
199
200 typedef decltype(coalescedRead(psi[0])) spinor;
201 spinor tmp1, tmp2, tmp;
202 tmp1=Zero();
203 coalescedWrite(chi[ss+Ls-1],tmp1);
204
205 for(int s=0; s<Ls; s++){
206
207 uint64_t idx_u = ss+((s+1)%Ls);
208 uint64_t idx_l = ss+((s+Ls-1)%Ls);
209
210 spProj5p(tmp1, psi(idx_u));
211 spProj5m(tmp2, psi(idx_l));
212
213 if(s==(Ls-1)) coalescedWrite(chi[ss+s], chi(ss+s)+ pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
214 else coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
215 if(pm == 1){ spProj5p(tmp, psi(ss+s)); }
216 else { spProj5m(tmp, psi(ss+s)); }
217
218 coalescedWrite(chi[ss+shift_s],chi(ss+shift_s)+pshift_coeffs[s]*tmp);
219 }
220 });
221
222}
223
224template<class Impl>
225void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &chi_i)
226{
227 chi_i.Checkerboard() = psi_i.Checkerboard();
228 GridBase *grid = psi_i.Grid();
229 int Ls = this->Ls;
230 autoView(psi , psi_i, AcceleratorRead);
231 autoView(chi , chi_i, AcceleratorWrite);
232
233 auto plee = & this->d_lee [0];
234 auto pdee = & this->d_dee [0];
235 auto puee = & this->d_uee [0];
236 auto pleem = & this->d_leem[0];
237 auto pueem = & this->d_ueem[0];
238
239 acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
240 acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
241 acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
242 acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
243 acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
244
245 if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
246
247 int nloop = grid->oSites()/Ls;
248 accelerator_for(sss,nloop,Simd::Nsimd(),{
249 uint64_t ss=sss*Ls;
250 typedef decltype(coalescedRead(psi[0])) spinor;
251 spinor tmp, acc, res;
252
253 // X = Nc*Ns
254 // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
255 // Apply (L^{\prime})^{-1} L_m^{-1}
256 res = psi(ss);
257 spProj5m(tmp,res);
258 acc = pleem[0]*tmp;
259 spProj5p(tmp,res);
260 coalescedWrite(chi[ss],res);
261
262 for(int s=1;s<Ls-1;s++){
263 res = psi(ss+s);
264 res -= plee[s-1]*tmp;
265 spProj5m(tmp,res);
266 acc += pleem[s]*tmp;
267 spProj5p(tmp,res);
268 coalescedWrite(chi[ss+s],res);
269 }
270 res = psi(ss+Ls-1) - plee[Ls-2]*tmp - acc;
271
272 // Apply U_m^{-1} D^{-1} U^{-1}
273 res = (1.0/pdee[Ls-1])*res;
274 coalescedWrite(chi[ss+Ls-1],res);
275 spProj5p(acc,res);
276 spProj5m(tmp,res);
277 for (int s=Ls-2;s>=0;s--){
278 res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
279 spProj5m(tmp,res);
280 coalescedWrite(chi[ss+s],res);
281 }
282 });
283
284}
285
286template<class Impl>
287void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i)
288{
289 chi_i.Checkerboard() = psi_i.Checkerboard();
290 GridBase *grid = psi_i.Grid();
291 int Ls = this->Ls;
292 autoView(psi , psi_i, AcceleratorRead);
293 autoView(chi , chi_i, AcceleratorWrite);
294
295 // Move into object and constructor
296 auto pm = this->pm;
297 auto plee = & this->d_lee [0];
298 auto pdee = & this->d_dee [0];
299 auto puee = & this->d_uee [0];
300 auto pleem = & this->d_leem[0];
301 auto pueem = & this->d_ueem[0];
302 auto pMooeeInv_shift_lc = &this->d_MooeeInv_shift_lc[0];
303 auto pMooeeInv_shift_norm = &this->d_MooeeInv_shift_norm[0];
304
305 acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
306 acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
307 acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
308 acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
309 acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
310 acceleratorCopyToDevice(&MooeeInv_shift_lc[0],&pMooeeInv_shift_lc[0],Ls*sizeof(Coeff_t));
311 acceleratorCopyToDevice(&MooeeInv_shift_norm[0],&pMooeeInv_shift_norm[0],Ls*sizeof(Coeff_t));
312
313 int nloop = grid->oSites()/Ls;
314 accelerator_for(sss,nloop,Simd::Nsimd(),{
315 uint64_t ss=sss*Ls;
316 typedef decltype(coalescedRead(psi[0])) spinor;
317 spinor tmp, acc, res, tmp_spProj;
318
319 // Apply (L^{\prime})^{-1} L_m^{-1}
320 res = psi(ss);
321 spProj5m(tmp,res);
322 acc = pleem[0]*tmp;
323 spProj5p(tmp,res);
324 coalescedWrite(chi[ss],res);
325 tmp_spProj = pMooeeInv_shift_lc[0]*res;
326
327 for(int s=1;s<Ls-1;s++){
328 res = psi(ss+s);
329 tmp_spProj += pMooeeInv_shift_lc[s]*res;
330 res -= plee[s-1]*tmp;
331 spProj5m(tmp,res);
332 acc += pleem[s]*tmp;
333 spProj5p(tmp,res);
334 coalescedWrite(chi[ss+s],res);
335 }
336 res = psi(ss+Ls-1);
337
338 tmp_spProj += pMooeeInv_shift_lc[Ls-1]*res;
339 if(pm == 1){ spProj5p(tmp_spProj, tmp_spProj);}
340 else { spProj5m(tmp_spProj, tmp_spProj); }
341
342 res = res - plee[Ls-2]*tmp - acc;
343
344 // Apply U_m^{-1} D^{-1} U^{-1}
345 res = (1.0/pdee[Ls-1])*res;
346 spProj5p(acc,res);
347 spProj5m(tmp,res);
348 coalescedWrite(chi[ss+Ls-1], res + pMooeeInv_shift_norm[Ls-1]*tmp_spProj);
349 for (int s=Ls-2;s>=0;s--){
350 res = (1.0/pdee[s])*chi(ss+s) - puee[s]*tmp - pueem[s]*acc;
351 spProj5m(tmp,res);
352 coalescedWrite(chi[ss+s], res + pMooeeInv_shift_norm[s]*tmp_spProj);
353 }
354 });
355
356}
357
358template<class Impl>
359void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i)
360{
361 if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; }
362
363 chi_i.Checkerboard() = psi_i.Checkerboard();
364 GridBase *grid = psi_i.Grid();
365 int Ls = this->Ls;
366 autoView(psi , psi_i, AcceleratorRead);
367 autoView(chi , chi_i, AcceleratorWrite);
368
369 auto plee = &this->d_lee [0];
370 auto pdee = &this->d_dee [0];
371 auto puee = &this->d_uee [0];
372 auto pleem = &this->d_leem[0];
373 auto pueem = &this->d_ueem[0];
374
375 acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
376 acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
377 acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
378 acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
379 acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
380
381 int nloop = grid->oSites()/Ls;
382 accelerator_for(sss,nloop,Simd::Nsimd(),{
383 uint64_t ss=sss*Ls;
384 typedef decltype(coalescedRead(psi[0])) spinor;
385 spinor tmp, acc, res;
386
387 // X = Nc*Ns
388 // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops
389 // Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
390 res = psi(ss);
391 spProj5p(tmp,res);
392 acc = pueem[0]*tmp;
393 spProj5m(tmp,res);
394 coalescedWrite(chi[ss],res);
395
396 for(int s=1;s<Ls-1;s++){
397 res = psi(ss+s);
398 res -= puee[s-1]*tmp;
399 spProj5p(tmp,res);
400 acc += pueem[s]*tmp;
401 spProj5m(tmp,res);
402 coalescedWrite(chi[ss+s],res);
403 }
404 res = psi(ss+Ls-1) - puee[Ls-2]*tmp - acc;
405
406 // Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
407 res = (1.0/pdee[Ls-1])*res;
408 coalescedWrite(chi[ss+Ls-1],res);
409 spProj5m(acc,res);
410 spProj5p(tmp,res);
411 for (int s=Ls-2;s>=0;s--){
412 res = (1.0/pdee[s])*chi(ss+s) - plee[s]*tmp - pleem[s]*acc;
413 spProj5p(tmp,res);
414 coalescedWrite(chi[ss+s],res);
415 }
416 });
417}
418
419template<class Impl>
420void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i)
421{
422 chi_i.Checkerboard() = psi_i.Checkerboard();
423 GridBase *grid = psi_i.Grid();
424 autoView(psi , psi_i, AcceleratorRead);
425 autoView(chi , chi_i, AcceleratorWrite);
426 int Ls = this->Ls;
427
428 auto pm = this->pm;
429 auto plee = & this->d_lee [0];
430 auto pdee = & this->d_dee [0];
431 auto puee = & this->d_uee [0];
432 auto pleem = & this->d_leem[0];
433 auto pueem = & this->d_ueem[0];
434
435 auto pMooeeInvDag_shift_lc = &this->d_MooeeInv_shift_lc[0];
436 auto pMooeeInvDag_shift_norm = &this->d_MooeeInv_shift_norm[0];
437
438 acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
439 acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
440 acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
441 acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
442 acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
443 acceleratorCopyToDevice(&MooeeInvDag_shift_lc[0],&pMooeeInvDag_shift_lc[0],Ls*sizeof(Coeff_t));
444 acceleratorCopyToDevice(&MooeeInvDag_shift_norm[0],&pMooeeInvDag_shift_norm[0],Ls*sizeof(Coeff_t));
445
446 // auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
447 // auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
448
449 int nloop = grid->oSites()/Ls;
450 accelerator_for(sss,nloop,Simd::Nsimd(),{
451 uint64_t ss=sss*Ls;
452 typedef decltype(coalescedRead(psi[0])) spinor;
453 spinor tmp, acc, res, tmp_spProj;
454
455 // Apply (U^{\prime})^{-dagger} U_m^{-\dagger}
456 res = psi(ss);
457 spProj5p(tmp,res);
458 acc = pueem[0]*tmp;
459 spProj5m(tmp,res);
460 coalescedWrite(chi[ss],res);
461 tmp_spProj = pMooeeInvDag_shift_lc[0]*res;
462
463 for(int s=1;s<Ls-1;s++){
464 res = psi(ss+s);
465 tmp_spProj += pMooeeInvDag_shift_lc[s]*res;
466 res -= puee[s-1]*tmp;
467 spProj5p(tmp,res);
468 acc += pueem[s]*tmp;
469 spProj5m(tmp,res);
470 coalescedWrite(chi[ss+s],res);
471 }
472 res = psi(ss+Ls-1);
473
474 tmp_spProj += pMooeeInvDag_shift_lc[Ls-1]*res;
475 if(pm == 1){ spProj5p(tmp_spProj, tmp_spProj); }
476 else { spProj5m(tmp_spProj, tmp_spProj); }
477
478 res = res - puee[Ls-2]*tmp - acc;
479
480 // Apply L_m^{-\dagger} D^{-dagger} L^{-dagger}
481 res = (1.0/pdee[Ls-1])*res;
482 spProj5m(acc,res);
483 spProj5p(tmp,res);
484 coalescedWrite(chi[ss+Ls-1], res + pMooeeInvDag_shift_norm[Ls-1]*tmp_spProj);
485 for (int s=Ls-2;s>=0;s--){
486 res = (1.0/pdee[s])*chi(ss+s) - plee[s]*tmp - pleem[s]*acc;
487 spProj5p(tmp,res);
488 coalescedWrite(chi[ss+s], res + pMooeeInvDag_shift_norm[s]*tmp_spProj);
489 }
490 });
491
492}
493
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
#define acc(v, a, off, step, n)
#define autoView(l_v, l, mode)
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
accelerator_inline void spProj5m(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:146
accelerator_inline void spProj5p(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:140
deviceVector< Coeff_t > d_upper
deviceVector< Coeff_t > d_diag
deviceVector< Coeff_t > d_lower
int oSites(void) const
void M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, std::vector< Coeff_t > &lower, std::vector< Coeff_t > &diag, std::vector< Coeff_t > &upper, std::vector< Coeff_t > &shift_coeffs)
virtual void MooeeInv(const FermionField &in, FermionField &out)
virtual void MooeeInvDag_shift(const FermionField &in, FermionField &out)
virtual void M5D(const FermionField &psi, FermionField &chi)
virtual void MooeeInv_shift(const FermionField &in, FermionField &out)
virtual void M5Ddag(const FermionField &psi, FermionField &chi)
virtual void MooeeInvDag(const FermionField &in, FermionField &out)
void M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, std::vector< Coeff_t > &lower, std::vector< Coeff_t > &diag, std::vector< Coeff_t > &upper, std::vector< Coeff_t > &shift_coeffs)
Definition Simd.h:194