Grid 0.7.0
WilsonKernelsImplementation.h
Go to the documentation of this file.
1/*************************************************************************************
2
3Grid physics library, www.github.com/paboyle/Grid
4
5Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
6
7Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
11Author: paboyle <paboyle@ph.ed.ac.uk>
12
13This program is free software; you can redistribute it and/or modify
14it under the terms of the GNU General Public License as published by
15the Free Software Foundation; either version 2 of the License, or
16(at your option) any later version.
17
18This program is distributed in the hope that it will be useful,
19but WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21GNU General Public License for more details.
22
23You should have received a copy of the GNU General Public License along
24with this program; if not, write to the Free Software Foundation, Inc.,
2551 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26
27See the full license in the file "LICENSE" in the top level distribution
28directory
29*************************************************************************************/
30/* END LEGAL */
31#pragma once
32
34
36
37
39// Generic implementation; move to different file?
41
42/*
43accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
44{
45#ifdef GRID_SIMT
46 static_assert(sizeof(StencilEntry)==sizeof(uint4),"Unexpected Stencil Entry Size");
47 uint4 * mem_pun = (uint4 *)mem; // force 128 bit loads
48 uint4 * chip_pun = (uint4 *)&chip;
49 * chip_pun = * mem_pun;
50#else
51 chip = *mem;
52#endif
53 return;
54}
55*/
57#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \
58 SE = st.GetEntry(ptype, Dir, sF); \
59 if (SE->_is_local) { \
60 int perm= SE->_permute; \
61 auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
62 spProj(chi,tmp); \
63 } else { \
64 chi = coalescedRead(buf[SE->_offset],lane); \
65 } \
66 acceleratorSynchronise(); \
67 Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
68 Recon(result, Uchi);
69
70#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \
71 SE = st.GetEntry(ptype, Dir, sF); \
72 if (SE->_is_local) { \
73 int perm= SE->_permute; \
74 auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
75 spProj(chi,tmp); \
76 Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
77 Recon(result, Uchi); \
78 } \
79 acceleratorSynchronise();
80
81#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
82 SE = st.GetEntry(ptype, Dir, sF); \
83 if (!SE->_is_local ) { \
84 auto chi = coalescedRead(buf[SE->_offset],lane); \
85 Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
86 Recon(result, Uchi); \
87 nmu++; \
88 } \
89 acceleratorSynchronise();
90
91#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \
92 if (SE->_is_local ) { \
93 int perm= SE->_permute; \
94 auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
95 spProj(chi,tmp); \
96 } else { \
97 chi = coalescedRead(buf[SE->_offset],lane); \
98 } \
99 acceleratorSynchronise(); \
100 Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
101 Recon(result, Uchi);
102
103#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
104 if (gamma == Dir) { \
105 GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \
106 }
107
108
110 // All legs kernels ; comms then compute
112template <class Impl> accelerator_inline
113void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
114 SiteHalfSpinor *buf, int sF,
115 int sU, const FermionFieldView &in, FermionFieldView &out)
116{
117 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
118 typedef decltype(coalescedRead(in[0])) calcSpinor;
119 calcHalfSpinor chi;
120 // calcHalfSpinor *chi_p;
121 calcHalfSpinor Uchi;
122 calcSpinor result;
123 StencilEntry *SE;
124 int ptype;
125 const int Nsimd = SiteHalfSpinor::Nsimd();
126 const int lane=acceleratorSIMTlane(Nsimd);
135 coalescedWrite(out[sF],result,lane);
136};
137
138template <class Impl> accelerator_inline
139void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
140 SiteHalfSpinor *buf, int sF,
141 int sU, const FermionFieldView &in, FermionFieldView &out)
142{
143 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
144 typedef decltype(coalescedRead(in[0])) calcSpinor;
145 calcHalfSpinor chi;
146 // calcHalfSpinor *chi_p;
147 calcHalfSpinor Uchi;
148 calcSpinor result;
149 StencilEntry *SE;
150 int ptype;
151
152 const int Nsimd = SiteHalfSpinor::Nsimd();
153 const int lane=acceleratorSIMTlane(Nsimd);
162 coalescedWrite(out[sF], result,lane);
163};
164
165 // Interior kernels
167template <class Impl> accelerator_inline
168void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
169 SiteHalfSpinor *buf, int sF,
170 int sU, const FermionFieldView &in, FermionFieldView &out)
171{
172 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
173 typedef decltype(coalescedRead(in[0])) calcSpinor;
174 calcHalfSpinor chi;
175 // calcHalfSpinor *chi_p;
176 calcHalfSpinor Uchi;
177 calcSpinor result;
178 StencilEntry *SE;
179 int ptype;
180 const int Nsimd = SiteHalfSpinor::Nsimd();
181 const int lane=acceleratorSIMTlane(Nsimd);
182
183 result=Zero();
192 coalescedWrite(out[sF], result,lane);
193};
194
195template <class Impl> accelerator_inline
196void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
197 SiteHalfSpinor *buf, int sF,
198 int sU, const FermionFieldView &in, FermionFieldView &out)
199{
200 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
201 typedef decltype(coalescedRead(in[0])) calcSpinor;
202 const int Nsimd = SiteHalfSpinor::Nsimd();
203 const int lane=acceleratorSIMTlane(Nsimd);
204
205 calcHalfSpinor chi;
206 // calcHalfSpinor *chi_p;
207 calcHalfSpinor Uchi;
208 calcSpinor result;
209 StencilEntry *SE;
210 int ptype;
211 result=Zero();
220 coalescedWrite(out[sF], result,lane);
221};
222
223// Exterior kernels
225template <class Impl> accelerator_inline
226void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
227 SiteHalfSpinor *buf, int sF,
228 int sU, const FermionFieldView &in, FermionFieldView &out)
229{
230 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
231 typedef decltype(coalescedRead(in[0])) calcSpinor;
232 // calcHalfSpinor *chi_p;
233 calcHalfSpinor Uchi;
234 calcSpinor result;
235 StencilEntry *SE;
236 int ptype;
237 int nmu=0;
238 const int Nsimd = SiteHalfSpinor::Nsimd();
239 const int lane=acceleratorSIMTlane(Nsimd);
240 result=Zero();
249 if ( nmu ) {
250 auto out_t = coalescedRead(out[sF],lane);
251 out_t = out_t + result;
252 coalescedWrite(out[sF],out_t,lane);
253 }
254};
255
256template <class Impl> accelerator_inline
257void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
258 SiteHalfSpinor *buf, int sF,
259 int sU, const FermionFieldView &in, FermionFieldView &out)
260{
261 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
262 typedef decltype(coalescedRead(in[0])) calcSpinor;
263 // calcHalfSpinor *chi_p;
264 calcHalfSpinor Uchi;
265 calcSpinor result;
266 StencilEntry *SE;
267 int ptype;
268 int nmu=0;
269 const int Nsimd = SiteHalfSpinor::Nsimd();
270 const int lane=acceleratorSIMTlane(Nsimd);
271 result=Zero();
280 if ( nmu ) {
281 auto out_t = coalescedRead(out[sF],lane);
282 out_t = out_t + result;
283 coalescedWrite(out[sF],out_t,lane);
284 }
285};
286
287#define DhopDirMacro(Dir,spProj,spRecon) \
288 template <class Impl> accelerator_inline \
289 void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \
290 int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
291 { \
292 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; \
293 typedef decltype(coalescedRead(in[0])) calcSpinor; \
294 calcHalfSpinor chi; \
295 calcSpinor result; \
296 calcHalfSpinor Uchi; \
297 StencilEntry *SE; \
298 int ptype; \
299 const int Nsimd = SiteHalfSpinor::Nsimd(); \
300 const int lane=acceleratorSIMTlane(Nsimd); \
301 \
302 SE = st.GetEntry(ptype, dir, sF); \
303 GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \
304 coalescedWrite(out[sF], result,lane); \
305 }
306
315
316template <class Impl> accelerator_inline
317void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
318 int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
319{
320 typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
321 typedef decltype(coalescedRead(in[0])) calcSpinor;
322 calcHalfSpinor chi;
323 calcSpinor result;
324 calcHalfSpinor Uchi;
325 StencilEntry *SE;
326 int ptype;
327 const int Nsimd = SiteHalfSpinor::Nsimd();
328 const int lane=acceleratorSIMTlane(Nsimd);
329
330 SE = st.GetEntry(ptype, dir, sF);
339 coalescedWrite(out[sF], result,lane);
340}
341
342template <class Impl>
343void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
344 int Nsite, const FermionField &in, std::vector<FermionField> &out)
345{
347 autoView(in_v ,in,AcceleratorRead);
348 autoView(st_v ,st,AcceleratorRead);
349
350 autoView(out_Xm,out[0],AcceleratorWrite);
351 autoView(out_Ym,out[1],AcceleratorWrite);
352 autoView(out_Zm,out[2],AcceleratorWrite);
353 autoView(out_Tm,out[3],AcceleratorWrite);
354 autoView(out_Xp,out[4],AcceleratorWrite);
355 autoView(out_Yp,out[5],AcceleratorWrite);
356 autoView(out_Zp,out[6],AcceleratorWrite);
357 autoView(out_Tp,out[7],AcceleratorWrite);
358 auto CBp=st.CommBuf();
359 accelerator_for(sss,Nsite*Ls,Simd::Nsimd(),{
360 int sU=sss/Ls;
361 int sF =sss;
362 DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0);
363 DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1);
364 DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2);
365 DhopDirTm(st_v,U_v,CBp,sF,sU,in_v,out_Tm,3);
366 DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,4);
367 DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,5);
368 DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,6);
369 DhopDirTp(st_v,U_v,CBp,sF,sU,in_v,out_Tp,7);
370 });
371}
372
373
374template <class Impl>
375void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
376 int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
377{
378 assert(dirdisp<=7);
379 assert(dirdisp>=0);
380
382 autoView(in_v ,in ,AcceleratorRead);
383 autoView(out_v,out,AcceleratorWrite);
384 autoView(st_v ,st ,AcceleratorRead);
385 auto CBp=st.CommBuf();
386#define LoopBody(Dir) \
387 case Dir : \
388 accelerator_for(ss,Nsite,Simd::Nsimd(),{ \
389 for(int s=0;s<Ls;s++){ \
390 int sU=ss; \
391 int sF = s+Ls*sU; \
392 DhopDir##Dir(st_v,U_v,CBp,sF,sU,in_v,out_v,dirdisp);\
393 } \
394 }); \
395 break;
396
397 switch(gamma){
398 LoopBody(Xp);
399 LoopBody(Yp);
400 LoopBody(Zp);
401 LoopBody(Tp);
402
403 LoopBody(Xm);
404 LoopBody(Ym);
405 LoopBody(Zm);
406 LoopBody(Tm);
407 default:
408 assert(0);
409 break;
410 }
411#undef LoopBody
412}
413
414#ifdef GRID_SYCL
415extern "C" {
416 ulong SYCL_EXTERNAL __attribute__((overloadable)) intel_get_cycle_counter( void );
417 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_active_channel_mask( void );
418 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_grf_register( uint reg );
419 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_flag_register( uint flag );
420 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_control_register( uint reg );
421 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_hw_thread_id( void );
422 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_slice_id( void );
423 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_subslice_id( void );
424 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_eu_id( void );
425 uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_eu_thread_id( void );
426 void SYCL_EXTERNAL __attribute__((overloadable)) intel_eu_thread_pause( uint value );
427}
428#ifdef GRID_SIMT
429#define MAKE_ID(A) (intel_get_eu_id()<<16)|(intel_get_slice_id()<<8)|(intel_get_subslice_id())
430#else
431#define MAKE_ID(A) (0)
432#endif
433
434#else
435
436#define MAKE_ID(A) (0)
437
438#endif
439
440
441#define KERNEL_CALL_ID(A) \
442 const uint64_t NN = Nsite*Ls; \
443 accelerator_forNB( ss, NN, Simd::Nsimd(), { \
444 int sF = ss; \
445 int sU = ss/Ls; \
446 WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
447 const int Nsimd = SiteHalfSpinor::Nsimd(); \
448 const int lane=acceleratorSIMTlane(Nsimd); \
449 int idx=sF*Nsimd+lane; \
450 uint64_t id = MAKE_ID(); \
451 ids[idx]=id; \
452 }); \
453 accelerator_barrier();
454
455#define KERNEL_CALLNB(A) \
456 const uint64_t NN = Nsite*Ls; \
457 accelerator_forNB( ss, NN, Simd::Nsimd(), { \
458 int sF = ss; \
459 int sU = ss/Ls; \
460 WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
461 });
462
463#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
464
465#define KERNEL_CALL_EXT(A) \
466 const uint64_t sz = st.surface_list.size(); \
467 auto ptr = &st.surface_list[0]; \
468 accelerator_forNB( ss, sz, Simd::Nsimd(), { \
469 int sF = ptr[ss]; \
470 int sU = sF/Ls; \
471 WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
472 }); \
473 accelerator_barrier();
474
475#define ASM_CALL(A) \
476 thread_for( sss, Nsite, { \
477 int ss = sss; /*st.lo->Reorder(sss);*/ \
478 int sU = ss; \
479 int sF = ss*Ls; \
480 WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
481 });
482#define ASM_CALL_SLICE(A) \
483 auto grid = in.Grid() ; \
484 int nt = grid->LocalDimensions()[4]; \
485 int nxyz = Nsite/nt ; \
486 for(int t=0;t<nt;t++){ \
487 thread_for( sss, nxyz, { \
488 int ss = t*nxyz+sss; \
489 int sU = ss; \
490 int sF = ss*Ls; \
491 WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
492 });}
493
494
495
496template <class Impl>
497void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
498 int Ls, int Nsite, const FermionField &in, FermionField &out,
499 int interior,int exterior)
500{
502 autoView(in_v , in,AcceleratorRead);
503 autoView(out_v,out,AcceleratorWrite);
504 autoView(st_v , st,AcceleratorRead);
505
506 if( interior && exterior ) {
510#ifndef GRID_CUDA
512#endif
513 } else if( interior ) {
516#ifndef GRID_CUDA
518#endif
519 } else if( exterior ) {
520 // // dependent on result of merge
524#ifndef GRID_CUDA
526#endif
527 }
528 assert(0 && " Kernel optimisation case not covered ");
529 }
530
531template <class Impl>
532void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
533 int Ls, int Nsite, const FermionField &in, FermionField &out,
534 uint64_t *ids)
535{
537 autoView(in_v , in,AcceleratorRead);
538 autoView(out_v,out,AcceleratorWrite);
539 autoView(st_v , st,AcceleratorRead);
541}
542 template <class Impl>
543 void WilsonKernels<Impl>::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
544 int Ls, int Nsite, const FermionField &in, FermionField &out,
545 int interior,int exterior)
546 {
548 autoView(in_v ,in,AcceleratorRead);
549 autoView(out_v,out,AcceleratorWrite);
550 autoView(st_v ,st,AcceleratorRead);
551
552 if( interior && exterior ) {
556#ifndef GRID_CUDA
558#endif
559 } else if( interior ) {
562#ifndef GRID_CUDA
564#endif
565 } else if( exterior ) {
566 // Dependent on result of merge
570#ifndef GRID_CUDA
572#endif
573 }
574 assert(0 && " Kernel optimisation case not covered ");
575 }
576
577#undef KERNEL_CALLNB
578#undef KERNEL_CALL
579#undef ASM_CALL
580
accelerator_inline int acceleratorSIMTlane(int Nsimd)
#define accelerator_inline
void acceleratorFenceComputeStream(void)
#define accelerator_for(iterator, num, nsimd,...)
#define autoView(l_v, l, mode)
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
static constexpr int Xm
Definition QCD.h:45
static constexpr int Tm
Definition QCD.h:48
static constexpr int Tp
Definition QCD.h:44
static constexpr int Zp
Definition QCD.h:43
static constexpr int Zm
Definition QCD.h:47
static constexpr int Xp
Definition QCD.h:41
static constexpr int Yp
Definition QCD.h:42
static constexpr int Ym
Definition QCD.h:46
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
accelerator_inline void spProjXp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:75
accelerator_inline void spReconZm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:269
accelerator_inline void accumReconYp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:241
accelerator_inline void spProjYm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:95
accelerator_inline void accumReconZm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:283
accelerator_inline void spProjTm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:129
accelerator_inline void spProjZp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:106
accelerator_inline void spProjTp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:123
accelerator_inline void spReconXp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:193
accelerator_inline void spReconTp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:296
accelerator_inline void spProjZm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:111
accelerator_inline void spReconTm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:303
accelerator_inline void spReconYp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:227
accelerator_inline void accumReconYm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:248
accelerator_inline void spProjXm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:80
accelerator_inline void accumReconTp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:310
accelerator_inline void spProjYp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:90
accelerator_inline void accumReconZp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:276
accelerator_inline void spReconXm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:200
accelerator_inline void accumReconXp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:207
accelerator_inline void accumReconTm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:317
accelerator_inline void accumReconXm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:214
accelerator_inline void spReconYm(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:234
accelerator_inline void spReconZp(iVector< vtype, Ns > &fspin, const iVector< vtype, Nhs > &hspin)
Definition TwoSpinor.h:262
#define GENERIC_STENCIL_LEG_INT(Dir, spProj, Recon)
#define KERNEL_CALL_ID(A)
#define KERNEL_CALL_EXT(A)
#define GENERIC_STENCIL_LEG_EXT(Dir, spProj, Recon)
#define DhopDirMacro(Dir, spProj, spRecon)
#define KERNEL_CALLNB(A)
#define ASM_CALL(A)
#define GENERIC_DHOPDIR_LEG(Dir, spProj, Recon)
#define LoopBody(Dir)
#define GENERIC_STENCIL_LEG(Dir, spProj, Recon)
#define KERNEL_CALL(A)
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230
static accelerator_inline void DhopDirXm(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator_inline void DhopDirYp(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma)
static void AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static accelerator_inline void DhopDirTm(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static accelerator void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void DhopDagKernel(int Opt, StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int Ls, int Nsite, const FermionField &in, FermionField &out, int interior=1, int exterior=1)
static accelerator void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static accelerator void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static accelerator void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static accelerator_inline void DhopDirXp(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static accelerator void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static accelerator_inline void DhopDirTp(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static accelerator_inline void DhopDirZm(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator_inline void DhopDirYm(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static void DhopDirAll(StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int Ls, int Nsite, const FermionField &in, std::vector< FermionField > &out)
static accelerator_inline void DhopDirZp(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp)
static accelerator void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static accelerator void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out)
static void DhopKernel(int Opt, StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int Ls, int Nsite, const FermionField &in, FermionField &out, int interior=1, int exterior=1)
Definition Simd.h:194