Grid 0.7.0
StaggeredKernelsHand.h
Go to the documentation of this file.
1 /*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/qcd/action/fermion/StaggerdKernelsHand.cc
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10Author: paboyle <paboyle@ph.ed.ac.uk>
11
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License along
23 with this program; if not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25
26 See the full license in the file "LICENSE" in the top level distribution directory
27 *************************************************************************************/
28 /* END LEGAL */
29#include <Grid/Grid.h>
30
31#pragma once
32
34
35#ifdef GRID_SIMT
36
37#define LOAD_CHI(ptype,b) \
38 const SiteSpinor & ref (b[offset]); \
39 Chi_0=coalescedReadPermute<ptype>(ref()()(0),perm,lane); \
40 Chi_1=coalescedReadPermute<ptype>(ref()()(1),perm,lane); \
41 Chi_2=coalescedReadPermute<ptype>(ref()()(2),perm,lane);
42
43#define LOAD_CHI_COMMS(b) \
44 const SiteSpinor & ref (b[offset]); \
45 Chi_0=coalescedRead(ref()()(0),lane); \
46 Chi_1=coalescedRead(ref()()(1),lane); \
47 Chi_2=coalescedRead(ref()()(2),lane);
48
49#define PERMUTE_DIR(dir) ;
50#else
51#define LOAD_CHI(ptype,b) LOAD_CHI_COMMS(b)
52
53#define LOAD_CHI_COMMS(b) \
54 const SiteSpinor & ref (b[offset]); \
55 Chi_0=ref()()(0); \
56 Chi_1=ref()()(1); \
57 Chi_2=ref()()(2);
58
59#define PERMUTE_DIR(dir) \
60 permute##dir(Chi_0,Chi_0); \
61 permute##dir(Chi_1,Chi_1); \
62 permute##dir(Chi_2,Chi_2);
63
64#endif
65
66
67// To splat or not to splat depends on the implementation
68#define MULT(A,UChi) \
69 auto & ref(U[sU](A)); \
70 U_00=coalescedRead(ref()(0,0),lane); \
71 U_10=coalescedRead(ref()(1,0),lane); \
72 U_20=coalescedRead(ref()(2,0),lane); \
73 U_01=coalescedRead(ref()(0,1),lane); \
74 U_11=coalescedRead(ref()(1,1),lane); \
75 U_21=coalescedRead(ref()(2,1),lane); \
76 U_02=coalescedRead(ref()(0,2),lane); \
77 U_12=coalescedRead(ref()(1,2),lane); \
78 U_22=coalescedRead(ref()(2,2),lane); \
79 UChi ## _0 = U_00*Chi_0; \
80 UChi ## _1 = U_10*Chi_0;\
81 UChi ## _2 = U_20*Chi_0;\
82 UChi ## _0 += U_01*Chi_1;\
83 UChi ## _1 += U_11*Chi_1;\
84 UChi ## _2 += U_21*Chi_1;\
85 UChi ## _0 += U_02*Chi_2;\
86 UChi ## _1 += U_12*Chi_2;\
87 UChi ## _2 += U_22*Chi_2;
88
89#define MULT_ADD(U,A,UChi) \
90 auto & ref(U[sU](A)); \
91 U_00=coalescedRead(ref()(0,0),lane); \
92 U_10=coalescedRead(ref()(1,0),lane); \
93 U_20=coalescedRead(ref()(2,0),lane); \
94 U_01=coalescedRead(ref()(0,1),lane); \
95 U_11=coalescedRead(ref()(1,1),lane); \
96 U_21=coalescedRead(ref()(2,1),lane); \
97 U_02=coalescedRead(ref()(0,2),lane); \
98 U_12=coalescedRead(ref()(1,2),lane); \
99 U_22=coalescedRead(ref()(2,2),lane); \
100 UChi ## _0 += U_00*Chi_0; \
101 UChi ## _1 += U_10*Chi_0;\
102 UChi ## _2 += U_20*Chi_0;\
103 UChi ## _0 += U_01*Chi_1;\
104 UChi ## _1 += U_11*Chi_1;\
105 UChi ## _2 += U_21*Chi_1;\
106 UChi ## _0 += U_02*Chi_2;\
107 UChi ## _1 += U_12*Chi_2;\
108 UChi ## _2 += U_22*Chi_2;
109
110
111#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
112 SE=st.GetEntry(ptype,Dir+skew,sF); \
113 offset = SE->_offset; \
114 local = SE->_is_local; \
115 perm = SE->_permute; \
116 if ( local ) { \
117 LOAD_CHI(Perm,in); \
118 if ( perm) { \
119 PERMUTE_DIR(Perm); \
120 } \
121 } else { \
122 LOAD_CHI_COMMS(buf); \
123 }
124
125#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \
126 HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
127 { \
128 MULT(Dir,even); \
129 }
130
131#define HAND_STENCIL_LEG(U,Dir,Perm,skew,even) \
132 HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
133 { \
134 MULT_ADD(U,Dir,even); \
135 }
136
137
138#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \
139 SE=st.GetEntry(ptype,Dir+skew,sF); \
140 offset = SE->_offset; \
141 local = SE->_is_local; \
142 perm = SE->_permute; \
143 if ( local ) { \
144 LOAD_CHI(Perm,in); \
145 if ( perm) { \
146 PERMUTE_DIR(Perm); \
147 } \
148 } else if ( st.same_node[Dir] ) { \
149 LOAD_CHI_COMMS(buf); \
150 } \
151 if (local || st.same_node[Dir] ) { \
152 MULT_ADD(U,Dir,even); \
153 }
154
155#define HAND_STENCIL_LEG_EXT(U,Dir,Perm,skew,even) \
156 SE=st.GetEntry(ptype,Dir+skew,sF); \
157 offset = SE->_offset; \
158 local = SE->_is_local; \
159 if ((!local) && (!st.same_node[Dir]) ) { \
160 nmu++; \
161 { LOAD_CHI_COMMS(buf); } \
162 { MULT_ADD(U,Dir,even); } \
163 }
164
165#define HAND_DECLARATIONS(Simd) \
166 Simd even_0; \
167 Simd even_1; \
168 Simd even_2; \
169 Simd odd_0; \
170 Simd odd_1; \
171 Simd odd_2; \
172 \
173 Simd Chi_0; \
174 Simd Chi_1; \
175 Simd Chi_2; \
176 \
177 Simd U_00; \
178 Simd U_10; \
179 Simd U_20; \
180 Simd U_01; \
181 Simd U_11; \
182 Simd U_21; \
183 Simd U_02; \
184 Simd U_12; \
185 Simd U_22;
186
187
188template <class Impl>
189template <int Naik> accelerator_inline
191 DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
192 SiteSpinor *buf, int sF, int sU,
193 const FermionFieldView &in, FermionFieldView &out,int dag)
194{
195 typedef typename Simd::scalar_type S;
196 typedef typename Simd::vector_type V;
197
198
199 const int Nsimd = SiteHalfSpinor::Nsimd();
200 const int lane=acceleratorSIMTlane(Nsimd);
201 typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
202 HAND_DECLARATIONS(Simt);
203
204 typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
205 calcSiteSpinor result;
206 int offset,local,perm, ptype;
207
208 StencilEntry *SE;
209 int skew;
210
211 // for(int s=0;s<LLs;s++){
212 // int sF=s+LLs*sU;
213 {
214
215 skew = 0;
216 HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);
217 HAND_STENCIL_LEG_BEGIN(Yp,2,skew,odd);
218 HAND_STENCIL_LEG (U,Zp,1,skew,even);
219 HAND_STENCIL_LEG (U,Tp,0,skew,odd);
220 HAND_STENCIL_LEG (U,Xm,3,skew,even);
221 HAND_STENCIL_LEG (U,Ym,2,skew,odd);
222 HAND_STENCIL_LEG (U,Zm,1,skew,even);
223 HAND_STENCIL_LEG (U,Tm,0,skew,odd);
224 if (Naik) {
225 skew = 8;
226 HAND_STENCIL_LEG(UUU,Xp,3,skew,even);
227 HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);
228 HAND_STENCIL_LEG(UUU,Zp,1,skew,even);
229 HAND_STENCIL_LEG(UUU,Tp,0,skew,odd);
230 HAND_STENCIL_LEG(UUU,Xm,3,skew,even);
231 HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);
232 HAND_STENCIL_LEG(UUU,Zm,1,skew,even);
233 HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);
234 }
235 if ( dag ) {
236 result()()(0) = - even_0 - odd_0;
237 result()()(1) = - even_1 - odd_1;
238 result()()(2) = - even_2 - odd_2;
239 } else {
240 result()()(0) = even_0 + odd_0;
241 result()()(1) = even_1 + odd_1;
242 result()()(2) = even_2 + odd_2;
243 }
244 coalescedWrite(out[sF],result);
245 }
246}
247
248
249template <class Impl>
250template <int Naik> accelerator_inline
252 DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
253 SiteSpinor *buf, int sF, int sU,
254 const FermionFieldView &in, FermionFieldView &out,int dag)
255{
256 typedef typename Simd::scalar_type S;
257 typedef typename Simd::vector_type V;
258
259 const int Nsimd = SiteHalfSpinor::Nsimd();
260 const int lane=acceleratorSIMTlane(Nsimd);
261 typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
262 HAND_DECLARATIONS(Simt);
263
264 typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
265 calcSiteSpinor result;
266 int offset, ptype, local, perm;
267
268 StencilEntry *SE;
269 int skew;
270
271 // for(int s=0;s<LLs;s++){
272 // int sF=s+LLs*sU;
273 {
274
275 zeroit(even_0); zeroit(even_1); zeroit(even_2);
276 zeroit(odd_0); zeroit(odd_1); zeroit(odd_2);
277
278 skew = 0;
279 HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
280 HAND_STENCIL_LEG_INT(U,Yp,2,skew,odd);
281 HAND_STENCIL_LEG_INT(U,Zp,1,skew,even);
282 HAND_STENCIL_LEG_INT(U,Tp,0,skew,odd);
283 HAND_STENCIL_LEG_INT(U,Xm,3,skew,even);
284 HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);
285 HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);
286 HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);
287 if (Naik) {
288 skew = 8;
289 HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);
290 HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);
291 HAND_STENCIL_LEG_INT(UUU,Zp,1,skew,even);
292 HAND_STENCIL_LEG_INT(UUU,Tp,0,skew,odd);
293 HAND_STENCIL_LEG_INT(UUU,Xm,3,skew,even);
294 HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);
295 HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);
296 HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);
297 }
298 // Assume every site must be connected to at least one interior point. No 1^4 subvols.
299 if ( dag ) {
300 result()()(0) = - even_0 - odd_0;
301 result()()(1) = - even_1 - odd_1;
302 result()()(2) = - even_2 - odd_2;
303 } else {
304 result()()(0) = even_0 + odd_0;
305 result()()(1) = even_1 + odd_1;
306 result()()(2) = even_2 + odd_2;
307 }
308 coalescedWrite(out[sF],result);
309 }
310}
311
312
313template <class Impl>
314template <int Naik> accelerator_inline
316 DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
317 SiteSpinor *buf, int sF, int sU,
318 const FermionFieldView &in, FermionFieldView &out,int dag)
319{
320 typedef typename Simd::scalar_type S;
321 typedef typename Simd::vector_type V;
322
323 const int Nsimd = SiteHalfSpinor::Nsimd();
324 const int lane=acceleratorSIMTlane(Nsimd);
325 typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
326 HAND_DECLARATIONS(Simt);
327
328 typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
329 calcSiteSpinor result;
330 int offset, ptype, local;
331
332 StencilEntry *SE;
333 int skew;
334
335 // for(int s=0;s<LLs;s++){
336 // int sF=s+LLs*sU;
337 {
338
339 zeroit(even_0); zeroit(even_1); zeroit(even_2);
340 zeroit(odd_0); zeroit(odd_1); zeroit(odd_2);
341 int nmu=0;
342 skew = 0;
343 HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
344 HAND_STENCIL_LEG_EXT(U,Yp,2,skew,odd);
345 HAND_STENCIL_LEG_EXT(U,Zp,1,skew,even);
346 HAND_STENCIL_LEG_EXT(U,Tp,0,skew,odd);
347 HAND_STENCIL_LEG_EXT(U,Xm,3,skew,even);
348 HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);
349 HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);
350 HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);
351 if (Naik) {
352 skew = 8;
353 HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);
354 HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);
355 HAND_STENCIL_LEG_EXT(UUU,Zp,1,skew,even);
356 HAND_STENCIL_LEG_EXT(UUU,Tp,0,skew,odd);
357 HAND_STENCIL_LEG_EXT(UUU,Xm,3,skew,even);
358 HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);
359 HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);
360 HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);
361 }
362 // Add sum of all exterior connected stencil legs
363 if ( nmu ) {
364 if ( dag ) {
365 result()()(0) = - even_0 - odd_0;
366 result()()(1) = - even_1 - odd_1;
367 result()()(2) = - even_2 - odd_2;
368 } else {
369 result()()(0) = even_0 + odd_0;
370 result()()(1) = even_1 + odd_1;
371 result()()(2) = even_2 + odd_2;
372 }
373 coalescedWrite(out[sF] , out(sF)+ result);
374 }
375 }
376}
377
378#undef LOAD_CHI
379#undef HAND_DECLARATIONS
380
382
383
accelerator_inline int acceleratorSIMTlane(int Nsimd)
#define accelerator_inline
accelerator_inline void zeroit(Grid_simd2< S, V > &z)
#define perm(a, b, n, w)
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
static constexpr int Xm
Definition QCD.h:45
static constexpr int Tm
Definition QCD.h:48
static constexpr int Tp
Definition QCD.h:44
static constexpr int Zp
Definition QCD.h:43
static constexpr int Zm
Definition QCD.h:47
static constexpr int Xp
Definition QCD.h:41
static constexpr int Yp
Definition QCD.h:42
static constexpr int Ym
Definition QCD.h:46
#define HAND_STENCIL_LEG(U, Dir, Perm, skew, even)
#define HAND_STENCIL_LEG_EXT(U, Dir, Perm, skew, even)
#define HAND_STENCIL_LEG_BEGIN(Dir, Perm, skew, even)
#define HAND_STENCIL_LEG_INT(U, Dir, Perm, skew, even)
#define HAND_DECLARATIONS(Simd)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230
static accelerator_inline void DhopSiteHandExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
static accelerator_inline void DhopSiteHand(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
static accelerator_inline void DhopSiteHandInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag)