Grid 0.7.0
WilsonKernelsHandGparityImplementation.h
Go to the documentation of this file.
1 /*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/qcd/action/fermion/WilsonKernelsHand.cc
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10Author: paboyle <paboyle@ph.ed.ac.uk>
11
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License along
23 with this program; if not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25
26 See the full license in the file "LICENSE" in the top level distribution directory
27 *************************************************************************************/
28 /* END LEGAL */
29
30#pragma once
31
33
34#define REGISTER
35
36#define LOAD_CHIMU_BODY(F) \
37 Chimu_00=ref(F)(0)(0); \
38 Chimu_01=ref(F)(0)(1); \
39 Chimu_02=ref(F)(0)(2); \
40 Chimu_10=ref(F)(1)(0); \
41 Chimu_11=ref(F)(1)(1); \
42 Chimu_12=ref(F)(1)(2); \
43 Chimu_20=ref(F)(2)(0); \
44 Chimu_21=ref(F)(2)(1); \
45 Chimu_22=ref(F)(2)(2); \
46 Chimu_30=ref(F)(3)(0); \
47 Chimu_31=ref(F)(3)(1); \
48 Chimu_32=ref(F)(3)(2)
49
50#define LOAD_CHIMU(DIR,F,PERM) \
51 { const SiteSpinor & ref (in[offset]); LOAD_CHIMU_BODY(F); }
52
53#define LOAD_CHI_BODY(F) \
54 Chi_00 = ref(F)(0)(0);\
55 Chi_01 = ref(F)(0)(1);\
56 Chi_02 = ref(F)(0)(2);\
57 Chi_10 = ref(F)(1)(0);\
58 Chi_11 = ref(F)(1)(1);\
59 Chi_12 = ref(F)(1)(2)
60
61#define LOAD_CHI(DIR,F,PERM) \
62 {const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
63
64
65//G-parity implementations using in-place intrinsic ops
66
67//1l 1h -> 1h 1l
68//0l 0h , 1h 1l -> 0l 1h 0h,1l
69//0h,1l -> 1l,0h
70//if( (distance == 1 && !perm_will_occur) || (distance == -1 && perm_will_occur) )
71//Pulled fermion through forwards face, GPBC on upper component
72//Need 0= 0l 1h 1= 1l 0h
73//else if( (distance == -1 && !perm) || (distance == 1 && perm) )
74//Pulled fermion through backwards face, GPBC on lower component
75//Need 0= 1l 0h 1= 0l 1h
76
77//1l 1h -> 1h 1l
78//0l 0h , 1h 1l -> 0l 1h 0h,1l
79#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
80 permute##PERM(tmp1, ref(1)(S)(C)); \
81 exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
82 INTO = tmp2;
83
84//0l 0h -> 0h 0l
85//1l 1h, 0h 0l -> 1l 0h, 1h 0l
86#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
87 permute##PERM(tmp1, ref(0)(S)(C)); \
88 exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
89 INTO = tmp2;
90
91
92
93
94#define LOAD_CHI_SETUP(DIR,F) \
95 g = F; \
96 direction = st._directions[DIR]; \
97 distance = st._distances[DIR]; \
98 sl = st._simd_layout[direction]; \
99 inplace_twist = 0; \
100 if(SE->_around_the_world && st.parameters.twists[DIR % 4]){ \
101 if(sl == 1){ \
102 g = (F+1) % 2; \
103 }else{ \
104 inplace_twist = 1; \
105 } \
106 }
107
108#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
109 { const SiteSpinor &ref(in[offset]); \
110 LOAD_CHI_SETUP(DIR,F); \
111 if(!inplace_twist){ \
112 LOAD_CHIMU_BODY(g); \
113 }else{ \
114 if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
115 ( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
116 DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
117 DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
118 DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
119 DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
120 DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
121 DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
122 DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
123 DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
124 DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
125 DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
126 DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
127 DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
128 }else{ \
129 DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
130 DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
131 DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
132 DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
133 DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
134 DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
135 DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
136 DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
137 DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
138 DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
139 DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
140 DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
141 } \
142 } \
143 }
144
145
146#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
147 { const SiteHalfSpinor &ref(buf[offset]); \
148 LOAD_CHI_SETUP(DIR,F); \
149 if(!inplace_twist){ \
150 LOAD_CHI_BODY(g); \
151 }else{ \
152 if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
153 ( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
154 DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
155 DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
156 DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
157 DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
158 DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
159 DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
160 }else{ \
161 DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
162 DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
163 DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
164 DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
165 DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
166 DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
167 } \
168 } \
169 }
170
171
172#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
173#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
174
175// To splat or not to splat depends on the implementation
176#define MULT_2SPIN_BODY \
177 Impl::loadLinkElement(U_00,ref()(0,0)); \
178 Impl::loadLinkElement(U_10,ref()(1,0)); \
179 Impl::loadLinkElement(U_20,ref()(2,0)); \
180 Impl::loadLinkElement(U_01,ref()(0,1)); \
181 Impl::loadLinkElement(U_11,ref()(1,1)); \
182 Impl::loadLinkElement(U_21,ref()(2,1)); \
183 UChi_00 = U_00*Chi_00; \
184 UChi_10 = U_00*Chi_10; \
185 UChi_01 = U_10*Chi_00; \
186 UChi_11 = U_10*Chi_10; \
187 UChi_02 = U_20*Chi_00; \
188 UChi_12 = U_20*Chi_10; \
189 UChi_00+= U_01*Chi_01; \
190 UChi_10+= U_01*Chi_11; \
191 UChi_01+= U_11*Chi_01; \
192 UChi_11+= U_11*Chi_11; \
193 UChi_02+= U_21*Chi_01; \
194 UChi_12+= U_21*Chi_11; \
195 Impl::loadLinkElement(U_00,ref()(0,2)); \
196 Impl::loadLinkElement(U_10,ref()(1,2)); \
197 Impl::loadLinkElement(U_20,ref()(2,2)); \
198 UChi_00+= U_00*Chi_02; \
199 UChi_10+= U_00*Chi_12; \
200 UChi_01+= U_10*Chi_02; \
201 UChi_11+= U_10*Chi_12; \
202 UChi_02+= U_20*Chi_02; \
203 UChi_12+= U_20*Chi_12
204
205
206#define MULT_2SPIN(A,F) \
207 {auto & ref(U[sU](A)); MULT_2SPIN_BODY; }
208
209#define MULT_2SPIN_GPARITY(A,F) \
210 {auto & ref(U[sU](F)(A)); MULT_2SPIN_BODY; }
211
212
213#define PERMUTE_DIR(dir) \
214 permute##dir(Chi_00,Chi_00);\
215 permute##dir(Chi_01,Chi_01);\
216 permute##dir(Chi_02,Chi_02);\
217 permute##dir(Chi_10,Chi_10);\
218 permute##dir(Chi_11,Chi_11);\
219 permute##dir(Chi_12,Chi_12);
220
221// hspin(0)=fspin(0)+timesI(fspin(3));
222// hspin(1)=fspin(1)+timesI(fspin(2));
223#define XP_PROJ \
224 Chi_00 = Chimu_00+timesI(Chimu_30);\
225 Chi_01 = Chimu_01+timesI(Chimu_31);\
226 Chi_02 = Chimu_02+timesI(Chimu_32);\
227 Chi_10 = Chimu_10+timesI(Chimu_20);\
228 Chi_11 = Chimu_11+timesI(Chimu_21);\
229 Chi_12 = Chimu_12+timesI(Chimu_22);
230
231#define YP_PROJ \
232 Chi_00 = Chimu_00-Chimu_30;\
233 Chi_01 = Chimu_01-Chimu_31;\
234 Chi_02 = Chimu_02-Chimu_32;\
235 Chi_10 = Chimu_10+Chimu_20;\
236 Chi_11 = Chimu_11+Chimu_21;\
237 Chi_12 = Chimu_12+Chimu_22;
238
239#define ZP_PROJ \
240 Chi_00 = Chimu_00+timesI(Chimu_20); \
241 Chi_01 = Chimu_01+timesI(Chimu_21); \
242 Chi_02 = Chimu_02+timesI(Chimu_22); \
243 Chi_10 = Chimu_10-timesI(Chimu_30); \
244 Chi_11 = Chimu_11-timesI(Chimu_31); \
245 Chi_12 = Chimu_12-timesI(Chimu_32);
246
247#define TP_PROJ \
248 Chi_00 = Chimu_00+Chimu_20; \
249 Chi_01 = Chimu_01+Chimu_21; \
250 Chi_02 = Chimu_02+Chimu_22; \
251 Chi_10 = Chimu_10+Chimu_30; \
252 Chi_11 = Chimu_11+Chimu_31; \
253 Chi_12 = Chimu_12+Chimu_32;
254
255
256// hspin(0)=fspin(0)-timesI(fspin(3));
257// hspin(1)=fspin(1)-timesI(fspin(2));
258#define XM_PROJ \
259 Chi_00 = Chimu_00-timesI(Chimu_30);\
260 Chi_01 = Chimu_01-timesI(Chimu_31);\
261 Chi_02 = Chimu_02-timesI(Chimu_32);\
262 Chi_10 = Chimu_10-timesI(Chimu_20);\
263 Chi_11 = Chimu_11-timesI(Chimu_21);\
264 Chi_12 = Chimu_12-timesI(Chimu_22);
265
266#define YM_PROJ \
267 Chi_00 = Chimu_00+Chimu_30;\
268 Chi_01 = Chimu_01+Chimu_31;\
269 Chi_02 = Chimu_02+Chimu_32;\
270 Chi_10 = Chimu_10-Chimu_20;\
271 Chi_11 = Chimu_11-Chimu_21;\
272 Chi_12 = Chimu_12-Chimu_22;
273
274#define ZM_PROJ \
275 Chi_00 = Chimu_00-timesI(Chimu_20); \
276 Chi_01 = Chimu_01-timesI(Chimu_21); \
277 Chi_02 = Chimu_02-timesI(Chimu_22); \
278 Chi_10 = Chimu_10+timesI(Chimu_30); \
279 Chi_11 = Chimu_11+timesI(Chimu_31); \
280 Chi_12 = Chimu_12+timesI(Chimu_32);
281
282#define TM_PROJ \
283 Chi_00 = Chimu_00-Chimu_20; \
284 Chi_01 = Chimu_01-Chimu_21; \
285 Chi_02 = Chimu_02-Chimu_22; \
286 Chi_10 = Chimu_10-Chimu_30; \
287 Chi_11 = Chimu_11-Chimu_31; \
288 Chi_12 = Chimu_12-Chimu_32;
289
290// fspin(0)=hspin(0);
291// fspin(1)=hspin(1);
292// fspin(2)=timesMinusI(hspin(1));
293// fspin(3)=timesMinusI(hspin(0));
294#define XP_RECON\
295 result_00 = UChi_00;\
296 result_01 = UChi_01;\
297 result_02 = UChi_02;\
298 result_10 = UChi_10;\
299 result_11 = UChi_11;\
300 result_12 = UChi_12;\
301 result_20 = timesMinusI(UChi_10);\
302 result_21 = timesMinusI(UChi_11);\
303 result_22 = timesMinusI(UChi_12);\
304 result_30 = timesMinusI(UChi_00);\
305 result_31 = timesMinusI(UChi_01);\
306 result_32 = timesMinusI(UChi_02);
307
308#define XP_RECON_ACCUM\
309 result_00+=UChi_00;\
310 result_01+=UChi_01;\
311 result_02+=UChi_02;\
312 result_10+=UChi_10;\
313 result_11+=UChi_11;\
314 result_12+=UChi_12;\
315 result_20-=timesI(UChi_10);\
316 result_21-=timesI(UChi_11);\
317 result_22-=timesI(UChi_12);\
318 result_30-=timesI(UChi_00);\
319 result_31-=timesI(UChi_01);\
320 result_32-=timesI(UChi_02);
321
322#define XM_RECON\
323 result_00 = UChi_00;\
324 result_01 = UChi_01;\
325 result_02 = UChi_02;\
326 result_10 = UChi_10;\
327 result_11 = UChi_11;\
328 result_12 = UChi_12;\
329 result_20 = timesI(UChi_10);\
330 result_21 = timesI(UChi_11);\
331 result_22 = timesI(UChi_12);\
332 result_30 = timesI(UChi_00);\
333 result_31 = timesI(UChi_01);\
334 result_32 = timesI(UChi_02);
335
336#define XM_RECON_ACCUM\
337 result_00+= UChi_00;\
338 result_01+= UChi_01;\
339 result_02+= UChi_02;\
340 result_10+= UChi_10;\
341 result_11+= UChi_11;\
342 result_12+= UChi_12;\
343 result_20+= timesI(UChi_10);\
344 result_21+= timesI(UChi_11);\
345 result_22+= timesI(UChi_12);\
346 result_30+= timesI(UChi_00);\
347 result_31+= timesI(UChi_01);\
348 result_32+= timesI(UChi_02);
349
350#define YP_RECON_ACCUM\
351 result_00+= UChi_00;\
352 result_01+= UChi_01;\
353 result_02+= UChi_02;\
354 result_10+= UChi_10;\
355 result_11+= UChi_11;\
356 result_12+= UChi_12;\
357 result_20+= UChi_10;\
358 result_21+= UChi_11;\
359 result_22+= UChi_12;\
360 result_30-= UChi_00;\
361 result_31-= UChi_01;\
362 result_32-= UChi_02;
363
364#define YM_RECON_ACCUM\
365 result_00+= UChi_00;\
366 result_01+= UChi_01;\
367 result_02+= UChi_02;\
368 result_10+= UChi_10;\
369 result_11+= UChi_11;\
370 result_12+= UChi_12;\
371 result_20-= UChi_10;\
372 result_21-= UChi_11;\
373 result_22-= UChi_12;\
374 result_30+= UChi_00;\
375 result_31+= UChi_01;\
376 result_32+= UChi_02;
377
378#define ZP_RECON_ACCUM\
379 result_00+= UChi_00;\
380 result_01+= UChi_01;\
381 result_02+= UChi_02;\
382 result_10+= UChi_10;\
383 result_11+= UChi_11;\
384 result_12+= UChi_12;\
385 result_20-= timesI(UChi_00); \
386 result_21-= timesI(UChi_01); \
387 result_22-= timesI(UChi_02); \
388 result_30+= timesI(UChi_10); \
389 result_31+= timesI(UChi_11); \
390 result_32+= timesI(UChi_12);
391
392#define ZM_RECON_ACCUM\
393 result_00+= UChi_00;\
394 result_01+= UChi_01;\
395 result_02+= UChi_02;\
396 result_10+= UChi_10;\
397 result_11+= UChi_11;\
398 result_12+= UChi_12;\
399 result_20+= timesI(UChi_00); \
400 result_21+= timesI(UChi_01); \
401 result_22+= timesI(UChi_02); \
402 result_30-= timesI(UChi_10); \
403 result_31-= timesI(UChi_11); \
404 result_32-= timesI(UChi_12);
405
406#define TP_RECON_ACCUM\
407 result_00+= UChi_00;\
408 result_01+= UChi_01;\
409 result_02+= UChi_02;\
410 result_10+= UChi_10;\
411 result_11+= UChi_11;\
412 result_12+= UChi_12;\
413 result_20+= UChi_00; \
414 result_21+= UChi_01; \
415 result_22+= UChi_02; \
416 result_30+= UChi_10; \
417 result_31+= UChi_11; \
418 result_32+= UChi_12;
419
420#define TM_RECON_ACCUM\
421 result_00+= UChi_00;\
422 result_01+= UChi_01;\
423 result_02+= UChi_02;\
424 result_10+= UChi_10;\
425 result_11+= UChi_11;\
426 result_12+= UChi_12;\
427 result_20-= UChi_00; \
428 result_21-= UChi_01; \
429 result_22-= UChi_02; \
430 result_30-= UChi_10; \
431 result_31-= UChi_11; \
432 result_32-= UChi_12;
433
434#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
435 SE=st.GetEntry(ptype,DIR,ss); \
436 offset = SE->_offset; \
437 local = SE->_is_local; \
438 perm = SE->_permute; \
439 if ( local ) { \
440 LOAD_CHIMU_IMPL(DIR,F,PERM); \
441 PROJ; \
442 if ( perm) { \
443 PERMUTE_DIR(PERM); \
444 } \
445 } else { \
446 LOAD_CHI_IMPL(DIR,F,PERM); \
447 } \
448 MULT_2SPIN_IMPL(DIR,F); \
449 RECON;
450
451
452#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
453 SE=st.GetEntry(ptype,DIR,ss); \
454 offset = SE->_offset; \
455 local = SE->_is_local; \
456 perm = SE->_permute; \
457 if ( local ) { \
458 LOAD_CHIMU_IMPL(DIR,F,PERM); \
459 PROJ; \
460 if ( perm) { \
461 PERMUTE_DIR(PERM); \
462 } \
463 } else if ( st.same_node[DIR] ) { \
464 LOAD_CHI_IMPL(DIR,F,PERM); \
465 } \
466 if (local || st.same_node[DIR] ) { \
467 MULT_2SPIN_IMPL(DIR,F); \
468 RECON; \
469 }
470
471#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
472 SE=st.GetEntry(ptype,DIR,ss); \
473 offset = SE->_offset; \
474 perm = SE->_permute; \
475 if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
476 LOAD_CHI_IMPL(DIR,F,PERM); \
477 MULT_2SPIN_IMPL(DIR,F); \
478 RECON; \
479 nmu++; \
480 }
481
482#define HAND_RESULT(ss,F) \
483 { \
484 SiteSpinor & ref (out[ss]); \
485 vstream(ref(F)(0)(0),result_00); \
486 vstream(ref(F)(0)(1),result_01); \
487 vstream(ref(F)(0)(2),result_02); \
488 vstream(ref(F)(1)(0),result_10); \
489 vstream(ref(F)(1)(1),result_11); \
490 vstream(ref(F)(1)(2),result_12); \
491 vstream(ref(F)(2)(0),result_20); \
492 vstream(ref(F)(2)(1),result_21); \
493 vstream(ref(F)(2)(2),result_22); \
494 vstream(ref(F)(3)(0),result_30); \
495 vstream(ref(F)(3)(1),result_31); \
496 vstream(ref(F)(3)(2),result_32); \
497 }
498
499#define HAND_RESULT_EXT(ss,F) \
500 if (nmu){ \
501 SiteSpinor & ref (out[ss]); \
502 ref(F)(0)(0)+=result_00; \
503 ref(F)(0)(1)+=result_01; \
504 ref(F)(0)(2)+=result_02; \
505 ref(F)(1)(0)+=result_10; \
506 ref(F)(1)(1)+=result_11; \
507 ref(F)(1)(2)+=result_12; \
508 ref(F)(2)(0)+=result_20; \
509 ref(F)(2)(1)+=result_21; \
510 ref(F)(2)(2)+=result_22; \
511 ref(F)(3)(0)+=result_30; \
512 ref(F)(3)(1)+=result_31; \
513 ref(F)(3)(2)+=result_32; \
514 }
515
516
517#define HAND_DECLARATIONS(a) \
518 Simd result_00; \
519 Simd result_01; \
520 Simd result_02; \
521 Simd result_10; \
522 Simd result_11; \
523 Simd result_12; \
524 Simd result_20; \
525 Simd result_21; \
526 Simd result_22; \
527 Simd result_30; \
528 Simd result_31; \
529 Simd result_32; \
530 Simd Chi_00; \
531 Simd Chi_01; \
532 Simd Chi_02; \
533 Simd Chi_10; \
534 Simd Chi_11; \
535 Simd Chi_12; \
536 Simd UChi_00; \
537 Simd UChi_01; \
538 Simd UChi_02; \
539 Simd UChi_10; \
540 Simd UChi_11; \
541 Simd UChi_12; \
542 Simd U_00; \
543 Simd U_10; \
544 Simd U_20; \
545 Simd U_01; \
546 Simd U_11; \
547 Simd U_21;
548
549#define ZERO_RESULT \
550 result_00=Zero(); \
551 result_01=Zero(); \
552 result_02=Zero(); \
553 result_10=Zero(); \
554 result_11=Zero(); \
555 result_12=Zero(); \
556 result_20=Zero(); \
557 result_21=Zero(); \
558 result_22=Zero(); \
559 result_30=Zero(); \
560 result_31=Zero(); \
561 result_32=Zero();
562
563#define Chimu_00 Chi_00
564#define Chimu_01 Chi_01
565#define Chimu_02 Chi_02
566#define Chimu_10 Chi_10
567#define Chimu_11 Chi_11
568#define Chimu_12 Chi_12
569#define Chimu_20 UChi_00
570#define Chimu_21 UChi_01
571#define Chimu_22 UChi_02
572#define Chimu_30 UChi_10
573#define Chimu_31 UChi_11
574#define Chimu_32 UChi_12
575
577
578#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
579 HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
580 HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
581 HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
582 HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
583 HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
584 HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
585 HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
586 HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
587 HAND_RESULT(ss,F)
588
589#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
590 HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
591 HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
592 HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
593 HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
594 HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
595 HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
596 HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
597 HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
598 HAND_RESULT(ss,F)
599
600#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
601 ZERO_RESULT; \
602 HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
603 HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
604 HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
605 HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
606 HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
607 HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
608 HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
609 HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
610 HAND_RESULT(ss,F)
611
612#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
613 ZERO_RESULT; \
614 HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
615 HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
616 HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
617 HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
618 HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
619 HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
620 HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
621 HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
622 HAND_RESULT(ss,F)
623
624#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
625 ZERO_RESULT; \
626 HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
627 HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
628 HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
629 HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
630 HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
631 HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
632 HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
633 HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
634 HAND_RESULT_EXT(ss,F)
635
636#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
637 ZERO_RESULT; \
638 HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
639 HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
640 HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
641 HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
642 HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
643 HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
644 HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
645 HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
646 HAND_RESULT_EXT(ss,F)
647
648#define HAND_SPECIALISE_GPARITY(IMPL) \
649 template<> accelerator_inline void \
650 WilsonKernels<IMPL>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
651 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
652 { \
653 typedef IMPL Impl; \
654 typedef typename Simd::scalar_type S; \
655 typedef typename Simd::vector_type V; \
656 \
657 HAND_DECLARATIONS(ignore); \
658 \
659 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
660 StencilEntry *SE; \
661 HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
662 HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
663 } \
664 \
665 template<> accelerator_inline void \
666 WilsonKernels<IMPL>::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
667 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
668 { \
669 typedef IMPL Impl; \
670 typedef typename Simd::scalar_type S; \
671 typedef typename Simd::vector_type V; \
672 \
673 HAND_DECLARATIONS(ignore); \
674 \
675 StencilEntry *SE; \
676 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
677 HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
678 HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
679 } \
680 \
681 template<> accelerator_inline void \
682 WilsonKernels<IMPL>::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
683 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
684 { \
685 typedef IMPL Impl; \
686 typedef typename Simd::scalar_type S; \
687 typedef typename Simd::vector_type V; \
688 \
689 HAND_DECLARATIONS(ignore); \
690 \
691 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
692 StencilEntry *SE; \
693 HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
694 HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
695 } \
696 \
697 template<> accelerator_inline void \
698 WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
699 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
700 { \
701 typedef IMPL Impl; \
702 typedef typename Simd::scalar_type S; \
703 typedef typename Simd::vector_type V; \
704 \
705 HAND_DECLARATIONS(ignore); \
706 \
707 StencilEntry *SE; \
708 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
709 HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
710 HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
711 } \
712 \
713 template<> accelerator_inline void \
714 WilsonKernels<IMPL>::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
715 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
716 { \
717 typedef IMPL Impl; \
718 typedef typename Simd::scalar_type S; \
719 typedef typename Simd::vector_type V; \
720 \
721 HAND_DECLARATIONS(ignore); \
722 \
723 int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
724 StencilEntry *SE; \
725 int nmu=0; \
726 HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
727 nmu = 0; \
728 HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
729 } \
730 template<> accelerator_inline void \
731 WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
732 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
733 { \
734 typedef IMPL Impl; \
735 typedef typename Simd::scalar_type S; \
736 typedef typename Simd::vector_type V; \
737 \
738 HAND_DECLARATIONS(ignore); \
739 \
740 StencilEntry *SE; \
741 int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
742 int nmu=0; \
743 HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
744 nmu = 0; \
745 HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
746 }
747
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36