36#define LOAD_CHIMU_BODY(F) \
37 Chimu_00=ref(F)(0)(0); \
38 Chimu_01=ref(F)(0)(1); \
39 Chimu_02=ref(F)(0)(2); \
40 Chimu_10=ref(F)(1)(0); \
41 Chimu_11=ref(F)(1)(1); \
42 Chimu_12=ref(F)(1)(2); \
43 Chimu_20=ref(F)(2)(0); \
44 Chimu_21=ref(F)(2)(1); \
45 Chimu_22=ref(F)(2)(2); \
46 Chimu_30=ref(F)(3)(0); \
47 Chimu_31=ref(F)(3)(1); \
50#define LOAD_CHIMU(DIR,F,PERM) \
51 { const SiteSpinor & ref (in[offset]); LOAD_CHIMU_BODY(F); }
53#define LOAD_CHI_BODY(F) \
54 Chi_00 = ref(F)(0)(0);\
55 Chi_01 = ref(F)(0)(1);\
56 Chi_02 = ref(F)(0)(2);\
57 Chi_10 = ref(F)(1)(0);\
58 Chi_11 = ref(F)(1)(1);\
61#define LOAD_CHI(DIR,F,PERM) \
62 {const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
79#define DO_TWIST_0L_1H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
80 permute##PERM(tmp1, ref(1)(S)(C)); \
81 exchange##PERM(tmp2,tmp3, ref(0)(S)(C), tmp1); \
86#define DO_TWIST_1L_0H(INTO,S,C,F, PERM, tmp1, tmp2, tmp3) \
87 permute##PERM(tmp1, ref(0)(S)(C)); \
88 exchange##PERM(tmp2,tmp3, ref(1)(S)(C), tmp1); \
94#define LOAD_CHI_SETUP(DIR,F) \
96 direction = st._directions[DIR]; \
97 distance = st._distances[DIR]; \
98 sl = st._simd_layout[direction]; \
100 if(SE->_around_the_world && st.parameters.twists[DIR % 4]){ \
108#define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
109 { const SiteSpinor &ref(in[offset]); \
110 LOAD_CHI_SETUP(DIR,F); \
111 if(!inplace_twist){ \
112 LOAD_CHIMU_BODY(g); \
114 if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
115 ( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
116 DO_TWIST_0L_1H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
117 DO_TWIST_0L_1H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
118 DO_TWIST_0L_1H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
119 DO_TWIST_0L_1H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
120 DO_TWIST_0L_1H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
121 DO_TWIST_0L_1H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
122 DO_TWIST_0L_1H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
123 DO_TWIST_0L_1H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
124 DO_TWIST_0L_1H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
125 DO_TWIST_0L_1H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
126 DO_TWIST_0L_1H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
127 DO_TWIST_0L_1H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
129 DO_TWIST_1L_0H(Chimu_00,0,0,F,PERM, U_00,U_01,U_10); \
130 DO_TWIST_1L_0H(Chimu_01,0,1,F,PERM, U_11,U_20,U_21); \
131 DO_TWIST_1L_0H(Chimu_02,0,2,F,PERM, U_00,U_01,U_10); \
132 DO_TWIST_1L_0H(Chimu_10,1,0,F,PERM, U_11,U_20,U_21); \
133 DO_TWIST_1L_0H(Chimu_11,1,1,F,PERM, U_00,U_01,U_10); \
134 DO_TWIST_1L_0H(Chimu_12,1,2,F,PERM, U_11,U_20,U_21); \
135 DO_TWIST_1L_0H(Chimu_20,2,0,F,PERM, U_00,U_01,U_10); \
136 DO_TWIST_1L_0H(Chimu_21,2,1,F,PERM, U_11,U_20,U_21); \
137 DO_TWIST_1L_0H(Chimu_22,2,2,F,PERM, U_00,U_01,U_10); \
138 DO_TWIST_1L_0H(Chimu_30,3,0,F,PERM, U_11,U_20,U_21); \
139 DO_TWIST_1L_0H(Chimu_31,3,1,F,PERM, U_00,U_01,U_10); \
140 DO_TWIST_1L_0H(Chimu_32,3,2,F,PERM, U_11,U_20,U_21); \
146#define LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM) \
147 { const SiteHalfSpinor &ref(buf[offset]); \
148 LOAD_CHI_SETUP(DIR,F); \
149 if(!inplace_twist){ \
152 if( ( F==0 && ((distance == 1 && !perm) || (distance == -1 && perm)) ) || \
153 ( F==1 && ((distance == -1 && !perm) || (distance == 1 && perm)) ) ){ \
154 DO_TWIST_0L_1H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
155 DO_TWIST_0L_1H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
156 DO_TWIST_0L_1H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
157 DO_TWIST_0L_1H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
158 DO_TWIST_0L_1H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
159 DO_TWIST_0L_1H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
161 DO_TWIST_1L_0H(Chi_00,0,0,F,PERM, U_00,U_01,U_10); \
162 DO_TWIST_1L_0H(Chi_01,0,1,F,PERM, U_11,U_20,U_21); \
163 DO_TWIST_1L_0H(Chi_02,0,2,F,PERM, UChi_00,UChi_01,UChi_02); \
164 DO_TWIST_1L_0H(Chi_10,1,0,F,PERM, UChi_10,UChi_11,UChi_12); \
165 DO_TWIST_1L_0H(Chi_11,1,1,F,PERM, U_00,U_01,U_10); \
166 DO_TWIST_1L_0H(Chi_12,1,2,F,PERM, U_11,U_20,U_21); \
172#define LOAD_CHI_GPARITY(DIR,F,PERM) LOAD_CHI_GPARITY_INPLACE_TWIST(DIR,F,PERM)
173#define LOAD_CHIMU_GPARITY(DIR,F,PERM) LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM)
176#define MULT_2SPIN_BODY \
177 Impl::loadLinkElement(U_00,ref()(0,0)); \
178 Impl::loadLinkElement(U_10,ref()(1,0)); \
179 Impl::loadLinkElement(U_20,ref()(2,0)); \
180 Impl::loadLinkElement(U_01,ref()(0,1)); \
181 Impl::loadLinkElement(U_11,ref()(1,1)); \
182 Impl::loadLinkElement(U_21,ref()(2,1)); \
183 UChi_00 = U_00*Chi_00; \
184 UChi_10 = U_00*Chi_10; \
185 UChi_01 = U_10*Chi_00; \
186 UChi_11 = U_10*Chi_10; \
187 UChi_02 = U_20*Chi_00; \
188 UChi_12 = U_20*Chi_10; \
189 UChi_00+= U_01*Chi_01; \
190 UChi_10+= U_01*Chi_11; \
191 UChi_01+= U_11*Chi_01; \
192 UChi_11+= U_11*Chi_11; \
193 UChi_02+= U_21*Chi_01; \
194 UChi_12+= U_21*Chi_11; \
195 Impl::loadLinkElement(U_00,ref()(0,2)); \
196 Impl::loadLinkElement(U_10,ref()(1,2)); \
197 Impl::loadLinkElement(U_20,ref()(2,2)); \
198 UChi_00+= U_00*Chi_02; \
199 UChi_10+= U_00*Chi_12; \
200 UChi_01+= U_10*Chi_02; \
201 UChi_11+= U_10*Chi_12; \
202 UChi_02+= U_20*Chi_02; \
203 UChi_12+= U_20*Chi_12
206#define MULT_2SPIN(A,F) \
207 {auto & ref(U[sU](A)); MULT_2SPIN_BODY; }
209#define MULT_2SPIN_GPARITY(A,F) \
210 {auto & ref(U[sU](F)(A)); MULT_2SPIN_BODY; }
213#define PERMUTE_DIR(dir) \
214 permute##dir(Chi_00,Chi_00);\
215 permute##dir(Chi_01,Chi_01);\
216 permute##dir(Chi_02,Chi_02);\
217 permute##dir(Chi_10,Chi_10);\
218 permute##dir(Chi_11,Chi_11);\
219 permute##dir(Chi_12,Chi_12);
224 Chi_00 = Chimu_00+timesI(Chimu_30);\
225 Chi_01 = Chimu_01+timesI(Chimu_31);\
226 Chi_02 = Chimu_02+timesI(Chimu_32);\
227 Chi_10 = Chimu_10+timesI(Chimu_20);\
228 Chi_11 = Chimu_11+timesI(Chimu_21);\
229 Chi_12 = Chimu_12+timesI(Chimu_22);
232 Chi_00 = Chimu_00-Chimu_30;\
233 Chi_01 = Chimu_01-Chimu_31;\
234 Chi_02 = Chimu_02-Chimu_32;\
235 Chi_10 = Chimu_10+Chimu_20;\
236 Chi_11 = Chimu_11+Chimu_21;\
237 Chi_12 = Chimu_12+Chimu_22;
240 Chi_00 = Chimu_00+timesI(Chimu_20); \
241 Chi_01 = Chimu_01+timesI(Chimu_21); \
242 Chi_02 = Chimu_02+timesI(Chimu_22); \
243 Chi_10 = Chimu_10-timesI(Chimu_30); \
244 Chi_11 = Chimu_11-timesI(Chimu_31); \
245 Chi_12 = Chimu_12-timesI(Chimu_32);
248 Chi_00 = Chimu_00+Chimu_20; \
249 Chi_01 = Chimu_01+Chimu_21; \
250 Chi_02 = Chimu_02+Chimu_22; \
251 Chi_10 = Chimu_10+Chimu_30; \
252 Chi_11 = Chimu_11+Chimu_31; \
253 Chi_12 = Chimu_12+Chimu_32;
259 Chi_00 = Chimu_00-timesI(Chimu_30);\
260 Chi_01 = Chimu_01-timesI(Chimu_31);\
261 Chi_02 = Chimu_02-timesI(Chimu_32);\
262 Chi_10 = Chimu_10-timesI(Chimu_20);\
263 Chi_11 = Chimu_11-timesI(Chimu_21);\
264 Chi_12 = Chimu_12-timesI(Chimu_22);
267 Chi_00 = Chimu_00+Chimu_30;\
268 Chi_01 = Chimu_01+Chimu_31;\
269 Chi_02 = Chimu_02+Chimu_32;\
270 Chi_10 = Chimu_10-Chimu_20;\
271 Chi_11 = Chimu_11-Chimu_21;\
272 Chi_12 = Chimu_12-Chimu_22;
275 Chi_00 = Chimu_00-timesI(Chimu_20); \
276 Chi_01 = Chimu_01-timesI(Chimu_21); \
277 Chi_02 = Chimu_02-timesI(Chimu_22); \
278 Chi_10 = Chimu_10+timesI(Chimu_30); \
279 Chi_11 = Chimu_11+timesI(Chimu_31); \
280 Chi_12 = Chimu_12+timesI(Chimu_32);
283 Chi_00 = Chimu_00-Chimu_20; \
284 Chi_01 = Chimu_01-Chimu_21; \
285 Chi_02 = Chimu_02-Chimu_22; \
286 Chi_10 = Chimu_10-Chimu_30; \
287 Chi_11 = Chimu_11-Chimu_31; \
288 Chi_12 = Chimu_12-Chimu_32;
295 result_00 = UChi_00;\
296 result_01 = UChi_01;\
297 result_02 = UChi_02;\
298 result_10 = UChi_10;\
299 result_11 = UChi_11;\
300 result_12 = UChi_12;\
301 result_20 = timesMinusI(UChi_10);\
302 result_21 = timesMinusI(UChi_11);\
303 result_22 = timesMinusI(UChi_12);\
304 result_30 = timesMinusI(UChi_00);\
305 result_31 = timesMinusI(UChi_01);\
306 result_32 = timesMinusI(UChi_02);
308#define XP_RECON_ACCUM\
315 result_20-=timesI(UChi_10);\
316 result_21-=timesI(UChi_11);\
317 result_22-=timesI(UChi_12);\
318 result_30-=timesI(UChi_00);\
319 result_31-=timesI(UChi_01);\
320 result_32-=timesI(UChi_02);
323 result_00 = UChi_00;\
324 result_01 = UChi_01;\
325 result_02 = UChi_02;\
326 result_10 = UChi_10;\
327 result_11 = UChi_11;\
328 result_12 = UChi_12;\
329 result_20 = timesI(UChi_10);\
330 result_21 = timesI(UChi_11);\
331 result_22 = timesI(UChi_12);\
332 result_30 = timesI(UChi_00);\
333 result_31 = timesI(UChi_01);\
334 result_32 = timesI(UChi_02);
336#define XM_RECON_ACCUM\
337 result_00+= UChi_00;\
338 result_01+= UChi_01;\
339 result_02+= UChi_02;\
340 result_10+= UChi_10;\
341 result_11+= UChi_11;\
342 result_12+= UChi_12;\
343 result_20+= timesI(UChi_10);\
344 result_21+= timesI(UChi_11);\
345 result_22+= timesI(UChi_12);\
346 result_30+= timesI(UChi_00);\
347 result_31+= timesI(UChi_01);\
348 result_32+= timesI(UChi_02);
350#define YP_RECON_ACCUM\
351 result_00+= UChi_00;\
352 result_01+= UChi_01;\
353 result_02+= UChi_02;\
354 result_10+= UChi_10;\
355 result_11+= UChi_11;\
356 result_12+= UChi_12;\
357 result_20+= UChi_10;\
358 result_21+= UChi_11;\
359 result_22+= UChi_12;\
360 result_30-= UChi_00;\
361 result_31-= UChi_01;\
364#define YM_RECON_ACCUM\
365 result_00+= UChi_00;\
366 result_01+= UChi_01;\
367 result_02+= UChi_02;\
368 result_10+= UChi_10;\
369 result_11+= UChi_11;\
370 result_12+= UChi_12;\
371 result_20-= UChi_10;\
372 result_21-= UChi_11;\
373 result_22-= UChi_12;\
374 result_30+= UChi_00;\
375 result_31+= UChi_01;\
378#define ZP_RECON_ACCUM\
379 result_00+= UChi_00;\
380 result_01+= UChi_01;\
381 result_02+= UChi_02;\
382 result_10+= UChi_10;\
383 result_11+= UChi_11;\
384 result_12+= UChi_12;\
385 result_20-= timesI(UChi_00); \
386 result_21-= timesI(UChi_01); \
387 result_22-= timesI(UChi_02); \
388 result_30+= timesI(UChi_10); \
389 result_31+= timesI(UChi_11); \
390 result_32+= timesI(UChi_12);
392#define ZM_RECON_ACCUM\
393 result_00+= UChi_00;\
394 result_01+= UChi_01;\
395 result_02+= UChi_02;\
396 result_10+= UChi_10;\
397 result_11+= UChi_11;\
398 result_12+= UChi_12;\
399 result_20+= timesI(UChi_00); \
400 result_21+= timesI(UChi_01); \
401 result_22+= timesI(UChi_02); \
402 result_30-= timesI(UChi_10); \
403 result_31-= timesI(UChi_11); \
404 result_32-= timesI(UChi_12);
406#define TP_RECON_ACCUM\
407 result_00+= UChi_00;\
408 result_01+= UChi_01;\
409 result_02+= UChi_02;\
410 result_10+= UChi_10;\
411 result_11+= UChi_11;\
412 result_12+= UChi_12;\
413 result_20+= UChi_00; \
414 result_21+= UChi_01; \
415 result_22+= UChi_02; \
416 result_30+= UChi_10; \
417 result_31+= UChi_11; \
420#define TM_RECON_ACCUM\
421 result_00+= UChi_00;\
422 result_01+= UChi_01;\
423 result_02+= UChi_02;\
424 result_10+= UChi_10;\
425 result_11+= UChi_11;\
426 result_12+= UChi_12;\
427 result_20-= UChi_00; \
428 result_21-= UChi_01; \
429 result_22-= UChi_02; \
430 result_30-= UChi_10; \
431 result_31-= UChi_11; \
434#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
435 SE=st.GetEntry(ptype,DIR,ss); \
436 offset = SE->_offset; \
437 local = SE->_is_local; \
438 perm = SE->_permute; \
440 LOAD_CHIMU_IMPL(DIR,F,PERM); \
446 LOAD_CHI_IMPL(DIR,F,PERM); \
448 MULT_2SPIN_IMPL(DIR,F); \
452#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
453 SE=st.GetEntry(ptype,DIR,ss); \
454 offset = SE->_offset; \
455 local = SE->_is_local; \
456 perm = SE->_permute; \
458 LOAD_CHIMU_IMPL(DIR,F,PERM); \
463 } else if ( st.same_node[DIR] ) { \
464 LOAD_CHI_IMPL(DIR,F,PERM); \
466 if (local || st.same_node[DIR] ) { \
467 MULT_2SPIN_IMPL(DIR,F); \
471#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
472 SE=st.GetEntry(ptype,DIR,ss); \
473 offset = SE->_offset; \
474 perm = SE->_permute; \
475 if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
476 LOAD_CHI_IMPL(DIR,F,PERM); \
477 MULT_2SPIN_IMPL(DIR,F); \
482#define HAND_RESULT(ss,F) \
484 SiteSpinor & ref (out[ss]); \
485 vstream(ref(F)(0)(0),result_00); \
486 vstream(ref(F)(0)(1),result_01); \
487 vstream(ref(F)(0)(2),result_02); \
488 vstream(ref(F)(1)(0),result_10); \
489 vstream(ref(F)(1)(1),result_11); \
490 vstream(ref(F)(1)(2),result_12); \
491 vstream(ref(F)(2)(0),result_20); \
492 vstream(ref(F)(2)(1),result_21); \
493 vstream(ref(F)(2)(2),result_22); \
494 vstream(ref(F)(3)(0),result_30); \
495 vstream(ref(F)(3)(1),result_31); \
496 vstream(ref(F)(3)(2),result_32); \
499#define HAND_RESULT_EXT(ss,F) \
501 SiteSpinor & ref (out[ss]); \
502 ref(F)(0)(0)+=result_00; \
503 ref(F)(0)(1)+=result_01; \
504 ref(F)(0)(2)+=result_02; \
505 ref(F)(1)(0)+=result_10; \
506 ref(F)(1)(1)+=result_11; \
507 ref(F)(1)(2)+=result_12; \
508 ref(F)(2)(0)+=result_20; \
509 ref(F)(2)(1)+=result_21; \
510 ref(F)(2)(2)+=result_22; \
511 ref(F)(3)(0)+=result_30; \
512 ref(F)(3)(1)+=result_31; \
513 ref(F)(3)(2)+=result_32; \
517#define HAND_DECLARATIONS(a) \
563#define Chimu_00 Chi_00
564#define Chimu_01 Chi_01
565#define Chimu_02 Chi_02
566#define Chimu_10 Chi_10
567#define Chimu_11 Chi_11
568#define Chimu_12 Chi_12
569#define Chimu_20 UChi_00
570#define Chimu_21 UChi_01
571#define Chimu_22 UChi_02
572#define Chimu_30 UChi_10
573#define Chimu_31 UChi_11
574#define Chimu_32 UChi_12
578#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
579 HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
580 HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
581 HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
582 HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
583 HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
584 HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
585 HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
586 HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
589#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
590 HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
591 HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
592 HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
593 HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
594 HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
595 HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
596 HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
597 HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
600#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
602 HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
603 HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
604 HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
605 HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
606 HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
607 HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
608 HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
609 HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
612#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
614 HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
615 HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
616 HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
617 HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
618 HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
619 HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
620 HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
621 HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
624#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
626 HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
627 HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
628 HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
629 HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
630 HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
631 HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
632 HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
633 HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
634 HAND_RESULT_EXT(ss,F)
636#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
638 HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
639 HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
640 HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
641 HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
642 HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
643 HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
644 HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
645 HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
646 HAND_RESULT_EXT(ss,F)
648#define HAND_SPECIALISE_GPARITY(IMPL) \
649 template<> accelerator_inline void \
650 WilsonKernels<IMPL>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
651 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
654 typedef typename Simd::scalar_type S; \
655 typedef typename Simd::vector_type V; \
657 HAND_DECLARATIONS(ignore); \
659 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
661 HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
662 HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
665 template<> accelerator_inline void \
666 WilsonKernels<IMPL>::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
667 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
670 typedef typename Simd::scalar_type S; \
671 typedef typename Simd::vector_type V; \
673 HAND_DECLARATIONS(ignore); \
676 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
677 HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
678 HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
681 template<> accelerator_inline void \
682 WilsonKernels<IMPL>::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
683 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
686 typedef typename Simd::scalar_type S; \
687 typedef typename Simd::vector_type V; \
689 HAND_DECLARATIONS(ignore); \
691 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
693 HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
694 HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
697 template<> accelerator_inline void \
698 WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
699 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
702 typedef typename Simd::scalar_type S; \
703 typedef typename Simd::vector_type V; \
705 HAND_DECLARATIONS(ignore); \
708 int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \
709 HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
710 HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
713 template<> accelerator_inline void \
714 WilsonKernels<IMPL>::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
715 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
718 typedef typename Simd::scalar_type S; \
719 typedef typename Simd::vector_type V; \
721 HAND_DECLARATIONS(ignore); \
723 int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
726 HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
728 HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
730 template<> accelerator_inline void \
731 WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
732 int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
735 typedef typename Simd::scalar_type S; \
736 typedef typename Simd::vector_type V; \
738 HAND_DECLARATIONS(ignore); \
741 int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \
743 HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
745 HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
#define NAMESPACE_BEGIN(A)