Grid 0.7.0
WilsonKernelsAsmA64FX.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5
6
7 Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmA64FX.h
8
9 Copyright (C) 2020
10
11Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
12
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26
27 See the full license in the file "LICENSE" in the top level distribution directory
28*************************************************************************************/
29/* END LEGAL */
30#pragma once
31
32//#if defined(A64FXASM)
33#if defined(A64FX)
34
35// safety include
36#include <arm_sve.h>
37
38// undefine everything related to kernels
40
41
43 // If we are A64FX specialise the single precision routine
45#if defined(DSLASHINTRIN)
46//#pragma message ("A64FX Dslash: intrin")
48#else
49#pragma message ("A64FX Dslash: asm")
50#include <simd/Fujitsu_A64FX_asm_single.h>
51#endif
52
54#undef DWFVEC5D
55
57// XYZT vectorised, undag Kernel, single
59#undef KERNEL_DAG
60#define INTERIOR_AND_EXTERIOR
61#undef INTERIOR
62#undef EXTERIOR
63
64#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
65template<> void
66WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
67 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
68#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
69
70#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
71template<> void
72WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
73 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
74#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
75
76//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
77//template<> void
78//WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
79// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
80//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
81
82//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
83//template<> void
84//WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
85// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
86//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
87
88
89#undef INTERIOR_AND_EXTERIOR
90#define INTERIOR
91#undef EXTERIOR
92
93#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
94template<> void
95WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
96 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
97#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
98
99#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
100template<> void
101WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
102 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
103#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
104
105//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
106//template<> void
107//WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
108// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
109//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
110
111//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
112//template<> void
113//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
114// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
115//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
116
117
118#undef INTERIOR_AND_EXTERIOR
119#undef INTERIOR
120#define EXTERIOR
121
122#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
123template<> void
124WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
125 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
126#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
127
128#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
129template<> void
130WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
131 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
132#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
133
134//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
135//template<> void
136//WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
137// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
138//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
139
140//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
141//template<> void
142//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
143// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
144//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
145
146
147
149// XYZT vectorised, dag Kernel, single
151#define KERNEL_DAG
152#define INTERIOR_AND_EXTERIOR
153#undef INTERIOR
154#undef EXTERIOR
155
156#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
157template<> void
158WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
159 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
160#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
161
162#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
163template<> void
164WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
165 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
166#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
167
168//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
169//template<> void
170//WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
171// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
172//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
173
174//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
175//template<> void
176//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
177// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
178//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
179
180
181#undef INTERIOR_AND_EXTERIOR
182#define INTERIOR
183#undef EXTERIOR
184
185#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
186template<> void
187WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
188 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
189#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
190
191#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
192template<> void
193WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
194 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
195#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
196
197//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
198//template<> void
199//WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
200// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
201//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
202
203//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
204//template<> void
205//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
206// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
207//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
208
209
210#undef INTERIOR_AND_EXTERIOR
211#undef INTERIOR
212#define EXTERIOR
213
214#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
215template<> void
216WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
217 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
218#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
219
220#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
221template<> void
222WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
223 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
224#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
225
226//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
227//template<> void
228//WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
229// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
230//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
231
232//#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
233//template<> void
234//WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
235// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
236//#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
237
238
239
240// undefine
242
244// If we are A64FX specialise the double precision routine
246
247#if defined(DSLASHINTRIN)
249#else
250#include <simd/Fujitsu_A64FX_asm_double.h>
251#endif
252
253// former KNL
254//#define MAYBEPERM(A,perm) if (perm) { A ; }
255//#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
256//#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0];
257
258
259#define INTERIOR_AND_EXTERIOR
260#undef INTERIOR
261#undef EXTERIOR
262
264// XYZT vectorised, undag Kernel, double
266#undef KERNEL_DAG
267#define INTERIOR_AND_EXTERIOR
268#undef INTERIOR
269#undef EXTERIOR
270
271#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
272template<> void
273WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
274 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
275#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
276
277#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
278template<> void
279WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
280 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
281#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
282
283// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
284// template<> void
285// WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
286// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
287// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
288
289// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
290// template<> void
291// WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
292// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
293// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
294
295
296#undef INTERIOR_AND_EXTERIOR
297#define INTERIOR
298#undef EXTERIOR
299
300#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
301template<> void
302WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
303 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
304#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
305
306#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
307template<> void
308WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
309 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
310#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
311
312// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
313// template<> void
314// WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
315// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
316// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
317
318// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
319// template<> void
320// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
321// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
322// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
323
324
325#undef INTERIOR_AND_EXTERIOR
326#undef INTERIOR
327#define EXTERIOR
328
329#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
330template<> void
331WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
332 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
333#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
334
335#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
336template<> void
337WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
338 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
339#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
340
341// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
342// template<> void
343// WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
344// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
345// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
346
347// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
348// template<> void
349// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
350// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
351// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
352
353
355// XYZT vectorised, dag Kernel, double
357#define KERNEL_DAG
358#define INTERIOR_AND_EXTERIOR
359#undef INTERIOR
360#undef EXTERIOR
361
362#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
363template<> void
364WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
365 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
366#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
367
368#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
369template<> void
370WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
371 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
372#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
373
374// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
375// template<> void
376// WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
377// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
378// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
379
380// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
381// template<> void
382// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
383// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
384// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
385
386
387#undef INTERIOR_AND_EXTERIOR
388#define INTERIOR
389#undef EXTERIOR
390
391#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
392template<> void
393WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
394 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
395#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
396
397#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
398template<> void
399WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
400 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
401#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
402
403// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
404// template<> void
405// WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
406// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
407// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
408
409// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
410// template<> void
411// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
412// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
413// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
414
415
416#undef INTERIOR_AND_EXTERIOR
417#undef INTERIOR
418#define EXTERIOR
419
420#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
421template<> void
422WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
423 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
424#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
425
426#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
427template<> void
428WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
429 int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
430#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
431
432// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
433// template<> void
434// WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
435// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
436// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
437
438// #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
439// template<> void
440// WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
441// int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
442// #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
443
444
445
446
447// undefs
449
450#endif //A64FXASM
static constexpr int Ns
Definition QCD.h:51
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230
static void AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)
static void AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out)