Grid
0.7.0
WilsonKernelsAsmBody.h
Go to the documentation of this file.
1
#ifdef KERNEL_DAG
2
#define DIR0_PROJMEM(base) XP_PROJMEM(base);
3
#define DIR1_PROJMEM(base) YP_PROJMEM(base);
4
#define DIR2_PROJMEM(base) ZP_PROJMEM(base);
5
#define DIR3_PROJMEM(base) TP_PROJMEM(base);
6
#define DIR4_PROJMEM(base) XM_PROJMEM(base);
7
#define DIR5_PROJMEM(base) YM_PROJMEM(base);
8
#define DIR6_PROJMEM(base) ZM_PROJMEM(base);
9
#define DIR7_PROJMEM(base) TM_PROJMEM(base);
10
#define DIR0_RECON XP_RECON
11
#define DIR1_RECON YP_RECON_ACCUM
12
#define DIR2_RECON ZP_RECON_ACCUM
13
#define DIR3_RECON TP_RECON_ACCUM
14
#define DIR4_RECON XM_RECON_ACCUM
15
#define DIR5_RECON YM_RECON_ACCUM
16
#define DIR6_RECON ZM_RECON_ACCUM
17
#define DIR7_RECON TM_RECON_ACCUM
18
#else
19
#define DIR0_PROJMEM(base) XM_PROJMEM(base);
20
#define DIR1_PROJMEM(base) YM_PROJMEM(base);
21
#define DIR2_PROJMEM(base) ZM_PROJMEM(base);
22
#define DIR3_PROJMEM(base) TM_PROJMEM(base);
23
#define DIR4_PROJMEM(base) XP_PROJMEM(base);
24
#define DIR5_PROJMEM(base) YP_PROJMEM(base);
25
#define DIR6_PROJMEM(base) ZP_PROJMEM(base);
26
#define DIR7_PROJMEM(base) TP_PROJMEM(base);
27
#define DIR0_RECON XM_RECON
28
#define DIR1_RECON YM_RECON_ACCUM
29
#define DIR2_RECON ZM_RECON_ACCUM
30
#define DIR3_RECON TM_RECON_ACCUM
31
#define DIR4_RECON XP_RECON_ACCUM
32
#define DIR5_RECON YP_RECON_ACCUM
33
#define DIR6_RECON ZP_RECON_ACCUM
34
#define DIR7_RECON TP_RECON_ACCUM
35
#endif
36
38
// Comms then compute kernel
40
#ifdef INTERIOR_AND_EXTERIOR
41
42
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
43
basep = st.GetPFInfo(nent,plocal); nent++; \
44
if ( local ) { \
45
LOAD64(%r10,isigns); \
46
PROJ(base); \
47
MAYBEPERM(PERMUTE_DIR,perm); \
48
} else { \
49
LOAD_CHI(base); \
50
} \
51
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
52
PREFETCH_CHIMU(base); \
53
MULT_2SPIN_DIR_PF(Dir,basep); \
54
LOAD64(%r10,isigns); \
55
RECON; \
56
57
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
58
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
59
PF_GAUGE(Xp); \
60
PREFETCH1_CHIMU(base); \
61
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
62
63
#define RESULT(base,basep) SAVE_RESULT(base,basep);
64
65
#endif
66
68
// Pre comms kernel -- prefetch like normal because it is mostly right
70
#ifdef INTERIOR
71
72
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
73
basep = st.GetPFInfo(nent,plocal); nent++; \
74
if ( local ) { \
75
LOAD64(%r10,isigns); \
76
PROJ(base); \
77
MAYBEPERM(PERMUTE_DIR,perm); \
78
}else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
79
if ( local || st.same_node[Dir] ) { \
80
MULT_2SPIN_DIR_PF(Dir,basep); \
81
LOAD64(%r10,isigns); \
82
RECON; \
83
} \
84
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
85
PREFETCH_CHIMU(base); \
86
87
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
88
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
89
PF_GAUGE(Xp); \
90
PREFETCH1_CHIMU(base); \
91
{ ZERO_PSI; } \
92
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
93
94
#define RESULT(base,basep) SAVE_RESULT(base,basep);
95
96
#endif
98
// Post comms kernel
100
#ifdef EXTERIOR
101
102
103
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
104
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
105
if((!local)&&(!st.same_node[Dir]) ) { \
106
LOAD_CHI(base); \
107
MULT_2SPIN_DIR_PF(Dir,base); \
108
LOAD64(%r10,isigns); \
109
RECON; \
110
nmu++; \
111
}
112
113
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
114
nmu=0; \
115
{ ZERO_PSI;} \
116
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
117
if((!local)&&(!st.same_node[Dir]) ) { \
118
LOAD_CHI(base); \
119
MULT_2SPIN_DIR_PF(Dir,base); \
120
LOAD64(%r10,isigns); \
121
RECON; \
122
nmu++; \
123
}
124
125
#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
126
127
#endif
128
{
129
int
nmu;
130
int
local
,
perm
,
ptype
;
131
uint64_t
base
;
132
uint64_t
basep
;
133
const
uint64_t
plocal
=(uint64_t) & in[0];
134
135
COMPLEX_SIGNS
(isigns);
136
MASK_REGS
;
137
int
nmax
=
U
.oSites();
138
for
(
int
site=0;site<
Ns
;site++) {
139
#ifndef EXTERIOR
140
// int sU =lo.Reorder(ssU);
141
int
sU =ssU;
142
int
ssn=ssU+1;
if
(ssn>=
nmax
) ssn=0;
143
// int sUn=lo.Reorder(ssn);
144
int
sUn=ssn;
145
LOCK_GAUGE
(0);
146
#else
147
int
sU =ssU;
148
int
ssn=ssU+1;
if
(ssn>=
nmax
) ssn=0;
149
int
sUn=ssn;
150
#endif
151
for
(
int
s=0;s<Ls;s++) {
152
ss =sU*Ls+s;
153
ssn=sUn*Ls+s;
154
int
ent=ss*8;
// 2*Ndim
155
int
nent=ssn*8;
156
157
ASM_LEG_XP(
Xp
,
Yp
,
PERMUTE_DIR3
,
DIR0_PROJMEM
,
DIR0_RECON
);
158
ASM_LEG(
Yp
,
Zp
,
PERMUTE_DIR2
,
DIR1_PROJMEM
,
DIR1_RECON
);
159
ASM_LEG(
Zp
,
Tp
,
PERMUTE_DIR1
,
DIR2_PROJMEM
,
DIR2_RECON
);
160
ASM_LEG(
Tp
,
Xm
,
PERMUTE_DIR0
,
DIR3_PROJMEM
,
DIR3_RECON
);
161
162
ASM_LEG(
Xm
,
Ym
,
PERMUTE_DIR3
,
DIR4_PROJMEM
,
DIR4_RECON
);
163
ASM_LEG(
Ym
,
Zm
,
PERMUTE_DIR2
,
DIR5_PROJMEM
,
DIR5_RECON
);
164
ASM_LEG(
Zm
,
Tm
,
PERMUTE_DIR1
,
DIR6_PROJMEM
,
DIR6_RECON
);
165
ASM_LEG(
Tm
,
Xp
,
PERMUTE_DIR0
,
DIR7_PROJMEM
,
DIR7_RECON
);
166
167
#ifdef EXTERIOR
168
if
(nmu==0)
break
;
169
// if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
170
#endif
171
base
= (uint64_t) &out[ss];
172
basep
= st.GetPFInfo(nent,
plocal
); nent++;
173
RESULT(
base
,
basep
);
174
}
175
ssU++;
176
UNLOCK_GAUGE
(0);
177
}
178
}
179
180
#undef DIR0_PROJMEM
181
#undef DIR1_PROJMEM
182
#undef DIR2_PROJMEM
183
#undef DIR3_PROJMEM
184
#undef DIR4_PROJMEM
185
#undef DIR5_PROJMEM
186
#undef DIR6_PROJMEM
187
#undef DIR7_PROJMEM
188
#undef DIR0_RECON
189
#undef DIR1_RECON
190
#undef DIR2_RECON
191
#undef DIR3_RECON
192
#undef DIR4_RECON
193
#undef DIR5_RECON
194
#undef DIR6_RECON
195
#undef DIR7_RECON
196
#undef ASM_LEG
197
#undef ASM_LEG_XP
198
#undef RESULT
LOCK_GAUGE
#define LOCK_GAUGE(dir)
Definition
BGQQPX.h:130
UNLOCK_GAUGE
#define UNLOCK_GAUGE(dir)
Definition
BGQQPX.h:138
MASK_REGS
#define MASK_REGS
Definition
BGQQPX.h:65
perm
#define perm(a, b, n, w)
Definition
Grid_generic.h:379
Xm
static constexpr int Xm
Definition
QCD.h:45
Tm
static constexpr int Tm
Definition
QCD.h:48
Ns
static constexpr int Ns
Definition
QCD.h:51
Tp
static constexpr int Tp
Definition
QCD.h:44
Zp
static constexpr int Zp
Definition
QCD.h:43
Zm
static constexpr int Zm
Definition
QCD.h:47
Xp
static constexpr int Xp
Definition
QCD.h:41
Yp
static constexpr int Yp
Definition
QCD.h:42
Ym
static constexpr int Ym
Definition
QCD.h:46
PERMUTE_DIR2
#define PERMUTE_DIR2
Definition
StaggeredKernelsAsm.h:802
PERMUTE_DIR1
#define PERMUTE_DIR1
Definition
StaggeredKernelsAsm.h:807
PERMUTE_DIR0
#define PERMUTE_DIR0
Definition
StaggeredKernelsAsm.h:812
PERMUTE_DIR3
#define PERMUTE_DIR3
Definition
StaggeredKernelsAsm.h:797
DIR7_PROJMEM
#define DIR7_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:26
plocal
const uint64_t plocal
Definition
WilsonKernelsAsmBody.h:133
COMPLEX_SIGNS
COMPLEX_SIGNS(isigns)
ptype
int ptype
Definition
WilsonKernelsAsmBody.h:130
basep
uint64_t basep
Definition
WilsonKernelsAsmBody.h:132
local
int local
Definition
WilsonKernelsAsmBody.h:130
DIR1_RECON
#define DIR1_RECON
Definition
WilsonKernelsAsmBody.h:28
DIR4_PROJMEM
#define DIR4_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:23
nmax
int nmax
Definition
WilsonKernelsAsmBody.h:137
DIR5_PROJMEM
#define DIR5_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:24
DIR1_PROJMEM
#define DIR1_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:20
DIR6_PROJMEM
#define DIR6_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:25
DIR3_RECON
#define DIR3_RECON
Definition
WilsonKernelsAsmBody.h:30
DIR7_RECON
#define DIR7_RECON
Definition
WilsonKernelsAsmBody.h:34
DIR6_RECON
#define DIR6_RECON
Definition
WilsonKernelsAsmBody.h:33
DIR2_RECON
#define DIR2_RECON
Definition
WilsonKernelsAsmBody.h:29
DIR5_RECON
#define DIR5_RECON
Definition
WilsonKernelsAsmBody.h:32
DIR4_RECON
#define DIR4_RECON
Definition
WilsonKernelsAsmBody.h:31
DIR2_PROJMEM
#define DIR2_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:21
DIR3_PROJMEM
#define DIR3_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:22
base
uint64_t base
Definition
WilsonKernelsAsmBody.h:131
DIR0_RECON
#define DIR0_RECON
Definition
WilsonKernelsAsmBody.h:27
DIR0_PROJMEM
#define DIR0_PROJMEM(base)
Definition
WilsonKernelsAsmBody.h:19
U
static INTERNAL_PRECISION U
Definition
Zolotarev.cc:230
Grid
qcd
action
fermion
implementation
WilsonKernelsAsmBody.h
Generated by
1.16.1