Grid 0.7.0
WilsonKernelsAsmBody.h
Go to the documentation of this file.
1#ifdef KERNEL_DAG
2#define DIR0_PROJMEM(base) XP_PROJMEM(base);
3#define DIR1_PROJMEM(base) YP_PROJMEM(base);
4#define DIR2_PROJMEM(base) ZP_PROJMEM(base);
5#define DIR3_PROJMEM(base) TP_PROJMEM(base);
6#define DIR4_PROJMEM(base) XM_PROJMEM(base);
7#define DIR5_PROJMEM(base) YM_PROJMEM(base);
8#define DIR6_PROJMEM(base) ZM_PROJMEM(base);
9#define DIR7_PROJMEM(base) TM_PROJMEM(base);
10#define DIR0_RECON XP_RECON
11#define DIR1_RECON YP_RECON_ACCUM
12#define DIR2_RECON ZP_RECON_ACCUM
13#define DIR3_RECON TP_RECON_ACCUM
14#define DIR4_RECON XM_RECON_ACCUM
15#define DIR5_RECON YM_RECON_ACCUM
16#define DIR6_RECON ZM_RECON_ACCUM
17#define DIR7_RECON TM_RECON_ACCUM
18#else
19#define DIR0_PROJMEM(base) XM_PROJMEM(base);
20#define DIR1_PROJMEM(base) YM_PROJMEM(base);
21#define DIR2_PROJMEM(base) ZM_PROJMEM(base);
22#define DIR3_PROJMEM(base) TM_PROJMEM(base);
23#define DIR4_PROJMEM(base) XP_PROJMEM(base);
24#define DIR5_PROJMEM(base) YP_PROJMEM(base);
25#define DIR6_PROJMEM(base) ZP_PROJMEM(base);
26#define DIR7_PROJMEM(base) TP_PROJMEM(base);
27#define DIR0_RECON XM_RECON
28#define DIR1_RECON YM_RECON_ACCUM
29#define DIR2_RECON ZM_RECON_ACCUM
30#define DIR3_RECON TM_RECON_ACCUM
31#define DIR4_RECON XP_RECON_ACCUM
32#define DIR5_RECON YP_RECON_ACCUM
33#define DIR6_RECON ZP_RECON_ACCUM
34#define DIR7_RECON TP_RECON_ACCUM
35#endif
36
38// Comms then compute kernel
40#ifdef INTERIOR_AND_EXTERIOR
41
42#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
43 basep = st.GetPFInfo(nent,plocal); nent++; \
44 if ( local ) { \
45 LOAD64(%r10,isigns); \
46 PROJ(base); \
47 MAYBEPERM(PERMUTE_DIR,perm); \
48 } else { \
49 LOAD_CHI(base); \
50 } \
51 base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
52 PREFETCH_CHIMU(base); \
53 MULT_2SPIN_DIR_PF(Dir,basep); \
54 LOAD64(%r10,isigns); \
55 RECON; \
56
57#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
58 base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
59 PF_GAUGE(Xp); \
60 PREFETCH1_CHIMU(base); \
61 ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
62
63#define RESULT(base,basep) SAVE_RESULT(base,basep);
64
65#endif
66
68// Pre comms kernel -- prefetch like normal because it is mostly right
70#ifdef INTERIOR
71
72#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
73 basep = st.GetPFInfo(nent,plocal); nent++; \
74 if ( local ) { \
75 LOAD64(%r10,isigns); \
76 PROJ(base); \
77 MAYBEPERM(PERMUTE_DIR,perm); \
78 }else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
79 if ( local || st.same_node[Dir] ) { \
80 MULT_2SPIN_DIR_PF(Dir,basep); \
81 LOAD64(%r10,isigns); \
82 RECON; \
83 } \
84 base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
85 PREFETCH_CHIMU(base); \
86
87#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
88 base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
89 PF_GAUGE(Xp); \
90 PREFETCH1_CHIMU(base); \
91 { ZERO_PSI; } \
92 ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
93
94#define RESULT(base,basep) SAVE_RESULT(base,basep);
95
96#endif
98// Post comms kernel
100#ifdef EXTERIOR
101
102
103#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
104 base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
105 if((!local)&&(!st.same_node[Dir]) ) { \
106 LOAD_CHI(base); \
107 MULT_2SPIN_DIR_PF(Dir,base); \
108 LOAD64(%r10,isigns); \
109 RECON; \
110 nmu++; \
111 }
112
113#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
114 nmu=0; \
115 { ZERO_PSI;} \
116 base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
117 if((!local)&&(!st.same_node[Dir]) ) { \
118 LOAD_CHI(base); \
119 MULT_2SPIN_DIR_PF(Dir,base); \
120 LOAD64(%r10,isigns); \
121 RECON; \
122 nmu++; \
123 }
124
125#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
126
127#endif
128{
129 int nmu;
131 uint64_t base;
132 uint64_t basep;
133 const uint64_t plocal =(uint64_t) & in[0];
134
137 int nmax=U.oSites();
138 for(int site=0;site<Ns;site++) {
139#ifndef EXTERIOR
140 // int sU =lo.Reorder(ssU);
141 int sU =ssU;
142 int ssn=ssU+1; if(ssn>=nmax) ssn=0;
143 // int sUn=lo.Reorder(ssn);
144 int sUn=ssn;
145 LOCK_GAUGE(0);
146#else
147 int sU =ssU;
148 int ssn=ssU+1; if(ssn>=nmax) ssn=0;
149 int sUn=ssn;
150#endif
151 for(int s=0;s<Ls;s++) {
152 ss =sU*Ls+s;
153 ssn=sUn*Ls+s;
154 int ent=ss*8;// 2*Ndim
155 int nent=ssn*8;
156
161
166
167#ifdef EXTERIOR
168 if (nmu==0) break;
169 // if (nmu!=0) std::cout << "EXT "<<sU<<std::endl;
170#endif
171 base = (uint64_t) &out[ss];
172 basep= st.GetPFInfo(nent,plocal); nent++;
173 RESULT(base,basep);
174 }
175 ssU++;
176 UNLOCK_GAUGE(0);
177 }
178}
179
180#undef DIR0_PROJMEM
181#undef DIR1_PROJMEM
182#undef DIR2_PROJMEM
183#undef DIR3_PROJMEM
184#undef DIR4_PROJMEM
185#undef DIR5_PROJMEM
186#undef DIR6_PROJMEM
187#undef DIR7_PROJMEM
188#undef DIR0_RECON
189#undef DIR1_RECON
190#undef DIR2_RECON
191#undef DIR3_RECON
192#undef DIR4_RECON
193#undef DIR5_RECON
194#undef DIR6_RECON
195#undef DIR7_RECON
196#undef ASM_LEG
197#undef ASM_LEG_XP
198#undef RESULT
#define LOCK_GAUGE(dir)
Definition BGQQPX.h:130
#define UNLOCK_GAUGE(dir)
Definition BGQQPX.h:138
#define MASK_REGS
Definition BGQQPX.h:65
#define perm(a, b, n, w)
static constexpr int Xm
Definition QCD.h:45
static constexpr int Tm
Definition QCD.h:48
static constexpr int Ns
Definition QCD.h:51
static constexpr int Tp
Definition QCD.h:44
static constexpr int Zp
Definition QCD.h:43
static constexpr int Zm
Definition QCD.h:47
static constexpr int Xp
Definition QCD.h:41
static constexpr int Yp
Definition QCD.h:42
static constexpr int Ym
Definition QCD.h:46
#define PERMUTE_DIR2
#define PERMUTE_DIR1
#define PERMUTE_DIR0
#define PERMUTE_DIR3
#define DIR7_PROJMEM(base)
const uint64_t plocal
COMPLEX_SIGNS(isigns)
uint64_t basep
#define DIR1_RECON
#define DIR4_PROJMEM(base)
#define DIR5_PROJMEM(base)
#define DIR1_PROJMEM(base)
#define DIR6_PROJMEM(base)
#define DIR3_RECON
#define DIR7_RECON
#define DIR6_RECON
#define DIR2_RECON
#define DIR5_RECON
#define DIR4_RECON
#define DIR2_PROJMEM(base)
#define DIR3_PROJMEM(base)
uint64_t base
#define DIR0_RECON
#define DIR0_PROJMEM(base)
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230