Grid 0.7.0
WilsonCompressor.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/qcd/action/fermion/WilsonCompressor.h
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
11Author: paboyle <paboyle@ph.ed.ac.uk>
12
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26
27 See the full license in the file "LICENSE" in the top level distribution directory
28*************************************************************************************/
29/* END LEGAL */
30#ifndef GRID_QCD_WILSON_COMPRESSOR_H
31#define GRID_QCD_WILSON_COMPRESSOR_H
32
34
36// optimised versions supporting half precision too??? Deprecate
38
39
40//Could make FaceGather a template param, but then behaviour is runtime not compile time
41template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
43{
44public:
45
46 int mu,dag;
47
48 void Point(int p) { mu=p; };
49
51 dag = _dag;
52 }
53
54 typedef _Spinor SiteSpinor;
55 typedef _Hspinor SiteHalfSpinor;
56 typedef _HCspinor SiteHalfCommSpinor;
57 typedef typename SiteHalfCommSpinor::vector_type vComplexLow;
58 typedef typename SiteHalfSpinor::vector_type vComplexHigh;
59 constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh);
60
62 return sizeof(SiteHalfCommSpinor);
63 }
64
65 /*****************************************************/
66 /* Compress includes precision change if mpi data is not same */
67 /*****************************************************/
69 typedef decltype(coalescedRead(buf)) sobj;
70 sobj sp;
71 auto sin = coalescedRead(in);
72 projector::Proj(sp,sin,mu,dag);
73 coalescedWrite(buf,sp);
74 }
75
76 /*****************************************************/
77 /* Exchange includes precision change if mpi data is not same */
78 /*****************************************************/
80 SiteHalfSpinor &mp1,
81 const SiteHalfSpinor & vp0,
82 const SiteHalfSpinor & vp1,
83 Integer type) const {
84#ifdef GRID_SIMT
85 exchangeSIMT(mp0,mp1,vp0,vp1,type);
86#else
87 SiteHalfSpinor tmp1;
88 SiteHalfSpinor tmp2;
89 exchange(tmp1,tmp2,vp0,vp1,type);
90 vstream(mp0,tmp1);
91 vstream(mp1,tmp2);
92#endif
93 }
94
95
96 /*****************************************************/
97 /* Have a decompression step if mpi data is not same */
98 /*****************************************************/
100 SiteHalfSpinor &in) const {
101 out = in;
102 }
103
104 /*****************************************************/
105 /* Compress Exchange */
106 /*****************************************************/
108 SiteHalfSpinor &out1,
109 const SiteSpinor &in0,
110 const SiteSpinor &in1,
111 Integer type) const
112 {
113#ifdef GRID_SIMT
114 typedef SiteSpinor vobj;
115 typedef SiteHalfSpinor hvobj;
116 typedef decltype(coalescedRead(in0)) sobj;
117 typedef decltype(coalescedRead(out0)) hsobj;
118
119 constexpr unsigned int Nsimd = vobj::Nsimd();
120 unsigned int mask = Nsimd >> (type + 1);
121 int lane = acceleratorSIMTlane(Nsimd);
122 int j0 = lane &(~mask); // inner coor zero
123 int j1 = lane |(mask) ; // inner coor one
124 const vobj *vp0 = &in0;
125 const vobj *vp1 = &in1;
126 const vobj *vp = (lane&mask) ? vp1:vp0;
127 auto sa = coalescedRead(*vp,j0);
128 auto sb = coalescedRead(*vp,j1);
129 hsobj psa, psb;
130 projector::Proj(psa,sa,mu,dag);
131 projector::Proj(psb,sb,mu,dag);
132 coalescedWrite(out0,psa);
133 coalescedWrite(out1,psb);
134#else
135 SiteHalfSpinor temp1, temp2;
136 SiteHalfSpinor temp3, temp4;
137 projector::Proj(temp1,in0,mu,dag);
138 projector::Proj(temp2,in1,mu,dag);
139 exchange(temp3,temp4,temp1,temp2,type);
140 vstream(out0,temp3);
141 vstream(out1,temp4);
142#endif
143 }
144
145 /*****************************************************/
146 /* Pass the info to the stencil */
147 /*****************************************************/
149 return false;
150 }
151
152};
153
154#define DECLARE_PROJ(Projector,Compressor,spProj) \
155 class Projector { \
156 public: \
157 template<class hsp,class fsp> \
158 static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){ \
159 spProj(result,in); \
160 } \
161 }; \
162 template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
163
164DECLARE_PROJ(WilsonXpProjector,WilsonXpCompressor,spProjXp);
165DECLARE_PROJ(WilsonYpProjector,WilsonYpCompressor,spProjYp);
166DECLARE_PROJ(WilsonZpProjector,WilsonZpCompressor,spProjZp);
167DECLARE_PROJ(WilsonTpProjector,WilsonTpCompressor,spProjTp);
168DECLARE_PROJ(WilsonXmProjector,WilsonXmCompressor,spProjXm);
169DECLARE_PROJ(WilsonYmProjector,WilsonYmCompressor,spProjYm);
170DECLARE_PROJ(WilsonZmProjector,WilsonZmCompressor,spProjZm);
171DECLARE_PROJ(WilsonTmProjector,WilsonTmCompressor,spProjTm);
172
174public:
175 template<class hsp,class fsp>
176 static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){
177 int mudag=dag? mu : (mu+Nd)%(2*Nd);
178 switch(mudag) {
179 case Xp: spProjXp(result,in); break;
180 case Yp: spProjYp(result,in); break;
181 case Zp: spProjZp(result,in); break;
182 case Tp: spProjTp(result,in); break;
183 case Xm: spProjXm(result,in); break;
184 case Ym: spProjYm(result,in); break;
185 case Zm: spProjZm(result,in); break;
186 case Tm: spProjTm(result,in); break;
187 default: assert(0); break;
188 }
189 }
190};
191template<typename HCS,typename HS,typename S> using WilsonCompressor = WilsonCompressorTemplate<HCS,HS,S,WilsonProjector>;
192
193// Fast comms buffer manipulation which should inline right through (avoid direction
194// dependent logic that prevents inlining
195template<class vobj,class cobj,class Parameters>
196class WilsonStencil : public CartesianStencil<vobj,cobj,Parameters> {
197public:
198
200 typedef typename Base::View_type View_type;
201
202 // Vector<int> surface_list;
204 int npoints,
205 int checkerboard,
206 const std::vector<int> &directions,
207 const std::vector<int> &distances,Parameters p)
208 : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
209 {
210 // surface_list.resize(0);
211 this->same_node.resize(npoints);
212 };
213
214 template < class compressor>
215 void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
216 {
217 std::vector<std::vector<CommsRequest_t> > reqs;
218 this->HaloExchangeOptGather(source,compress);
219 // Asynchronous MPI calls multidirectional, Isend etc...
220 // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
221 this->Communicate();
222 this->CommsMerge(compress);
223 this->CommsMergeSHM(compress);
224 }
225
226 template <class compressor>
227 void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
228 {
229 this->Prepare();
230 this->HaloGatherOpt(source,compress);
231 }
232
233 template <class compressor>
234 void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
235 {
236 // Strategy. Inherit types from Compressor.
237 // Use types to select the write direction by directon compressor
238 typedef typename compressor::SiteSpinor SiteSpinor;
239 typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
240 typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
241
242 this->_grid->StencilBarrier();
243
244 assert(source.Grid()==this->_grid);
245
246 this->u_comm_offset=0;
247
248 WilsonXpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XpCompress;
249 WilsonYpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YpCompress;
250 WilsonZpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZpCompress;
251 WilsonTpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TpCompress;
252 WilsonXmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XmCompress;
253 WilsonYmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YmCompress;
254 WilsonZmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZmCompress;
255 WilsonTmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TmCompress;
256
257 int dag = compress.dag;
258 int face_idx=0;
259#define vet_same_node(a,b) \
260 { auto tmp = b; }
261 if ( dag ) {
262 vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XpCompress,Xp,face_idx));
263 vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YpCompress,Yp,face_idx));
264 vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
265 vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TpCompress,Tp,face_idx));
266 vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XmCompress,Xm,face_idx));
267 vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YmCompress,Ym,face_idx));
268 vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
269 vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TmCompress,Tm,face_idx));
270 } else {
271 vet_same_node(this->same_node[Xp],this->HaloGatherDir(source,XmCompress,Xp,face_idx));
272 vet_same_node(this->same_node[Yp],this->HaloGatherDir(source,YmCompress,Yp,face_idx));
273 vet_same_node(this->same_node[Zp],this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
274 vet_same_node(this->same_node[Tp],this->HaloGatherDir(source,TmCompress,Tp,face_idx));
275 vet_same_node(this->same_node[Xm],this->HaloGatherDir(source,XpCompress,Xm,face_idx));
276 vet_same_node(this->same_node[Ym],this->HaloGatherDir(source,YpCompress,Ym,face_idx));
277 vet_same_node(this->same_node[Zm],this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
278 vet_same_node(this->same_node[Tm],this->HaloGatherDir(source,TpCompress,Tm,face_idx));
279 }
280 this->face_table_computed=1;
281 assert(this->u_comm_offset==this->_unified_buffer_size);
283#ifdef NVLINK_GET
284 this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his
285 // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
286 // Or issue barrier AFTER the DMA is running
287#endif
288 }
289
290};
291
293#endif
accelerator_inline int acceleratorSIMTlane(int Nsimd)
#define accelerator_inline
#define accelerator
#define accelerator_barrier(dummy)
accelerator_inline void vstream(Grid_simd2< S, V > &out, const Grid_simd2< S, V > &in)
accelerator_inline Grid_simd< S, V > sin(const Grid_simd< S, V > &r)
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
static constexpr int Xm
Definition QCD.h:45
static constexpr int Tm
Definition QCD.h:48
static constexpr int Nd
Definition QCD.h:52
static constexpr int Tp
Definition QCD.h:44
static constexpr int Zp
Definition QCD.h:43
static constexpr int Zm
Definition QCD.h:47
static constexpr int Xp
Definition QCD.h:41
static constexpr int Yp
Definition QCD.h:42
static constexpr int Ym
Definition QCD.h:46
uint32_t Integer
Definition Simd.h:58
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline void exchangeSIMT(vobj &mp0, vobj &mp1, const vobj &vp0, const vobj &vp1, Integer type)
Definition Tensor_SIMT.h:38
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
accelerator_inline void spProjXp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:75
accelerator_inline void spProjYm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:95
accelerator_inline void spProjTm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:129
accelerator_inline void spProjZp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:106
accelerator_inline void spProjTp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:123
accelerator_inline void spProjZm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:111
accelerator_inline void spProjXm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:80
accelerator_inline void spProjYp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
Definition TwoSpinor.h:90
#define vet_same_node(a, b)
#define DECLARE_PROJ(Projector, Compressor, spProj)
WilsonCompressorTemplate< HCS, HS, S, WilsonProjector > WilsonCompressor
const CartesianStencilView< vobj, cobj, Parameters > View_type
Definition Stencil.h:206
void CommsMergeSHM(decompressor decompress)
Definition Stencil.h:809
void Prepare(void)
Definition Stencil.h:701
void CommsMerge(decompressor decompress)
Definition Stencil.h:805
void Communicate(void)
Definition Stencil.h:600
CartesianStencil(GridBase *grid, int npoints, int checkerboard, const std::vector< int > &directions, const std::vector< int > &distances, Parameters p=Parameters(), bool preserve_shm=false)
Definition Stencil.h:931
int HaloGatherDir(const Lattice< vobj > &source, compressor &compress, int point, int &face_idx)
Definition Stencil.h:620
GridBase * Grid(void) const
accelerator_inline void Compress(SiteHalfSpinor &buf, const SiteSpinor &in) const
SiteHalfCommSpinor::vector_type vComplexLow
accelerator_inline bool DecompressionStep(void) const
accelerator_inline void Exchange(SiteHalfSpinor &mp0, SiteHalfSpinor &mp1, const SiteHalfSpinor &vp0, const SiteHalfSpinor &vp1, Integer type) const
accelerator_inline void CompressExchange(SiteHalfSpinor &out0, SiteHalfSpinor &out1, const SiteSpinor &in0, const SiteSpinor &in1, Integer type) const
SiteHalfSpinor::vector_type vComplexHigh
accelerator_inline void Decompress(SiteHalfSpinor &out, SiteHalfSpinor &in) const
static accelerator void Proj(hsp &result, const fsp &in, int mu, int dag)
CartesianStencil< SiteSpinor, SiteHalfSpinor, ImplParams > Base
void HaloExchangeOptGather(const Lattice< vobj > &source, compressor &compress)
WilsonStencil(GridBase *grid, int npoints, int checkerboard, const std::vector< int > &directions, const std::vector< int > &distances, Parameters p)
void HaloGatherOpt(const Lattice< vobj > &source, compressor &compress)
void HaloExchangeOpt(const Lattice< vobj > &source, compressor &compress)