30#ifndef GRID_QCD_WILSON_COMPRESSOR_H
31#define GRID_QCD_WILSON_COMPRESSOR_H
41template<
class _HCspinor,
class _Hspinor,
class _Spinor,
class projector>
89 exchange(tmp1,tmp2,vp0,vp1,
type);
119 constexpr unsigned int Nsimd = vobj::Nsimd();
120 unsigned int mask = Nsimd >> (
type + 1);
122 int j0 = lane &(~mask);
123 int j1 = lane |(mask) ;
124 const vobj *vp0 = &in0;
125 const vobj *vp1 = &in1;
126 const vobj *vp = (lane&mask) ? vp1:vp0;
130 projector::Proj(psa,sa,
mu,
dag);
131 projector::Proj(psb,sb,
mu,
dag);
137 projector::Proj(temp1,in0,
mu,
dag);
138 projector::Proj(temp2,in1,
mu,
dag);
139 exchange(temp3,temp4,temp1,temp2,
type);
154#define DECLARE_PROJ(Projector,Compressor,spProj) \
157 template<class hsp,class fsp> \
158 static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){ \
162 template<typename HCS,typename HS,typename S> using Compressor = WilsonCompressorTemplate<HCS,HS,S,Projector>;
175 template<
class hsp,
class fsp>
177 int mudag=dag? mu : (mu+
Nd)%(2*
Nd);
187 default: assert(0);
break;
195template<
class vobj,
class cobj,
class Parameters>
206 const std::vector<int> &directions,
207 const std::vector<int> &distances,Parameters p)
208 :
CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
214 template <
class compressor>
217 std::vector<std::vector<CommsRequest_t> > reqs;
226 template <
class compressor>
233 template <
class compressor>
238 typedef typename compressor::SiteSpinor SiteSpinor;
239 typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
240 typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
242 this->
_grid->StencilBarrier();
244 assert(source.
Grid()==this->_grid);
248 WilsonXpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XpCompress;
249 WilsonYpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YpCompress;
250 WilsonZpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZpCompress;
251 WilsonTpCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TpCompress;
252 WilsonXmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> XmCompress;
253 WilsonYmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> YmCompress;
254 WilsonZmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> ZmCompress;
255 WilsonTmCompressor<SiteHalfCommSpinor,SiteHalfSpinor,SiteSpinor> TmCompress;
257 int dag = compress.dag;
259#define vet_same_node(a,b) \
284 this->
_grid->StencilBarrier();
accelerator_inline int acceleratorSIMTlane(int Nsimd)
#define accelerator_inline
#define accelerator_barrier(dummy)
accelerator_inline void vstream(Grid_simd2< S, V > &out, const Grid_simd2< S, V > &in)
accelerator_inline Grid_simd< S, V > sin(const Grid_simd< S, V > &r)
#define NAMESPACE_BEGIN(A)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline void exchangeSIMT(vobj &mp0, vobj &mp1, const vobj &vp0, const vobj &vp1, Integer type)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
accelerator_inline void spProjXp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProjYm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProjTm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProjZp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProjTp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProjZm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProjXm(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
accelerator_inline void spProjYp(iVector< vtype, Nhs > &hspin, const iVector< vtype, Ns > &fspin)
#define vet_same_node(a, b)
#define DECLARE_PROJ(Projector, Compressor, spProj)
WilsonCompressorTemplate< HCS, HS, S, WilsonProjector > WilsonCompressor
const CartesianStencilView< vobj, cobj, Parameters > View_type
void CommsMergeSHM(decompressor decompress)
void CommsMerge(decompressor decompress)
CartesianStencil(GridBase *grid, int npoints, int checkerboard, const std::vector< int > &directions, const std::vector< int > &distances, Parameters p=Parameters(), bool preserve_shm=false)
int HaloGatherDir(const Lattice< vobj > &source, compressor &compress, int point, int &face_idx)
GridBase * Grid(void) const
accelerator_inline void Compress(SiteHalfSpinor &buf, const SiteSpinor &in) const
SiteHalfCommSpinor::vector_type vComplexLow
WilsonCompressorTemplate(int _dag=0)
accelerator_inline int CommDatumSize(void) const
accelerator_inline bool DecompressionStep(void) const
accelerator_inline void Exchange(SiteHalfSpinor &mp0, SiteHalfSpinor &mp1, const SiteHalfSpinor &vp0, const SiteHalfSpinor &vp1, Integer type) const
accelerator_inline void CompressExchange(SiteHalfSpinor &out0, SiteHalfSpinor &out1, const SiteSpinor &in0, const SiteSpinor &in1, Integer type) const
SiteHalfSpinor::vector_type vComplexHigh
_HCspinor SiteHalfCommSpinor
accelerator_inline void Decompress(SiteHalfSpinor &out, SiteHalfSpinor &in) const
static accelerator void Proj(hsp &result, const fsp &in, int mu, int dag)
CartesianStencil< SiteSpinor, SiteHalfSpinor, ImplParams > Base
void HaloExchangeOptGather(const Lattice< vobj > &source, compressor &compress)
WilsonStencil(GridBase *grid, int npoints, int checkerboard, const std::vector< int > &directions, const std::vector< int > &distances, Parameters p)
void HaloGatherOpt(const Lattice< vobj > &source, compressor &compress)
void HaloExchangeOpt(const Lattice< vobj > &source, compressor &compress)
Base::View_type View_type