41 unsigned int Nsimd = vobj::Nsimd();
42 unsigned int mask = Nsimd >> (
type + 1);
44 int j0 = lane &(~mask);
45 int j1 = lane |(mask) ;
46 const vobj *vpa = &vp0;
47 const vobj *vpb = &vp1;
48 const vobj *vp = (lane&mask) ? (vpb) : (vpa);
81 for (
int d=0;d<nd;d++)
82 if (perm_mask & (0x1 << d)) {
permute(obj,tmp,d); tmp=obj;}
103typename vsimd::scalar_type
106 typedef typename vsimd::scalar_type S;
107 S * __restrict__ p=(S *)&
vec;
111typename vsimd::scalar_type
114 typedef typename vsimd::scalar_type S;
116 S * __restrict__ p=(S *)&
vec;
117 int mask = vsimd::Nsimd() >> (
ptype + 1);
118 int plane= doperm ? lane ^ mask : lane;
123 const typename vsimd::scalar_type & __restrict__ extracted,
126 typedef typename vsimd::scalar_type S;
127 S * __restrict__ p=(S *)&
vec;
134typename vsimd::vector_type::datum
137 typedef typename vsimd::vector_type::datum S;
138 S * __restrict__ p=(S *)&
vec;
142typename vsimd::vector_type::datum
145 typedef typename vsimd::vector_type::datum S;
147 S * __restrict__ p=(S *)&
vec;
148 int mask = vsimd::Nsimd() >> (
ptype + 1);
149 int plane= doperm ? lane ^ mask : lane;
154 const typename vsimd::vector_type::datum & __restrict__ extracted,
157 typedef typename vsimd::vector_type::datum S;
158 S * __restrict__ p=(S *)&
vec;
174 int mask = vobj::Nsimd() >> (
ptype + 1);
175 int plane= doperm ? lane ^ mask : lane;
182 for (
int d=0;d<nd;d++)
183 plane = (perm_mask & (0x1 << d)) ? plane ^ (vobj::Nsimd() >> (d + 1)) : plane;
accelerator_inline int acceleratorSIMTlane(int Nsimd)
#define accelerator_inline
accelerator_inline void vstream(Grid_simd2< S, V > &out, const Grid_simd2< S, V > &in)
accelerator_inline void permute(ComplexD &y, ComplexD b, int perm)
#define NAMESPACE_BEGIN(A)
accelerator_inline vobj coalescedReadPermute(const vobj &__restrict__ vec, int ptype, int doperm, int lane=0)
accelerator_inline void coalescedWriteNonTemporal(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline void exchangeSIMT(vobj &mp0, vobj &mp1, const vobj &vp0, const vobj &vp1, Integer type)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
accelerator_inline vobj coalescedReadGeneralPermute(const vobj &__restrict__ vec, int perm_mask, int nd, int lane=0)