89 out.
v[2*i] = a[i].real();
90 out.
v[2*i+1] = a[i].imag();
112 template <
typename T>
118 out.
v[i] = a.
v[i] + b.
v[i];
127 template <
typename T>
133 out.
v[i] = a.
v[i] - b.
v[i];
142 template <
typename T>
148 out.
v[i] = a.
v[i]*b.
v[i];
155#define cmul(a, b, c, i) \
156 c[i] = a[i]*b[i] - a[i+1]*b[i+1]; \
157 c[i+1] = a[i]*b[i+1] + a[i+1]*b[i];
160 template <
typename T>
166 out.
v[2*i] = a.
v[2*i]*b.
v[2*i];
167 out.
v[2*i+1] = a.
v[2*i]*b.
v[2*i+1];
174 template <
typename T>
180 out.
v[2*i] = a.
v[2*i]*b.
v[2*i] + c.
v[2*i];
181 out.
v[2*i+1] = a.
v[2*i]*b.
v[2*i+1] + c.
v[2*i+1];
189 template <
typename T>
206 template <
typename T>
212 out.
v[i] = a.
v[i]/b.
v[i];
219#define conj(a, b, i) \
225 template <
typename T>
240#define timesmi(a, b, i) \
246 template <
typename T>
261#define timesi(a, b, i) \
267 template <
typename T>
344 template <
typename T,
int n>
347 unsigned int mask = w >> (n + 1);
351 if ( (i&mask) == 0 ) { out1.
v[i]=in1.
v[j1];}
352 else { out1.
v[i]=in2.
v[j1];}
354 if ( (i&mask) == 0 ) { out2.
v[i]=in1.
v[j2];}
355 else { out2.
v[i]=in2.
v[j2];}
358 template <
typename T>
362 template <
typename T>
366 template <
typename T>
370 template <
typename T>
379#define perm(a, b, n, w) \
380 unsigned int _mask = w >> (n + 1); \
381 VECTOR_FOR(i, w, 1) \
386#define DECL_PERMUTE_N(n) \
387 template <typename T> \
388 static accelerator_inline vec<T> Permute##n(vec<T> in) { \
390 perm(in.v, out.v, n, W<T>::r); \
404#define rot(a, b, n, w) \
405 VECTOR_FOR(i, w, 1) \
407 b[i] = a[(i + n)%w]; \
416 template <
typename T>
428#define acc(v, a, off, step, n) \
429 for (unsigned int i = off; i < n; i += step) \
434template <
typename Out_type,
typename In_type>
439 printf(
"Error, using wrong Reduce function\n");
448 float a = 0.f, b = 0.f;
453 return Grid::ComplexF(a, b);
469 double a = 0., b = 0.;
474 return Grid::ComplexD(a, b);
517template <
typename S,
typename T>
using ReduceSIMD = Optimization::Reduce<S,T>;
520typedef Optimization::Sum
SumSIMD;
521typedef Optimization::Sub
SubSIMD;
522typedef Optimization::Div
DivSIMD;
#define accelerator_inline
Optimization::Vstream VstreamSIMD
Optimization::TimesMinusI TimesMinusISIMD
Optimization::MultComplex MultComplexSIMD
Optimization::TimesI TimesISIMD
Optimization::Reduce< S, T > ReduceSIMD
Optimization::Mult MultSIMD
Optimization::MaddRealPart MaddRealPartSIMD
Optimization::vecd SIMD_Dtype
Optimization::veci SIMD_Itype
Optimization::Vstore VstoreSIMD
Optimization::Conj ConjSIMD
Optimization::vecf SIMD_Ftype
Optimization::Vsplat VsplatSIMD
Optimization::Sum SumSIMD
Optimization::Sub SubSIMD
Optimization::Div DivSIMD
Optimization::MultRealPart MultRealPartSIMD
Optimization::Vset VsetSIMD
Optimization::vech SIMD_Htype
accelerator_inline void v_prefetch0(int size, const char *ptr)
#define acc(v, a, off, step, n)
accelerator_inline void prefetch_HINT_T0(const char *ptr)
#define VECTOR_FOR(i, w, inc)
#define NAMESPACE_BEGIN(A)
accelerator_inline vec< T > operator()(vec< T > a)
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
static accelerator_inline void Exchange1(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
static accelerator_inline void Exchange3(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
static accelerator_inline void Exchange2(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
static accelerator_inline void ExchangeN(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
static accelerator_inline void Exchange0(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b, vec< T > c)
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
static vech StoH(const vecf &sa, const vecf &sb)
static accelerator_inline vech StoH(const vecf &a, const vecf &b)
static void StoD(vecf s, vecd &a, vecd &b)
static accelerator_inline void HtoD(vech h, vecd &a, vecd &b, vecd &c, vecd &d)
static accelerator_inline vecf DtoS(vecd a, vecd b)
static accelerator_inline void HtoS(vech h, vecf &sa, vecf &sb)
static vecf DtoS(vecd a, vecd b)
static accelerator_inline void StoD(vecf s, vecd &a, vecd &b)
static void HtoS(vech h, vecf &sa, vecf &sb)
static accelerator_inline vech DtoH(vecd a, vecd b, vecd c, vecd d)
accelerator_inline Out_type operator()(In_type in)
Out_type operator()(In_type in)
static vec< T > rotate(vec< T > in, int n)
static accelerator_inline vec< T > tRotate(vec< T > in)
static accelerator_inline vec< T > rotate(vec< T > in, int n)
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
accelerator_inline vec< T > operator()(vec< T > a)
accelerator_inline vec< T > operator()(vec< T > a)
accelerator_inline vec< T > operator()(T *a)
accelerator_inline vec< T > operator()(std::complex< T > *a)
accelerator_inline vec< T > operator()(T a, T b)
accelerator_inline vec< T > operator()(T a)
accelerator_inline void operator()(vec< T > a, T *D)
accelerator_inline void operator()(T *a, vec< T > b)