Grid 0.7.0
Grid_doubled_vector.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/simd/Grid_vector_types.h
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25 See the full license in the file "LICENSE" in the top level distribution directory
26*************************************************************************************/
27/* END LEGAL */
28#pragma once
29
31
32template <class Scalar_type, class Vector_type>
34public:
36 typedef Vector_type vector_type;
37 typedef Scalar_type scalar_type;
38
39 typedef union conv_t_union {
40 Vector_type v;
41 Scalar_type s[sizeof(Vector_type) / sizeof(Scalar_type)];
44
45 static constexpr int nvec=2;
46 Vector_type v[nvec];
47
48 static accelerator_inline constexpr int Nsimd(void) {
49 static_assert( (sizeof(Vector_type) / sizeof(Scalar_type) >= 1), " size mismatch " );
50
51 return nvec*sizeof(Vector_type) / sizeof(Scalar_type);
52 }
53
55 for(int n=0;n<nvec;n++) v[n] = rhs.v[n];
56 return *this;
57 };
59 for(int n=0;n<nvec;n++) v[n] = rhs.v[n];
60 return *this;
61 }; // faster than not declaring it and leaving to the compiler
62
64 accelerator_inline Grid_simd2(const Grid_simd2 &rhs) { for(int n=0;n<nvec;n++) v[n] = rhs.v[n]; };
65 accelerator_inline Grid_simd2(const Grid_simd2 &&rhs){ for(int n=0;n<nvec;n++) v[n] = rhs.v[n]; };
66 accelerator_inline Grid_simd2(const Real a) { vsplat(*this, Scalar_type(a)); };
67 // Enable if complex type
68 template <typename S = Scalar_type> accelerator_inline
69 Grid_simd2(const typename std::enable_if<is_complex<S>::value, S>::type a) {
70 vsplat(*this, a);
71 };
72
74 // Constructors
77 vzero(*this);
78 return (*this);
79 }
80
82 // mac, mult, sub, add, adj
84
85 friend accelerator_inline void mac(Grid_simd2 *__restrict__ y,
86 const Grid_simd2 *__restrict__ a,
87 const Grid_simd2 *__restrict__ x) {
88 *y = (*a) * (*x) + (*y);
89 };
90
91 friend accelerator_inline void mult(Grid_simd2 *__restrict__ y,
92 const Grid_simd2 *__restrict__ l,
93 const Grid_simd2 *__restrict__ r) {
94 *y = (*l) * (*r);
95 }
96
97 friend accelerator_inline void sub(Grid_simd2 *__restrict__ y,
98 const Grid_simd2 *__restrict__ l,
99 const Grid_simd2 *__restrict__ r) {
100 *y = (*l) - (*r);
101 }
102 friend accelerator_inline void add(Grid_simd2 *__restrict__ y,
103 const Grid_simd2 *__restrict__ l,
104 const Grid_simd2 *__restrict__ r) {
105 *y = (*l) + (*r);
106 }
107 friend accelerator_inline void mac(Grid_simd2 *__restrict__ y,
108 const Scalar_type *__restrict__ a,
109 const Grid_simd2 *__restrict__ x) {
110 *y = (*a) * (*x) + (*y);
111 };
112 friend accelerator_inline void mult(Grid_simd2 *__restrict__ y,
113 const Scalar_type *__restrict__ l,
114 const Grid_simd2 *__restrict__ r) {
115 *y = (*l) * (*r);
116 }
117 friend accelerator_inline void sub(Grid_simd2 *__restrict__ y,
118 const Scalar_type *__restrict__ l,
119 const Grid_simd2 *__restrict__ r) {
120 *y = (*l) - (*r);
121 }
122 friend accelerator_inline void add(Grid_simd2 *__restrict__ y,
123 const Scalar_type *__restrict__ l,
124 const Grid_simd2 *__restrict__ r) {
125 *y = (*l) + (*r);
126 }
127
128 friend accelerator_inline void mac(Grid_simd2 *__restrict__ y,
129 const Grid_simd2 *__restrict__ a,
130 const Scalar_type *__restrict__ x) {
131 *y = (*a) * (*x) + (*y);
132 };
133 friend accelerator_inline void mult(Grid_simd2 *__restrict__ y,
134 const Grid_simd2 *__restrict__ l,
135 const Scalar_type *__restrict__ r) {
136 *y = (*l) * (*r);
137 }
138 friend accelerator_inline void sub(Grid_simd2 *__restrict__ y,
139 const Grid_simd2 *__restrict__ l,
140 const Scalar_type *__restrict__ r) {
141 *y = (*l) - (*r);
142 }
143 friend accelerator_inline void add(Grid_simd2 *__restrict__ y,
144 const Grid_simd2 *__restrict__ l,
145 const Scalar_type *__restrict__ r) {
146 *y = (*l) + (*r);
147 }
148
150 // FIXME: gonna remove these load/store, get, set, prefetch
152 friend accelerator_inline void vset(Grid_simd2 &ret, Scalar_type *a) {
153 for(int n=0;n<nvec;n++) vset(ret.v[n],a);
154 }
155
157 // Vstore
159 friend accelerator_inline void vstore(const Grid_simd2 &ret, Scalar_type *a) {
160 for(int n=0;n<nvec;n++) vstore(ret.v[n],a);
161 }
162
164 // Vprefetch
167 vprefetch(v.v[0]);
168 }
169
171 // Reduce
173 friend accelerator_inline Scalar_type Reduce(const Grid_simd2 &in) {
174 return Reduce(in.v[0])+ Reduce(in.v[1]);
175 }
176
178 // operator scalar * simd
180 friend accelerator_inline Grid_simd2 operator*(const Scalar_type &a, Grid_simd2 b) {
181 Grid_simd2 va;
182 vsplat(va, a);
183 return va * b;
184 }
185 friend accelerator_inline Grid_simd2 operator*(Grid_simd2 b, const Scalar_type &a) {
186 return a * b;
187 }
188
190 // Divides
192 friend accelerator_inline Grid_simd2 operator/(const Scalar_type &a, Grid_simd2 b) {
193 Grid_simd2 va;
194 vsplat(va, a);
195 return va / b;
196 }
197 friend accelerator_inline Grid_simd2 operator/(Grid_simd2 b, const Scalar_type &a) {
198 Grid_simd2 va;
199 vsplat(va, a);
200 return b / a;
201 }
202
204 // Unary negation
207 Grid_simd2 ret;
208 vzero(ret);
209 ret = ret - r;
210 return ret;
211 }
212 // *=,+=,-= operators
214 *this = (*this) * r;
215 return *this;
216 }
218 *this = *this + r;
219 return *this;
220 }
222 *this = *this - r;
223 return *this;
224 }
225
227 // Not all functions are supported
228 // through SIMD and must breakout to
229 // scalar type and back again. This
230 // provides support
232 template <class functor>
233 friend accelerator_inline Grid_simd2 SimdApply(const functor &func, const Grid_simd2 &v) {
234 Grid_simd2 ret;
235 for(int n=0;n<nvec;n++){
236 ret.v[n]=SimdApply(func,v.v[n]);
237 }
238 return ret;
239 }
240 template <class functor>
241 friend accelerator_inline Grid_simd2 SimdApplyBinop(const functor &func,
242 const Grid_simd2 &x,
243 const Grid_simd2 &y) {
244 Grid_simd2 ret;
245 for(int n=0;n<nvec;n++){
246 ret.v[n]=SimdApplyBinop(func,x.v[n],y.v[n]);
247 }
248 return ret;
249 }
250
251 // Exchange
252 // Al Ah , Bl Bh -> Al Bl Ah,Bh
255 out1.v[0] = in1.v[0];
256 out1.v[1] = in2.v[0];
257 out2.v[0] = in1.v[1];
258 out2.v[1] = in2.v[1];
259 }
261 exchange0(out1.v[0],out2.v[0],in1.v[0],in2.v[0]);
262 exchange0(out1.v[1],out2.v[1],in1.v[1],in2.v[1]);
263 }
265 exchange1(out1.v[0],out2.v[0],in1.v[0],in2.v[0]);
266 exchange1(out1.v[1],out2.v[1],in1.v[1],in2.v[1]);
267 }
269 exchange2(out1.v[0],out2.v[0],in1.v[0],in2.v[0]);
270 exchange2(out1.v[1],out2.v[1],in1.v[1],in2.v[1]);
271 }
273 exchange3(out1.v[0],out2.v[0],in1.v[0],in2.v[0]);
274 exchange3(out1.v[1],out2.v[1],in1.v[1],in2.v[1]);
275 }
277 {
278 if (n==3) {
279 exchange3(out1,out2,in1,in2);
280 } else if(n==2) {
281 exchange2(out1,out2,in1,in2);
282 } else if(n==1) {
283 exchange1(out1,out2,in1,in2);
284 } else if(n==0) {
285 exchange0(out1,out2,in1,in2);
286 }
287 }
288
289 // General permute; assumes vector length is same across
290 // all subtypes; may not be a good assumption, but could
291 // add the vector width as a template param for BG/Q for example
294 y.v[0]=b.v[1];
295 y.v[1]=b.v[0];
296 }
298 permute0(y.v[0],b.v[0]);
299 permute0(y.v[1],b.v[1]);
300 }
302 permute1(y.v[0],b.v[0]);
303 permute1(y.v[1],b.v[1]);
304 }
306 permute2(y.v[0],b.v[0]);
307 permute2(y.v[1],b.v[1]);
308 }
310 permute3(y.v[0],b.v[0]);
311 permute3(y.v[1],b.v[1]);
312 }
314 if(perm==3) permute3(y, b);
315 else if(perm==2) permute2(y, b);
316 else if(perm==1) permute1(y, b);
317 else if(perm==0) permute0(y, b);
318 }
319
321 // Getting single lanes
323 accelerator_inline Scalar_type getlane(int lane) const {
324 if(lane < vector_type::Nsimd() ) return v[0].getlane(lane);
325 else return v[1].getlane(lane%vector_type::Nsimd());
326 }
327
328 accelerator_inline void putlane(const Scalar_type &S, int lane){
329 if(lane < vector_type::Nsimd() ) v[0].putlane(S,lane);
330 else v[1].putlane(S,lane%vector_type::Nsimd());
331 }
332}; // end of Grid_simd2 class definition
333
335// Define available types
337
340
341
342
344// Some traits to recognise the types
346template <typename T>
347struct is_simd : public std::false_type {};
348template <> struct is_simd<vRealF> : public std::true_type {};
349template <> struct is_simd<vRealD> : public std::true_type {};
350template <> struct is_simd<vRealH> : public std::true_type {};
351template <> struct is_simd<vComplexF> : public std::true_type {};
352template <> struct is_simd<vComplexD> : public std::true_type {};
353template <> struct is_simd<vComplexH> : public std::true_type {};
354template <> struct is_simd<vInteger> : public std::true_type {};
355template <> struct is_simd<vRealD2> : public std::true_type {};
356template <> struct is_simd<vComplexD2> : public std::true_type {};
357
358template <typename T> using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >;
359template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >;
360
362// insert / extract with complex support
364template <class S, class V>
366 return in.getlane(lane);
367}
368template <class S, class V>
369accelerator_inline void putlane(Grid_simd<S, V> &vec,const S &_S, int lane){
370 vec.putlane(_S,lane);
371}
372template <class S,IfNotSimd<S> = 0 >
373accelerator_inline S getlane(const S &in,int lane) {
374 return in;
375}
376template <class S,IfNotSimd<S> = 0 >
377accelerator_inline void putlane(S &vec,const S &_S, int lane){
378 vec = _S;
379}
380template <class S, class V>
382 return in.getlane(lane);
383}
384template <class S, class V>
385accelerator_inline void putlane(Grid_simd2<S, V> &vec,const S &_S, int lane){
386 vec.putlane(_S,lane);
387}
388
389
391// General rotate
393
394template <class S, class V>
396 S* typepun =(S*) &src;
397 vsplat(ret,typepun[lane]);
398}
399template <class S, class V, IfComplex<S> =0>
401 typedef typename V::vector_type vector_type;
402 S* typepun =(S*) &src;
403 ret.v[0].v = unary<vector_type>(real(typepun[lane]), VsplatSIMD());
404 ret.v[1].v = unary<vector_type>(real(typepun[lane]), VsplatSIMD());
405}
406
407
409// Splat
411
412// this is only for the complex version
413template <class S, class V, IfComplex<S> = 0, class ABtype>
414accelerator_inline void vsplat(Grid_simd2<S, V> &ret, ABtype a, ABtype b) {
415 vsplat(ret.v[0],a,b);
416 vsplat(ret.v[1],a,b);
417}
418
419// overload if complex
420template <class S, class V>
424template <class S, class V>
428
429// if real fill with a, if complex fill with a in the real part (first function
430// above)
431template <class S, class V>
433{
434 vsplat(ret.v[0],a);
435 vsplat(ret.v[1],a);
436}
437
438
440// Initialise to 1,0,i for the correct types
442// For complex types
443template <class S, class V, IfComplex<S> = 0>
445 vsplat(ret, S(1.0, 0.0));
446}
447template <class S, class V, IfComplex<S> = 0>
449 vsplat(ret, S(0.0, 0.0));
450} // use xor?
451template <class S, class V, IfComplex<S> = 0>
453 vsplat(ret, S(0.0, 1.0));
454}
455
456template <class S, class V, IfComplex<S> = 0>
458 vsplat(ret, S(1.0, -1.0));
459}
460template <class S, class V, IfComplex<S> = 0>
462 vsplat(ret, S(-1.0, 1.0));
463}
464
465// if not complex overload here
466template <class S, class V, IfReal<S> = 0>
468 vsplat(ret, S(1.0));
469}
470template <class S, class V, IfReal<S> = 0>
472 vsplat(ret, S(0.0));
473}
474
475// For integral types
476template <class S, class V, IfInteger<S> = 0>
478 vsplat(ret, 1);
479}
480template <class S, class V, IfInteger<S> = 0>
482 vsplat(ret, 0);
483}
484template <class S, class V, IfInteger<S> = 0>
486 vsplat(ret, 0xFFFFFFFF);
487}
488template <class S, class V, IfInteger<S> = 0>
490 vsplat(ret, 0);
491}
492template <class S, class V>
496
498// Vstream
500template <class S, class V, IfReal<S> = 0>
502 vstream(out.v[0],in.v[0]);
503 vstream(out.v[1],in.v[1]);
504}
505template <class S, class V, IfComplex<S> = 0>
507 vstream(out.v[0],in.v[0]);
508 vstream(out.v[1],in.v[1]);
509}
510template <class S, class V, IfInteger<S> = 0>
512 vstream(out.v[0],in.v[0]);
513 vstream(out.v[1],in.v[1]);
514}
515
517// Arithmetic operator overloads +,-,*
519template <class S, class V>
522 ret.v[0] = a.v[0]+b.v[0];
523 ret.v[1] = a.v[1]+b.v[1];
524 return ret;
525};
526
527template <class S, class V>
530 ret.v[0] = a.v[0]-b.v[0];
531 ret.v[1] = a.v[1]-b.v[1];
532 return ret;
533};
534
535// Distinguish between complex types and others
536template <class S, class V, IfComplex<S> = 0>
539 ret.v[0] =real_mult(a.v[0],b.v[0]);
540 ret.v[1] =real_mult(a.v[1],b.v[1]);
541 return ret;
542};
543template <class S, class V, IfComplex<S> = 0>
546 ret.v[0] =real_madd(a.v[0],b.v[0],c.v[0]);
547 ret.v[1] =real_madd(a.v[1],b.v[1],c.v[1]);
548 return ret;
549};
550
551
552// Distinguish between complex types and others
553template <class S, class V>
556 ret.v[0] = a.v[0]*b.v[0];
557 ret.v[1] = a.v[1]*b.v[1];
558 return ret;
559};
560
561// Distinguish between complex types and others
562template <class S, class V>
565 ret.v[0] = a.v[0]/b.v[0];
566 ret.v[1] = a.v[1]/b.v[1];
567 return ret;
568};
569
571// Conjugate
573template <class S, class V>
576 ret.v[0] = conjugate(in.v[0]);
577 ret.v[1] = conjugate(in.v[1]);
578 return ret;
579}
580template <class S, class V, IfNotInteger<S> = 0>
584
586// timesMinusI
588template <class S, class V>
590 timesMinusI(ret.v[0],in.v[0]);
591 timesMinusI(ret.v[1],in.v[1]);
592}
593template <class S, class V>
596 timesMinusI(ret.v[0],in.v[0]);
597 timesMinusI(ret.v[1],in.v[1]);
598 return ret;
599}
600
602// timesI
604template <class S, class V>
606 timesI(ret.v[0],in.v[0]);
607 timesI(ret.v[1],in.v[1]);
608}
609template <class S, class V>
612 timesI(ret.v[0],in.v[0]);
613 timesI(ret.v[1],in.v[1]);
614 return ret;
615}
616
618// Inner, outer
620template <class S, class V>
624template <class S, class V>
628
629template <class S, class V>
633
635// copy/splat complex real parts into real;
636// insert real into complex and zero imag;
639 Optimization::PrecisionChange::StoD(in.v,out.v[0].v,out.v[1].v);
640}
642 out.v=Optimization::PrecisionChange::DtoS(in.v[0].v,in.v[1].v);
643}
645 for(int m=0;m<nvec;m++){ precisionChange(out[m],in[m]); }
646}
648 for(int m=0;m<nvec;m++){ precisionChange(out[m],in[m]); }
649}
650
652 Optimization::PrecisionChange::StoD(in.v,out.v[0].v,out.v[1].v);
653}
655 out.v=Optimization::PrecisionChange::DtoS(in.v[0].v,in.v[1].v);
656}
657accelerator_inline void precisionChange(vRealD2 *out,const vRealF *in,int nvec){
658 for(int m=0;m<nvec;m++){ precisionChange(out[m],in[m]); }
659}
660accelerator_inline void precisionChange(vRealF *out,const vRealD2 *in,int nvec){
661 for(int m=0;m<nvec;m++){ precisionChange(out[m],in[m]); }
662}
663
665
666
#define accelerator_inline
#define accelerator
Optimization::Vsplat VsplatSIMD
accelerator_inline void vrsign(Grid_simd2< S, V > &ret)
accelerator_inline S getlane(const Grid_simd< S, V > &in, int lane)
accelerator_inline void vzero(Grid_simd2< S, V > &ret)
accelerator_inline void vtrue(Grid_simd2< S, V > &ret)
accelerator_inline Grid_simd2< S, V > real_mult(Grid_simd2< S, V > a, Grid_simd2< S, V > b)
accelerator_inline Grid_simd2< S, V > innerProduct(const Grid_simd2< S, V > &l, const Grid_simd2< S, V > &r)
accelerator_inline void vstream(Grid_simd2< S, V > &out, const Grid_simd2< S, V > &in)
accelerator_inline void vfalse(Grid_simd2< S, V > &ret)
accelerator_inline void vcomplex_i(Grid_simd2< S, V > &ret)
accelerator_inline void precisionChange(vComplexD2 &out, const vComplexF &in)
accelerator_inline Grid_simd2< S, V > operator*(Grid_simd2< S, V > a, Grid_simd2< S, V > b)
accelerator_inline void vone(Grid_simd2< S, V > &ret)
accelerator_inline Grid_simd2< S, V > conjugate(const Grid_simd2< S, V > &in)
Invoke< std::enable_if< is_simd< T >::value, int > > IfSimd
accelerator_inline Grid_simd2< S, V > real_madd(Grid_simd2< S, V > a, Grid_simd2< S, V > b, Grid_simd2< S, V > c)
accelerator_inline Grid_simd2< S, V > adj(const Grid_simd2< S, V > &in)
accelerator_inline void timesMinusI(Grid_simd2< S, V > &ret, const Grid_simd2< S, V > &in)
accelerator_inline void zeroit(Grid_simd2< S, V > &z)
accelerator_inline Grid_simd2< S, V > operator-(Grid_simd2< S, V > a, Grid_simd2< S, V > b)
accelerator_inline void rsplat(Grid_simd2< S, V > &ret, EnableIf< is_complex< S >, S > c)
accelerator_inline void putlane(Grid_simd< S, V > &vec, const S &_S, int lane)
accelerator_inline void timesI(Grid_simd2< S, V > &ret, const Grid_simd2< S, V > &in)
Invoke< std::enable_if<!is_simd< T >::value, unsigned > > IfNotSimd
accelerator_inline void rbroadcast(Grid_simd2< S, V > &ret, const Grid_simd2< S, V > &src, int lane)
accelerator_inline Grid_simd2< S, V > trace(const Grid_simd2< S, V > &arg)
accelerator_inline void visign(Grid_simd2< S, V > &ret)
accelerator_inline Grid_simd2< S, V > outerProduct(const Grid_simd2< S, V > &l, const Grid_simd2< S, V > &r)
accelerator_inline Grid_simd2< S, V > operator/(Grid_simd2< S, V > a, Grid_simd2< S, V > b)
Grid_simd2< complex< double >, vComplexD > vComplexD2
Grid_simd2< double, vRealD > vRealD2
accelerator_inline Grid_simd2< S, V > operator+(Grid_simd2< S, V > a, Grid_simd2< S, V > b)
accelerator_inline void vsplat(Grid_simd2< S, V > &ret, ABtype a, ABtype b)
accelerator_inline void vbroadcast(Grid_simd2< S, V > &ret, const Grid_simd2< S, V > &src, int lane)
#define perm(a, b, n, w)
Grid_simd< complex< float >, SIMD_Ftype > vComplexF
Out accelerator_inline unary(Input src, Operation op)
Grid_simd< uint16_t, SIMD_Htype > vRealH
Grid_simd< complex< uint16_t >, SIMD_Htype > vComplexH
Grid_simd< float, SIMD_Ftype > vRealF
Grid_simd< complex< double >, SIMD_Dtype > vComplexD
typename T::type Invoke
Invoke< std::enable_if<!Condition::value, ReturnType > > NotEnableIf
Invoke< std::enable_if< Condition::value, ReturnType > > EnableIf
Grid_simd< Integer, SIMD_Itype > vInteger
Grid_simd< double, SIMD_Dtype > vRealD
Lattice< vobj > real(const Lattice< vobj > &lhs)
Lattice< vobj > imag(const Lattice< vobj > &lhs)
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
accelerator_inline Grid_simd2 & operator*=(const Grid_simd2 &r)
friend accelerator_inline Scalar_type Reduce(const Grid_simd2 &in)
friend accelerator_inline void sub(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ l, const Grid_simd2 *__restrict__ r)
friend accelerator_inline void exchange(Grid_simd2 &out1, Grid_simd2 &out2, Grid_simd2 in1, Grid_simd2 in2, int n)
friend accelerator_inline void exchange0(Grid_simd2 &out1, Grid_simd2 &out2, Grid_simd2 in1, Grid_simd2 in2)
friend accelerator_inline void permute0(Grid_simd2 &y, Grid_simd2 b)
friend accelerator_inline Grid_simd2 operator/(const Scalar_type &a, Grid_simd2 b)
friend accelerator_inline void mult(Grid_simd2 *__restrict__ y, const Scalar_type *__restrict__ l, const Grid_simd2 *__restrict__ r)
friend accelerator_inline void add(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ l, const Scalar_type *__restrict__ r)
accelerator Grid_simd2()=default
friend accelerator_inline Grid_simd2 SimdApplyBinop(const functor &func, const Grid_simd2 &x, const Grid_simd2 &y)
friend accelerator_inline void permute4(Grid_simd2 &y, Grid_simd2 b)
Scalar_type scalar_type
friend accelerator_inline void exchange2(Grid_simd2 &out1, Grid_simd2 &out2, Grid_simd2 in1, Grid_simd2 in2)
accelerator_inline Grid_simd2 & operator-=(const Grid_simd2 &r)
accelerator_inline Grid_simd2 & operator=(const Grid_simd2 &&rhs)
friend accelerator_inline void sub(Grid_simd2 *__restrict__ y, const Scalar_type *__restrict__ l, const Grid_simd2 *__restrict__ r)
friend accelerator_inline void mult(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ l, const Grid_simd2 *__restrict__ r)
accelerator_inline Grid_simd2(const Grid_simd2 &rhs)
accelerator_inline Grid_simd2(const typename std::enable_if< is_complex< S >::value, S >::type a)
friend accelerator_inline void mac(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ a, const Scalar_type *__restrict__ x)
friend accelerator_inline void mult(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ l, const Scalar_type *__restrict__ r)
friend accelerator_inline void mac(Grid_simd2 *__restrict__ y, const Scalar_type *__restrict__ a, const Grid_simd2 *__restrict__ x)
friend accelerator_inline void permute(Grid_simd2 &y, Grid_simd2 b, int perm)
friend accelerator_inline Grid_simd2 operator*(Grid_simd2 b, const Scalar_type &a)
accelerator_inline void putlane(const Scalar_type &S, int lane)
friend accelerator_inline Grid_simd2 operator/(Grid_simd2 b, const Scalar_type &a)
static accelerator_inline constexpr int Nsimd(void)
friend accelerator_inline void add(Grid_simd2 *__restrict__ y, const Scalar_type *__restrict__ l, const Grid_simd2 *__restrict__ r)
accelerator_inline Grid_simd2 & operator=(const Grid_simd2 &rhs)
accelerator_inline Grid_simd2(const Real a)
accelerator_inline Grid_simd2(const Grid_simd2 &&rhs)
friend accelerator_inline Grid_simd2 operator*(const Scalar_type &a, Grid_simd2 b)
friend accelerator_inline void vprefetch(const Grid_simd2 &v)
friend accelerator_inline void mac(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ a, const Grid_simd2 *__restrict__ x)
friend accelerator_inline void sub(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ l, const Scalar_type *__restrict__ r)
friend accelerator_inline void exchange1(Grid_simd2 &out1, Grid_simd2 &out2, Grid_simd2 in1, Grid_simd2 in2)
friend accelerator_inline Grid_simd2 SimdApply(const functor &func, const Grid_simd2 &v)
union Grid_simd2::conv_t_union conv_t
friend accelerator_inline void vstore(const Grid_simd2 &ret, Scalar_type *a)
friend accelerator_inline void permute1(Grid_simd2 &y, Grid_simd2 b)
accelerator_inline Grid_simd2 & operator=(const Zero &z)
friend accelerator_inline void exchange4(Grid_simd2 &out1, Grid_simd2 &out2, Grid_simd2 in1, Grid_simd2 in2)
friend accelerator_inline void permute2(Grid_simd2 &y, Grid_simd2 b)
friend accelerator_inline Grid_simd2 operator-(const Grid_simd2 &r)
Vector_type vector_type
friend accelerator_inline void exchange3(Grid_simd2 &out1, Grid_simd2 &out2, Grid_simd2 in1, Grid_simd2 in2)
friend accelerator_inline void vset(Grid_simd2 &ret, Scalar_type *a)
accelerator_inline Scalar_type getlane(int lane) const
accelerator_inline Grid_simd2 & operator+=(const Grid_simd2 &r)
friend accelerator_inline void add(Grid_simd2 *__restrict__ y, const Grid_simd2 *__restrict__ l, const Grid_simd2 *__restrict__ r)
friend accelerator_inline void permute3(Grid_simd2 &y, Grid_simd2 b)
RealPart< Scalar_type >::type Real
accelerator_inline Scalar_type getlane(int lane) const
static accelerator_inline constexpr int Nsimd(void)
Vector_type v
Definition Simd.h:194
accelerator_inline conv_t_union()
Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)]