Grid 0.7.0
Grid_qpx.h
Go to the documentation of this file.
1/*******************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/simd/Grid_qpx.h
6
7 Copyright (C) 2016
8 Copyright (C) 2017
9
10 Author: Antonin Portelli <antonin.portelli@me.com>
11 Andrew Lawson <andrew.lawson1991@gmail.com>
12
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26
27 See the full license in the file "LICENSE" in the top level distribution directory
28******************************************************************************/
29
30#ifndef GEN_SIMD_WIDTH
31#define GEN_SIMD_WIDTH 32u
32#endif
33#include "Grid_generic_types.h" // Definitions for simulated integer SIMD.
34
36
37#ifdef QPX
38#include <spi/include/kernel/location.h>
39#include <spi/include/l1p/types.h>
40#include <hwi/include/bqc/l1p_mmio.h>
41#include <hwi/include/bqc/A2_inlines.h>
42#endif
43
44NAMESPACE_BEGIN(Optimization);
45
46typedef struct
47{
48 float v0,v1,v2,v3;
50
51inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
52{
53 stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
54 return stream;
55};
56
57inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
58{
59 stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
60 return stream;
61};
62
63struct Vsplat{
64 //Complex float
65 inline vector4float operator()(float a, float b){
66 return (vector4float){a, b, a, b};
67 }
68 // Real float
69 inline vector4float operator()(float a){
70 return (vector4float){a, a, a, a};
71 }
72 //Complex double
73 inline vector4double operator()(double a, double b){
74 return (vector4double){a, b, a, b};
75 }
76 //Real double
77 inline vector4double operator()(double a){
78 return (vector4double){a, a, a, a};
79 }
80 //Integer
82 veci out;
83
85 {
86 out.v[i] = a;
87 }
88
89 return out;
90 }
91};
92
93struct Vstore{
94 //Float
95 inline void operator()(vector4double a, float *f){
96 vec_st(a, 0, f);
97 }
98
99 inline void operator()(vector4double a, vector4float &f){
100 vec_st(a, 0, (float *)(&f));
101 }
102
103 inline void operator()(vector4float a, float *f){
104 f[0] = a.v0;
105 f[1] = a.v1;
106 f[2] = a.v2;
107 f[3] = a.v3;
108 }
109
110 //Double
111 inline void operator()(vector4double a, double *d){
112 vec_st(a, 0, d);
113 }
114
115 //Integer
116 inline void operator()(veci a, Integer *i){
117 *((veci *)i) = a;
118 }
119};
120
121struct Vstream{
122 //Float
123 inline void operator()(float *f, vector4double a){
124 vec_st(a, 0, f);
125 }
126
127 inline void operator()(vector4float f, vector4double a){
128 vec_st(a, 0, (float *)(&f));
129 }
130
131 inline void operator()(float *f, vector4float a){
132 f[0] = a.v0;
133 f[1] = a.v1;
134 f[2] = a.v2;
135 f[3] = a.v3;
136 }
137 //Double
138 inline void operator()(double *d, vector4double a){
139 vec_st(a, 0, d);
140 }
141
142};
143
144struct Vset{
145 // Complex float
146 inline vector4float operator()(Grid::ComplexF *a){
147 return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
148 }
149 // Complex double
150 inline vector4double operator()(Grid::ComplexD *a){
151 return vec_ld(0, (double *)a);
152 }
153
154 // Real float
155 inline vector4float operator()(float *a){
156 return (vector4float){a[0], a[1], a[2], a[3]};
157 }
158
159 inline vector4double operator()(vector4float a){
160 return vec_ld(0, (float *)(&a));
161 }
162
163 // Real double
164 inline vector4double operator()(double *a){
165 return vec_ld(0, a);
166 }
167 // Integer
169 veci out;
170
171 out = *((veci *)a);
172
173 return out;
174 }
175};
176
177template <typename Out_type, typename In_type>
178struct Reduce{
179 //Need templated class to overload output type
180 //General form must generate error if compiled
181 inline Out_type operator()(In_type in){
182 printf("Error, using wrong Reduce function\n");
183 exit(1);
184 return 0;
185 }
186};
187
189// Arithmetic operations
191
192#define FLOAT_WRAP_3(fn, pref) \
193 pref vector4float fn(vector4float a, vector4float b, vector4float c) \
194 { \
195 vector4double ad, bd, rd, cd; \
196 vector4float r; \
197 \
198 ad = Vset()(a); \
199 bd = Vset()(b); \
200 cd = Vset()(c); \
201 rd = fn(ad, bd, cd); \
202 Vstore()(rd, r); \
203 \
204 return r; \
205 }
206
207#define FLOAT_WRAP_2(fn, pref) \
208 pref vector4float fn(vector4float a, vector4float b) \
209 { \
210 vector4double ad, bd, rd; \
211 vector4float r; \
212 \
213 ad = Vset()(a); \
214 bd = Vset()(b); \
215 rd = fn(ad, bd); \
216 Vstore()(rd, r); \
217 \
218 return r; \
219 }
220
221#define FLOAT_WRAP_1(fn, pref) \
222 pref vector4float fn(vector4float a) \
223 { \
224 vector4double ad, rd; \
225 vector4float r; \
226 \
227 ad = Vset()(a); \
228 rd = fn(ad); \
229 Vstore()(rd, r); \
230 \
231 return r; \
232 }
233
234struct Sum{
235 //Complex/Real double
236 inline vector4double operator()(vector4double a, vector4double b){
237 return vec_add(a, b);
238 }
239
240 //Complex/Real float
241 FLOAT_WRAP_2(operator(), inline)
242
243 //Integer
244 inline veci operator()(veci a, veci b){
245 veci out;
246
248 {
249 out.v[i] = a.v[i] + b.v[i];
250 }
251
252 return out;
253 }
254};
255
256struct Sub{
257 //Complex/Real double
258 inline vector4double operator()(vector4double a, vector4double b){
259 return vec_sub(a, b);
260 }
261
262 //Complex/Real float
263 FLOAT_WRAP_2(operator(), inline)
264
265 //Integer
266 inline veci operator()(veci a, veci b){
267 veci out;
268
270 {
271 out.v[i] = a.v[i] - b.v[i];
272 }
273
274 return out;
275 }
276};
277
278struct MultRealPart{
279 // Complex double
280 inline vector4double operator()(vector4double a, vector4double b){
281 // return vec_xmul(b, a);
282 return vec_xmul(a, b);
283 }
284 FLOAT_WRAP_2(operator(), inline)
285};
286struct MaddRealPart{
287 // Complex double
288 inline vector4double operator()(vector4double a, vector4double b,vector4double c){
289 return vec_xmadd(a, b, c);
290 }
291 FLOAT_WRAP_3(operator(), inline)
292};
293struct MultComplex{
294 // Complex double
295 inline vector4double operator()(vector4double a, vector4double b){
296 return vec_xxnpmadd(a, b, vec_xmul(b, a));
297 }
298
299 // Complex float
300 FLOAT_WRAP_2(operator(), inline)
301};
302
303struct Mult{
304 // Real double
305 inline vector4double operator()(vector4double a, vector4double b){
306 return vec_mul(a, b);
307 }
308
309 // Real float
310 FLOAT_WRAP_2(operator(), inline)
311
312 // Integer
313 inline veci operator()(veci a, veci b){
314 veci out;
315
317 {
318 out.v[i] = a.v[i]*b.v[i];
319 }
320
321 return out;
322 }
323};
324
325struct Div{
326 // Real double
327 inline vector4double operator()(vector4double a, vector4double b){
328 return vec_swdiv(a, b);
329 }
330
331 // Real float
332 FLOAT_WRAP_2(operator(), inline)
333
334 // Integer
335 inline veci operator()(veci a, veci b){
336 veci out;
337
339 {
340 out.v[i] = a.v[i]/b.v[i];
341 }
342
343 return out;
344 }
345};
346
347struct Conj{
348 // Complex double
349 inline vector4double operator()(vector4double v){
350 return vec_mul(v, (vector4double){1., -1., 1., -1.});
351 }
352
353 // Complex float
354 FLOAT_WRAP_1(operator(), inline)
355};
356
357struct TimesMinusI{
358 //Complex double
359 inline vector4double operator()(vector4double v){
360 return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
361 (vector4double){0., 0., 0., 0.});
362 }
363
364 // Complex float
365 FLOAT_WRAP_2(operator(), inline)
366};
367
368struct TimesI{
369 //Complex double
370 inline vector4double operator()(vector4double v){
371 return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
372 (vector4double){0., 0., 0., 0.});
373 }
374
375 // Complex float
376 FLOAT_WRAP_2(operator(), inline)
377};
378#define USE_FP16
379struct PrecisionChange {
380 static inline vech StoH (const vector4float &a, const vector4float &b) {
381 vech ret;
382 std::cout << GridLogError << "QPX single to half precision conversion not yet supported." << std::endl;
383 assert(0);
384 return ret;
385 }
386 static inline void HtoS (vech h, vector4float &sa, vector4float &sb) {
387 std::cout << GridLogError << "QPX half to single precision conversion not yet supported." << std::endl;
388 assert(0);
389 }
390 static inline vector4float DtoS (vector4double a, vector4double b) {
391 vector4float ret;
392 std::cout << GridLogError << "QPX double to single precision conversion not yet supported." << std::endl;
393 assert(0);
394 return ret;
395 }
396 static inline void StoD (vector4float s, vector4double &a, vector4double &b) {
397 std::cout << GridLogError << "QPX single to double precision conversion not yet supported." << std::endl;
398 assert(0);
399 }
400 static inline vech DtoH (vector4double a, vector4double b,
401 vector4double c, vector4double d) {
402 vech ret;
403 std::cout << GridLogError << "QPX double to half precision conversion not yet supported." << std::endl;
404 assert(0);
405 return ret;
406 }
407 static inline void HtoD (vech h, vector4double &a, vector4double &b,
408 vector4double &c, vector4double &d) {
409 std::cout << GridLogError << "QPX half to double precision conversion not yet supported." << std::endl;
410 assert(0);
411 }
412};
413
415// Exchange support
416#define FLOAT_WRAP_EXCHANGE(fn) \
417 static inline void fn(vector4float &out1, vector4float &out2, \
418 vector4float in1, vector4float in2) \
419 { \
420 vector4double out1d, out2d, in1d, in2d; \
421 in1d = Vset()(in1); \
422 in2d = Vset()(in2); \
423 fn(out1d, out2d, in1d, in2d); \
424 Vstore()(out1d, out1); \
425 Vstore()(out2d, out2); \
426 }
427
428struct Exchange{
429
430 // double precision
431 static inline void Exchange0(vector4double &out1, vector4double &out2,
432 vector4double in1, vector4double in2) {
433 out1 = vec_perm(in1, in2, vec_gpci(0145));
434 out2 = vec_perm(in1, in2, vec_gpci(02367));
435 }
436 static inline void Exchange1(vector4double &out1, vector4double &out2,
437 vector4double in1, vector4double in2) {
438 out1 = vec_perm(in1, in2, vec_gpci(0426));
439 out2 = vec_perm(in1, in2, vec_gpci(01537));
440 }
441 static inline void Exchange2(vector4double &out1, vector4double &out2,
442 vector4double in1, vector4double in2) {
443 assert(0);
444 }
445 static inline void Exchange3(vector4double &out1, vector4double &out2,
446 vector4double in1, vector4double in2) {
447 assert(0);
448 }
449
450 // single precision
455};
456
457struct Permute{
458 //Complex double
459 static inline vector4double Permute0(vector4double v){ //0123 -> 2301
460 return vec_perm(v, v, vec_gpci(02301));
461 };
462 static inline vector4double Permute1(vector4double v){ //0123 -> 1032
463 return vec_perm(v, v, vec_gpci(01032));
464 };
465 static inline vector4double Permute2(vector4double v){
466 return v;
467 };
468 static inline vector4double Permute3(vector4double v){
469 return v;
470 };
471
472 // Complex float
473 FLOAT_WRAP_1(Permute0, static inline)
474 FLOAT_WRAP_1(Permute1, static inline)
475 FLOAT_WRAP_1(Permute2, static inline)
476 FLOAT_WRAP_1(Permute3, static inline)
477};
478
479struct Rotate{
480
481 template<int n> static inline vector4double tRotate(vector4double v){
482 if ( n==1 ) return vec_perm(v, v, vec_gpci(01230));
483 if ( n==2 ) return vec_perm(v, v, vec_gpci(02301));
484 if ( n==3 ) return vec_perm(v, v, vec_gpci(03012));
485 return v;
486 };
487 template<int n> static inline vector4float tRotate(vector4float a)
488 {
489 vector4double ad, rd;
490 vector4float r;
491 ad = Vset()(a);
492 rd = tRotate<n>(ad);
493 Vstore()(rd, r);
494 return r;
495 };
496
497 static inline vector4double rotate(vector4double v, int n){
498 switch(n){
499 case 0:
500 return v;
501 break;
502 case 1:
503 return tRotate<1>(v);
504 break;
505 case 2:
506 return tRotate<2>(v);
507 break;
508 case 3:
509 return tRotate<3>(v);
510 break;
511 default: assert(0);
512 }
513 }
514
515 static inline vector4float rotate(vector4float v, int n){
516 vector4double vd, rd;
517 vector4float r;
518 vd = Vset()(v);
519 rd = rotate(vd, n);
520 Vstore()(rd, r);
521 return r;
522 }
523};
524
525//Complex float Reduce
526template<>
527inline Grid::ComplexF
529 vector4float v1,v2;
530
531 v1 = Optimization::Permute::Permute0(v);
532 v1 = Optimization::Sum()(v1, v);
533
534 return Grid::ComplexF(v1.v0, v1.v1);
535}
536//Real float Reduce
537template<>
538inline Grid::RealF
540 vector4float v1,v2;
541
542 v1 = Optimization::Permute::Permute0(v);
543 v1 = Optimization::Sum()(v1, v);
544 v2 = Optimization::Permute::Permute1(v1);
545 v1 = Optimization::Sum()(v1, v2);
546
547 return v1.v0;
548}
549
550
551//Complex double Reduce
552template<>
553inline Grid::ComplexD
555 vector4double v1;
556
557 v1 = Optimization::Permute::Permute0(v);
558 v1 = vec_add(v1, v);
559
560 return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1));
561}
562
563//Real double Reduce
564template<>
565inline Grid::RealD
567 vector4double v1,v2;
568
569 v1 = Optimization::Permute::Permute0(v);
570 v1 = vec_add(v1, v);
571 v2 = Optimization::Permute::Permute1(v1);
572 v1 = vec_add(v1, v2);
573
574 return vec_extract(v1, 0);
575}
576
577//Integer Reduce
578template<>
580 Integer a = 0;
581 for (unsigned int i = 0; i < W<Integer>::r; ++i)
582 {
583 a += in.v[i];
584 }
585 return a;
586}
587
588NAMESPACE_END(Optimization);
589
591// Here assign types
592typedef Optimization::vech SIMD_Htype; // Half precision type
593typedef Optimization::vector4float SIMD_Ftype; // Single precision type
594typedef vector4double SIMD_Dtype; // Double precision type
595typedef Optimization::veci SIMD_Itype; // Integer type
596
597// prefetch utilities
598inline void v_prefetch0(int size, const char *ptr){};
599inline void prefetch_HINT_T0(const char *ptr){};
600
601// Function name aliases
602typedef Optimization::Vsplat VsplatSIMD;
603typedef Optimization::Vstore VstoreSIMD;
604typedef Optimization::Vset VsetSIMD;
605typedef Optimization::Vstream VstreamSIMD;
606template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
607
608// Arithmetic operations
609typedef Optimization::Sum SumSIMD;
610typedef Optimization::Sub SubSIMD;
611typedef Optimization::Mult MultSIMD;
612typedef Optimization::Div DivSIMD;
613typedef Optimization::MultComplex MultComplexSIMD;
614typedef Optimization::MultRealPart MultRealPartSIMD;
615typedef Optimization::MaddRealPart MaddRealPartSIMD;
616typedef Optimization::Conj ConjSIMD;
617typedef Optimization::TimesMinusI TimesMinusISIMD;
618typedef Optimization::TimesI TimesISIMD;
619
Optimization::Vstream VstreamSIMD
Optimization::TimesMinusI TimesMinusISIMD
Optimization::MultComplex MultComplexSIMD
Optimization::TimesI TimesISIMD
Optimization::Reduce< S, T > ReduceSIMD
vec< Integer > veci
Optimization::Mult MultSIMD
Optimization::MaddRealPart MaddRealPartSIMD
Optimization::vecd SIMD_Dtype
Optimization::veci SIMD_Itype
Optimization::Vstore VstoreSIMD
Optimization::Conj ConjSIMD
Optimization::vecf SIMD_Ftype
Optimization::Vsplat VsplatSIMD
Optimization::Sum SumSIMD
Optimization::Sub SubSIMD
Optimization::Div DivSIMD
vec< uint16_t > vech
Optimization::MultRealPart MultRealPartSIMD
Optimization::Vset VsetSIMD
Optimization::vech SIMD_Htype
#define VECTOR_FOR(i, w, inc)
#define FLOAT_WRAP_1(fn, pref)
Definition Grid_qpx.h:221
#define FLOAT_WRAP_3(fn, pref)
Definition Grid_qpx.h:192
#define FLOAT_WRAP_2(fn, pref)
Definition Grid_qpx.h:207
std::ostream & operator<<(std::ostream &stream, const vector4double a)
Definition Grid_qpx.h:51
void prefetch_HINT_T0(const char *ptr)
Definition Grid_sse4.h:517
void v_prefetch0(int size, const char *ptr)
Definition Grid_sse4.h:516
accelerator_inline Grid_simd< S, V > rotate(Grid_simd< S, V > b, int nrot)
GridLogger GridLogError(1, "Error", GridLogColours, "RED")
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
uint32_t Integer
Definition Simd.h:58
vector4double operator()(vector4double v)
Definition Grid_qpx.h:349
vector4double operator()(vector4double a, vector4double b)
Definition Grid_qpx.h:327
static void Exchange1(vec< T > &out1, vec< T > &out2, const vec< T > &in1, const vec< T > &in2)
static void Exchange0(vec< T > &out1, vec< T > &out2, const vec< T > &in1, const vec< T > &in2)
FLOAT_WRAP_EXCHANGE(Exchange3)
FLOAT_WRAP_EXCHANGE(Exchange2)
FLOAT_WRAP_EXCHANGE(Exchange0)
static void Exchange3(vector4double &out1, vector4double &out2, vector4double in1, vector4double in2)
Definition Grid_qpx.h:445
static void Exchange2(vector4double &out1, vector4double &out2, vector4double in1, vector4double in2)
Definition Grid_qpx.h:441
FLOAT_WRAP_EXCHANGE(Exchange1)
static void Exchange2(vec< T > &out1, vec< T > &out2, const vec< T > &in1, const vec< T > &in2)
static void Exchange3(vecf &out1, vecf &out2, const vecf &in1, const vecf &in2)
static void Exchange1(vector4double &out1, vector4double &out2, vector4double in1, vector4double in2)
Definition Grid_qpx.h:436
static void Exchange0(vector4double &out1, vector4double &out2, vector4double in1, vector4double in2)
Definition Grid_qpx.h:431
vector4double operator()(vector4double a, vector4double b, vector4double c)
Definition Grid_qpx.h:288
vector4double operator()(vector4double a, vector4double b)
Definition Grid_qpx.h:295
vector4double operator()(vector4double a, vector4double b)
Definition Grid_qpx.h:280
vector4double operator()(vector4double a, vector4double b)
Definition Grid_qpx.h:305
static vec< T > Permute0(vec< T > in)
static vector4double Permute0(vector4double v)
Definition Grid_qpx.h:459
static vector4double Permute1(vector4double v)
Definition Grid_qpx.h:462
static vector4double Permute2(vector4double v)
Definition Grid_qpx.h:465
static vecd Permute1(vecd in)
static vector4double Permute3(vector4double v)
Definition Grid_qpx.h:468
static vecf Permute3(vecf in)
static vecd Permute2(vecd in)
static void HtoS(vech h, vector4float &sa, vector4float &sb)
Definition Grid_qpx.h:386
static vech StoH(const vector4float &a, const vector4float &b)
Definition Grid_qpx.h:380
static vector4float DtoS(vector4double a, vector4double b)
Definition Grid_qpx.h:390
static vech DtoH(vector4double a, vector4double b, vector4double c, vector4double d)
Definition Grid_qpx.h:400
static void HtoD(vech h, vector4double &a, vector4double &b, vector4double &c, vector4double &d)
Definition Grid_qpx.h:407
static void StoD(vector4float s, vector4double &a, vector4double &b)
Definition Grid_qpx.h:396
Out_type operator()(In_type in)
Definition Grid_qpx.h:181
static vec< T > tRotate(vec< T > in)
static vector4float rotate(vector4float v, int n)
Definition Grid_qpx.h:515
static vector4float tRotate(vector4float a)
Definition Grid_qpx.h:487
static vector4double rotate(vector4double v, int n)
Definition Grid_qpx.h:497
static vector4double tRotate(vector4double v)
Definition Grid_qpx.h:481
vector4double operator()(vector4double a, vector4double b)
Definition Grid_qpx.h:258
vector4double operator()(vector4double a, vector4double b)
Definition Grid_qpx.h:236
vector4double operator()(vector4double v)
Definition Grid_qpx.h:370
vector4double operator()(vector4double v)
Definition Grid_qpx.h:359
veci operator()(Integer *a)
Definition Grid_qpx.h:168
vector4double operator()(Grid::ComplexD *a)
Definition Grid_qpx.h:150
vector4double operator()(double *a)
Definition Grid_qpx.h:164
vector4float operator()(float *a)
Definition Grid_qpx.h:155
vector4double operator()(vector4float a)
Definition Grid_qpx.h:159
vector4float operator()(Grid::ComplexF *a)
Definition Grid_qpx.h:146
vector4double operator()(double a, double b)
Definition Grid_qpx.h:73
vector4float operator()(float a, float b)
Definition Grid_qpx.h:65
veci operator()(Integer a)
Definition Grid_qpx.h:81
vector4double operator()(double a)
Definition Grid_qpx.h:77
vector4float operator()(float a)
Definition Grid_qpx.h:69
void operator()(veci a, Integer *i)
Definition Grid_qpx.h:116
void operator()(vector4double a, vector4float &f)
Definition Grid_qpx.h:99
void operator()(vector4double a, double *d)
Definition Grid_qpx.h:111
void operator()(vector4float a, float *f)
Definition Grid_qpx.h:103
void operator()(vector4double a, float *f)
Definition Grid_qpx.h:95
void operator()(vector4float f, vector4double a)
Definition Grid_qpx.h:127
void operator()(float *f, vector4double a)
Definition Grid_qpx.h:123
void operator()(double *d, vector4double a)
Definition Grid_qpx.h:138
void operator()(float *f, vector4float a)
Definition Grid_qpx.h:131
T v[W< T >::r]