34#if defined(GRID_CUDA) || defined(GRID_HIP)
35#include <thrust/complex.h>
48#define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D))
49#define _MM_SELECT_FOUR_FOUR_STRING(A,B,C,D) "((" #A "<<6)|(" #B "<<4)|(" #C "<<2)|(" #D "))"
50#define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H))
51#define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D)
52#define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B)
54#define RotateBit (0x100)
62#ifdef GRID_DEFAULT_PRECISION_DOUBLE
68#if defined(GRID_CUDA) || defined(GRID_HIP)
69typedef thrust::complex<RealF>
ComplexF;
70typedef thrust::complex<RealD>
ComplexD;
71typedef thrust::complex<Real>
Complex;
72typedef thrust::complex<uint16_t>
ComplexH;
73template<
class T>
using complex = thrust::complex<T>;
82template<
class T>
using complex = std::complex<T>;
225template<
class VectorSIMD>
242 std::vector<ComplexF,alignedAllocator<ComplexF> > buf(nn);
245 for(
int i=0;i<nn;i++){
247 if(i<nn-1) stream<<
",";
255 std::vector<ComplexD,alignedAllocator<ComplexD> > buf(nn);
258 for(
int i=0;i<nn;i++){
260 if(i<nn-1) stream<<
",";
275 std::vector<RealF,alignedAllocator<RealF> > buf(nn);
278 for(
int i=0;i<nn;i++){
280 if(i<nn-1) stream<<
",";
288 std::vector<RealD,alignedAllocator<RealD> > buf(nn);
291 for(
int i=0;i<nn;i++){
293 if(i<nn-1) stream<<
",";
300 std::vector<Integer,alignedAllocator<Integer> > buf(nn);
303 for(
int i=0;i<nn;i++){
305 if(i<nn-1) stream<<
",";
#define accelerator_inline
Grid_simd2< complex< double >, vComplexD > vComplexD2
Defines templated class Grid_simd to deal with inner vector types.
Grid_simd< complex< float >, SIMD_Ftype > vComplexF
Grid_simd< float, SIMD_Ftype > vRealF
Grid_simd< complex< double >, SIMD_Dtype > vComplexD
Grid_simd< Integer, SIMD_Itype > vInteger
Grid_simd< double, SIMD_Dtype > vRealD
#define NAMESPACE_BEGIN(A)
accelerator_inline RealF imag(const ComplexF &r)
accelerator_inline RealD toReal(const ComplexD &r)
accelerator_inline ComplexD pow(const ComplexD &r, RealD y)
accelerator_inline void vstream(ComplexF &l, const ComplexF &r)
accelerator_inline Integer mod(Integer a, Integer y)
accelerator_inline ComplexF timesMinusI(const ComplexF &r)
accelerator_inline ComplexF timesI(const ComplexF &r)
accelerator_inline RealF conjugate(const RealF &r)
accelerator_inline void mult(ComplexD *__restrict__ y, const ComplexD *__restrict__ l, const ComplexD *__restrict__ r)
std::complex< uint16_t > ComplexH
std::complex< T > complex
accelerator_inline void mac(ComplexD *__restrict__ y, const ComplexD *__restrict__ a, const ComplexD *__restrict__ x)
accelerator_inline void sub(ComplexD *__restrict__ y, const ComplexD *__restrict__ l, const ComplexD *__restrict__ r)
std::ostream & operator<<(std::ostream &stream, const vComplexF &o)
accelerator_inline void add(ComplexD *__restrict__ y, const ComplexD *__restrict__ l, const ComplexD *__restrict__ r)
accelerator_inline ComplexD innerProduct(const ComplexD &l, const ComplexD &r)
accelerator_inline RealF adj(const RealF &r)
std::complex< RealF > ComplexF
accelerator_inline Integer div(Integer a, Integer y)
accelerator_inline ComplexF projImag(const ComplexF &r)
std::complex< RealD > ComplexD
accelerator_inline void zeroit(itype &arg)
accelerator_inline void Gpermute(VectorSIMD &y, const VectorSIMD &b, int perm)
accelerator_inline RealF real(const RealF &r)
accelerator_inline ComplexF projReal(const ComplexF &r)
accelerator_inline ComplexD toComplex(const RealD &in)
std::complex< Real > Complex
accelerator_inline ComplexD Reduce(const ComplexD &r)
static accelerator_inline constexpr int Nsimd(void)