Grid/dev/Grid__generic_8h_source.html

/*************************************************************************************


    Grid physics library, www.github.com/paboyle/Grid


    Source file: ./lib/simd/Grid_generic.h


    Copyright (C) 2015

    Copyright (C) 2017


Author: Antonin Portelli <antonin.portelli@me.com>

        Andrew Lawson    <andrew.lawson1991@gmail.com>


    This program is free software; you can redistribute it and/or modify

    it under the terms of the GNU General Public License as published by

    the Free Software Foundation; either version 2 of the License, or

    (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU General Public License along

    with this program; if not, write to the Free Software Foundation, Inc.,

    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


    See the full license in the file "LICENSE" in the top level distribution directory

*************************************************************************************/

/*  END LEGAL */


#include "Grid_generic_types.h"


NAMESPACE_BEGIN(Grid);

NAMESPACE_BEGIN(Optimization);


struct Vsplat{

  // Complex

  template <typename T>


  accelerator_inline vec<T> operator()(T a, T b){

    vec<T> out;


    VECTOR_FOR(i, W<T>::r, 2)

      {

        out.v[i]   = a;

        out.v[i+1] = b;

      }


    return out;

  }


  // Real

  template <typename T>


  accelerator_inline vec<T> operator()(T a){

    vec<T> out;


    VECTOR_FOR(i, W<T>::r, 1)

      {

        out.v[i] = a;

      }


    return out;

  }


};


struct Vstore{

  // Real

  template <typename T>


  accelerator_inline void operator()(vec<T> a, T *D){

    *((vec<T> *)D) = a;

  }


};


struct Vstream{

  // Real

  template <typename T>


  accelerator_inline void operator()(T * a, vec<T> b){

    *((vec<T> *)a) = b;

  }


};


struct Vset{

  // Complex

  template <typename T>


  accelerator_inline vec<T> operator()(std::complex<T> *a){

    vec<T> out;


    VECTOR_FOR(i, W<T>::c, 1)

      {

        out.v[2*i]   = a[i].real();

        out.v[2*i+1] = a[i].imag();

      }


    return out;

  }


  // Real

  template <typename T>


  accelerator_inline vec<T> operator()(T *a){

    vec<T> out;


    out = *((vec<T> *)a);


    return out;

  }


};


// Arithmetic operations

struct Sum{

  // Complex/Real

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){

    vec<T> out;


    VECTOR_FOR(i, W<T>::r, 1)

      {

        out.v[i] = a.v[i] + b.v[i];

      }


    return out;

  }


};


struct Sub{

  // Complex/Real

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){

    vec<T> out;


    VECTOR_FOR(i, W<T>::r, 1)

      {

        out.v[i] = a.v[i] - b.v[i];

      }


    return out;

  }


};


struct Mult{

  // Real

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){

    vec<T> out;


    VECTOR_FOR(i, W<T>::r, 1)

      {

        out.v[i] = a.v[i]*b.v[i];

      }


    return out;

  }


};


#define cmul(a, b, c, i)            \

  c[i]   = a[i]*b[i]   - a[i+1]*b[i+1];     \

  c[i+1] = a[i]*b[i+1] + a[i+1]*b[i];


struct MultRealPart{

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){

    vec<T> out;


    VECTOR_FOR(i, W<T>::c, 1)

      {

    out.v[2*i]   = a.v[2*i]*b.v[2*i];

    out.v[2*i+1] = a.v[2*i]*b.v[2*i+1];

      }

    return out;

  }


};


struct MaddRealPart{

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){

    vec<T> out;


    VECTOR_FOR(i, W<T>::c, 1)

      {

    out.v[2*i]   = a.v[2*i]*b.v[2*i] + c.v[2*i];

    out.v[2*i+1] = a.v[2*i]*b.v[2*i+1] + c.v[2*i+1];

      }

    return out;

  }


};


struct MultComplex{

  // Complex

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){

    vec<T> out;


    VECTOR_FOR(i, W<T>::c, 1)

      {

        cmul(a.v, b.v, out.v, 2*i);

      }


    return out;

  }


};


#undef cmul


struct Div{

  // Real

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a, vec<T> b){

    vec<T> out;


    VECTOR_FOR(i, W<T>::r, 1)

      {

        out.v[i] = a.v[i]/b.v[i];

      }


    return out;

  }


};


#define conj(a, b, i)               \

  b[i]   = a[i];                \

  b[i+1] = -a[i+1];


struct Conj{

  // Complex

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a){

    vec<T> out;


    VECTOR_FOR(i, W<T>::c, 1)

      {

        conj(a.v, out.v, 2*i);

      }


    return out;

  }


};


#undef conj


#define timesmi(a, b, i)            \

  b[i]   = a[i+1];              \

  b[i+1] = -a[i];


struct TimesMinusI{

  // Complex

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a){

    vec<T> out;


    VECTOR_FOR(i, W<T>::c, 1)

      {

        timesmi(a.v, out.v, 2*i);

      }


    return out;

  }


};


#undef timesmi


#define timesi(a, b, i)             \

  b[i]   = -a[i+1];             \

  b[i+1] = a[i];


struct TimesI{

  // Complex

  template <typename T>


  accelerator_inline vec<T> operator()(vec<T> a){

    vec<T> out;


    VECTOR_FOR(i, W<T>::c, 1)

      {

        timesi(a.v, out.v, 2*i);

      }


    return out;

  }


};


#undef timesi


struct PrecisionChange {


  static accelerator_inline vech StoH (const vecf &a,const vecf &b) {

    vech ret;

    const int nf = W<float>::r;

#ifdef USE_FP16

    vech *ha = (vech *)&a;

    vech *hb = (vech *)&b;

    //      VECTOR_FOR(i, nf,1){ ret.v[i]    = ( (uint16_t *) &a.v[i])[1] ; }

    //      VECTOR_FOR(i, nf,1){ ret.v[i+nf] = ( (uint16_t *) &b.v[i])[1] ; }

    VECTOR_FOR(i, nf,1){ ret.v[i]    = ha->v[2*i+1]; }

    VECTOR_FOR(i, nf,1){ ret.v[i+nf] = hb->v[2*i+1]; }

#else

    VECTOR_FOR(i, nf,1){ ret.v[i]=0; }

    assert(0);

#endif

    return ret;

  }


  static accelerator_inline void  HtoS (vech h,vecf &sa,vecf &sb) {

#ifdef USE_FP16

    const int nf = W<float>::r;

    const int nh = W<uint16_t>::r;

    vech *ha = (vech *)&sa;

    vech *hb = (vech *)&sb;

    VECTOR_FOR(i, nf, 1){ sb.v[i]= sa.v[i] = 0; }

    //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sa.v[i]))[1] = h.v[i];}

    //      VECTOR_FOR(i, nf, 1){ ( (uint16_t *) (&sb.v[i]))[1] = h.v[i+nf];}

    VECTOR_FOR(i, nf, 1){ ha->v[2*i+1]=h.v[i]; }

    VECTOR_FOR(i, nf, 1){ hb->v[2*i+1]=h.v[i+nf]; }

#else

    assert(0);

#endif

  }


  static accelerator_inline vecf DtoS (vecd a,vecd b) {

    const int nd = W<double>::r;

    vecf ret;

    VECTOR_FOR(i, nd,1){ ret.v[i]    = a.v[i] ; }

    VECTOR_FOR(i, nd,1){ ret.v[i+nd] = b.v[i] ; }

    return ret;

  }


  static accelerator_inline void StoD (vecf s,vecd &a,vecd &b) {

    const int nd = W<double>::r;

    VECTOR_FOR(i, nd,1){ a.v[i] = s.v[i] ; }

    VECTOR_FOR(i, nd,1){ b.v[i] = s.v[i+nd] ; }

  }


  static accelerator_inline vech DtoH (vecd a,vecd b,vecd c,vecd d) {

    vecf sa,sb;

    sa = DtoS(a,b);

    sb = DtoS(c,d);

    return StoH(sa,sb);

  }


  static accelerator_inline void HtoD (vech h,vecd &a,vecd &b,vecd &c,vecd &d) {

    vecf sa,sb;

    HtoS(h,sa,sb);

    StoD(sa,a,b);

    StoD(sb,c,d);

  }


};


// Exchange support

struct Exchange{


  template <typename T,int n>


  static accelerator_inline void ExchangeN(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){

    const int w = W<T>::r;

    unsigned int mask = w >> (n + 1);

    //      std::cout << " Exchange "<<n<<" nsimd "<<w<<" mask 0x" <<std::hex<<mask<<std::dec<<std::endl;

    VECTOR_FOR(i, w, 1) {

      int j1 = i&(~mask);

      if  ( (i&mask) == 0 ) { out1.v[i]=in1.v[j1];}

      else                  { out1.v[i]=in2.v[j1];}

      int j2 = i|mask;

      if  ( (i&mask) == 0 ) { out2.v[i]=in1.v[j2];}

      else                  { out2.v[i]=in2.v[j2];}

    }

  }


  template <typename T>


  static accelerator_inline void Exchange0(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){

    ExchangeN<T,0>(out1,out2,in1,in2);

  };


  template <typename T>


  static accelerator_inline void Exchange1(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){

    ExchangeN<T,1>(out1,out2,in1,in2);

  };


  template <typename T>


  static accelerator_inline void Exchange2(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){

    ExchangeN<T,2>(out1,out2,in1,in2);

  };


  template <typename T>


  static accelerator_inline void Exchange3(vec<T> &out1,vec<T> &out2,vec<T> &in1,vec<T> &in2){

    ExchangeN<T,3>(out1,out2,in1,in2);

  };


};


// Some Template specialization


#define perm(a, b, n, w)            \

  unsigned int _mask = w >> (n + 1);        \

  VECTOR_FOR(i, w, 1)               \

  {                     \

    b[i] = a[i^_mask];              \

  }


#define DECL_PERMUTE_N(n)           \

  template <typename T>             \

  static accelerator_inline vec<T> Permute##n(vec<T> in) {  \

    vec<T> out;                 \

    perm(in.v, out.v, n, W<T>::r);      \

    return out;                 \

  }


struct Permute{

  DECL_PERMUTE_N(0);

  DECL_PERMUTE_N(1);

  DECL_PERMUTE_N(2);

  DECL_PERMUTE_N(3);

};


#undef perm

#undef DECL_PERMUTE_N


#define rot(a, b, n, w)             \

  VECTOR_FOR(i, w, 1)               \

  {                     \

    b[i] = a[(i + n)%w];            \

  }


struct Rotate{


  template <int n, typename T> static accelerator_inline vec<T> tRotate(vec<T> in){

    return rotate(in, n);

  }


  template <typename T>


  static accelerator_inline vec<T> rotate(vec<T> in, int n){

    vec<T> out;


    rot(in.v, out.v, n, W<T>::r);


    return out;

  }


};


#undef rot


#define acc(v, a, off, step, n)         \

  for (unsigned int i = off; i < n; i += step)  \

    {                       \

      a += v[i];                \

    }


template <typename Out_type, typename In_type>

struct Reduce{

  //Need templated class to overload output type

  //General form must generate error if compiled


  accelerator_inline Out_type operator()(In_type in){

    printf("Error, using wrong Reduce function\n");

    exit(1);

    return 0;

  }


};


//Complex float Reduce

template <>


accelerator_inline Grid::ComplexF Reduce<Grid::ComplexF, vecf>::operator()(vecf in){

  float a = 0.f, b = 0.f;


  acc(in.v, a, 0, 2, W<float>::r);

  acc(in.v, b, 1, 2, W<float>::r);


  return Grid::ComplexF(a, b);

}


//Real float Reduce

template<>


accelerator_inline Grid::RealF Reduce<Grid::RealF, vecf>::operator()(vecf in){

  float a = 0.;


  acc(in.v, a, 0, 1, W<float>::r);


  return a;

}


//Complex double Reduce

template<>


accelerator_inline Grid::ComplexD Reduce<Grid::ComplexD, vecd>::operator()(vecd in){

  double a = 0., b = 0.;


  acc(in.v, a, 0, 2, W<double>::r);

  acc(in.v, b, 1, 2, W<double>::r);


  return Grid::ComplexD(a, b);

}


//Real double Reduce

template<>


accelerator_inline Grid::RealD Reduce<Grid::RealD, vecd>::operator()(vecd in){

  double a = 0.f;


  acc(in.v, a, 0, 1, W<double>::r);


  return a;

}


//Integer Reduce

template<>


accelerator_inline Integer Reduce<Integer, veci>::operator()(veci in){

  Integer a = 0;


  acc(in.v, a, 0, 1, W<Integer>::r);


  return a;

}


#undef acc  // EIGEN compatibility

NAMESPACE_END(Optimization)


// Here assign types


typedef Optimization::vech SIMD_Htype; // Reduced precision type

typedef Optimization::vecf SIMD_Ftype; // Single precision type

typedef Optimization::vecd SIMD_Dtype; // Double precision type

typedef Optimization::veci SIMD_Itype; // Integer type


// prefetch utilities

accelerator_inline void v_prefetch0(int size, const char *ptr){};

accelerator_inline void prefetch_HINT_T0(const char *ptr){};


// Function name aliases

typedef Optimization::Vsplat   VsplatSIMD;

typedef Optimization::Vstore   VstoreSIMD;

typedef Optimization::Vset     VsetSIMD;

typedef Optimization::Vstream  VstreamSIMD;

template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;


// Arithmetic operations

typedef Optimization::Sum         SumSIMD;

typedef Optimization::Sub         SubSIMD;

typedef Optimization::Div         DivSIMD;

typedef Optimization::Mult        MultSIMD;

typedef Optimization::MultComplex MultComplexSIMD;

typedef Optimization::MultRealPart MultRealPartSIMD;

typedef Optimization::MaddRealPart MaddRealPartSIMD;

typedef Optimization::Conj        ConjSIMD;

typedef Optimization::TimesMinusI TimesMinusISIMD;

typedef Optimization::TimesI      TimesISIMD;


NAMESPACE_END(Grid)


accelerator_inline
#define accelerator_inline
Definition Accelerator.h:608

VstreamSIMD
Optimization::Vstream VstreamSIMD
Definition Grid_a64fx-2.h:926

TimesMinusISIMD
Optimization::TimesMinusI TimesMinusISIMD
Definition Grid_a64fx-2.h:939

MultComplexSIMD
Optimization::MultComplex MultComplexSIMD
Definition Grid_a64fx-2.h:934

TimesISIMD
Optimization::TimesI TimesISIMD
Definition Grid_a64fx-2.h:940

ReduceSIMD
Optimization::Reduce< S, T > ReduceSIMD
Definition Grid_a64fx-2.h:927

vecd
vec< double > vecd
Definition Grid_a64fx-2.h:97

veci
vec< Integer > veci
Definition Grid_a64fx-2.h:99

MultSIMD
Optimization::Mult MultSIMD
Definition Grid_a64fx-2.h:933

MaddRealPartSIMD
Optimization::MaddRealPart MaddRealPartSIMD
Definition Grid_a64fx-2.h:937

vecf
vec< float > vecf
Definition Grid_a64fx-2.h:96

SIMD_Dtype
Optimization::vecd SIMD_Dtype
Definition Grid_a64fx-2.h:915

SIMD_Itype
Optimization::veci SIMD_Itype
Definition Grid_a64fx-2.h:916

VstoreSIMD
Optimization::Vstore VstoreSIMD
Definition Grid_a64fx-2.h:924

ConjSIMD
Optimization::Conj ConjSIMD
Definition Grid_a64fx-2.h:938

SIMD_Ftype
Optimization::vecf SIMD_Ftype
Definition Grid_a64fx-2.h:914

VsplatSIMD
Optimization::Vsplat VsplatSIMD
Definition Grid_a64fx-2.h:923

SumSIMD
Optimization::Sum SumSIMD
Definition Grid_a64fx-2.h:930

SubSIMD
Optimization::Sub SubSIMD
Definition Grid_a64fx-2.h:931

DivSIMD
Optimization::Div DivSIMD
Definition Grid_a64fx-2.h:932

vech
vec< uint16_t > vech
Definition Grid_a64fx-2.h:98

MultRealPartSIMD
Optimization::MultRealPart MultRealPartSIMD
Definition Grid_a64fx-2.h:936

VsetSIMD
Optimization::Vset VsetSIMD
Definition Grid_a64fx-2.h:925

SIMD_Htype
Optimization::vech SIMD_Htype
Definition Grid_a64fx-2.h:913

v_prefetch0
accelerator_inline void v_prefetch0(int size, const char *ptr)
Definition Grid_generic.h:509

timesmi
#define timesmi(a, b, i)
Definition Grid_generic.h:240

rot
#define rot(a, b, n, w)
Definition Grid_generic.h:404

timesi
#define timesi(a, b, i)
Definition Grid_generic.h:261

acc
#define acc(v, a, off, step, n)
Definition Grid_generic.h:428

conj
#define conj(a, b, i)
Definition Grid_generic.h:219

prefetch_HINT_T0
accelerator_inline void prefetch_HINT_T0(const char *ptr)
Definition Grid_generic.h:510

cmul
#define cmul(a, b, c, i)
Definition Grid_generic.h:155

Grid_generic_types.h

VECTOR_FOR
#define VECTOR_FOR(i, w, inc)
Definition Grid_generic_types.h:50

NAMESPACE_BEGIN
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35

NAMESPACE_END
#define NAMESPACE_END(A)
Definition Namespace.h:36

Integer
uint32_t Integer
Definition Simd.h:58

Grid
Definition Deflation.h:31

Conj
Definition Grid_a64fx-2.h:485

Conj::operator()
accelerator_inline vec< T > operator()(vec< T > a)
Definition Grid_generic.h:226

Div
Definition Grid_a64fx-2.h:470

Div::operator()
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
Definition Grid_generic.h:207

Exchange
Definition Grid_a64fx-2.h:641

Exchange::Exchange1
static accelerator_inline void Exchange1(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
Definition Grid_generic.h:363

Exchange::Exchange3
static accelerator_inline void Exchange3(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
Definition Grid_generic.h:371

Exchange::Exchange2
static accelerator_inline void Exchange2(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
Definition Grid_generic.h:367

Exchange::ExchangeN
static accelerator_inline void ExchangeN(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
Definition Grid_generic.h:345

Exchange::Exchange0
static accelerator_inline void Exchange0(vec< T > &out1, vec< T > &out2, vec< T > &in1, vec< T > &in2)
Definition Grid_generic.h:359

MaddRealPart
Definition Grid_a64fx-2.h:413

MaddRealPart::operator()
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b, vec< T > c)
Definition Grid_generic.h:175

MultComplex
Definition Grid_a64fx-2.h:431

MultComplex::operator()
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
Definition Grid_generic.h:190

MultRealPart
Definition Grid_a64fx-2.h:395

MultRealPart::operator()
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
Definition Grid_generic.h:161

Mult
Definition Grid_a64fx-2.h:369

Mult::operator()
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
Definition Grid_generic.h:143

Permute
Definition Grid_a64fx-2.h:711

Permute::DECL_PERMUTE_N
DECL_PERMUTE_N(2)

Permute::DECL_PERMUTE_N
DECL_PERMUTE_N(1)

Permute::DECL_PERMUTE_N
DECL_PERMUTE_N(0)

Permute::DECL_PERMUTE_N
DECL_PERMUTE_N(3)

PrecisionChange
Definition Grid_a64fx-2.h:540

PrecisionChange::StoH
static vech StoH(const vecf &sa, const vecf &sb)
Definition Grid_a64fx-2.h:541

PrecisionChange::StoH
static accelerator_inline vech StoH(const vecf &a, const vecf &b)
Definition Grid_generic.h:283

PrecisionChange::StoD
static void StoD(vecf s, vecd &a, vecd &b)
Definition Grid_a64fx-2.h:578

PrecisionChange::HtoD
static accelerator_inline void HtoD(vech h, vecd &a, vecd &b, vecd &c, vecd &d)
Definition Grid_generic.h:332

PrecisionChange::DtoS
static accelerator_inline vecf DtoS(vecd a, vecd b)
Definition Grid_generic.h:314

PrecisionChange::HtoS
static accelerator_inline void HtoS(vech h, vecf &sa, vecf &sb)
Definition Grid_generic.h:299

PrecisionChange::DtoS
static vecf DtoS(vecd a, vecd b)
Definition Grid_a64fx-2.h:565

PrecisionChange::StoD
static accelerator_inline void StoD(vecf s, vecd &a, vecd &b)
Definition Grid_generic.h:321

PrecisionChange::HtoS
static void HtoS(vech h, vecf &sa, vecf &sb)
Definition Grid_a64fx-2.h:554

PrecisionChange::DtoH
static accelerator_inline vech DtoH(vecd a, vecd b, vecd c, vecd d)
Definition Grid_generic.h:326

Reduce
Definition Grid_a64fx-2.h:838

Reduce::operator()
accelerator_inline Out_type operator()(In_type in)
Definition Grid_generic.h:438

Reduce::operator()
Out_type operator()(In_type in)
Definition Grid_a64fx-2.h:841

Rotate
Definition Grid_a64fx-2.h:791

Rotate::rotate
static vec< T > rotate(vec< T > in, int n)
Definition Grid_a64fx-2.h:804

Rotate::tRotate
static accelerator_inline vec< T > tRotate(vec< T > in)
Definition Grid_generic.h:412

Rotate::rotate
static accelerator_inline vec< T > rotate(vec< T > in, int n)
Definition Grid_generic.h:417

Sub
Definition Grid_a64fx-2.h:355

Sub::operator()
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
Definition Grid_generic.h:128

Sum
Definition Grid_a64fx-2.h:341

Sum::operator()
accelerator_inline vec< T > operator()(vec< T > a, vec< T > b)
Definition Grid_generic.h:113

TimesI
Definition Grid_a64fx-2.h:520

TimesI::operator()
accelerator_inline vec< T > operator()(vec< T > a)
Definition Grid_generic.h:268

TimesMinusI
Definition Grid_a64fx-2.h:501

TimesMinusI::operator()
accelerator_inline vec< T > operator()(vec< T > a)
Definition Grid_generic.h:247

Vset
Definition Grid_a64fx-2.h:313

Vset::operator()
accelerator_inline vec< T > operator()(T *a)
Definition Grid_generic.h:98

Vset::operator()
accelerator_inline vec< T > operator()(std::complex< T > *a)
Definition Grid_generic.h:84

Vsplat
Definition Grid_a64fx-2.h:240

Vsplat::operator()
accelerator_inline vec< T > operator()(T a, T b)
Definition Grid_generic.h:39

Vsplat::operator()
accelerator_inline vec< T > operator()(T a)
Definition Grid_generic.h:53

Vstore
Definition Grid_a64fx-2.h:292

Vstore::operator()
accelerator_inline void operator()(vec< T > a, T *D)
Definition Grid_generic.h:68

Vstream
Definition Grid_a64fx-2.h:302

Vstream::operator()
accelerator_inline void operator()(T *a, vec< T > b)
Definition Grid_generic.h:76

W
Definition Grid_a64fx-2.h:41

vec
Definition Grid_a64fx-2.h:91

vec::v
T v[W< T >::r]
Definition Grid_a64fx-2.h:92