Grid/dev/Lattice__reduction__sycl_8h_source.html

NAMESPACE_BEGIN(Grid);


// Possibly promote to double and sum


template <class vobj>


inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)

{

  typedef typename vobj::scalar_object sobj;

  typedef typename vobj::scalar_objectD sobjD;


  sobj identity; zeroit(identity);

  sobj ret; zeroit(ret);

  Integer nsimd= vobj::Nsimd();

  {

    sycl::buffer<sobj, 1> abuff(&ret, {1});

    theGridAccelerator->submit([&](sycl::handler &cgh) {

      auto Reduction = sycl::reduction(abuff,cgh,identity,std::plus<>());

      cgh.parallel_for(sycl::range<1>{osites},

                      Reduction,

                      [=] (sycl::id<1> item, auto &sum) {

                        auto osite   = item[0];

                        sum +=Reduce(lat[osite]);

                      });

    });

  }

  sobjD dret; convertType(dret,ret);

  return dret;

}


template <class vobj>


inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)

{

  return sumD_gpu_tensor(lat,osites);

}


template <class vobj>


inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)

{

  return sumD_gpu_large(lat,osites);

}


template <class vobj>


inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)

{

  return sumD_gpu_large(lat,osites);

}


// Return as same precision as input performing reduction in double precision though

template <class vobj>


inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)

{

  typedef typename vobj::scalar_object sobj;

  sobj result;

  result = sumD_gpu(lat,osites);

  return result;

}


template <class vobj>


inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)

{

  typedef typename vobj::scalar_object sobj;

  sobj result;

  result = sumD_gpu_large(lat,osites);

  return result;

}


template<class Word> Word svm_xor(Word *vec,uint64_t L)

{

  Word identity;  identity=0;

  Word ret = 0;

  {

    sycl::buffer<Word, 1> abuff(&ret, {1});

    theGridAccelerator->submit([&](sycl::handler &cgh) {

      auto Reduction = sycl::reduction(abuff,cgh,identity,std::bit_xor<>());

      cgh.parallel_for(sycl::range<1>{L},

                      Reduction,

                      [=] (sycl::id<1> index, auto &sum) {

                        sum ^=vec[index];

                      });

    });

  }

  theGridAccelerator->wait();

  return ret;

}


NAMESPACE_END(Grid);


zeroit
accelerator_inline void zeroit(Grid_simd2< S, V > &z)
Definition Grid_doubled_vector.h:493

sum
vobj::scalar_object sum(const vobj *arg, Integer osites)
Definition Lattice_reduction.h:129

sumD_gpu_small
vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
Definition Lattice_reduction_sycl.h:39

svm_xor
Word svm_xor(Word *vec, uint64_t L)
Definition Lattice_reduction_sycl.h:72

sum_gpu_large
vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
Definition Lattice_reduction_sycl.h:63

sumD_gpu
vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
Definition Lattice_reduction_sycl.h:45

sumD_gpu_large
vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
Definition Lattice_reduction_sycl.h:34

sumD_gpu_tensor
vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
Definition Lattice_reduction_sycl.h:9

sum_gpu
vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
Definition Lattice_reduction_sycl.h:54

convertType
accelerator_inline void convertType(ComplexD &out, const std::complex< double > &in)
Definition Lattice_transfer.h:162

NAMESPACE_BEGIN
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35

NAMESPACE_END
#define NAMESPACE_END(A)
Definition Namespace.h:36

Integer
uint32_t Integer
Definition Simd.h:58

Reduce
accelerator_inline ComplexD Reduce(const ComplexD &r)
Definition Simd.h:129

Grid
Definition Deflation.h:31

vec
Definition Grid_a64fx-2.h:91