Grid 0.7.0
Lattice_reduction_sycl.h
Go to the documentation of this file.
2
4// Possibly promote to double and sum
6
7
8template <class vobj>
9inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
10{
11 typedef typename vobj::scalar_object sobj;
12 typedef typename vobj::scalar_objectD sobjD;
13
14 sobj identity; zeroit(identity);
15 sobj ret; zeroit(ret);
16 Integer nsimd= vobj::Nsimd();
17 {
18 sycl::buffer<sobj, 1> abuff(&ret, {1});
19 theGridAccelerator->submit([&](sycl::handler &cgh) {
20 auto Reduction = sycl::reduction(abuff,cgh,identity,std::plus<>());
21 cgh.parallel_for(sycl::range<1>{osites},
22 Reduction,
23 [=] (sycl::id<1> item, auto &sum) {
24 auto osite = item[0];
25 sum +=Reduce(lat[osite]);
26 });
27 });
28 }
29 sobjD dret; convertType(dret,ret);
30 return dret;
31}
32
33template <class vobj>
34inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
35{
36 return sumD_gpu_tensor(lat,osites);
37}
38template <class vobj>
39inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
40{
41 return sumD_gpu_large(lat,osites);
42}
43
44template <class vobj>
45inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
46{
47 return sumD_gpu_large(lat,osites);
48}
49
51// Return as same precision as input performing reduction in double precision though
53template <class vobj>
54inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
55{
56 typedef typename vobj::scalar_object sobj;
57 sobj result;
58 result = sumD_gpu(lat,osites);
59 return result;
60}
61
62template <class vobj>
63inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
64{
65 typedef typename vobj::scalar_object sobj;
66 sobj result;
67 result = sumD_gpu_large(lat,osites);
68 return result;
69}
70
71
72template<class Word> Word svm_xor(Word *vec,uint64_t L)
73{
74 Word identity; identity=0;
75 Word ret = 0;
76 {
77 sycl::buffer<Word, 1> abuff(&ret, {1});
78 theGridAccelerator->submit([&](sycl::handler &cgh) {
79 auto Reduction = sycl::reduction(abuff,cgh,identity,std::bit_xor<>());
80 cgh.parallel_for(sycl::range<1>{L},
81 Reduction,
82 [=] (sycl::id<1> index, auto &sum) {
83 sum ^=vec[index];
84 });
85 });
86 }
87 theGridAccelerator->wait();
88 return ret;
89}
90
92
accelerator_inline void zeroit(Grid_simd2< S, V > &z)
vobj::scalar_object sum(const vobj *arg, Integer osites)
vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
Word svm_xor(Word *vec, uint64_t L)
vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
accelerator_inline void convertType(ComplexD &out, const std::complex< double > &in)
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
uint32_t Integer
Definition Simd.h:58
accelerator_inline ComplexD Reduce(const ComplexD &r)
Definition Simd.h:129