29#ifndef GRID_LATTICE_ARITH_H
30#define GRID_LATTICE_ARITH_H
37template<
class obj1,
class obj2,
class obj3>
inline
47 decltype(coalescedRead(obj1())) tmp;
48 auto lhs_t = lhs_v(ss);
49 auto rhs_t = rhs_v(ss);
50 mult(&tmp,&lhs_t,&rhs_t);
51 coalescedWrite(ret_v[ss],tmp);
55template<
class obj1,
class obj2,
class obj3>
inline
68 mac(&tmp,&lhs_t,&rhs_t);
69 coalescedWrite(ret_v[ss],tmp);
73template<
class obj1,
class obj2,
class obj3>
inline
83 decltype(coalescedRead(obj1())) tmp;
86 sub(&tmp,&lhs_t,&rhs_t);
87 coalescedWrite(ret_v[ss],tmp);
90template<
class obj1,
class obj2,
class obj3>
inline
100 decltype(coalescedRead(obj1())) tmp;
101 auto lhs_t=lhs_v(ss);
102 auto rhs_t=rhs_v(ss);
103 add(&tmp,&lhs_t,&rhs_t);
104 coalescedWrite(ret_v[ss],tmp);
111template<
class obj1,
class obj2,
class obj3>
inline
119 decltype(coalescedRead(obj1())) tmp;
120 mult(&tmp,&lhs_v(ss),&rhs);
121 coalescedWrite(ret_v[ss],tmp);
125template<
class obj1,
class obj2,
class obj3>
inline
134 auto lhs_t=lhs_v(ss);
135 mac(&tmp,&lhs_t,&rhs);
136 coalescedWrite(ret_v[ss],tmp);
140template<
class obj1,
class obj2,
class obj3>
inline
148 decltype(coalescedRead(obj1())) tmp;
149 auto lhs_t=lhs_v(ss);
150 sub(&tmp,&lhs_t,&rhs);
151 coalescedWrite(ret_v[ss],tmp);
154template<
class obj1,
class obj2,
class obj3>
inline
162 decltype(coalescedRead(obj1())) tmp;
163 auto lhs_t=lhs_v(ss);
164 add(&tmp,&lhs_t,&rhs);
165 coalescedWrite(ret_v[ss],tmp);
172template<
class obj1,
class obj2,
class obj3>
inline
180 decltype(coalescedRead(obj1())) tmp;
181 auto rhs_t=rhs_v(ss);
182 mult(&tmp,&lhs,&rhs_t);
183 coalescedWrite(ret_v[ss],tmp);
187template<
class obj1,
class obj2,
class obj3>
inline
196 auto rhs_t=rhs_v(ss);
197 mac(&tmp,&lhs,&rhs_t);
198 coalescedWrite(ret_v[ss],tmp);
202template<
class obj1,
class obj2,
class obj3>
inline
210 decltype(coalescedRead(obj1())) tmp;
211 auto rhs_t=rhs_v(ss);
212 sub(&tmp,&lhs,&rhs_t);
213 coalescedWrite(ret_v[ss],tmp);
216template<
class obj1,
class obj2,
class obj3>
inline
224 decltype(coalescedRead(obj1())) tmp;
225 auto rhs_t=rhs_v(ss);
226 add(&tmp,&lhs,&rhs_t);
227 coalescedWrite(ret_v[ss],tmp);
231template<
class sobj,
class vobj>
inline
241 auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]);
242 coalescedWrite(ret_v[ss],tmp);
245template<
class sobj,
class vobj>
inline
255 auto tmp = a*x_v(ss)+b*y_v(ss);
256 coalescedWrite(ret_v[ss],tmp);
260#define FAST_AXPY_NORM
261template<
class sobj,
class vobj>
inline
273template<
class sobj,
class vobj>
inline
290 typedef decltype(
trace(obj())) robj;
295 ret.Checkerboard() = rhs_1.Checkerboard();
297 coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2(ss)));
305 typedef decltype(
trace(obj1())) robj;
309 ret.Checkerboard() = rhs_1.Checkerboard();
311 coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2));
#define accelerator_for(iterator, num, nsimd,...)
accelerator_inline Grid_simd2< S, V > trace(const Grid_simd2< S, V > &arg)
RealD axpy_norm(Lattice< vobj > &ret, sobj a, const Lattice< vobj > &x, const Lattice< vobj > &y)
void add(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void mac(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void axpy(Lattice< vobj > &ret, sobj a, const Lattice< vobj > &x, const Lattice< vobj > &y)
void sub(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void axpby(Lattice< vobj > &ret, sobj a, sobj b, const Lattice< vobj > &x, const Lattice< vobj > &y)
RealD axpby_norm(Lattice< vobj > &ret, sobj a, sobj b, const Lattice< vobj > &x, const Lattice< vobj > &y)
auto traceProduct(const Lattice< obj > &rhs_1, const Lattice< obj > &rhs_2) -> Lattice< decltype(trace(obj()))>
Trace product.
void mult(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
strong_inline RealD axpby_norm_fast(Lattice< vobj > &z, sobj a, sobj b, const Lattice< vobj > &x, const Lattice< vobj > &y)
strong_inline RealD axpy_norm_fast(Lattice< vobj > &z, sobj a, const Lattice< vobj > &x, const Lattice< vobj > &y)
RealD norm2(const Lattice< vobj > &arg)
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
accelerator_inline int Checkerboard(void) const