28#ifndef GRID_LATTICE_LOCALREDUCTION_H
29#define GRID_LATTICE_LOCALREDUCTION_H
49 coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss)));
63 coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss)));
70template<
class ll,
class rr>
82 ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
#define accelerator_for(iterator, num, nsimd,...)
auto localInnerProduct(const Lattice< vobj > &lhs, const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
auto outerProduct(const Lattice< ll > &lhs, const Lattice< rr > &rhs) -> Lattice< decltype(outerProduct(ll(), rr()))>
auto localNorm2(const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)