Grid 0.7.0
Lattice_local.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/lattice/Lattice_local.h
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25 See the full license in the file "LICENSE" in the top level distribution directory
26*************************************************************************************/
27/* END LEGAL */
28#ifndef GRID_LATTICE_LOCALREDUCTION_H
29#define GRID_LATTICE_LOCALREDUCTION_H
30
32// localInner, localNorm, outerProduct
34
36
38// Non site, reduced locally reduced routines
40
41// localNorm2,
42template<class vobj>
44{
46 autoView( rhs_v , rhs, AcceleratorRead);
47 autoView( ret_v , ret, AcceleratorWrite);
48 accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
49 coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss)));
50 });
51 return ret;
52}
53
54// localInnerProduct
55template<class vobj>
57{
59 autoView( lhs_v , lhs, AcceleratorRead);
60 autoView( rhs_v , rhs, AcceleratorRead);
61 autoView( ret_v , ret, AcceleratorWrite);
62 accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
63 coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss)));
64 });
65 return ret;
66}
67
68// outerProduct Scalar x Scalar -> Scalar
69// Vector x Vector -> Matrix
70template<class ll,class rr>
71inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>
72{
73 typedef decltype(coalescedRead(ll())) sll;
74 typedef decltype(coalescedRead(rr())) srr;
75 Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());
76 autoView( lhs_v , lhs, AcceleratorRead);
77 autoView( rhs_v , rhs, AcceleratorRead);
78 autoView( ret_v , ret, AcceleratorWrite);
79 accelerator_for(ss,rhs_v.size(),1,{
80 // FIXME had issues with scalar version of outer
81 // Use vector [] operator and don't read coalesce this loop
82 ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);
83 });
84 return ret;
85}
87#endif
#define accelerator_for(iterator, num, nsimd,...)
auto localInnerProduct(const Lattice< vobj > &lhs, const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
auto outerProduct(const Lattice< ll > &lhs, const Lattice< rr > &rhs) -> Lattice< decltype(outerProduct(ll(), rr()))>
auto localNorm2(const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
#define autoView(l_v, l, mode)
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61