Grid 0.7.0
Lattice_arith.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/lattice/Lattice_arith.h
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10Author: Christoph Lehner <christoph@lhnr.de>
11
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License along
23 with this program; if not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25
26 See the full license in the file "LICENSE" in the top level distribution directory
27*************************************************************************************/
28/* END LEGAL */
29#ifndef GRID_LATTICE_ARITH_H
30#define GRID_LATTICE_ARITH_H
31
33
35// avoid copy back routines for mult, mac, sub, add
37template<class obj1,class obj2,class obj3> inline
38void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
39 GRID_TRACE("mult");
40 ret.Checkerboard() = lhs.Checkerboard();
41 autoView( ret_v , ret, AcceleratorWrite);
42 autoView( lhs_v , lhs, AcceleratorRead);
43 autoView( rhs_v , rhs, AcceleratorRead);
44 conformable(ret,rhs);
45 conformable(lhs,rhs);
46 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
47 decltype(coalescedRead(obj1())) tmp;
48 auto lhs_t = lhs_v(ss);
49 auto rhs_t = rhs_v(ss);
50 mult(&tmp,&lhs_t,&rhs_t);
51 coalescedWrite(ret_v[ss],tmp);
52 });
53}
54
55template<class obj1,class obj2,class obj3> inline
56void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
57 GRID_TRACE("mac");
58 ret.Checkerboard() = lhs.Checkerboard();
59 conformable(ret,rhs);
60 conformable(lhs,rhs);
61 autoView( ret_v , ret, AcceleratorWrite);
62 autoView( lhs_v , lhs, AcceleratorRead);
63 autoView( rhs_v , rhs, AcceleratorRead);
64 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
65 auto lhs_t=lhs_v(ss);
66 auto rhs_t=rhs_v(ss);
67 auto tmp =ret_v(ss);
68 mac(&tmp,&lhs_t,&rhs_t);
69 coalescedWrite(ret_v[ss],tmp);
70 });
71}
72
73template<class obj1,class obj2,class obj3> inline
74void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
75 GRID_TRACE("sub");
76 ret.Checkerboard() = lhs.Checkerboard();
77 conformable(ret,rhs);
78 conformable(lhs,rhs);
79 autoView( ret_v , ret, AcceleratorWrite);
80 autoView( lhs_v , lhs, AcceleratorRead);
81 autoView( rhs_v , rhs, AcceleratorRead);
82 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
83 decltype(coalescedRead(obj1())) tmp;
84 auto lhs_t=lhs_v(ss);
85 auto rhs_t=rhs_v(ss);
86 sub(&tmp,&lhs_t,&rhs_t);
87 coalescedWrite(ret_v[ss],tmp);
88 });
89}
90template<class obj1,class obj2,class obj3> inline
91void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
92 GRID_TRACE("add");
93 ret.Checkerboard() = lhs.Checkerboard();
94 conformable(ret,rhs);
95 conformable(lhs,rhs);
96 autoView( ret_v , ret, AcceleratorWrite);
97 autoView( lhs_v , lhs, AcceleratorRead);
98 autoView( rhs_v , rhs, AcceleratorRead);
99 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
100 decltype(coalescedRead(obj1())) tmp;
101 auto lhs_t=lhs_v(ss);
102 auto rhs_t=rhs_v(ss);
103 add(&tmp,&lhs_t,&rhs_t);
104 coalescedWrite(ret_v[ss],tmp);
105 });
106}
107
109// avoid copy back routines for mult, mac, sub, add
111template<class obj1,class obj2,class obj3> inline
112void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
113 GRID_TRACE("mult");
114 ret.Checkerboard() = lhs.Checkerboard();
115 conformable(lhs,ret);
116 autoView( ret_v , ret, AcceleratorWrite);
117 autoView( lhs_v , lhs, AcceleratorRead);
118 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
119 decltype(coalescedRead(obj1())) tmp;
120 mult(&tmp,&lhs_v(ss),&rhs);
121 coalescedWrite(ret_v[ss],tmp);
122 });
123}
124
125template<class obj1,class obj2,class obj3> inline
126void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
127 GRID_TRACE("mac");
128 ret.Checkerboard() = lhs.Checkerboard();
129 conformable(ret,lhs);
130 autoView( ret_v , ret, AcceleratorWrite);
131 autoView( lhs_v , lhs, AcceleratorRead);
132 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
133 auto tmp =ret_v(ss);
134 auto lhs_t=lhs_v(ss);
135 mac(&tmp,&lhs_t,&rhs);
136 coalescedWrite(ret_v[ss],tmp);
137 });
138}
139
140template<class obj1,class obj2,class obj3> inline
141void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
142 GRID_TRACE("sub");
143 ret.Checkerboard() = lhs.Checkerboard();
144 conformable(ret,lhs);
145 autoView( ret_v , ret, AcceleratorWrite);
146 autoView( lhs_v , lhs, AcceleratorRead);
147 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
148 decltype(coalescedRead(obj1())) tmp;
149 auto lhs_t=lhs_v(ss);
150 sub(&tmp,&lhs_t,&rhs);
151 coalescedWrite(ret_v[ss],tmp);
152 });
153}
154template<class obj1,class obj2,class obj3> inline
155void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
156 GRID_TRACE("add");
157 ret.Checkerboard() = lhs.Checkerboard();
158 conformable(lhs,ret);
159 autoView( ret_v , ret, AcceleratorWrite);
160 autoView( lhs_v , lhs, AcceleratorRead);
161 accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
162 decltype(coalescedRead(obj1())) tmp;
163 auto lhs_t=lhs_v(ss);
164 add(&tmp,&lhs_t,&rhs);
165 coalescedWrite(ret_v[ss],tmp);
166 });
167}
168
170// avoid copy back routines for mult, mac, sub, add
172template<class obj1,class obj2,class obj3> inline
173void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
174 GRID_TRACE("mult");
175 ret.Checkerboard() = rhs.Checkerboard();
176 conformable(ret,rhs);
177 autoView( ret_v , ret, AcceleratorWrite);
178 autoView( rhs_v , lhs, AcceleratorRead);
179 accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
180 decltype(coalescedRead(obj1())) tmp;
181 auto rhs_t=rhs_v(ss);
182 mult(&tmp,&lhs,&rhs_t);
183 coalescedWrite(ret_v[ss],tmp);
184 });
185}
186
187template<class obj1,class obj2,class obj3> inline
188void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
189 GRID_TRACE("mac");
190 ret.Checkerboard() = rhs.Checkerboard();
191 conformable(ret,rhs);
192 autoView( ret_v , ret, AcceleratorWrite);
193 autoView( rhs_v , lhs, AcceleratorRead);
194 accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
195 auto tmp =ret_v(ss);
196 auto rhs_t=rhs_v(ss);
197 mac(&tmp,&lhs,&rhs_t);
198 coalescedWrite(ret_v[ss],tmp);
199 });
200}
201
202template<class obj1,class obj2,class obj3> inline
203void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
204 GRID_TRACE("sub");
205 ret.Checkerboard() = rhs.Checkerboard();
206 conformable(ret,rhs);
207 autoView( ret_v , ret, AcceleratorWrite);
208 autoView( rhs_v , lhs, AcceleratorRead);
209 accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
210 decltype(coalescedRead(obj1())) tmp;
211 auto rhs_t=rhs_v(ss);
212 sub(&tmp,&lhs,&rhs_t);
213 coalescedWrite(ret_v[ss],tmp);
214 });
215}
216template<class obj1,class obj2,class obj3> inline
217void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
218 GRID_TRACE("add");
219 ret.Checkerboard() = rhs.Checkerboard();
220 conformable(ret,rhs);
221 autoView( ret_v , ret, AcceleratorWrite);
222 autoView( rhs_v , lhs, AcceleratorRead);
223 accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
224 decltype(coalescedRead(obj1())) tmp;
225 auto rhs_t=rhs_v(ss);
226 add(&tmp,&lhs,&rhs_t);
227 coalescedWrite(ret_v[ss],tmp);
228 });
229}
230
231template<class sobj,class vobj> inline
232void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
233 GRID_TRACE("axpy");
234 ret.Checkerboard() = x.Checkerboard();
235 conformable(ret,x);
236 conformable(x,y);
237 autoView( ret_v , ret, AcceleratorWrite);
238 autoView( x_v , x, AcceleratorRead);
239 autoView( y_v , y, AcceleratorRead);
240 accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
241 auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]);
242 coalescedWrite(ret_v[ss],tmp);
243 });
244}
245template<class sobj,class vobj> inline
246void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
247 GRID_TRACE("axpby");
248 ret.Checkerboard() = x.Checkerboard();
249 conformable(ret,x);
250 conformable(x,y);
251 autoView( ret_v , ret, AcceleratorWrite);
252 autoView( x_v , x, AcceleratorRead);
253 autoView( y_v , y, AcceleratorRead);
254 accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
255 auto tmp = a*x_v(ss)+b*y_v(ss);
256 coalescedWrite(ret_v[ss],tmp);
257 });
258}
259
260#define FAST_AXPY_NORM
261template<class sobj,class vobj> inline
263{
264 GRID_TRACE("axpy_norm");
265#ifdef FAST_AXPY_NORM
266 return axpy_norm_fast(ret,a,x,y);
267#else
268 ret = a*x+y;
269 RealD nn=norm2(ret);
270 return nn;
271#endif
272}
273template<class sobj,class vobj> inline
274RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
275{
276 GRID_TRACE("axpby_norm");
277#ifdef FAST_AXPY_NORM
278 return axpby_norm_fast(ret,a,b,x,y);
279#else
280 ret = a*x+b*y;
281 RealD nn=norm2(ret);
282 return nn;
283#endif
284}
285
287template<class obj> auto traceProduct(const Lattice<obj> &rhs_1,const Lattice<obj> &rhs_2)
288 -> Lattice<decltype(trace(obj()))>
289{
290 typedef decltype(trace(obj())) robj;
291 Lattice<robj> ret_i(rhs_1.Grid());
292 autoView( rhs1 , rhs_1, AcceleratorRead);
293 autoView( rhs2 , rhs_2, AcceleratorRead);
294 autoView( ret , ret_i, AcceleratorWrite);
295 ret.Checkerboard() = rhs_1.Checkerboard();
296 accelerator_for(ss,rhs1.size(),obj::Nsimd(),{
297 coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2(ss)));
298 });
299 return ret_i;
300}
301
302template<class obj1,class obj2> auto traceProduct(const Lattice<obj1> &rhs_1,const obj2 &rhs2)
303 -> Lattice<decltype(trace(obj1()))>
304{
305 typedef decltype(trace(obj1())) robj;
306 Lattice<robj> ret_i(rhs_1.Grid());
307 autoView( rhs1 , rhs_1, AcceleratorRead);
308 autoView( ret , ret_i, AcceleratorWrite);
309 ret.Checkerboard() = rhs_1.Checkerboard();
310 accelerator_for(ss,rhs1.size(),obj1::Nsimd(),{
311 coalescedWrite(ret[ss],traceProduct(rhs1(ss),rhs2));
312 });
313 return ret_i;
314}
315template<class obj1,class obj2> auto traceProduct(const obj2 &rhs_2,const Lattice<obj1> &rhs_1)
316 -> Lattice<decltype(trace(obj1()))>
317{
318 return traceProduct(rhs_1,rhs_2);
319}
320
321
322
324#endif
#define accelerator_for(iterator, num, nsimd,...)
accelerator_inline Grid_simd2< S, V > trace(const Grid_simd2< S, V > &arg)
RealD axpy_norm(Lattice< vobj > &ret, sobj a, const Lattice< vobj > &x, const Lattice< vobj > &y)
void add(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void mac(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void axpy(Lattice< vobj > &ret, sobj a, const Lattice< vobj > &x, const Lattice< vobj > &y)
void sub(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void axpby(Lattice< vobj > &ret, sobj a, sobj b, const Lattice< vobj > &x, const Lattice< vobj > &y)
RealD axpby_norm(Lattice< vobj > &ret, sobj a, sobj b, const Lattice< vobj > &x, const Lattice< vobj > &y)
auto traceProduct(const Lattice< obj > &rhs_1, const Lattice< obj > &rhs_2) -> Lattice< decltype(trace(obj()))>
Trace product.
void mult(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void conformable(const Lattice< obj1 > &lhs, const Lattice< obj2 > &rhs)
strong_inline RealD axpby_norm_fast(Lattice< vobj > &z, sobj a, sobj b, const Lattice< vobj > &x, const Lattice< vobj > &y)
strong_inline RealD axpy_norm_fast(Lattice< vobj > &z, sobj a, const Lattice< vobj > &x, const Lattice< vobj > &y)
RealD norm2(const Lattice< vobj > &arg)
#define autoView(l_v, l, mode)
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
double RealD
Definition Simd.h:61
#define GRID_TRACE(name)
Definition Tracing.h:68
accelerator_inline int Checkerboard(void) const