Grid 0.7.0
Tensor_inner.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/tensors/Tensor_inner.h
6
7 Copyright (C) 2015
8
9Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11Author: Christoph Lehner <christoph@lhnr.de>
12
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2 of the License, or
16 (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License along
24 with this program; if not, write to the Free Software Foundation, Inc.,
25 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26
27 See the full license in the file "LICENSE" in the top level distribution directory
28*************************************************************************************/
29/* END LEGAL */
30#ifndef GRID_MATH_INNER_H
31#define GRID_MATH_INNER_H
32
34
36// innerProduct Scalar x Scalar -> Scalar
37// innerProduct Vector x Vector -> Scalar
38// innerProduct Matrix x Matrix -> Scalar
40template<class sobj> accelerator_inline RealD norm2(const sobj &arg){
41 auto nrm = innerProductD(arg,arg);
42 RealD ret = real(nrm);
43 return ret;
44}
45
47// FIXME horizontal sum: single promote to double and sum
48// Not yet used; make sum_cpu in LatticeReduction.h call these
49// GPU sum_gpu is using a double precision promotion.
51#if 0
52accelerator_inline ComplexD ReduceD(const ComplexF &l) { return l; };
53accelerator_inline ComplexD ReduceD(const ComplexD &l) { return l; };
54accelerator_inline RealD ReduceD(const RealD &l) { return l; };
55accelerator_inline RealD ReduceD(const RealF &l) { return l; };
56
57accelerator_inline ComplexD ReduceD(const vComplexD &l) { return Reduce(l); };
58accelerator_inline RealD ReduceD(const vRealD &l) { return Reduce(l); };
59accelerator_inline ComplexD ReduceD(const vComplexF &l)
60{
61 vComplexD la,lb;
62 Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
63 return Reduce(la)+Reduce(lb);
64}
65accelerator_inline RealD ReduceD(const vRealF &l)
66{
67 vRealD la,lb;
68 Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
69 return Reduce(la)+Reduce(lb);
70};
72// Now do it for vector, matrix, scalar
74template<class l,int N> accelerator_inline
75auto ReduceD (const iVector<l,N>& lhs) -> iVector<decltype(ReduceD(lhs._internal[0])),N>
76{
77 typedef decltype(ReduceD(lhs._internal[0])) ret_t;
79 for(int c1=0;c1<N;c1++){
80 ret._internal[c1] = ReduceD(lhs._internal[c1]);
81 }
82 return ret;
83}
84template<class l,int N> accelerator_inline
85auto ReduceD (const iMatrix<l,N>& lhs) -> iMatrix<decltype(ReduceD(lhs._internal[0][0])),N>
86{
87 typedef decltype(ReduceD(lhs._internal[0][0])) ret_t;
89 for(int c1=0;c1<N;c1++){
90 for(int c2=0;c2<N;c2++){
91 ret._internal[c1][c2]=ReduceD(lhs._internal[c1][c2]);
92 }}
93 return ret;
94}
95template<class l> accelerator_inline
96auto ReduceD (const iScalar<l>& lhs) -> iScalar<decltype(ReduceD(lhs._internal))>
97{
98 typedef decltype(ReduceD(lhs._internal)) ret_t;
100 ret._internal = ReduceD(lhs._internal);
101 return ret;
102}
103#endif
105// Now do Reduce for vector, matrix, scalar
107template<class l,int N> accelerator_inline
108auto Reduce (const iVector<l,N>& lhs) -> iVector<decltype(Reduce(lhs._internal[0])),N>
109{
110 typedef decltype(Reduce(lhs._internal[0])) ret_t;
112 for(int c1=0;c1<N;c1++){
113 ret._internal[c1] = Reduce(lhs._internal[c1]);
114 }
115 return ret;
116}
117template<class l,int N> accelerator_inline
118auto Reduce (const iMatrix<l,N>& lhs) -> iMatrix<decltype(Reduce(lhs._internal[0][0])),N>
119{
120 typedef decltype(Reduce(lhs._internal[0][0])) ret_t;
122 for(int c1=0;c1<N;c1++){
123 for(int c2=0;c2<N;c2++){
124 ret._internal[c1][c2]=Reduce(lhs._internal[c1][c2]);
125 }}
126 return ret;
127}
128template<class l> accelerator_inline
129auto Reduce (const iScalar<l>& lhs) -> iScalar<decltype(Reduce(lhs._internal))>
130{
131 typedef decltype(Reduce(lhs._internal)) ret_t;
132 iScalar<ret_t> ret;
133 ret._internal = Reduce(lhs._internal);
134 return ret;
135}
136
137
138
140// innerProductD : if single promote to double and evaluate with sum 2x
146
150{
151 vComplexD la,lb;
152 vComplexD ra,rb;
153 Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
154 Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
155 return innerProduct(la,ra) + innerProduct(lb,rb);
156}
158{
159 vRealD la,lb;
160 vRealD ra,rb;
161 Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
162 Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
163 return innerProduct(la,ra) + innerProduct(lb,rb);
164}
165
166// Now do it for vector, matrix, scalar
167template<class l,class r,int N> accelerator_inline
168auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
169{
170 typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t;
171 iScalar<ret_t> ret;
172 zeroit(ret);
173 for(int c1=0;c1<N;c1++){
174 ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]);
175 }
176 return ret;
177}
178template<class l,class r,int N> accelerator_inline
179auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))>
180{
181 typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
182 iScalar<ret_t> ret;
183 ret=Zero();
184 for(int c1=0;c1<N;c1++){
185 for(int c2=0;c2<N;c2++){
186 ret._internal+=innerProductD(lhs._internal[c1][c2],rhs._internal[c1][c2]);
187 }}
188 return ret;
189}
190template<class l,class r> accelerator_inline
191auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))>
192{
193 typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t;
194 iScalar<ret_t> ret;
195 ret._internal = innerProductD(lhs._internal,rhs._internal);
196 return ret;
197}
198
199
201// innerProductD2: precision promotion without inner sum
203
206
211
214
216{
217 vComplexD2 dl,dr;
218 vComplexD2 ret;
219 precisionChange(dl,l);
220 precisionChange(dr,r);
221 ret = innerProduct(dl,dr);
222 return ret;
223}
225{
226 vRealD2 dl,dr;
227 vRealD2 ret;
228 precisionChange(dl,l);
229 precisionChange(dr,r);
230 ret=innerProduct(dl,dr);
231 return ret;
232}
233
234// Now do it for vector, matrix, scalar
235template<class l,class r,int N> accelerator_inline
236 auto innerProductD2 (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal[0],rhs._internal[0]))>
237{
238 typedef decltype(innerProductD2(lhs._internal[0],rhs._internal[0])) ret_t;
239 iScalar<ret_t> ret;
240 zeroit(ret);
241 for(int c1=0;c1<N;c1++){
242 ret._internal += innerProductD2(lhs._internal[c1],rhs._internal[c1]);
243 }
244 return ret;
245}
246template<class l,class r,int N> accelerator_inline
247 auto innerProductD2 (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0]))>
248{
249 typedef decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
250 iScalar<ret_t> ret;
251 ret=Zero();
252 for(int c1=0;c1<N;c1++){
253 for(int c2=0;c2<N;c2++){
254 ret._internal+=innerProductD2(lhs._internal[c1][c2],rhs._internal[c1][c2]);
255 }}
256 return ret;
257}
258template<class l,class r> accelerator_inline
259 auto innerProductD2 (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal,rhs._internal))>
260{
261 typedef decltype(innerProductD2(lhs._internal,rhs._internal)) ret_t;
262 iScalar<ret_t> ret;
263 ret._internal = innerProductD2(lhs._internal,rhs._internal);
264 return ret;
265}
266
268// Keep same precison
270template<class l,class r,int N> accelerator_inline
271auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
272{
273 typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
274 iScalar<ret_t> ret;
275 ret=Zero();
276 for(int c1=0;c1<N;c1++){
277 ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
278 }
279 return ret;
280}
281template<class l,class r,int N> accelerator_inline
282auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
283{
284 typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
285 iScalar<ret_t> ret;
286 ret=Zero();
287 for(int c1=0;c1<N;c1++){
288 for(int c2=0;c2<N;c2++){
289 ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
290 }}
291 return ret;
292}
293template<class l,class r> accelerator_inline
294auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
295{
296 typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
297 iScalar<ret_t> ret;
298 ret._internal = innerProduct(lhs._internal,rhs._internal);
299 return ret;
300}
301
303
304#endif
#define accelerator_inline
accelerator_inline void zeroit(Grid_simd2< S, V > &z)
Grid_simd2< complex< double >, vComplexD > vComplexD2
Grid_simd2< double, vRealD > vRealD2
Grid_simd< complex< float >, SIMD_Ftype > vComplexF
Grid_simd< float, SIMD_Ftype > vRealF
Grid_simd< complex< double >, SIMD_Dtype > vComplexD
Grid_simd< double, SIMD_Dtype > vRealD
Lattice< vobj > real(const Lattice< vobj > &lhs)
void precisionChange(Lattice< VobjOut > &out, const Lattice< VobjIn > &in, const precisionChangeWorkspace &workspace)
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
std::complex< RealF > ComplexF
Definition Simd.h:78
float RealF
Definition Simd.h:60
std::complex< RealD > ComplexD
Definition Simd.h:79
double RealD
Definition Simd.h:61
accelerator_inline vComplexD2 TensorRemove(const vComplexD2 &x)
accelerator_inline ComplexD innerProductD2(const ComplexF &l, const ComplexF &r)
accelerator_inline RealD norm2(const sobj &arg)
accelerator_inline auto Reduce(const iVector< l, N > &lhs) -> iVector< decltype(Reduce(lhs._internal[0])), N >
accelerator_inline ComplexD innerProductD(const ComplexF &l, const ComplexF &r)
accelerator_inline auto innerProduct(const iVector< l, N > &lhs, const iVector< r, N > &rhs) -> iScalar< decltype(innerProduct(lhs._internal[0], rhs._internal[0]))>
Vector_type v
Definition Simd.h:194
vtype _internal