Grid 0.7.0
Tensor_arith_mul.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/tensors/Tensor_arith_mul.h
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25 See the full license in the file "LICENSE" in the top level distribution directory
26*************************************************************************************/
27/* END LEGAL */
28#ifndef GRID_MATH_ARITH_MUL_H
29#define GRID_MATH_ARITH_MUL_H
30
32
36
37template<class rtype,class vtype,class mtype>
38accelerator_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){
39 mult(&ret->_internal,&lhs->_internal,&rhs->_internal);
40}
41
42template<class rrtype,class ltype,class rtype,int N>
43accelerator_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
44 for(int c1=0;c1<N;c1++){
45 for(int c2=0;c2<N;c2++){
46 mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
47 }
48 }
49 for(int c1=0;c1<N;c1++){
50 for(int c3=1;c3<N;c3++){
51 for(int c2=0;c2<N;c2++){
52 mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
53 }
54 }
55 }
56 return;
57}
58
59template<class rrtype,class ltype,class rtype,int N>
60accelerator_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
61 for(int c2=0;c2<N;c2++){
62 for(int c1=0;c1<N;c1++){
63 mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
64 }}
65 return;
66}
67
68template<class rrtype,class ltype,class rtype, int N>
69accelerator_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
70 for(int c2=0;c2<N;c2++){
71 for(int c1=0;c1<N;c1++){
72 mult(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
73 }}
74 return;
75}
76// Matrix left multiplies vector
77template<class rtype,class vtype,class mtype,int N>
78accelerator_inline void mult(iVector<rtype,N> * __restrict__ ret,const iMatrix<mtype,N> * __restrict__ lhs,const iVector<vtype,N> * __restrict__ rhs)
79{
80 for(int c1=0;c1<N;c1++){
81 mult(&ret->_internal[c1],&lhs->_internal[c1][0],&rhs->_internal[0]);
82 for(int c2=1;c2<N;c2++){
83 mac(&ret->_internal[c1],&lhs->_internal[c1][c2],&rhs->_internal[c2]);
84 }
85 }
86 return;
87}
88template<class rtype,class vtype,class mtype,int N>
90 const iScalar<mtype> * __restrict__ lhs,
91 const iVector<vtype,N> * __restrict__ rhs){
92 for(int c1=0;c1<N;c1++){
93 mult(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
94 }
95}
96template<class rtype,class vtype,class mtype,int N>
98 const iVector<vtype,N> * __restrict__ rhs,
99 const iScalar<mtype> * __restrict__ lhs){
100 for(int c1=0;c1<N;c1++){
101 mult(&ret->_internal[c1],&rhs->_internal[c1],&lhs->_internal);
102 }
103}
104
105
106
107template<class rtype,class vtype,class mtype,int N> accelerator_inline
109{
111 mult(&ret,&lhs,&rhs);
112 return ret;
113}
114
115template<class rtype,class vtype,class mtype,int N> accelerator_inline
117{
119 mult(&ret,&lhs,&rhs);
120 return ret;
121}
122
123template<class rtype,class vtype,class mtype,int N> accelerator_inline
125{
127 mult(&ret,&lhs,&rhs);
128 return ret;
129}
130
132// Divide by scalar
134template<class rtype,class vtype> accelerator_inline
136{
137 iScalar<rtype> ret;
138 ret._internal = lhs._internal/rhs._internal;
139 return ret;
140}
141template<class rtype,class vtype,int N> accelerator_inline
143{
145 for(int i=0;i<N;i++){
146 ret._internal[i] = lhs._internal[i]/rhs._internal;
147 }
148 return ret;
149}
150template<class rtype,class vtype,int N> accelerator_inline
152{
154 for(int i=0;i<N;i++){
155 for(int j=0;j<N;j++){
156 ret._internal[i][j] = lhs._internal[i][j]/rhs._internal;
157 }}
158 return ret;
159}
160
162// Glue operators to mult routines. Must resolve return type cleverly from typeof(internal)
163// since nesting matrix<scalar> x matrix<matrix>-> matrix<matrix>
164// while matrix<scalar> x matrix<scalar>-> matrix<scalar>
165// so return type depends on argument types in nasty way.
167// scal x scal = scal
168// mat x mat = mat
169// mat x scal = mat
170// scal x mat = mat
171// mat x vec = vec
172// vec x scal = vec
173// scal x vec = vec
174//
175// We can special case scalar_type ??
176template<class l,class r>
177accelerator_inline auto operator * (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(lhs._internal * rhs._internal)>
178{
179 typedef iScalar<decltype(lhs._internal*rhs._internal)> ret_t;
180 ret_t ret;
181 mult(&ret,&lhs,&rhs);
182 return ret;
183}
184template<class l,class r,int N> accelerator_inline
185auto operator * (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal[0][0]),N>
186{
187 typedef decltype(lhs._internal[0][0]*rhs._internal[0][0]) ret_t;
189 mult(&ret,&lhs,&rhs);
190 return ret;
191}
192template<class l,class r, int N> accelerator_inline
193auto operator * (const iMatrix<r,N>& lhs,const iScalar<l>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal),N>
194{
195 typedef decltype(lhs._internal[0][0]*rhs._internal) ret_t;
196
198 for(int c1=0;c1<N;c1++){
199 for(int c2=0;c2<N;c2++){
200 mult(&ret._internal[c1][c2],&lhs._internal[c1][c2],&rhs._internal);
201 }}
202 return ret;
203}
204template<class l,class r,int N> accelerator_inline
205auto operator * (const iScalar<l>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal*rhs._internal[0][0]),N>
206{
207 typedef decltype(lhs._internal*rhs._internal[0][0]) ret_t;
209 for(int c1=0;c1<N;c1++){
210 for(int c2=0;c2<N;c2++){
211 mult(&ret._internal[c1][c2],&lhs._internal,&rhs._internal[c1][c2]);
212 }}
213 return ret;
214}
215template<class l,class r,int N> accelerator_inline
216auto operator * (const iMatrix<l,N>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal[0][0]*rhs._internal[0]),N>
217{
218 typedef decltype(lhs._internal[0][0]*rhs._internal[0]) ret_t;
220 for(int c1=0;c1<N;c1++){
221 mult(&ret._internal[c1],&lhs._internal[c1][0],&rhs._internal[0]);
222 for(int c2=1;c2<N;c2++){
223 mac(&ret._internal[c1],&lhs._internal[c1][c2],&rhs._internal[c2]);
224 }
225 }
226 return ret;
227}
228template<class l,class r,int N> accelerator_inline
229auto operator * (const iScalar<l>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal*rhs._internal[0]),N>
230{
231 typedef decltype(lhs._internal*rhs._internal[0]) ret_t;
233 for(int c1=0;c1<N;c1++){
234 mult(&ret._internal[c1],&lhs._internal,&rhs._internal[c1]);
235 }
236 return ret;
237}
238template<class l,class r,int N> accelerator_inline
239auto operator * (const iVector<l,N>& lhs,const iScalar<r>& rhs) -> iVector<decltype(lhs._internal[0]*rhs._internal),N>
240{
241 typedef decltype(lhs._internal[0]*rhs._internal) ret_t;
243 for(int c1=0;c1<N;c1++){
244 mult(&ret._internal[c1],&lhs._internal[c1],&rhs._internal);
245 }
246 return ret;
247}
248
250
251
252#endif
#define accelerator_inline
void mac(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
accelerator_inline iScalar< rtype > operator/(const iScalar< rtype > &lhs, const iScalar< vtype > &rhs)
accelerator_inline void mult(iScalar< rtype > *__restrict__ ret, const iScalar< mtype > *__restrict__ lhs, const iScalar< vtype > *__restrict__ rhs)
accelerator_inline iVector< rtype, N > operator*(const iMatrix< mtype, N > &lhs, const iVector< vtype, N > &rhs)
vtype _internal[N][N]
vtype _internal
vtype _internal[N]