Grid 0.7.0
Tensor_extract_merge.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/tensors/Tensor_extract_merge.h
6
7 Copyright (C) 2015
8
9Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11Author: neo <cossu@post.kek.jp>
12Author: paboyle <paboyle@ph.ed.ac.uk>
13Author: Christopher Kelly <ckelly@phys.columbia.edu>
14
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; either version 2 of the License, or
18 (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License along
26 with this program; if not, write to the Free Software Foundation, Inc.,
27 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28
29 See the full license in the file "LICENSE" in the top level distribution directory
30*************************************************************************************/
31/* END LEGAL */
32#pragma once
33
34#include <string.h>
35
36//#pragma GCC optimize("no-strict-aliasing")
37
39
41// Generic extract/merge/permute
43
46
47//void extract(const vobj &vec,ExtractBuffer<typename vobj::scalar_object> &extracted);
48//void extract(const vobj &vec,ExtractPointerArray<sobj> &extracted, int offset);
49//void merge(vobj &vec,ExtractBuffer<typename vobj::scalar_object> &extracted)
50//void merge(vobj &vec,ExtractPointerArray<typename vobj::scalar_object> &extracted)
51
53// Extract to contiguous array scalar object
55template<class vobj,class sobj> accelerator
56void extract(const vobj &vec,ExtractBuffer<sobj> &extracted)
57{
58 typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
60 typedef typename GridTypeMapper<vobj>::vector_type vector_type;
61
62 const int words=sizeof(vobj)/sizeof(vector_type);
63 const int Nsimd=vector_type::Nsimd();
64 const int Nextr=extracted.size();
65 vector_type * vp = (vector_type *)&vec;
66 const int s=Nsimd/Nextr;
67 sobj_scalar_type *sp = (sobj_scalar_type *) &extracted[0];
68 sobj_scalar_type stmp;
69 for(int w=0;w<words;w++){
70 for(int i=0;i<Nextr;i++){
71 stmp = vp[w].getlane(i*s);
72 sp[i*words+w] =stmp;
73 // memcpy((char *)&sp[i*words+w],(char *)&stmp,sizeof(stmp));
74 }
75 }
76 /*
77 scalar_type *vp = (scalar_type *)&vec;
78 scalar_type vtmp;
79 sobj_scalar_type stmp;
80 for(int w=0;w<words;w++){
81 for(int i=0;i<Nextr;i++){
82 memcpy((char *)&vtmp,(char *)&vp[w*Nsimd+i*s],sizeof(vtmp));
83 stmp = vtmp;
84 memcpy((char *)&sp[i*words+w],(char *)&stmp,sizeof(stmp));
85 }
86 }
87 */
88
89 return;
90}
91
93// Merge a contiguous array of scalar objects
95template<class vobj,class sobj> accelerator
96void merge(vobj &vec,ExtractBuffer<sobj> &extracted)
97{
98 typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
100 typedef typename GridTypeMapper<vobj>::vector_type vector_type;
101
102 const int words=sizeof(vobj)/sizeof(vector_type);
103 const int Nsimd=vector_type::Nsimd();
104 const int Nextr = extracted.size();
105 const int s=Nsimd/Nextr;
106
107 sobj_scalar_type *sp = (sobj_scalar_type *)&extracted[0];
108 vector_type *vp = (vector_type *)&vec;
109 scalar_type vtmp;
110 sobj_scalar_type stmp;
111 for(int w=0;w<words;w++){
112 for(int i=0;i<Nextr;i++){
113 for(int ii=0;ii<s;ii++){
114 memcpy((char *)&stmp,(char *)&sp[i*words+w],sizeof(stmp));
115 vtmp = stmp;
116 vp[w].putlane(vtmp,i*s+ii);
117 // memcpy((char *)&vp[w*Nsimd+i*s+ii],(char *)&vtmp,sizeof(vtmp));
118 }
119 }
120 }
121}
122
124// Extract/Insert a single lane
126template<class vobj> accelerator_inline
127typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec)
128{
129 typedef typename vobj::scalar_type scalar_type;
130 typedef typename vobj::scalar_object scalar_object;
131 typedef typename vobj::vector_type vector_type;
132 typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
133 typedef scalar_type * pointer;
134
135 constexpr int words=sizeof(vobj)/sizeof(vector_type);
136
137 scalar_object extracted;
138 pointer __restrict__ sp = (pointer)&extracted; // Type pun
139 vector_type *vp = (vector_type *)&vec;
140 for(int w=0;w<words;w++){
141 sp[w]=vp[w].getlane(lane);
142 }
143 return extracted;
144}
145
146template<class vobj> accelerator_inline
147void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted)
148{
149 typedef typename vobj::vector_type vector_type;
150 typedef typename vector_type::scalar_type scalar_type;
151 typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
152 typedef scalar_type * pointer;
153
154 constexpr int words=sizeof(vobj)/sizeof(vector_type);
155
156 pointer __restrict__ sp = (pointer)&extracted;
157 vector_type *vp = (vector_type *)&vec;
158 for(int w=0;w<words;w++){
159 vp[w].putlane(sp[w],lane);
160 }
161}
162
164// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
166template<class vobj, class sobj> accelerator
167void extract(const vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
168{
169 typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
171 typedef typename GridTypeMapper<vobj>::vector_type vector_type;
172
173 const int words=sizeof(vobj)/sizeof(vector_type);
174 const int Nsimd=vector_type::Nsimd();
175 const int Nextr=extracted.size();
176 const int s = Nsimd/Nextr;
177
178 vector_type * vp = (vector_type *)&vec;
179 for(int w=0;w<words;w++){
180 for(int i=0;i<Nextr;i++){
181 sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
182 pointer[w] = vp[w].getlane(i*s);
183 }
184 }
185}
186
188// Merge bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
190template<class vobj, class sobj> accelerator
191void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
192{
193 typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
195 typedef typename GridTypeMapper<vobj>::vector_type vector_type;
196
197 const int words=sizeof(vobj)/sizeof(vector_type);
198 const int Nsimd=vector_type::Nsimd();
199 const int Nextr=extracted.size();
200 const int s = Nsimd/Nextr;
201
202 vector_type * vp = (vector_type *)&vec;
203 scalar_type vtmp;
204 for(int w=0;w<words;w++){
205 for(int i=0;i<Nextr;i++){
206 sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
207 for(int ii=0;ii<s;ii++){
208 vtmp=pointer[w];
209 vp[w].putlane(vtmp,i*s+ii);
210 }
211 }
212 }
213}
214
215
217//Copy a single lane of a SIMD tensor type from one object to another
218//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type)
220template<class vobjOut, class vobjIn>
222void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
223{
224 static_assert( std::is_same<typename vobjOut::scalar_typeD, typename vobjIn::scalar_typeD>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
225
226 typedef typename vobjOut::vector_type ovector_type;
227 typedef typename vobjIn::vector_type ivector_type;
228 constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type);
229 constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type);
230 static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" );
231
232 typedef typename vobjOut::scalar_type oscalar_type;
233 typedef typename vobjIn::scalar_type iscalar_type;
234 typedef typename ExtractTypeMap<oscalar_type>::extract_type oextract_type;
235 typedef typename ExtractTypeMap<iscalar_type>::extract_type iextract_type;
236
237 typedef oextract_type * opointer;
238 typedef iextract_type * ipointer;
239
240 iscalar_type itmp;
241 oscalar_type otmp;
242
243 ovector_type * __restrict__ op = (ovector_type *)&vecOut;
244 ivector_type * __restrict__ ip = (ivector_type *)&vecIn;
245 for(int w=0;w<owords;w++){
246 itmp = ip[w].getlane(lane_in);
247 otmp = itmp; //potential precision change
248 op[w].putlane(otmp,lane_out);
249 }
250}
251
252
254
#define accelerator_inline
#define accelerator
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
AcceleratorVector< __T,GRID_MAX_SIMD > ExtractBuffer
accelerator_inline void copyLane(vobjOut &__restrict__ vecOut, int lane_out, const vobjIn &__restrict__ vecIn, int lane_in)
accelerator_inline void insertLane(int lane, vobj &__restrict__ vec, const typename vobj::scalar_object &__restrict__ extracted)
AcceleratorVector< __T *, GRID_MAX_SIMD > ExtractPointerArray
accelerator_inline vobj::scalar_object extractLane(int lane, const vobj &__restrict__ vec)
accelerator void extract(const vobj &vec, ExtractBuffer< sobj > &extracted)
accelerator void merge(vobj &vec, ExtractBuffer< sobj > &extracted)
accelerator_inline size_type size(void) const
Definition Coordinate.h:52