Grid 0.7.0
PaddedCell.h
Go to the documentation of this file.
1/*************************************************************************************
2 Grid physics library, www.github.com/paboyle/Grid
3
4 Source file: ./lib/lattice/PaddedCell.h
5
6 Copyright (C) 2019
7
8Author: Peter Boyle pboyle@bnl.gov
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License along
21 with this program; if not, write to the Free Software Foundation, Inc.,
22 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23
24 See the full license in the file "LICENSE" in the top level distribution directory
25*************************************************************************************/
26/* END LEGAL */
27#pragma once
28
30
32
33//Allow the user to specify how the C-shift is performed, e.g. to respect the appropriate boundary conditions
34template<typename vobj>
36 virtual Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const = 0;
37 virtual ~CshiftImplBase(){}
38};
39template<typename vobj>
40struct CshiftImplDefault: public CshiftImplBase<vobj>{
41 Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const override{ return Grid::Cshift(in,dir,shift); }
42};
43template<typename Gimpl>
44struct CshiftImplGauge: public CshiftImplBase<typename Gimpl::GaugeLinkField::vector_object>{
45 typename Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override{ return Gimpl::CshiftLink(in,dir,shift); }
46};
47
48
49/*
50 *
51 * TODO:
52 * -- address elementsof vobj via thread block in Scatter/Gather
53 * -- overlap comms with motion in Face_exchange
54 *
55 */
56
57template<class vobj> inline void ScatterSlice(const deviceVector<vobj> &buf,
58 Lattice<vobj> &lat,
59 int x,
60 int dim,
61 int offset=0)
62{
63 const int Nsimd=vobj::Nsimd();
64 typedef typename vobj::scalar_object sobj;
65 typedef typename vobj::scalar_type scalar_type;
66 typedef typename vobj::vector_type vector_type;
67
68 GridBase *grid = lat.Grid();
69 Coordinate simd = grid->_simd_layout;
70 int Nd = grid->Nd();
71 int block = grid->_slice_block[dim];
72 int stride = grid->_slice_stride[dim];
73 int nblock = grid->_slice_nblock[dim];
74 int rd = grid->_rdimensions[dim];
75
76 int ox = x%rd;
77 int ix = x/rd;
78
79 int isites = 1; for(int d=0;d<Nd;d++) if( d!=dim) isites*=simd[d];
80
81 Coordinate rsimd= simd; rsimd[dim]=1; // maybe reduce Nsimd
82
83 int rNsimd = 1; for(int d=0;d<Nd;d++) rNsimd*=rsimd[d];
84 int rNsimda= Nsimd/simd[dim]; // should be equal
85 assert(rNsimda==rNsimd);
86 int face_ovol=block*nblock;
87
88 // assert(buf.size()==face_ovol*rNsimd);
89
90 /*This will work GPU ONLY unless rNsimd is put in the lexico index*/
91 //Let's make it work on GPU and then make a special accelerator_for that
92 //doesn't hide the SIMD direction and keeps explicit in the threadIdx
93 //for cross platform
94 // FIXME -- can put internal indices into thread loop
95 auto buf_p = & buf[0];
96 autoView(lat_v, lat, AcceleratorWrite);
97 accelerator_for(ss, face_ovol/simd[dim],Nsimd,{
98
99 // scalar layout won't coalesce
100#ifdef GRID_SIMT
101 {
102 int blane=acceleratorSIMTlane(Nsimd); // buffer lane
103#else
104 for(int blane=0;blane<Nsimd;blane++) {
105#endif
106 int olane=blane%rNsimd; // reduced lattice lane
107 int obit =blane/rNsimd;
108
110 // osite -- potentially one bit from simd in the buffer: (ss<<1)|obit
112 int ssp = ss*simd[dim]+obit;
113 int b = ssp%block;
114 int n = ssp/block;
115 int osite= b+n*stride + ox*block;
116
118 // isite -- map lane within buffer to lane within lattice
120 Coordinate icoor;
121 int lane;
122 Lexicographic::CoorFromIndex(icoor,olane,rsimd);
123 icoor[dim]=ix;
124 Lexicographic::IndexFromCoor(icoor,lane,simd);
125
127 // Transfer into lattice - will coalesce
129 // sobj obj = extractLane(blane,buf_p[ss+offset]);
130 // insertLane(lane,lat_v[osite],obj);
131 const int words=sizeof(vobj)/sizeof(vector_type);
132 vector_type * from = (vector_type *)&buf_p[ss+offset];
133 vector_type * to = (vector_type *)&lat_v[osite];
134 scalar_type stmp;
135 for(int w=0;w<words;w++){
136 stmp = getlane(from[w], blane);
137 putlane(to[w], stmp, lane);
138 }
139 }
140 });
141}
142
143template<class vobj> inline void GatherSlice(deviceVector<vobj> &buf,
144 const Lattice<vobj> &lat,
145 int x,
146 int dim,
147 int offset=0)
148{
149 const int Nsimd=vobj::Nsimd();
150 typedef typename vobj::scalar_object sobj;
151 typedef typename vobj::scalar_type scalar_type;
152 typedef typename vobj::vector_type vector_type;
153
154 autoView(lat_v, lat, AcceleratorRead);
155
156 GridBase *grid = lat.Grid();
157 Coordinate simd = grid->_simd_layout;
158 int Nd = grid->Nd();
159 int block = grid->_slice_block[dim];
160 int stride = grid->_slice_stride[dim];
161 int nblock = grid->_slice_nblock[dim];
162 int rd = grid->_rdimensions[dim];
163
164 int ox = x%rd;
165 int ix = x/rd;
166
167 int isites = 1; for(int d=0;d<Nd;d++) if( d!=dim) isites*=simd[d];
168
169 Coordinate rsimd= simd; rsimd[dim]=1; // maybe reduce Nsimd
170
171 int rNsimd = 1; for(int d=0;d<Nd;d++) rNsimd*=rsimd[d];
172
173 int face_ovol=block*nblock;
174
175 // assert(buf.size()==face_ovol*rNsimd);
176
177 /*This will work GPU ONLY unless rNsimd is put in the lexico index*/
178 //Let's make it work on GPU and then make a special accelerator_for that
179 //doesn't hide the SIMD direction and keeps explicit in the threadIdx
180 //for cross platform
181 //For CPU perhaps just run a loop over Nsimd
182 auto buf_p = & buf[0];
183 accelerator_for(ss, face_ovol/simd[dim],Nsimd,{
184
185 // scalar layout won't coalesce
186#ifdef GRID_SIMT
187 {
188 int blane=acceleratorSIMTlane(Nsimd); // buffer lane
189#else
190 for(int blane=0;blane<Nsimd;blane++) {
191#endif
192 int olane=blane%rNsimd; // reduced lattice lane
193 int obit =blane/rNsimd;
194
196 // osite
198 int ssp = ss*simd[dim]+obit;
199 int b = ssp%block;
200 int n = ssp/block;
201 int osite= b+n*stride + ox*block;
202
204 // isite -- map lane within buffer to lane within lattice
206 Coordinate icoor;
207 int lane;
208 Lexicographic::CoorFromIndex(icoor,olane,rsimd);
209 icoor[dim]=ix;
210 Lexicographic::IndexFromCoor(icoor,lane,simd);
211
213 // Take out of lattice
215 // sobj obj = extractLane(lane,lat_v[osite]);
216 // insertLane(blane,buf_p[ss+offset],obj);
217 const int words=sizeof(vobj)/sizeof(vector_type);
218 vector_type * to = (vector_type *)&buf_p[ss+offset];
219 vector_type * from = (vector_type *)&lat_v[osite];
220 scalar_type stmp;
221 for(int w=0;w<words;w++){
222 stmp = getlane(from[w], lane);
223 putlane(to[w], stmp, blane);
224 }
225 }
226 });
227}
228
229
231public:
233 int dims;
234 int depth;
235 std::vector<GridCartesian *> grids;
236
238 {
239 DeleteGrids();
240 }
241 PaddedCell(int _depth,GridCartesian *_grid)
242 {
243 unpadded_grid = _grid;
244 depth=_depth;
245 dims=_grid->Nd();
247 Coordinate local =unpadded_grid->LocalDimensions();
248 Coordinate procs =unpadded_grid->ProcessorGrid();
249 for(int d=0;d<dims;d++){
250 if ( procs[d] > 1 ) assert(local[d]>=depth);
251 }
252 }
253 void DeleteGrids(void)
254 {
255 Coordinate processors=unpadded_grid->_processors;
256 for(int d=0;d<grids.size();d++){
257 if ( processors[d] > 1 ) {
258 delete grids[d];
259 }
260 }
261 grids.resize(0);
262 };
263 void AllocateGrids(void)
264 {
265 Coordinate local =unpadded_grid->LocalDimensions();
266 Coordinate simd =unpadded_grid->_simd_layout;
267 Coordinate processors=unpadded_grid->_processors;
268 Coordinate plocal =unpadded_grid->LocalDimensions();
269 Coordinate global(dims);
270 GridCartesian *old_grid = unpadded_grid;
271 // expand up one dim at a time
272 for(int d=0;d<dims;d++){
273
274 if ( processors[d] > 1 ) {
275 plocal[d] += 2*depth;
276
277 for(int d=0;d<dims;d++){
278 global[d] = plocal[d]*processors[d];
279 }
280
281 old_grid = new GridCartesian(global,simd,processors);
282 }
283 grids.push_back(old_grid);
284 }
285 };
286 template<class vobj>
287 inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
288 {
289 Coordinate processors=unpadded_grid->_processors;
290
292
293 Coordinate local =unpadded_grid->LocalDimensions();
294 // depends on the MPI spread
295 Coordinate fll(dims,depth);
296 Coordinate tll(dims,0); // depends on the MPI spread
297 for(int d=0;d<dims;d++){
298 if( processors[d]==1 ) fll[d]=0;
299 }
300 localCopyRegion(in,out,fll,tll,local);
301 return out;
302 }
303 template<class vobj>
305 {
306 GridBase *old_grid = in.Grid();
307 int dims = old_grid->Nd();
308 Lattice<vobj> tmp = in;
309 for(int d=0;d<dims;d++){
310 tmp = Expand(d,tmp,cshift); // rvalue && assignment
311 }
312 return tmp;
313 }
314 template<class vobj>
316 {
317 GridBase *old_grid = in.Grid();
318 int dims = old_grid->Nd();
319 Lattice<vobj> tmp = in;
320 for(int d=0;d<dims;d++){
321 tmp = ExpandPeriodic(d,tmp); // rvalue && assignment
322 }
323 return tmp;
324 }
325 // expand up one dim at a time
326 template<class vobj>
327 inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
328 {
329 Coordinate processors=unpadded_grid->_processors;
330 GridBase *old_grid = in.Grid();
331 GridCartesian *new_grid = grids[dim];//These are new grids
332 Lattice<vobj> padded(new_grid);
333 Lattice<vobj> shifted(old_grid);
334 Coordinate local =old_grid->LocalDimensions();
335 Coordinate plocal =new_grid->LocalDimensions();
336 if(dim==0) conformable(old_grid,unpadded_grid);
337 else conformable(old_grid,grids[dim-1]);
338
339 double tins=0, tshift=0;
340
341 int islocal = 0 ;
342 if ( processors[dim] == 1 ) islocal = 1;
343
344 if ( islocal ) {
345
346 // replace with a copy and maybe grid swizzle
347 // return in;??
348 double t = usecond();
349 padded = in;
350 tins += usecond() - t;
351
352 } else {
353
355 // Replace sequence with
356 // ---------------------
357 // (i) Gather high face(s); start comms
358 // (ii) Gather low face(s); start comms
359 // (iii) Copy middle bit with localCopyRegion
360 // (iv) Complete high face(s), insert slice(s)
361 // (iv) Complete low face(s), insert slice(s)
363 // Middle bit
364 double t = usecond();
365 for(int x=0;x<local[dim];x++){
366 InsertSliceLocal(in,padded,x,depth+x,dim);
367 }
368 tins += usecond() - t;
369
370 // High bit
371 t = usecond();
372 shifted = cshift.Cshift(in,dim,depth);
373 tshift += usecond() - t;
374
375 t=usecond();
376 for(int x=0;x<depth;x++){
377 InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
378 }
379 tins += usecond() - t;
380
381 // Low bit
382 t = usecond();
383 shifted = cshift.Cshift(in,dim,-depth);
384 tshift += usecond() - t;
385
386 t = usecond();
387 for(int x=0;x<depth;x++){
388 InsertSliceLocal(shifted,padded,x,x,dim);
389 }
390 tins += usecond() - t;
391
392 }
393 std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
394
395 return padded;
396 }
397
398 template<class vobj>
399 inline Lattice<vobj> ExpandPeriodic(int dim, const Lattice<vobj> &in) const
400 {
401 Coordinate processors=unpadded_grid->_processors;
402 GridBase *old_grid = in.Grid();
403 GridCartesian *new_grid = grids[dim];//These are new grids
404 Lattice<vobj> padded(new_grid);
405 // Lattice<vobj> shifted(old_grid);
406 Coordinate local =old_grid->LocalDimensions();
407 Coordinate plocal =new_grid->LocalDimensions();
408 if(dim==0) conformable(old_grid,unpadded_grid);
409 else conformable(old_grid,grids[dim-1]);
410
411 // std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
412 double tins=0, tshift=0;
413
414 int islocal = 0 ;
415 if ( processors[dim] == 1 ) islocal = 1;
416
417 if ( islocal ) {
418 padded=in; // slightly different interface could avoid a copy operation
419 } else {
420 Face_exchange(in,padded,dim,depth);
421 return padded;
422 }
423 return padded;
424 }
425 template<class vobj>
426 void Face_exchange(const Lattice<vobj> &from,
427 Lattice<vobj> &to,
428 int dimension,int depth) const
429 {
430 typedef typename vobj::vector_type vector_type;
431 typedef typename vobj::scalar_type scalar_type;
432 typedef typename vobj::scalar_object sobj;
433
434 RealD t_gather=0.0;
435 RealD t_scatter=0.0;
436 RealD t_comms=0.0;
437 RealD t_copy=0.0;
438
439 // std::cout << GridLogMessage << "dimension " <<dimension<<std::endl;
440 // DumpSliceNorm(std::string("Face_exchange from"),from,dimension);
441 GridBase *grid=from.Grid();
442 GridBase *new_grid=to.Grid();
443
444 Coordinate lds = from.Grid()->_ldimensions;
445 Coordinate nlds= to.Grid()->_ldimensions;
446 Coordinate simd= from.Grid()->_simd_layout;
447 int ld = lds[dimension];
448 int nld = to.Grid()->_ldimensions[dimension];
449 const int Nsimd = vobj::Nsimd();
450
451 assert(depth<=lds[dimension]); // A must be on neighbouring node
452 assert(depth>0); // A caller bug if zero
453 assert(ld+2*depth==nld);
455 // Face size and byte calculations
457 int buffer_size = 1;
458 for(int d=0;d<lds.size();d++){
459 if ( d!= dimension) buffer_size=buffer_size*lds[d];
460 }
461 buffer_size = buffer_size / Nsimd;
462 int rNsimd = Nsimd / simd[dimension];
463 assert( buffer_size == from.Grid()->_slice_nblock[dimension]*from.Grid()->_slice_block[dimension] / simd[dimension]);
464
465 static deviceVector<vobj> send_buf;
466 static deviceVector<vobj> recv_buf;
467 send_buf.resize(buffer_size*2*depth);
468 recv_buf.resize(buffer_size*2*depth);
469#ifndef ACCELERATOR_AWARE_MPI
470 static hostVector<vobj> hsend_buf;
471 static hostVector<vobj> hrecv_buf;
472 hsend_buf.resize(buffer_size*2*depth);
473 hrecv_buf.resize(buffer_size*2*depth);
474#endif
475
476 std::vector<MpiCommsRequest_t> fwd_req;
477 std::vector<MpiCommsRequest_t> bwd_req;
478
479 int words = buffer_size;
480 int bytes = words * sizeof(vobj);
481
483 // Communication coords
485 int comm_proc = 1;
486 int xmit_to_rank;
487 int recv_from_rank;
488 grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
489
491 // Gather all surface terms up to depth "d"
493 RealD t;
494 RealD t_tot=-usecond();
495 int plane=0;
496 for ( int d=0;d < depth ; d ++ ) {
497 int tag = d*1024 + dimension*2+0;
498
499 t=usecond();
500 GatherSlice(send_buf,from,d,dimension,plane*buffer_size); plane++;
501 t_gather+=usecond()-t;
502
503 t=usecond();
504#ifdef ACCELERATOR_AWARE_MPI
505 grid->SendToRecvFromBegin(fwd_req,
506 (void *)&send_buf[d*buffer_size], xmit_to_rank,
507 (void *)&recv_buf[d*buffer_size], recv_from_rank, bytes, tag);
508#else
509 acceleratorCopyFromDevice(&send_buf[d*buffer_size],&hsend_buf[d*buffer_size],bytes);
510 grid->SendToRecvFromBegin(fwd_req,
511 (void *)&hsend_buf[d*buffer_size], xmit_to_rank,
512 (void *)&hrecv_buf[d*buffer_size], recv_from_rank, bytes, tag);
513#endif
514 t_comms+=usecond()-t;
515 }
516 for ( int d=0;d < depth ; d ++ ) {
517 int tag = d*1024 + dimension*2+1;
518
519 t=usecond();
520 GatherSlice(send_buf,from,ld-depth+d,dimension,plane*buffer_size); plane++;
521 t_gather+= usecond() - t;
522
523 t=usecond();
524#ifdef ACCELERATOR_AWARE_MPI
525 grid->SendToRecvFromBegin(bwd_req,
526 (void *)&send_buf[(d+depth)*buffer_size], recv_from_rank,
527 (void *)&recv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag);
528#else
529 acceleratorCopyFromDevice(&send_buf[(d+depth)*buffer_size],&hsend_buf[(d+depth)*buffer_size],bytes);
530 grid->SendToRecvFromBegin(bwd_req,
531 (void *)&hsend_buf[(d+depth)*buffer_size], recv_from_rank,
532 (void *)&hrecv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag);
533#endif
534 t_comms+=usecond()-t;
535 }
536
538 // Copy interior -- overlap this with comms
540 int Nd = new_grid->Nd();
541 Coordinate LL(Nd,0);
542 Coordinate sz = grid->_ldimensions;
543 Coordinate toLL(Nd,0);
544 toLL[dimension]=depth;
545 t=usecond();
546 localCopyRegion(from,to,LL,toLL,sz);
547 t_copy= usecond() - t;
548
550 // Scatter all faces
552 plane=0;
553
554 t=usecond();
555 grid->CommsComplete(fwd_req);
556#ifndef ACCELERATOR_AWARE_MPI
557 for ( int d=0;d < depth ; d ++ ) {
558 acceleratorCopyToDevice(&hrecv_buf[d*buffer_size],&recv_buf[d*buffer_size],bytes);
559 }
560#endif
561 t_comms+= usecond() - t;
562
563 t=usecond();
564 for ( int d=0;d < depth ; d ++ ) {
565 ScatterSlice(recv_buf,to,nld-depth+d,dimension,plane*buffer_size); plane++;
566 }
567 t_scatter= usecond() - t;
568
569 t=usecond();
570 grid->CommsComplete(bwd_req);
571#ifndef ACCELERATOR_AWARE_MPI
572 for ( int d=0;d < depth ; d ++ ) {
573 acceleratorCopyToDevice(&hrecv_buf[(d+depth)*buffer_size],&recv_buf[(d+depth)*buffer_size],bytes);
574 }
575#endif
576 t_comms+= usecond() - t;
577
578 t=usecond();
579 for ( int d=0;d < depth ; d ++ ) {
580 ScatterSlice(recv_buf,to,d,dimension,plane*buffer_size); plane++;
581 }
582 t_scatter+= usecond() - t;
583 t_tot+=usecond();
584
585 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: gather :" << t_gather/1000 << "ms"<<std::endl;
586 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: scatter:" << t_scatter/1000 << "ms"<<std::endl;
587 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: copy :" << t_copy/1000 << "ms"<<std::endl;
588 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: comms :" << t_comms/1000 << "ms"<<std::endl;
589 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: total :" << t_tot/1000 << "ms"<<std::endl;
590 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: gather :" << depth*4.0*bytes/t_gather << "MB/s"<<std::endl;
591 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: scatter:" << depth*4.0*bytes/t_scatter<< "MB/s"<<std::endl;
592 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: comms :" << (RealD)4.0*bytes/t_comms << "MB/s"<<std::endl;
593 std::cout << GridLogPerformance << "PaddedCell::Expand new timings: face bytes :" << depth*bytes/1e6 << "MB"<<std::endl;
594 }
595
596};
597
598
600
601
accelerator_inline int acceleratorSIMTlane(int Nsimd)
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
void acceleratorCopyFromDevice(void *from, void *to, size_t bytes)
std::vector< T, devAllocator< T > > deviceVector
std::vector< T, alignedAllocator< T > > hostVector
AcceleratorVector< int, MaxDims > Coordinate
Definition Coordinate.h:95
accelerator_inline S getlane(const Grid_simd< S, V > &in, int lane)
accelerator_inline void putlane(Grid_simd< S, V > &vec, const S &_S, int lane)
void conformable(const Lattice< obj1 > &lhs, const Lattice< obj2 > &rhs)
void localCopyRegion(const Lattice< vobj > &From, Lattice< vobj > &To, Coordinate FromLowerLeft, Coordinate ToLowerLeft, Coordinate RegionSize)
void InsertSliceLocal(const Lattice< vobj > &lowDim, Lattice< vobj > &higherDim, int slice_lo, int slice_hi, int orthog)
#define autoView(l_v, l, mode)
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN")
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
void ScatterSlice(const deviceVector< vobj > &buf, Lattice< vobj > &lat, int x, int dim, int offset=0)
Definition PaddedCell.h:57
void GatherSlice(deviceVector< vobj > &buf, const Lattice< vobj > &lat, int x, int dim, int offset=0)
Definition PaddedCell.h:143
static constexpr int Nd
Definition QCD.h:52
double RealD
Definition Simd.h:61
double usecond(void)
Definition Timer.h:50
const uint64_t plocal
accelerator_inline size_type size(void) const
Definition Coordinate.h:52
void CommsComplete(std::vector< MpiCommsRequest_t > &list)
void SendToRecvFromBegin(std::vector< MpiCommsRequest_t > &list, void *xmit, int dest, void *recv, int from, int bytes, int dir)
void ShiftedRanks(int dim, int shift, int &source, int &dest)
Coordinate _slice_stride
Coordinate _slice_nblock
Coordinate _slice_block
Coordinate _rdimensions
int Nd(void) const
Coordinate _simd_layout
Coordinate _ldimensions
const Coordinate & LocalDimensions(void)
GridBase * Grid(void) const
void AllocateGrids(void)
Definition PaddedCell.h:263
void Face_exchange(const Lattice< vobj > &from, Lattice< vobj > &to, int dimension, int depth) const
Definition PaddedCell.h:426
Lattice< vobj > Extract(const Lattice< vobj > &in) const
Definition PaddedCell.h:287
void DeleteGrids(void)
Definition PaddedCell.h:253
std::vector< GridCartesian * > grids
Definition PaddedCell.h:235
Lattice< vobj > ExchangePeriodic(const Lattice< vobj > &in) const
Definition PaddedCell.h:315
Lattice< vobj > Exchange(const Lattice< vobj > &in, const CshiftImplBase< vobj > &cshift=CshiftImplDefault< vobj >()) const
Definition PaddedCell.h:304
GridCartesian * unpadded_grid
Definition PaddedCell.h:232
PaddedCell(int _depth, GridCartesian *_grid)
Definition PaddedCell.h:241
Lattice< vobj > Expand(int dim, const Lattice< vobj > &in, const CshiftImplBase< vobj > &cshift=CshiftImplDefault< vobj >()) const
Definition PaddedCell.h:327
Lattice< vobj > ExpandPeriodic(int dim, const Lattice< vobj > &in) const
Definition PaddedCell.h:399
virtual ~CshiftImplBase()
Definition PaddedCell.h:37
virtual Lattice< vobj > Cshift(const Lattice< vobj > &in, int dir, int shift) const =0
Lattice< vobj > Cshift(const Lattice< vobj > &in, int dir, int shift) const override
Definition PaddedCell.h:41
Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override
Definition PaddedCell.h:45