42 uint64_t sz=Cshift_obj.size();
43 if (Cshift_obj_device.size()!=sz ) {
44 Cshift_obj_device.resize(sz);
47 (
void *)&Cshift_obj_device[0],
48 sizeof(Cshift_obj[0])*sz);
58 return &Cshift_obj_device[0];
79 for(
int n=0;n<e1;n++){
80 for(
int b=0;b<e2;b++){
86 for(
int n=0;n<e1;n++){
87 for(
int b=0;b<e2;b++){
104template<
class vobj>
void
122 if ( cbmask == 0x3 ) {
123 for(
int n=0;n<e1;n++){
124 for(
int b=0;b<e2;b++){
127 Cshift_table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
132 for(
int n=0;n<e1;n++){
133 for(
int b=0;b<e2;b++){
137 Cshift_table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
143 auto buffer_p = & buffer[0];
155template<
class vobj>
void
158 int dimension,
int plane,
int cbmask)
180 vobj temp =rhs_v[so+o+b];
186 std::cout <<
" Dense packed buffer WARNING " <<std::endl;
202 if ( ocb & cbmask ) {
203 vobj temp =rhs_v[so+o+b];
231 if ( cbmask ==0x3 ) {
233 for(
int n=0;n<e1;n++){
234 for(
int b=0;b<e2;b++){
243 for(
int n=0;n<e1;n++){
244 for(
int b=0;b<e2;b++){
247 if ( ocb & cbmask ) {
255 auto buffer_p = & buffer[0];
287 int o = n*_slice_stride;
288 int offset = b+n*_slice_block;
289 merge(rhs_v[so+o+b],pointers,offset);
295 std::cout <<
"Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;
296 std::cout<<
" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
299 for(
int n=0;n<e1;n++){
300 for(
int b=0;b<e2;b++){
305 merge(rhs_v[so+o+b],pointers,offset);
328 coalescedWrite(lhs_v[table[i]+lo],coalescedRead(rhs_v[table[i]+ro]));
342 permute(lhs_v[table[i]+lo],rhs_v[table[i]+ro],permute_type);
356 if ( sshift[0] == sshift[1] ) {
374 shift = (shift+fd)%fd;
376 int cb= (cbmask==0x2)?
Odd :
Even;
383 int permute_type_dist;
402 int wrap = sshift/rd; wrap=wrap % ly;
410 for(
int x=0;x<rd;x++){
412 int sx = (x+sshift)%rd;
416 if ( x< rd-num ) permute_slice=wrap;
417 else permute_slice = (wrap+1)%ly;
419 if ( (ly>2) && (permute_slice) ) {
421 permute_type_dist = permute_type|permute_slice;
423 permute_type_dist = permute_type;
427 if ( permute_slice )
Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
428 else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
std::vector< T, devAllocator< T > > deviceVector
accelerator_inline int RedBlackCheckerBoardFromOindex(int oindex, const Coordinate &rdim, const Coordinate &chk_dim_msk)
AcceleratorVector< int, MaxDims > Coordinate
void Copy_plane(Lattice< vobj > &lhs, const Lattice< vobj > &rhs, int dimension, int lplane, int rplane, int cbmask)
void Copy_plane_permute(Lattice< vobj > &lhs, const Lattice< vobj > &rhs, int dimension, int lplane, int rplane, int cbmask, int permute_type)
deviceVector< std::pair< int, int > > Cshift_table_device
void Gather_plane_extract(const Lattice< vobj > &rhs, ExtractPointerArray< typename vobj::scalar_object > pointers, int dimension, int plane, int cbmask)
std::vector< int > Cshift_vector
void CalculateCshiftVector(Lattice< vobj > &ret, const Lattice< vobj > &rhs, int dimension, int cbmask)
vobj * MapCshift(std::vector< vobj > &Cshift_obj, deviceVector< vobj > &Cshift_obj_device)
void Scatter_plane_simple(Lattice< vobj > &rhs, deviceVector< vobj > &buffer, int dimension, int plane, int cbmask)
void Scatter_plane_merge(Lattice< vobj > &rhs, ExtractPointerArray< typename vobj::scalar_object > pointers, int dimension, int plane, int cbmask)
std::vector< std::pair< int, int > > Cshift_table
void MapCshiftCopy(std::vector< vobj > &Cshift_obj, deviceVector< vobj > &Cshift_obj_device)
void Cshift_local(Lattice< vobj > &ret, const Lattice< vobj > &rhs, int dimension, int shift)
deviceVector< int > Cshift_vector_device
void Gather_plane_simple(const Lattice< vobj > &rhs, deviceVector< vobj > &buffer, int dimension, int plane, int cbmask, int off=0)
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
int PermuteDim(int dimension)
virtual int CheckerBoardFromOindex(int Oindex)=0
int PermuteType(int dimension)
Coordinate _checker_dim_mask
virtual int CheckerBoarded(int dim)=0
virtual int CheckerBoardDestination(int source_cb, int shift, int dim)=0
virtual int CheckerBoardShiftForCB(int source_cb, int dim, int shift, int cb)=0
accelerator_inline int Checkerboard(void) const
GridBase * Grid(void) const