60 std::cout <<
"Thread Interleaving"<<std::endl;
74 for(
int t=0;t<threads;t++){
75 for(
int ss=0;ss<vol;ss++){
76 if ( ( ss >> blockbits) % threads == t ) {
77 throrder.push_back(reorder[ss]);
85 std::cout<<
GridLogDebug<<
"Lexicographic : no cache blocking"<<std::endl;
87 for (
int s = 0 ; s!=
grid->oSites();s++){
102 assert(ND==
Block.size());
109 dims[mu] =
grid->_rdimensions[mu];
120 for(xo[dim]=0;xo[dim]<dims[dim];xo[dim]+=
Block[dim]){
136 for(xi[dim]=0;xi[dim]<std::min(dims[dim]-xo[dim],
Block[dim]);xi[dim]++){
140 for(
int d=0;d<ND;d++){
146 Lexicographic::IndexFromCoor(x,index,
grid->_rdimensions);
156 std::cout <<
GridLogDebug <<
" Lebesgue order "<<std::endl;
163 std::vector<std::vector<IndexInteger> > bitlist(ND);
166 dims[mu] =
grid->_rdimensions[mu];
167 assert ( dims[mu] != 0 );
175 for(
int bit=0;bit<32;bit++){
177 for(
int mu=0;mu<ND;mu++){
178 if ( mask&(adims[mu]-1) ){
179 bitlist[mu].push_back(sitebit);
187 for(
int mu=0;mu<ND;mu++) avol = avol * adims[mu];
190 for(
int mu=0;mu<ND;mu++) vol = vol * dims[mu];
194 std::vector<IndexInteger> ax(ND);
199 for(
int mu=0;mu<ND;mu++) ax[mu] = 0;
202 for(
int mu=0;mu<ND;mu++){
205 for(
int bit=0;bit<bitlist[mu].size();bit++){
206 int sbit=bitlist[mu][bit];
208 if(asite&(
one<<sbit)){
214 if ( ax[mu]>dims[mu]-1 ) contained = 0;
221 +dims[0]*dims[1]*ax[2]
222 +dims[0]*dims[1]*dims[2]*ax[3];
std::vector< T, uvmAllocator< T > > Vector
AcceleratorVector< int, MaxDims > Coordinate
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE")
#define NAMESPACE_BEGIN(A)
static int GetThreads(void)
void CartesianBlocking(void)
static int UseLebesgueOrder
LebesgueOrder(GridBase *_grid)
deviceVector< IndexInteger > _LebesgueReorder
void ThreadInterleave(void)
static std::vector< int > Block
void IterateI(int ND, int dim, Coordinate &xo, Coordinate &xi, Coordinate &dims)
void IterateO(int ND, int dim, Coordinate &xo, Coordinate &xi, Coordinate &dims)
IndexInteger alignup(IndexInteger n)