39template<
class Fobj,
class CComplex,
int nbasis>
65 std::vector<CoarseMatrix>
_A;
66 std::vector<CoarseMatrix>
_Adag;
116 int npoint = _geom.
npoint;
149 int npoint =
geom.npoint;
153 const int Nsimd = CComplex::Nsimd();
157 RealD flops = 1.0* npoint * nbasis * nbasis * 8.0 * osites * CComplex::Nsimd();
172 std::vector<Aview> AcceleratorViewContainer_h;
173 std::vector<Vview> AcceleratorVecViewContainer_h;
176 for(
int p=0;p<npoint;p++) {
182 static deviceVector<Aview> AcceleratorViewContainer; AcceleratorViewContainer.resize(npoint);
183 static deviceVector<Vview> AcceleratorVecViewContainer; AcceleratorVecViewContainer.resize(npoint);
185 auto Aview_p = &AcceleratorViewContainer[0];
186 auto Vview_p = &AcceleratorVecViewContainer[0];
189 acceleratorCopyToDevice(&AcceleratorVecViewContainer_h[0],&AcceleratorVecViewContainer[0],npoint *
sizeof(Vview));
195 int32_t ss = spb/(nbasis*npoint);
196 int32_t bp = spb%(nbasis*npoint);
197 int32_t point= bp/nbasis;
198 int32_t b = bp%nbasis;
199 auto SE = Stencil_v.GetEntry(point,ss);
202 for(
int bb=1;bb<nbasis;bb++) {
212 for(
int point=1;point<npoint;point++){
219 for(
int p=0;p<npoint;p++) {
220 AcceleratorViewContainer_h[p].ViewClose();
221 AcceleratorVecViewContainer_h[p].ViewClose();
226 out =
Cell.Extract(pout);
230 std::cout <<
GridLogPerformance<<
"Coarse 1rhs Mult Aviews "<<tviews<<
" us"<<std::endl;
240 std::cout <<
GridLogPerformance<<
"Coarse Kernel flop/s "<< flops/tmult<<
" mflop/s"<<std::endl;
241 std::cout <<
GridLogPerformance<<
"Coarse Kernel bytes/s "<< bytes/tmult<<
" MB/s"<<std::endl;
242 std::cout <<
GridLogPerformance<<
"Coarse overall flops/s "<< flops/ttot<<
" mflop/s"<<std::endl;
253 for(
int p=0;p<
geom.npoint;p++){
255 for(
int mu=0;mu<bcoor.
size();mu++){
257 scoor[mu] = (bcoor[mu] -
geom.shifts[p][mu] + L) % L;
261 int pp =
geom.Reverse(p);
309 std::cout <<
GridLogMessage<<
"GeneralCoarsenMatrix "<< std::endl;
346 Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint);
347 Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint);
349 for(
int k=0;k<npoint;k++){
351 for(
int l=0;l<npoint;l++){
353 for(
int mu=0;mu<
Nd;mu++){
361 invMkl = Mkl.inverse();
372 std::vector<CoarseVector> ComputeProj(npoint,
CoarseGrid());
373 std::vector<CoarseVector> FT(npoint,
CoarseGrid());
374 for(
int i=0;i<nbasis;i++){
375 std::cout <<
GridLogMessage<<
"CoarsenMatrixColoured vec "<<i<<
"/"<<nbasis<< std::endl;
376 for(
int p=0;p<npoint;p++){
383 for(
int mu=0;mu<
Nd;mu++){
386 pha = pha + (TwoPiL *
geom.shifts[p][mu]) * coor;
398 linop.Op(phaV,MphaV);
403 coarseInner =
conjugate(pha) * coarseInner;
405 ComputeProj[p] = coarseInner;
411 for(
int k=0;k<npoint;k++){
413 for(
int l=0;l<npoint;l++){
414 FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
421 for(
int j=0;j<nbasis;j++){
422 A_v[sss](i,j) = FT_v[sss](j);
437 std::cout <<
GridLogMessage<<
"CoarsenOperator eigen "<<teigen<<
" us"<<std::endl;
438 std::cout <<
GridLogMessage<<
"CoarsenOperator phase "<<tphase<<
" us"<<std::endl;
439 std::cout <<
GridLogMessage<<
"CoarsenOperator mat "<<tmat <<
" us"<<std::endl;
440 std::cout <<
GridLogMessage<<
"CoarsenOperator proj "<<tproj<<
" us"<<std::endl;
441 std::cout <<
GridLogMessage<<
"CoarsenOperator inv "<<tinv<<
" us"<<std::endl;
459 std::cout <<
GridLogMessage<<
"GeneralCoarsenMatrix "<< std::endl;
476 const int npoint =
geom.npoint;
498 Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint);
499 Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint);
501 for(
int k=0;k<npoint;k++){
503 for(
int l=0;l<npoint;l++){
505 for(
int mu=0;mu<
Nd;mu++){
507 phase=phase+TwoPiL*
geom.shifts[k][mu]*
geom.shifts[l][mu];
513 invMkl = Mkl.inverse();
522 std::vector<FineComplexField> phaF(npoint,grid);
523 std::vector<CoarseComplexField> pha(npoint,
CoarseGrid());
527 typedef typename CComplex::scalar_type SComplex;
531 for(
int p=0;p<npoint;p++){
537 for(
int mu=0;mu<
Nd;mu++){
540 pha[p] = pha[p] + (TwoPiL *
geom.shifts[p][mu]) * coor;
542 pha[p] =
exp(pha[p]*ci);
549 std::vector<CoarseVector> ComputeProj(npoint,
CoarseGrid());
550 std::vector<CoarseVector> FT(npoint,
CoarseGrid());
551 for(
int i=0;i<nbasis;i++){
552 std::cout <<
GridLogMessage<<
"CoarsenMatrixColoured vec "<<i<<
"/"<<nbasis<< std::endl;
553 for(
int p=0;p<npoint;p++){
563 linop.Op(phaV,MphaV);
569 coarseInner =
conjugate(pha[p]) * coarseInner;
571 ComputeProj[p] = coarseInner;
578 for(
int k=0;k<npoint;k++){
580 for(
int l=0;l<npoint;l++){
581 FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
588 for(
int j=0;j<nbasis;j++){
589 A_v[sss](i,j) = FT_v[sss](j);
602 for(
int p=0;p<
geom.npoint;p++){
603 std::cout <<
" _A["<<p<<
"] "<<
norm2(
_A[p])<<std::endl;
608 std::cout <<
GridLogMessage<<
"CoarsenOperator eigen "<<teigen<<
" us"<<std::endl;
609 std::cout <<
GridLogMessage<<
"CoarsenOperator phase "<<tphase<<
" us"<<std::endl;
610 std::cout <<
GridLogMessage<<
"CoarsenOperator phaseBZ "<<tphaseBZ<<
" us"<<std::endl;
611 std::cout <<
GridLogMessage<<
"CoarsenOperator mat "<<tmat <<
" us"<<std::endl;
612 std::cout <<
GridLogMessage<<
"CoarsenOperator proj "<<tproj<<
" us"<<std::endl;
613 std::cout <<
GridLogMessage<<
"CoarsenOperator inv "<<tinv<<
" us"<<std::endl;
617 for(
int p=0;p<
geom.npoint;p++){
618 _A[p] =
Cell.ExchangePeriodic(
_A[p]);
624 virtual void MdirAll (
const Field &in, std::vector<Field> &out){assert(0);};
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
std::vector< T, devAllocator< T > > deviceVector
AcceleratorVector< int, MaxDims > Coordinate
accelerator_inline Grid_simd< S, V > exp(const Grid_simd< S, V > &r)
void LatticeCoordinate(Lattice< iobj > &l, int mu)
void pokeSite(const sobj &s, Lattice< vobj > &l, const Coordinate &site)
vobj::scalar_object peekSite(const Lattice< vobj > &l, const Coordinate &site)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
Lattice< vobj > adj(const Lattice< vobj > &lhs)
RealD norm2(const Lattice< vobj > &arg)
void blockOrthogonalise(Lattice< CComplex > &ip, std::vector< Lattice< vobj > > &Basis)
void blockZAXPY(Lattice< vobj > &fineZ, const Lattice< CComplex > &coarseA, const Lattice< vobj2 > &fineX, const Lattice< vobj > &fineY)
void blockProject(Lattice< iVector< CComplex, nbasis > > &coarseData, const Lattice< vobj > &fineData, const VLattice &Basis)
#define autoView(l_v, l, mode)
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN")
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
@ AcceleratorWriteDiscard
#define NAMESPACE_BEGIN(A)
std::complex< RealD > ComplexD
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
accelerator_inline vobj coalescedReadGeneralPermute(const vobj &__restrict__ vec, int perm_mask, int nd, int lane=0)
static INTERNAL_PRECISION U
accelerator_inline size_type size(void) const
std::vector< Lattice< Fobj > > subspace
std::vector< CoarseMatrix > _Adag
Lattice< iScalar< CComplex > > CoarseComplexField
void M(const CoarseVector &in, CoarseVector &out)
void ExchangeCoarseLinks(void)
GeneralLocalStencil Stencil
Lattice< CComplex > FineComplexField
Lattice< CComplex > CoarseScalar
Lattice< siteVector > CoarseVector
iMatrix< CComplex, nbasis > Cobj
std::vector< CoarseMatrix > _A
iVector< CComplex, nbasis > Cvec
virtual void Mdiag(const Field &in, Field &out)
GridCartesian * _CoarseGrid
GridBase * FineGrid(void)
virtual void MdirAll(const Field &in, std::vector< Field > &out)
void Mdag(const CoarseVector &in, CoarseVector &out)
void CoarsenOperator(LinearOperatorBase< Lattice< Fobj > > &linop, Aggregation< Fobj, CComplex, nbasis > &U, Aggregation< Fobj, CComplex, nbasis > &V)
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom, GridBase *FineGrid, GridCartesian *CoarseGrid)
iMatrix< CComplex, nbasis > siteMatrix
Lattice< Fobj > FineField
std::vector< CoarseVector > MultTemporaries
void CoarsenOperator(LinearOperatorBase< Lattice< Fobj > > &linop, Aggregation< Fobj, CComplex, nbasis > &Subspace)
iVector< CComplex, nbasis > siteVector
void Mult(std::vector< CoarseMatrix > &A, const CoarseVector &in, CoarseVector &out)
Lattice< iMatrix< CComplex, nbasis > > CoarseMatrix
GridCartesian * CoarseGrid(void)
NonLocalStencilGeometry & geom
GeneralCoarsenedMatrix< Fobj, CComplex, nbasis > GeneralCoarseOp
virtual void Mdir(const Field &in, Field &out, int dir, int disp)
const Coordinate & GlobalDimensions(void)
int64_t gSites(void) const
void GlobalIndexToGlobalCoor(int64_t gidx, Coordinate &gcoor)
accelerator_inline int Checkerboard(void) const
GridBase * Grid(void) const
std::vector< Coordinate > shifts