31#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
32#define GRID_ALGORITHM_COARSENED_MATRIX_H
38template<
class vobj,
class CComplex>
55 mult(fine_inner_msk, fine_inner,FineMask);
56 blockSum(CoarseInner,fine_inner_msk);
61template<
class Fobj,
class CComplex,
int nbasis>
94 std::vector<CoarseMatrix>
A;
95 std::vector<CoarseMatrix>
Aeven;
96 std::vector<CoarseMatrix>
Aodd;
120 Stencil.HaloExchange(in,compressor);
124 int npoint =
geom.npoint;
130 for(
int p=0;p<
geom.npoint;p++) {
132 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
134 Aview *Aview_p = & AcceleratorViewContainer[0];
136 const int Nsimd = CComplex::Nsimd();
145 calcComplex res =
Zero();
150 for(
int point=0;point<npoint;point++){
152 SE=Stencil_v.GetEntry(
ptype,point,ss);
161 for(
int bb=0;bb<nbasis;bb++) {
168 for(
int p=0;p<
geom.npoint;p++) hAcceleratorViewContainer[p].ViewClose();
190 Stencil.HaloExchange(in,compressor);
194 int npoint =
geom.npoint;
201 for(
int p=0;p<
geom.npoint;p++) {
203 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
205 Aview *Aview_p = & AcceleratorViewContainer[0];
207 const int Nsimd = CComplex::Nsimd();
214 for(
int p=0; p<
geom.npoint; p++) {
217 auto points_p = &points[0];
224 calcComplex res =
Zero();
229 for(
int p=0;p<npoint;p++){
230 int point = points_p[p];
232 SE=Stencil_v.GetEntry(
ptype,point,ss);
241 for(
int bb=0;bb<nbasis;bb++) {
242 res = res + dag_factor_p[b*nbasis+bb]*
coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
248 for(
int p=0;p<
geom.npoint;p++) hAcceleratorViewContainer[p].ViewClose();
254 Stencil.HaloExchange(in,compressor);
267 for(
int p=0;p<
geom.npoint;p++) {
269 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
271 Aview *Aview_p = & AcceleratorViewContainer[0];
277 const int Nsimd = CComplex::Nsimd();
284 calcComplex res =
Zero();
289 SE=Stencil_v.GetEntry(
ptype,point,ss);
298 for(
int bb=0;bb<nbasis;bb++) {
303 for(
int p=0;p<
geom.npoint;p++) hAcceleratorViewContainer[p].ViewClose();
308 int ndir=
geom.npoint-1;
309 if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {
310 std::cout <<
"MdirAll out size "<< out.size()<<std::endl;
311 std::cout <<
"MdirAll ndir "<< ndir<<std::endl;
314 for(
int p=0;p<ndir;p++){
327 int point=
geom.npoint-1;
409 assert(Aself !=
nullptr);
414 int point =
geom.npoint-1;
420 const int Nsimd = CComplex::Nsimd();
430 calcComplex res = Zero();
435 SE=st_v.GetEntry(ptype,point,ss);
438 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
440 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
444 for(
int bb=0;bb<nbasis;bb++) {
445 res = res + dag_factor_p[b*nbasis+bb]*
coalescedRead(a_v[ss](b,bb))*nbr(bb);
453 calcComplex res = Zero();
458 SE=st_v.GetEntry(ptype,point,ss);
461 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
463 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
467 for(
int bb=0;bb<nbasis;bb++) {
486 int npoint =
geom.npoint-1;
488 for(
int p=0; p<npoint; p++) {
492 auto points_p = &points[0];
497 for(
int p=0;p<
geom.npoint;p++) {
499 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
501 Aview *Aview_p = & AcceleratorViewContainer[0];
503 const int Nsimd = CComplex::Nsimd();
513 calcComplex res = Zero();
518 for(int p=0;p<npoint;p++){
519 int point = points_p[p];
520 SE=st_v.GetEntry(ptype,point,ss);
523 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
525 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
527 acceleratorSynchronise();
529 for(int bb=0;bb<nbasis;bb++) {
530 res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
539 calcComplex res = Zero();
544 for(int p=0;p<npoint;p++){
545 int point = points_p[p];
546 SE=st_v.GetEntry(ptype,point,ss);
549 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
551 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
553 acceleratorSynchronise();
555 for(int bb=0;bb<nbasis;bb++) {
556 res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
563 for(
int p=0;p<npoint;p++) hAcceleratorViewContainer[p].ViewClose();
569 geom(CoarseGrid._ndimension),
574 A(
geom.npoint,&CoarseGrid),
589 geom(CoarseGrid._ndimension),
594 A(
geom.npoint,&CoarseGrid),
606 Eigen::MatrixXd dag_factor_eigen = Eigen::MatrixXd::Ones(nbasis, nbasis);
608 const int nb = nbasis/2;
609 dag_factor_eigen.block(0,nb,nb,nb) *= -1.0;
610 dag_factor_eigen.block(nb,0,nb,nb) *= -1.0;
614 std::vector<RealD> h_dag_factor(nbasis*nbasis);
618 h_dag_factor[i] = dag_factor_eigen(j, k);
632 FineComplexField zero(FineGrid); zero=
scalar_type(0.0,0.0);
634 std::vector<FineComplexField> masks(
geom.npoint,FineGrid);
635 FineComplexField imask(FineGrid);
636 FineComplexField omask(FineGrid);
638 FineComplexField evenmask(FineGrid);
639 FineComplexField oddmask(FineGrid);
647 std::vector<FineField> Mphi_p(
geom.npoint,FineGrid);
661 std::cout <<
GridLogMessage<<
"CoarsenMatrix Orthog "<< std::endl;
669 for(
int p=0;p<
geom.npoint;p++)
671 int dir =
geom.directions[p];
672 int disp =
geom.displacements[p];
674 if(
geom.displacements[p]==0){
686 bcb = bcb +
div(coor,block);
691 }
else if ( disp==1 ) {
692 masks[p] = where(
mod(coor,block)==(block-1),
one,zero);
693 }
else if ( disp==-1 ) {
698 oddmask =
one-evenmask;
700 assert(self_stencil!=-1);
702 for(
int i=0;i<nbasis;i++){
706 std::cout <<
GridLogMessage<<
"CoarsenMatrix vector "<<i << std::endl;
707 linop.OpDirAll(phi,Mphi_p);
708 linop.OpDiag (phi,Mphi_p[
geom.npoint-1]);
710 for(
int p=0;p<
geom.npoint;p++){
714 int dir =
geom.directions[p];
715 int disp =
geom.displacements[p];
725 for(
int j=0;j<nbasis;j++){
736 for(
int pp=0;pp<
geom.npoint;pp++){
737 int dirp =
geom.directions[pp];
738 int dispp =
geom.displacements[pp];
739 if ( (dirp==dir) && (dispp==1) ){
756 mult(tmp,phi,evenmask); linop.Op(tmp,Mphie);
757 mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio);
766 coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss));
776 for(
int j=0;j<nbasis;j++){
784 std::cout <<
GridLogMessage <<
" ForceHermitian, new code "<<std::endl;
792 std::cout <<
GridLogDebug <<
"CoarsenedMatrix::InvertSelfStencilLink" << std::endl;
793 int localVolume =
Grid()->lSites();
795 typedef typename Cobj::scalar_object scalar_object;
800 Eigen::MatrixXcd selfLinkEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
801 Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
803 scalar_object selfLink =
Zero();
804 scalar_object selfLinkInv =
Zero();
808 Grid()->LocalIndexToLocalCoor(site, lcoor);
811 for (
int i = 0; i < nbasis; ++i)
812 for (
int j = 0; j < nbasis; ++j)
815 selfLinkInvEigen = selfLinkEigen.inverse();
817 for(
int i = 0; i < nbasis; ++i)
818 for(
int j = 0; j < nbasis; ++j)
819 selfLinkInv(i, j) = selfLinkInvEigen(i, j);
826 std::cout <<
GridLogDebug <<
"CoarsenedMatrix::FillHalfCbs" << std::endl;
827 for(
int p = 0; p <
geom.npoint; ++p) {
void acceleratorPut(T &dev, const T &host)
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
accelerator_inline void acceleratorSynchronise(void)
#define accelerator_for(iterator, num, nsimd,...)
std::vector< T, devAllocator< T > > deviceVector
std::vector< T, alignedAllocator< T > > hostVector
void blockMaskedInnerProduct(Lattice< CComplex > &CoarseInner, const Lattice< decltype(innerProduct(vobj(), vobj()))> &FineMask, const Lattice< vobj > &fineX, const Lattice< vobj > &fineY)
AcceleratorVector< int, MaxDims > Coordinate
auto Cshift(const Expression &expr, int dim, int shift) -> decltype(closure(expr))
void mult(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void LatticeCoordinate(Lattice< iobj > &l, int mu)
auto localInnerProduct(const Lattice< vobj > &lhs, const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
void peekLocalSite(sobj &s, const LatticeView< vobj > &l, Coordinate &site)
void pokeLocalSite(const sobj &s, LatticeView< vobj > &l, Coordinate &site)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
ComplexD innerProduct(const Lattice< vobj > &left, const Lattice< vobj > &right)
void blockOrthogonalise(Lattice< CComplex > &ip, std::vector< Lattice< vobj > > &Basis)
void pickCheckerboard(int cb, Lattice< vobj > &half, const Lattice< vobj > &full)
void blockProject(Lattice< iVector< CComplex, nbasis > > &coarseData, const Lattice< vobj > &fineData, const VLattice &Basis)
void blockSum(Lattice< vobj > &coarseData, const Lattice< vobj > &fineData)
Lattice< obj > mod(const Lattice< obj > &rhs_i, Integer y)
Lattice< obj > div(const Lattice< obj > &rhs_i, Integer y)
#define autoView(l_v, l, mode)
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE")
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
#define NAMESPACE_BEGIN(A)
static constexpr int DaggerYes
static constexpr int DaggerNo
static constexpr int InverseNo
static constexpr int InverseYes
std::complex< RealD > ComplexD
SimpleCompressorGather< vobj, FaceGatherSimple > SimpleCompressor
accelerator_inline vobj coalescedReadPermute(const vobj &__restrict__ vec, int ptype, int doperm, int lane=0)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
accelerator_inline std::enable_if<!isGridTensor< T >::value, T >::type TensorRemove(T arg)
#define thread_for(i, num,...)
std::vector< Lattice< Fobj > > subspace
void HaloExchange(const Lattice< vobj > &source, compressor &compress)
void DhopInternal(CartesianStencil< siteVector, siteVector, DefaultImplParams > &st, std::vector< CoarseMatrix > &a, const CoarseVector &in, CoarseVector &out, int dag)
void ImportPhysicalFermionSource(CoarseVector const &input, CoarseVector &imported)
iMatrix< CComplex, nbasis > Cobj
CartesianStencil< siteVector, siteVector, DefaultImplParams > Stencil
CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0)
void ExportPhysicalFermionSolution(CoarseVector const &solution, CoarseVector &exported)
void CoarsenOperator(GridBase *FineGrid, LinearOperatorBase< Lattice< Fobj > > &linop, Aggregation< Fobj, CComplex, nbasis > &Subspace)
void ExportPhysicalFermionSource(CoarseVector const &solution, CoarseVector &exported)
void Mooee(const CoarseVector &in, CoarseVector &out)
std::vector< CoarseMatrix > Aeven
void MdagNonHermitian(const CoarseVector &in, CoarseVector &out)
Lattice< siteVector > CoarseVector
void M(const CoarseVector &in, CoarseVector &out)
void DhopEO(const CoarseVector &in, CoarseVector &out, int dag)
Lattice< Fobj > FineField
void MooeeInv(const CoarseVector &in, CoarseVector &out)
CartesianStencil< siteVector, siteVector, DefaultImplParams > StencilEven
void Meooe(const CoarseVector &in, CoarseVector &out)
void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv)
void Mdiag(const CoarseVector &in, CoarseVector &out)
Lattice< CComplex > CoarseComplexField
void InvertSelfStencilLink()
void MooeeDag(const CoarseVector &in, CoarseVector &out)
void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
void DselfInternal(CartesianStencil< siteVector, siteVector, DefaultImplParams > &st, CoarseMatrix &a, const CoarseVector &in, CoarseVector &out, int dag)
void MeooeDag(const CoarseVector &in, CoarseVector &out)
void MdirComms(const CoarseVector &in)
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp)
void Dhop(const CoarseVector &in, CoarseVector &out, int dag)
void ImportUnphysicalFermion(CoarseVector const &input, CoarseVector &imported)
std::vector< CoarseMatrix > A
CoarseMatrix AselfInvEven
void Mdag(const CoarseVector &in, CoarseVector &out)
void Dminus(CoarseVector const &in, CoarseVector &out)
Lattice< CComplex > CoarseScalar
void MooeeInvDag(const CoarseVector &in, CoarseVector &out)
std::vector< CoarseMatrix > Aodd
CoarseVector FermionField
Lattice< iMatrix< CComplex, nbasis > > CoarseMatrix
void MdirAll(const CoarseVector &in, std::vector< CoarseVector > &out)
CartesianStencil< siteVector, siteVector, DefaultImplParams > StencilOdd
deviceVector< RealD > dag_factor
void DminusDag(CoarseVector const &in, CoarseVector &out)
GridBase * RedBlackGrid()
CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0)
void DhopOE(const CoarseVector &in, CoarseVector &out, int dag)
iVector< CComplex, nbasis > siteVector
accelerator_inline int Checkerboard(void) const
GridBase * Grid(void) const