Grid 0.7.0
CoarsenedMatrix.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/algorithms/CoarsenedMatrix.h
6
7 Copyright (C) 2015
8
9Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
12Author: paboyle <paboyle@ph.ed.ac.uk>
13
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 2 of the License, or
17 (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License along
25 with this program; if not, write to the Free Software Foundation, Inc.,
26 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27
28 See the full license in the file "LICENSE" in the top level distribution directory
29*************************************************************************************/
30/* END LEGAL */
31#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
32#define GRID_ALGORITHM_COARSENED_MATRIX_H
33
34#include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No)
35
37
38template<class vobj,class CComplex>
40 const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask,
41 const Lattice<vobj> &fineX,
42 const Lattice<vobj> &fineY)
43{
44 typedef decltype(innerProduct(vobj(),vobj())) dotp;
45
46 GridBase *coarse(CoarseInner.Grid());
47 GridBase *fine (fineX.Grid());
48
49 Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
50 Lattice<dotp> fine_inner_msk(fine);
51
52 // Multiply could be fused with innerProduct
53 // Single block sum kernel could do both masks.
54 fine_inner = localInnerProduct(fineX,fineY);
55 mult(fine_inner_msk, fine_inner,FineMask);
56 blockSum(CoarseInner,fine_inner_msk);
57}
58
59// Fine Object == (per site) type of fine field
60// nbasis == number of deflation vectors
61template<class Fobj,class CComplex,int nbasis>
62class CoarsenedMatrix : public CheckerBoardedSparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
63public:
64
70 typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
73
74 // enrich interface, use default implementation as in FermionOperator ///////
75 void Dminus(CoarseVector const& in, CoarseVector& out) { out = in; }
76 void DminusDag(CoarseVector const& in, CoarseVector& out) { out = in; }
77 void ImportPhysicalFermionSource(CoarseVector const& input, CoarseVector& imported) { imported = input; }
78 void ImportUnphysicalFermion(CoarseVector const& input, CoarseVector& imported) { imported = input; }
79 void ExportPhysicalFermionSolution(CoarseVector const& solution, CoarseVector& exported) { exported = solution; };
80 void ExportPhysicalFermionSource(CoarseVector const& solution, CoarseVector& exported) { exported = solution; };
81
83 // Data members
89
93
94 std::vector<CoarseMatrix> A;
95 std::vector<CoarseMatrix> Aeven;
96 std::vector<CoarseMatrix> Aodd;
97
101
103
105 // Interface
107 GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
109
110 int ConstEE() { return 0; }
111
112 void M (const CoarseVector &in, CoarseVector &out)
113 {
114 conformable(_grid,in.Grid());
115 conformable(in.Grid(),out.Grid());
116 out.Checkerboard() = in.Checkerboard();
117
119
120 Stencil.HaloExchange(in,compressor);
121 autoView( in_v , in, AcceleratorRead);
122 autoView( out_v , out, AcceleratorWrite);
123 autoView( Stencil_v , Stencil, AcceleratorRead);
124 int npoint = geom.npoint;
125 typedef LatticeView<Cobj> Aview;
126
127 deviceVector<Aview> AcceleratorViewContainer(geom.npoint);
128 hostVector<Aview> hAcceleratorViewContainer(geom.npoint);
129
130 for(int p=0;p<geom.npoint;p++) {
131 hAcceleratorViewContainer[p] = A[p].View(AcceleratorRead);
132 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
133 }
134 Aview *Aview_p = & AcceleratorViewContainer[0];
135
136 const int Nsimd = CComplex::Nsimd();
137 typedef decltype(coalescedRead(in_v[0])) calcVector;
138 typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
139
140 int osites=Grid()->oSites();
141
142 accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
143 int ss = sss/nbasis;
144 int b = sss%nbasis;
145 calcComplex res = Zero();
146 calcVector nbr;
147 int ptype;
148 StencilEntry *SE;
149
150 for(int point=0;point<npoint;point++){
151
152 SE=Stencil_v.GetEntry(ptype,point,ss);
153
154 if(SE->_is_local) {
155 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
156 } else {
157 nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
158 }
160
161 for(int bb=0;bb<nbasis;bb++) {
162 res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
163 }
164 }
165 coalescedWrite(out_v[ss](b),res);
166 });
167
168 for(int p=0;p<geom.npoint;p++) hAcceleratorViewContainer[p].ViewClose();
169 };
170
171 void Mdag (const CoarseVector &in, CoarseVector &out)
172 {
173 if(hermitian) {
174 // corresponds to Petrov-Galerkin coarsening
175 return M(in,out);
176 } else {
177 // corresponds to Galerkin coarsening
178 return MdagNonHermitian(in, out);
179 }
180 };
181
183 {
184 conformable(_grid,in.Grid());
185 conformable(in.Grid(),out.Grid());
186 out.Checkerboard() = in.Checkerboard();
187
189
190 Stencil.HaloExchange(in,compressor);
191 autoView( in_v , in, AcceleratorRead);
192 autoView( out_v , out, AcceleratorWrite);
193 autoView( Stencil_v , Stencil, AcceleratorRead);
194 int npoint = geom.npoint;
195 typedef LatticeView<Cobj> Aview;
196
197
198 deviceVector<Aview> AcceleratorViewContainer(geom.npoint);
199 hostVector<Aview> hAcceleratorViewContainer(geom.npoint);
200
201 for(int p=0;p<geom.npoint;p++) {
202 hAcceleratorViewContainer[p] = A[p].View(AcceleratorRead);
203 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
204 }
205 Aview *Aview_p = & AcceleratorViewContainer[0];
206
207 const int Nsimd = CComplex::Nsimd();
208 typedef decltype(coalescedRead(in_v[0])) calcVector;
209 typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
210
211 int osites=Grid()->oSites();
212
213 deviceVector<int> points(geom.npoint);
214 for(int p=0; p<geom.npoint; p++) {
215 acceleratorPut(points[p],geom.points_dagger[p]);
216 }
217 auto points_p = &points[0];
218
219 RealD* dag_factor_p = &dag_factor[0];
220
221 accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
222 int ss = sss/nbasis;
223 int b = sss%nbasis;
224 calcComplex res = Zero();
225 calcVector nbr;
226 int ptype;
227 StencilEntry *SE;
228
229 for(int p=0;p<npoint;p++){
230 int point = points_p[p];
231
232 SE=Stencil_v.GetEntry(ptype,point,ss);
233
234 if(SE->_is_local) {
235 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
236 } else {
237 nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
238 }
240
241 for(int bb=0;bb<nbasis;bb++) {
242 res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
243 }
244 }
245 coalescedWrite(out_v[ss](b),res);
246 });
247
248 for(int p=0;p<geom.npoint;p++) hAcceleratorViewContainer[p].ViewClose();
249 }
250
251 void MdirComms(const CoarseVector &in)
252 {
254 Stencil.HaloExchange(in,compressor);
255 }
256 void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
257 {
258 conformable(_grid,in.Grid());
259 conformable(_grid,out.Grid());
260 out.Checkerboard() = in.Checkerboard();
261
262 typedef LatticeView<Cobj> Aview;
263
264 deviceVector<Aview> AcceleratorViewContainer(geom.npoint);
265 hostVector<Aview> hAcceleratorViewContainer(geom.npoint);
266
267 for(int p=0;p<geom.npoint;p++) {
268 hAcceleratorViewContainer[p] = A[p].View(AcceleratorRead);
269 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
270 }
271 Aview *Aview_p = & AcceleratorViewContainer[0];
272
273 autoView( out_v , out, AcceleratorWrite);
274 autoView( in_v , in, AcceleratorRead);
275 autoView( Stencil_v , Stencil, AcceleratorRead);
276
277 const int Nsimd = CComplex::Nsimd();
278 typedef decltype(coalescedRead(in_v[0])) calcVector;
279 typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
280
281 accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
282 int ss = sss/nbasis;
283 int b = sss%nbasis;
284 calcComplex res = Zero();
285 calcVector nbr;
286 int ptype;
287 StencilEntry *SE;
288
289 SE=Stencil_v.GetEntry(ptype,point,ss);
290
291 if(SE->_is_local) {
292 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
293 } else {
294 nbr = coalescedRead(Stencil_v.CommBuf()[SE->_offset]);
295 }
297
298 for(int bb=0;bb<nbasis;bb++) {
299 res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
300 }
301 coalescedWrite(out_v[ss](b),res);
302 });
303 for(int p=0;p<geom.npoint;p++) hAcceleratorViewContainer[p].ViewClose();
304 }
305 void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out)
306 {
307 this->MdirComms(in);
308 int ndir=geom.npoint-1;
309 if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {
310 std::cout <<"MdirAll out size "<< out.size()<<std::endl;
311 std::cout <<"MdirAll ndir "<< ndir<<std::endl;
312 assert(0);
313 }
314 for(int p=0;p<ndir;p++){
315 MdirCalc(in,out[p],p);
316 }
317 };
318 void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
319
320 this->MdirComms(in);
321
322 MdirCalc(in,out,geom.point(dir,disp));
323 };
324
325 void Mdiag(const CoarseVector &in, CoarseVector &out)
326 {
327 int point=geom.npoint-1;
328 MdirCalc(in, out, point); // No comms
329 };
330
331 void Mooee(const CoarseVector &in, CoarseVector &out) {
333 }
334
335 void MooeeInv(const CoarseVector &in, CoarseVector &out) {
337 }
338
339 void MooeeDag(const CoarseVector &in, CoarseVector &out) {
341 }
342
343 void MooeeInvDag(const CoarseVector &in, CoarseVector &out) {
345 }
346
347 void Meooe(const CoarseVector &in, CoarseVector &out) {
348 if(in.Checkerboard() == Odd) {
349 DhopEO(in, out, DaggerNo);
350 } else {
351 DhopOE(in, out, DaggerNo);
352 }
353 }
354
355 void MeooeDag(const CoarseVector &in, CoarseVector &out) {
356 if(in.Checkerboard() == Odd) {
357 DhopEO(in, out, DaggerYes);
358 } else {
359 DhopOE(in, out, DaggerYes);
360 }
361 }
362
363 void Dhop(const CoarseVector &in, CoarseVector &out, int dag) {
364 conformable(in.Grid(), _grid); // verifies full grid
365 conformable(in.Grid(), out.Grid());
366
367 out.Checkerboard() = in.Checkerboard();
368
369 DhopInternal(Stencil, A, in, out, dag);
370 }
371
372 void DhopOE(const CoarseVector &in, CoarseVector &out, int dag) {
373 conformable(in.Grid(), _cbgrid); // verifies half grid
374 conformable(in.Grid(), out.Grid()); // drops the cb check
375
376 assert(in.Checkerboard() == Even);
377 out.Checkerboard() = Odd;
378
379 DhopInternal(StencilEven, Aodd, in, out, dag);
380 }
381
382 void DhopEO(const CoarseVector &in, CoarseVector &out, int dag) {
383 conformable(in.Grid(), _cbgrid); // verifies half grid
384 conformable(in.Grid(), out.Grid()); // drops the cb check
385
386 assert(in.Checkerboard() == Odd);
387 out.Checkerboard() = Even;
388
389 DhopInternal(StencilOdd, Aeven, in, out, dag);
390 }
391
392 void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv) {
393 out.Checkerboard() = in.Checkerboard();
394 assert(in.Checkerboard() == Odd || in.Checkerboard() == Even);
395
396 CoarseMatrix *Aself = nullptr;
397 if(in.Grid()->_isCheckerBoarded) {
398 if(in.Checkerboard() == Odd) {
399 Aself = (inv) ? &AselfInvOdd : &Aodd[geom.npoint-1];
400 DselfInternal(StencilOdd, *Aself, in, out, dag);
401 } else {
402 Aself = (inv) ? &AselfInvEven : &Aeven[geom.npoint-1];
403 DselfInternal(StencilEven, *Aself, in, out, dag);
404 }
405 } else {
406 Aself = (inv) ? &AselfInv : &A[geom.npoint-1];
407 DselfInternal(Stencil, *Aself, in, out, dag);
408 }
409 assert(Aself != nullptr);
410 }
411
413 const CoarseVector &in, CoarseVector &out, int dag) {
414 int point = geom.npoint-1;
415 autoView( out_v, out, AcceleratorWrite);
416 autoView( in_v, in, AcceleratorRead);
417 autoView( st_v, st, AcceleratorRead);
418 autoView( a_v, a, AcceleratorRead);
419
420 const int Nsimd = CComplex::Nsimd();
421 typedef decltype(coalescedRead(in_v[0])) calcVector;
422 typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
423
424 RealD* dag_factor_p = &dag_factor[0];
425
426 if(dag) {
427 accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
428 int ss = sss/nbasis;
429 int b = sss%nbasis;
430 calcComplex res = Zero();
431 calcVector nbr;
432 int ptype;
433 StencilEntry *SE;
434
435 SE=st_v.GetEntry(ptype,point,ss);
436
437 if(SE->_is_local) {
438 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
439 } else {
440 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
441 }
443
444 for(int bb=0;bb<nbasis;bb++) {
445 res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(a_v[ss](b,bb))*nbr(bb);
446 }
447 coalescedWrite(out_v[ss](b),res);
448 });
449 } else {
450 accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
451 int ss = sss/nbasis;
452 int b = sss%nbasis;
453 calcComplex res = Zero();
454 calcVector nbr;
455 int ptype;
456 StencilEntry *SE;
457
458 SE=st_v.GetEntry(ptype,point,ss);
459
460 if(SE->_is_local) {
461 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
462 } else {
463 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
464 }
466
467 for(int bb=0;bb<nbasis;bb++) {
468 res = res + coalescedRead(a_v[ss](b,bb))*nbr(bb);
469 }
470 coalescedWrite(out_v[ss](b),res);
471 });
472 }
473 }
474
476 const CoarseVector &in, CoarseVector &out, int dag) {
478
479 st.HaloExchange(in,compressor);
480 autoView( in_v, in, AcceleratorRead);
481 autoView( out_v, out, AcceleratorWrite);
482 autoView( st_v , st, AcceleratorRead);
483 typedef LatticeView<Cobj> Aview;
484
485 // determine in what order we need the points
486 int npoint = geom.npoint-1;
487 deviceVector<int> points(npoint);
488 for(int p=0; p<npoint; p++) {
489 int val = (dag && !hermitian) ? geom.points_dagger[p] : p;
490 acceleratorPut(points[p], val);
491 }
492 auto points_p = &points[0];
493
494 deviceVector<Aview> AcceleratorViewContainer(geom.npoint);
495 hostVector<Aview> hAcceleratorViewContainer(geom.npoint);
496
497 for(int p=0;p<geom.npoint;p++) {
498 hAcceleratorViewContainer[p] = a[p].View(AcceleratorRead);
499 acceleratorPut(AcceleratorViewContainer[p],hAcceleratorViewContainer[p]);
500 }
501 Aview *Aview_p = & AcceleratorViewContainer[0];
502
503 const int Nsimd = CComplex::Nsimd();
504 typedef decltype(coalescedRead(in_v[0])) calcVector;
505 typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
506
507 RealD* dag_factor_p = &dag_factor[0];
508
509 if(dag) {
510 accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
511 int ss = sss/nbasis;
512 int b = sss%nbasis;
513 calcComplex res = Zero();
514 calcVector nbr;
515 int ptype;
516 StencilEntry *SE;
517
518 for(int p=0;p<npoint;p++){
519 int point = points_p[p];
520 SE=st_v.GetEntry(ptype,point,ss);
521
522 if(SE->_is_local) {
523 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
524 } else {
525 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
526 }
527 acceleratorSynchronise();
528
529 for(int bb=0;bb<nbasis;bb++) {
530 res = res + dag_factor_p[b*nbasis+bb]*coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
531 }
532 }
533 coalescedWrite(out_v[ss](b),res);
534 });
535 } else {
536 accelerator_for(sss, in.Grid()->oSites()*nbasis, Nsimd, {
537 int ss = sss/nbasis;
538 int b = sss%nbasis;
539 calcComplex res = Zero();
540 calcVector nbr;
541 int ptype;
542 StencilEntry *SE;
543
544 for(int p=0;p<npoint;p++){
545 int point = points_p[p];
546 SE=st_v.GetEntry(ptype,point,ss);
547
548 if(SE->_is_local) {
549 nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
550 } else {
551 nbr = coalescedRead(st_v.CommBuf()[SE->_offset]);
552 }
553 acceleratorSynchronise();
554
555 for(int bb=0;bb<nbasis;bb++) {
556 res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
557 }
558 }
559 coalescedWrite(out_v[ss](b),res);
560 });
561 }
562
563 for(int p=0;p<npoint;p++) hAcceleratorViewContainer[p].ViewClose();
564 }
565
566 CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) :
567 _grid(&CoarseGrid),
568 _cbgrid(new GridRedBlackCartesian(&CoarseGrid)),
569 geom(CoarseGrid._ndimension),
570 hermitian(hermitian_),
571 Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
572 StencilEven(_cbgrid,geom.npoint,Even,geom.directions,geom.displacements),
573 StencilOdd(_cbgrid,geom.npoint,Odd,geom.directions,geom.displacements),
574 A(geom.npoint,&CoarseGrid),
575 Aeven(geom.npoint,_cbgrid),
576 Aodd(geom.npoint,_cbgrid),
577 AselfInv(&CoarseGrid),
580 dag_factor(nbasis*nbasis)
581 {
582 fillFactor();
583 };
584
585 CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) :
586
587 _grid(&CoarseGrid),
588 _cbgrid(&CoarseRBGrid),
589 geom(CoarseGrid._ndimension),
590 hermitian(hermitian_),
591 Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
592 StencilEven(&CoarseRBGrid,geom.npoint,Even,geom.directions,geom.displacements),
593 StencilOdd(&CoarseRBGrid,geom.npoint,Odd,geom.directions,geom.displacements),
594 A(geom.npoint,&CoarseGrid),
595 Aeven(geom.npoint,&CoarseRBGrid),
596 Aodd(geom.npoint,&CoarseRBGrid),
597 AselfInv(&CoarseGrid),
598 AselfInvEven(&CoarseRBGrid),
599 AselfInvOdd(&CoarseRBGrid),
600 dag_factor(nbasis*nbasis)
601 {
602 fillFactor();
603 };
604
605 void fillFactor() {
606 Eigen::MatrixXd dag_factor_eigen = Eigen::MatrixXd::Ones(nbasis, nbasis);
607 if(!hermitian) {
608 const int nb = nbasis/2;
609 dag_factor_eigen.block(0,nb,nb,nb) *= -1.0;
610 dag_factor_eigen.block(nb,0,nb,nb) *= -1.0;
611 }
612
613 // GPU readable prefactor
614 std::vector<RealD> h_dag_factor(nbasis*nbasis);
615 thread_for(i, nbasis*nbasis, {
616 int j = i/nbasis;
617 int k = i%nbasis;
618 h_dag_factor[i] = dag_factor_eigen(j, k);
619 });
620 acceleratorCopyToDevice(&h_dag_factor[0],&dag_factor[0],dag_factor.size()*sizeof(RealD));
621 }
622
625 {
626 typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
627 typedef typename Fobj::scalar_type scalar_type;
628
629 std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl;
630
631 FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
632 FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
633
634 std::vector<FineComplexField> masks(geom.npoint,FineGrid);
635 FineComplexField imask(FineGrid); // contributions from within this block
636 FineComplexField omask(FineGrid); // contributions from outwith this block
637
638 FineComplexField evenmask(FineGrid);
639 FineComplexField oddmask(FineGrid);
640
641 FineField phi(FineGrid);
642 FineField tmp(FineGrid);
643 FineField zz(FineGrid); zz=Zero();
644 FineField Mphi(FineGrid);
645 FineField Mphie(FineGrid);
646 FineField Mphio(FineGrid);
647 std::vector<FineField> Mphi_p(geom.npoint,FineGrid);
648
649 Lattice<iScalar<vInteger> > coor (FineGrid);
650 Lattice<iScalar<vInteger> > bcoor(FineGrid);
651 Lattice<iScalar<vInteger> > bcb (FineGrid); bcb = Zero();
652
653 CoarseVector iProj(Grid());
654 CoarseVector oProj(Grid());
655 CoarseVector SelfProj(Grid());
656 CoarseComplexField iZProj(Grid());
657 CoarseComplexField oZProj(Grid());
658
659 CoarseScalar InnerProd(Grid());
660
661 std::cout << GridLogMessage<< "CoarsenMatrix Orthog "<< std::endl;
662 // Orthogonalise the subblocks over the basis
663 blockOrthogonalise(InnerProd,Subspace.subspace);
664
665 // Compute the matrix elements of linop between this orthonormal
666 // set of vectors.
667 std::cout << GridLogMessage<< "CoarsenMatrix masks "<< std::endl;
668 int self_stencil=-1;
669 for(int p=0;p<geom.npoint;p++)
670 {
671 int dir = geom.directions[p];
672 int disp = geom.displacements[p];
673 A[p]=Zero();
674 if( geom.displacements[p]==0){
675 self_stencil=p;
676 }
677
678 Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
679
680 LatticeCoordinate(coor,dir);
681
683 // Work out even and odd block checkerboarding for fast diagonal term
685 if ( disp==1 ) {
686 bcb = bcb + div(coor,block);
687 }
688
689 if ( disp==0 ) {
690 masks[p]= Zero();
691 } else if ( disp==1 ) {
692 masks[p] = where(mod(coor,block)==(block-1),one,zero);
693 } else if ( disp==-1 ) {
694 masks[p] = where(mod(coor,block)==(Integer)0,one,zero);
695 }
696 }
697 evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
698 oddmask = one-evenmask;
699
700 assert(self_stencil!=-1);
701
702 for(int i=0;i<nbasis;i++){
703
704 phi=Subspace.subspace[i];
705
706 std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
707 linop.OpDirAll(phi,Mphi_p);
708 linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
709
710 for(int p=0;p<geom.npoint;p++){
711
712 Mphi = Mphi_p[p];
713
714 int dir = geom.directions[p];
715 int disp = geom.displacements[p];
716
717 if ( (disp==-1) || (!hermitian ) ) {
718
720 // Pick out contributions coming from this cell and neighbour cell
722 omask = masks[p];
723 imask = one-omask;
724
725 for(int j=0;j<nbasis;j++){
726
727 blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi);
728
729 autoView( iZProj_v , iZProj, AcceleratorRead) ;
730 autoView( oZProj_v , oZProj, AcceleratorRead) ;
731 autoView( A_p , A[p], AcceleratorWrite);
732 autoView( A_self , A[self_stencil], AcceleratorWrite);
733
734 accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
735 if ( hermitian && (disp==-1) ) {
736 for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>*
737 int dirp = geom.directions[pp];
738 int dispp = geom.displacements[pp];
739 if ( (dirp==dir) && (dispp==1) ){
740 auto sft = conjugate(Cshift(oZProj,dir,1));
741 autoView( sft_v , sft , AcceleratorWrite);
742 autoView( A_pp , A[pp], AcceleratorWrite);
743 accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); });
744 }
745 }
746 }
747
748 }
749 }
750 }
751
753 // Faster alternate self coupling.. use hermiticity to save 2x
755 {
756 mult(tmp,phi,evenmask); linop.Op(tmp,Mphie);
757 mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio);
758
759 {
760 autoView( tmp_ , tmp, AcceleratorWrite);
761 autoView( evenmask_ , evenmask, AcceleratorRead);
762 autoView( oddmask_ , oddmask, AcceleratorRead);
763 autoView( Mphie_ , Mphie, AcceleratorRead);
764 autoView( Mphio_ , Mphio, AcceleratorRead);
765 accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{
766 coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss));
767 });
768 }
769
770 blockProject(SelfProj,tmp,Subspace.subspace);
771
772 autoView( SelfProj_ , SelfProj, AcceleratorRead);
773 autoView( A_self , A[self_stencil], AcceleratorWrite);
774
775 accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
776 for(int j=0;j<nbasis;j++){
777 coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
778 }
779 });
780
781 }
782 }
783 if(hermitian) {
784 std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
785 }
786
787 InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl;
788 FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl;
789 }
790
792 std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl;
793 int localVolume = Grid()->lSites();
794
795 typedef typename Cobj::scalar_object scalar_object;
796
797 autoView(Aself_v, A[geom.npoint-1], CpuRead);
798 autoView(AselfInv_v, AselfInv, CpuWrite);
799 thread_for(site, localVolume, { // NOTE: Not able to bring this to GPU because of Eigen + peek/poke
800 Eigen::MatrixXcd selfLinkEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
801 Eigen::MatrixXcd selfLinkInvEigen = Eigen::MatrixXcd::Zero(nbasis, nbasis);
802
803 scalar_object selfLink = Zero();
804 scalar_object selfLinkInv = Zero();
805
806 Coordinate lcoor;
807
808 Grid()->LocalIndexToLocalCoor(site, lcoor);
809 peekLocalSite(selfLink, Aself_v, lcoor);
810
811 for (int i = 0; i < nbasis; ++i)
812 for (int j = 0; j < nbasis; ++j)
813 selfLinkEigen(i, j) = static_cast<ComplexD>(TensorRemove(selfLink(i, j)));
814
815 selfLinkInvEigen = selfLinkEigen.inverse();
816
817 for(int i = 0; i < nbasis; ++i)
818 for(int j = 0; j < nbasis; ++j)
819 selfLinkInv(i, j) = selfLinkInvEigen(i, j);
820
821 pokeLocalSite(selfLinkInv, AselfInv_v, lcoor);
822 });
823 }
824
825 void FillHalfCbs() {
826 std::cout << GridLogDebug << "CoarsenedMatrix::FillHalfCbs" << std::endl;
827 for(int p = 0; p < geom.npoint; ++p) {
828 pickCheckerboard(Even, Aeven[p], A[p]);
829 pickCheckerboard(Odd, Aodd[p], A[p]);
830 }
833 }
834};
835
837#endif
void acceleratorPut(T &dev, const T &host)
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
accelerator_inline void acceleratorSynchronise(void)
#define accelerator_for(iterator, num, nsimd,...)
std::vector< T, devAllocator< T > > deviceVector
std::vector< T, alignedAllocator< T > > hostVector
#define one
Definition BGQQPX.h:108
static const int Even
static const int Odd
void blockMaskedInnerProduct(Lattice< CComplex > &CoarseInner, const Lattice< decltype(innerProduct(vobj(), vobj()))> &FineMask, const Lattice< vobj > &fineX, const Lattice< vobj > &fineY)
AcceleratorVector< int, MaxDims > Coordinate
Definition Coordinate.h:95
auto Cshift(const Expression &expr, int dim, int shift) -> decltype(closure(expr))
Definition Cshift.h:55
void mult(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
void conformable(const Lattice< obj1 > &lhs, const Lattice< obj2 > &rhs)
void LatticeCoordinate(Lattice< iobj > &l, int mu)
auto localInnerProduct(const Lattice< vobj > &lhs, const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
void peekLocalSite(sobj &s, const LatticeView< vobj > &l, Coordinate &site)
void pokeLocalSite(const sobj &s, LatticeView< vobj > &l, Coordinate &site)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
ComplexD innerProduct(const Lattice< vobj > &left, const Lattice< vobj > &right)
void blockOrthogonalise(Lattice< CComplex > &ip, std::vector< Lattice< vobj > > &Basis)
void pickCheckerboard(int cb, Lattice< vobj > &half, const Lattice< vobj > &full)
void blockProject(Lattice< iVector< CComplex, nbasis > > &coarseData, const Lattice< vobj > &fineData, const VLattice &Basis)
void blockSum(Lattice< vobj > &coarseData, const Lattice< vobj > &fineData)
Lattice< obj > mod(const Lattice< obj > &rhs_i, Integer y)
Lattice< obj > div(const Lattice< obj > &rhs_i, Integer y)
#define autoView(l_v, l, mode)
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE")
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
@ AcceleratorRead
@ CpuRead
@ AcceleratorWrite
@ CpuWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
static constexpr int DaggerYes
Definition QCD.h:70
static constexpr int DaggerNo
Definition QCD.h:69
static constexpr int InverseNo
Definition QCD.h:71
static constexpr int InverseYes
Definition QCD.h:72
uint32_t Integer
Definition Simd.h:58
std::complex< RealD > ComplexD
Definition Simd.h:79
double RealD
Definition Simd.h:61
SimpleCompressorGather< vobj, FaceGatherSimple > SimpleCompressor
accelerator_inline vobj coalescedReadPermute(const vobj &__restrict__ vec, int ptype, int doperm, int lane=0)
Definition Tensor_SIMT.h:66
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
accelerator_inline std::enable_if<!isGridTensor< T >::value, T >::type TensorRemove(T arg)
#define thread_for(i, num,...)
Definition Threads.h:60
std::vector< Lattice< Fobj > > subspace
Definition Aggregates.h:58
void HaloExchange(const Lattice< vobj > &source, compressor &compress)
Definition Stencil.h:610
void DhopInternal(CartesianStencil< siteVector, siteVector, DefaultImplParams > &st, std::vector< CoarseMatrix > &a, const CoarseVector &in, CoarseVector &out, int dag)
void ImportPhysicalFermionSource(CoarseVector const &input, CoarseVector &imported)
iMatrix< CComplex, nbasis > Cobj
CartesianStencil< siteVector, siteVector, DefaultImplParams > Stencil
CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0)
void ExportPhysicalFermionSolution(CoarseVector const &solution, CoarseVector &exported)
void CoarsenOperator(GridBase *FineGrid, LinearOperatorBase< Lattice< Fobj > > &linop, Aggregation< Fobj, CComplex, nbasis > &Subspace)
GridBase * Grid(void)
void ExportPhysicalFermionSource(CoarseVector const &solution, CoarseVector &exported)
void Mooee(const CoarseVector &in, CoarseVector &out)
std::vector< CoarseMatrix > Aeven
void MdagNonHermitian(const CoarseVector &in, CoarseVector &out)
Lattice< siteVector > CoarseVector
void M(const CoarseVector &in, CoarseVector &out)
void DhopEO(const CoarseVector &in, CoarseVector &out, int dag)
CoarseMatrix AselfInvOdd
Lattice< Fobj > FineField
void MooeeInv(const CoarseVector &in, CoarseVector &out)
CartesianStencil< siteVector, siteVector, DefaultImplParams > StencilEven
void Meooe(const CoarseVector &in, CoarseVector &out)
void MooeeInternal(const CoarseVector &in, CoarseVector &out, int dag, int inv)
void Mdiag(const CoarseVector &in, CoarseVector &out)
Lattice< CComplex > CoarseComplexField
void MooeeDag(const CoarseVector &in, CoarseVector &out)
void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
void DselfInternal(CartesianStencil< siteVector, siteVector, DefaultImplParams > &st, CoarseMatrix &a, const CoarseVector &in, CoarseVector &out, int dag)
void MeooeDag(const CoarseVector &in, CoarseVector &out)
void MdirComms(const CoarseVector &in)
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp)
void Dhop(const CoarseVector &in, CoarseVector &out, int dag)
void ImportUnphysicalFermion(CoarseVector const &input, CoarseVector &imported)
std::vector< CoarseMatrix > A
CoarseMatrix AselfInvEven
void Mdag(const CoarseVector &in, CoarseVector &out)
void Dminus(CoarseVector const &in, CoarseVector &out)
Lattice< CComplex > CoarseScalar
void MooeeInvDag(const CoarseVector &in, CoarseVector &out)
std::vector< CoarseMatrix > Aodd
CoarseMatrix AselfInv
CoarseVector FermionField
Lattice< iMatrix< CComplex, nbasis > > CoarseMatrix
void MdirAll(const CoarseVector &in, std::vector< CoarseVector > &out)
CartesianStencil< siteVector, siteVector, DefaultImplParams > StencilOdd
deviceVector< RealD > dag_factor
void DminusDag(CoarseVector const &in, CoarseVector &out)
GridBase * RedBlackGrid()
CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0)
void DhopOE(const CoarseVector &in, CoarseVector &out, int dag)
iVector< CComplex, nbasis > siteVector
bool _isCheckerBoarded
int oSites(void) const
Coordinate _rdimensions
accelerator_inline int Checkerboard(void) const
GridBase * Grid(void) const
Definition Simd.h:194
uint8_t _permute
Definition Stencil.h:93
uint8_t _is_local
Definition Stencil.h:92
uint64_t _offset
Definition Stencil.h:90