Grid 0.7.0
GeneralCoarsenedMatrix.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <pboyle@bnl.gov>
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25 See the full license in the file "LICENSE" in the top level distribution directory
26*************************************************************************************/
27/* END LEGAL */
28#pragma once
29
30#include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No)
31
34
36
37// Fine Object == (per site) type of fine field
38// nbasis == number of deflation vectors
39template<class Fobj,class CComplex,int nbasis>
40class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
41public:
42
51 typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
56 // Data members
64
65 std::vector<CoarseMatrix> _A;
66 std::vector<CoarseMatrix> _Adag;
67 std::vector<CoarseVector> MultTemporaries;
68
70 // Interface
72 GridBase * Grid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
73 GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know
74 GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
75
76 /* void ShiftMatrix(RealD shift)
77 {
78 int Nd=_FineGrid->Nd();
79 Coordinate zero_shift(Nd,0);
80 for(int p=0;p<geom.npoint;p++){
81 if ( zero_shift==geom.shifts[p] ) {
82 _A[p] = _A[p]+shift;
83 // _Adag[p] = _Adag[p]+shift;
84 }
85 }
86 }
87 void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe)
88 {
89 int nfound=0;
90 std::cout << GridLogMessage <<"GeneralCoarsenedMatrix::ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl;
91 for(int p=0;p<geom.npoint;p++){
92 for(int pp=0;pp<CopyMe.geom.npoint;pp++){
93 // Search for the same relative shift
94 // Avoids brutal handling of Grid pointers
95 if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) {
96 _A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]);
97 // _Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]);
98 nfound++;
99 }
100 }
101 }
102 assert(nfound==geom.npoint);
103 ExchangeCoarseLinks();
104 }
105 */
106
108 : geom(_geom),
111 hermitian(1),
112 Cell(_geom.Depth(),_CoarseGrid),
113 Stencil(Cell.grids.back(),geom.shifts)
114 {
115 {
116 int npoint = _geom.npoint;
117 }
118 _A.resize(geom.npoint,CoarseGrid);
119 // _Adag.resize(geom.npoint,CoarseGrid);
120 }
121 void M (const CoarseVector &in, CoarseVector &out)
122 {
123 Mult(_A,in,out);
124 }
125 void Mdag (const CoarseVector &in, CoarseVector &out)
126 {
127 assert(hermitian);
128 Mult(_A,in,out);
129 // if ( hermitian ) M(in,out);
130 // else Mult(_Adag,in,out);
131 }
132 void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out)
133 {
134 RealD tviews=0; RealD ttot=0; RealD tmult=0; RealD texch=0; RealD text=0; RealD ttemps=0; RealD tcopy=0;
135 RealD tmult2=0;
136
137 ttot=-usecond();
139 conformable(in.Grid(),out.Grid());
140 out.Checkerboard() = in.Checkerboard();
141 CoarseVector tin=in;
142
143 texch-=usecond();
144 CoarseVector pin = Cell.ExchangePeriodic(tin);
145 texch+=usecond();
146
147 CoarseVector pout(pin.Grid());
148
149 int npoint = geom.npoint;
150 typedef LatticeView<Cobj> Aview;
151 typedef LatticeView<Cvec> Vview;
152
153 const int Nsimd = CComplex::Nsimd();
154
155 int64_t osites=pin.Grid()->oSites();
156
157 RealD flops = 1.0* npoint * nbasis * nbasis * 8.0 * osites * CComplex::Nsimd();
158 RealD bytes = 1.0*osites*sizeof(siteMatrix)*npoint
159 + 2.0*osites*sizeof(siteVector)*npoint;
160
161 {
162 tviews-=usecond();
163 autoView( in_v , pin, AcceleratorRead);
164 autoView( out_v , pout, AcceleratorWriteDiscard);
165 autoView( Stencil_v , Stencil, AcceleratorRead);
166 tviews+=usecond();
167
168 // Static and prereserve to keep UVM region live and not resized across multiple calls
169 ttemps-=usecond();
170 MultTemporaries.resize(npoint,pin.Grid());
171 ttemps+=usecond();
172 std::vector<Aview> AcceleratorViewContainer_h;
173 std::vector<Vview> AcceleratorVecViewContainer_h;
174
175 tviews-=usecond();
176 for(int p=0;p<npoint;p++) {
177 AcceleratorViewContainer_h.push_back( A[p].View(AcceleratorRead));
178 AcceleratorVecViewContainer_h.push_back(MultTemporaries[p].View(AcceleratorWrite));
179 }
180 tviews+=usecond();
181
182 static deviceVector<Aview> AcceleratorViewContainer; AcceleratorViewContainer.resize(npoint);
183 static deviceVector<Vview> AcceleratorVecViewContainer; AcceleratorVecViewContainer.resize(npoint);
184
185 auto Aview_p = &AcceleratorViewContainer[0];
186 auto Vview_p = &AcceleratorVecViewContainer[0];
187 tcopy-=usecond();
188 acceleratorCopyToDevice(&AcceleratorViewContainer_h[0],&AcceleratorViewContainer[0],npoint *sizeof(Aview));
189 acceleratorCopyToDevice(&AcceleratorVecViewContainer_h[0],&AcceleratorVecViewContainer[0],npoint *sizeof(Vview));
190 tcopy+=usecond();
191
192 tmult-=usecond();
193 accelerator_for(spb, osites*nbasis*npoint, Nsimd, {
194 typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
195 int32_t ss = spb/(nbasis*npoint);
196 int32_t bp = spb%(nbasis*npoint);
197 int32_t point= bp/nbasis;
198 int32_t b = bp%nbasis;
199 auto SE = Stencil_v.GetEntry(point,ss);
200 auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd);
201 auto res = coalescedRead(Aview_p[point][ss](0,b))*nbr(0);
202 for(int bb=1;bb<nbasis;bb++) {
203 res = res + coalescedRead(Aview_p[point][ss](bb,b))*nbr(bb);
204 }
205 coalescedWrite(Vview_p[point][ss](b),res);
206 });
207 tmult2-=usecond();
208 accelerator_for(sb, osites*nbasis, Nsimd, {
209 int ss = sb/nbasis;
210 int b = sb%nbasis;
211 auto res = coalescedRead(Vview_p[0][ss](b));
212 for(int point=1;point<npoint;point++){
213 res = res + coalescedRead(Vview_p[point][ss](b));
214 }
215 coalescedWrite(out_v[ss](b),res);
216 });
217 tmult2+=usecond();
218 tmult+=usecond();
219 for(int p=0;p<npoint;p++) {
220 AcceleratorViewContainer_h[p].ViewClose();
221 AcceleratorVecViewContainer_h[p].ViewClose();
222 }
223 }
224
225 text-=usecond();
226 out = Cell.Extract(pout);
227 text+=usecond();
228 ttot+=usecond();
229
230 std::cout << GridLogPerformance<<"Coarse 1rhs Mult Aviews "<<tviews<<" us"<<std::endl;
231 std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
232 std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
233 std::cout << GridLogPerformance<<" of which mult2 "<<tmult2<<" us"<<std::endl;
234 std::cout << GridLogPerformance<<"Coarse Mult ext "<<text<<" us"<<std::endl;
235 std::cout << GridLogPerformance<<"Coarse Mult temps "<<ttemps<<" us"<<std::endl;
236 std::cout << GridLogPerformance<<"Coarse Mult copy "<<tcopy<<" us"<<std::endl;
237 std::cout << GridLogPerformance<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
238 // std::cout << GridLogPerformance<<std::endl;
239 std::cout << GridLogPerformance<<"Coarse Kernel flops "<< flops<<std::endl;
240 std::cout << GridLogPerformance<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl;
241 std::cout << GridLogPerformance<<"Coarse Kernel bytes/s "<< bytes/tmult<<" MB/s"<<std::endl;
242 std::cout << GridLogPerformance<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
243 std::cout << GridLogPerformance<<"Coarse total bytes "<< bytes/1e6<<" MB"<<std::endl;
244
245 };
246
247 void PopulateAdag(void)
248 {
249 for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
250 Coordinate bcoor;
251 CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor);
252
253 for(int p=0;p<geom.npoint;p++){
254 Coordinate scoor = bcoor;
255 for(int mu=0;mu<bcoor.size();mu++){
256 int L = CoarseGrid()->GlobalDimensions()[mu];
257 scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic
258 }
259 // Flip to poke/peekLocalSite and not too bad
260 auto link = peekSite(_A[p],scoor);
261 int pp = geom.Reverse(p);
262 pokeSite(adj(link),_Adag[pp],bcoor);
263 }
264 }
265 }
266
267 //
268 // A) Only reduced flops option is to use a padded cell of depth 4
269 // and apply MpcDagMpc in the padded cell.
270 //
271 // Makes for ONE application of MpcDagMpc per vector instead of 30 or 80.
272 // With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio
273 // Cost is 81x more, same as stencil size.
274 //
275 // But: can eliminate comms and do as local dirichlet.
276 //
277 // Local exchange gauge field once.
278 // Apply to all vectors, local only computation.
279 // Must exchange ghost subcells in reverse process of PaddedCell to take inner products
280 //
281 // B) Can reduce cost: pad by 1, apply Deo (4^4+6^4+8^4+8^4 )/ (4x 4^4)
282 // pad by 2, apply Doe
283 // pad by 3, apply Deo
284 // then break out 8x directions; cost is ~10x MpcDagMpc per vector
285 //
286 // => almost factor of 10 in setup cost, excluding data rearrangement
287 //
288 // Intermediates -- ignore the corner terms, leave approximate and force Hermitian
289 // Intermediates -- pad by 2 and apply 1+8+24 = 33 times.
291
293 // BFM HDCG style approach: Solve a system of equations to get Aij
295 /*
296 * Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM.
297 *
298 * conj(phases[block]) proj[k][ block*Nvec+j ] = \sum_ball e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >
299 * = \sum_ball e^{iqk.delta} A_ji
300 *
301 * Must invert matrix M_k,l = e^[i q_k . delta_l]
302 *
303 * Where q_k = delta_k . (2*M_PI/global_nb[mu])
304 */
305#if 0
308 {
309 std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
310 GridBase *grid = FineGrid();
311
312 RealD tproj=0.0;
313 RealD teigen=0.0;
314 RealD tmat=0.0;
315 RealD tphase=0.0;
316 RealD tinv=0.0;
317
319 // Orthogonalise the subblocks over the basis
321 CoarseScalar InnerProd(CoarseGrid());
322 blockOrthogonalise(InnerProd,Subspace.subspace);
323
324 const int npoint = geom.npoint;
325
327 int Nd = CoarseGrid()->Nd();
328
329 /*
330 * Here, k,l index which possible momentum/shift within the N-points connected by MdagM.
331 * Matrix index i is mapped to this shift via
332 * geom.shifts[i]
333 *
334 * conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]
335 * = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >
336 * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l}
337 * = M_{kl} A_ji^{b.b+l}
338 *
339 * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l]
340 *
341 * Where q_k = delta_k . (2*M_PI/global_nb[mu])
342 *
343 * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j}
344 */
345 teigen-=usecond();
346 Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint);
347 Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint);
348 ComplexD ci(0.0,1.0);
349 for(int k=0;k<npoint;k++){ // Loop over momenta
350
351 for(int l=0;l<npoint;l++){ // Loop over nbr relative
352 ComplexD phase(0.0,0.0);
353 for(int mu=0;mu<Nd;mu++){
354 RealD TwoPiL = M_PI * 2.0/ clatt[mu];
355 phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu];
356 }
357 phase=exp(phase*ci);
358 Mkl(k,l) = phase;
359 }
360 }
361 invMkl = Mkl.inverse();
362 teigen+=usecond();
363
365 // Now compute the matrix elements of linop between the orthonormal
366 // set of vectors.
368 FineField phaV(grid); // Phased block basis vector
369 FineField MphaV(grid);// Matrix applied
370 CoarseVector coarseInner(CoarseGrid());
371
372 std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid());
373 std::vector<CoarseVector> FT(npoint,CoarseGrid());
374 for(int i=0;i<nbasis;i++){// Loop over basis vectors
375 std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl;
376 for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
378 // Stick a phase on every block
380 tphase-=usecond();
382 CoarseComplexField pha(CoarseGrid()); pha=Zero();
383 for(int mu=0;mu<Nd;mu++){
384 LatticeCoordinate(coor,mu);
385 RealD TwoPiL = M_PI * 2.0/ clatt[mu];
386 pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor;
387 }
388 pha =exp(pha*ci);
389 phaV=Zero();
390 blockZAXPY(phaV,pha,Subspace.subspace[i],phaV);
391 tphase+=usecond();
392
394 // Multiple phased subspace vector by matrix and project to subspace
395 // Remove local bulk phase to leave relative phases
397 tmat-=usecond();
398 linop.Op(phaV,MphaV);
399 tmat+=usecond();
400
401 tproj-=usecond();
402 blockProject(coarseInner,MphaV,Subspace.subspace);
403 coarseInner = conjugate(pha) * coarseInner;
404
405 ComputeProj[p] = coarseInner;
406 tproj+=usecond();
407
408 }
409
410 tinv-=usecond();
411 for(int k=0;k<npoint;k++){
412 FT[k] = Zero();
413 for(int l=0;l<npoint;l++){
414 FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
415 }
416
417 int osites=CoarseGrid()->oSites();
418 autoView( A_v , _A[k], AcceleratorWrite);
419 autoView( FT_v , FT[k], AcceleratorRead);
420 accelerator_for(sss, osites, 1, {
421 for(int j=0;j<nbasis;j++){
422 A_v[sss](i,j) = FT_v[sss](j);
423 }
424 });
425 }
426 tinv+=usecond();
427 }
428
429 // Only needed if nonhermitian
430 if ( ! hermitian ) {
431 // std::cout << GridLogMessage<<"PopulateAdag "<<std::endl;
432 // PopulateAdag();
433 }
434
435 // Need to write something to populate Adag from A
437 std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
438 std::cout << GridLogMessage<<"CoarsenOperator phase "<<tphase<<" us"<<std::endl;
439 std::cout << GridLogMessage<<"CoarsenOperator mat "<<tmat <<" us"<<std::endl;
440 std::cout << GridLogMessage<<"CoarsenOperator proj "<<tproj<<" us"<<std::endl;
441 std::cout << GridLogMessage<<"CoarsenOperator inv "<<tinv<<" us"<<std::endl;
442 }
443#else
445 // Galerkin projection of matrix
449 {
450 CoarsenOperator(linop,Subspace,Subspace);
451 }
452
453 // Petrov - Galerkin projection of matrix
458 {
459 std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
460 GridBase *grid = FineGrid();
461
462 RealD tproj=0.0;
463 RealD teigen=0.0;
464 RealD tmat=0.0;
465 RealD tphase=0.0;
466 RealD tphaseBZ=0.0;
467 RealD tinv=0.0;
468
470 // Orthogonalise the subblocks over the basis
472 CoarseScalar InnerProd(CoarseGrid());
473 blockOrthogonalise(InnerProd,V.subspace);
474 blockOrthogonalise(InnerProd,U.subspace);
475
476 const int npoint = geom.npoint;
477
479 int Nd = CoarseGrid()->Nd();
480
481 /*
482 * Here, k,l index which possible momentum/shift within the N-points connected by MdagM.
483 * Matrix index i is mapped to this shift via
484 * geom.shifts[i]
485 *
486 * conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]
487 * = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >
488 * = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l}
489 * = M_{kl} A_ji^{b.b+l}
490 *
491 * Must assemble and invert matrix M_k,l = e^[i q_k . delta_l]
492 *
493 * Where q_k = delta_k . (2*M_PI/global_nb[mu])
494 *
495 * Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j}
496 */
497 teigen-=usecond();
498 Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint);
499 Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint);
500 ComplexD ci(0.0,1.0);
501 for(int k=0;k<npoint;k++){ // Loop over momenta
502
503 for(int l=0;l<npoint;l++){ // Loop over nbr relative
504 ComplexD phase(0.0,0.0);
505 for(int mu=0;mu<Nd;mu++){
506 RealD TwoPiL = M_PI * 2.0/ clatt[mu];
507 phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu];
508 }
509 phase=exp(phase*ci);
510 Mkl(k,l) = phase;
511 }
512 }
513 invMkl = Mkl.inverse();
514 teigen+=usecond();
515
517 // Now compute the matrix elements of linop between the orthonormal
518 // set of vectors.
520 FineField phaV(grid); // Phased block basis vector
521 FineField MphaV(grid);// Matrix applied
522 std::vector<FineComplexField> phaF(npoint,grid);
523 std::vector<CoarseComplexField> pha(npoint,CoarseGrid());
524
525 CoarseVector coarseInner(CoarseGrid());
526
527 typedef typename CComplex::scalar_type SComplex;
528 FineComplexField one(grid); one=SComplex(1.0);
529 FineComplexField zz(grid); zz = Zero();
530 tphase=-usecond();
531 for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
533 // Stick a phase on every block
536 pha[p]=Zero();
537 for(int mu=0;mu<Nd;mu++){
538 LatticeCoordinate(coor,mu);
539 RealD TwoPiL = M_PI * 2.0/ clatt[mu];
540 pha[p] = pha[p] + (TwoPiL * geom.shifts[p][mu]) * coor;
541 }
542 pha[p] =exp(pha[p]*ci);
543
544 blockZAXPY(phaF[p],pha[p],one,zz);
545
546 }
547 tphase+=usecond();
548
549 std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid());
550 std::vector<CoarseVector> FT(npoint,CoarseGrid());
551 for(int i=0;i<nbasis;i++){// Loop over basis vectors
552 std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl;
553 for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
554 tphaseBZ-=usecond();
555 phaV = phaF[p]*V.subspace[i];
556 tphaseBZ+=usecond();
557
559 // Multiple phased subspace vector by matrix and project to subspace
560 // Remove local bulk phase to leave relative phases
562 tmat-=usecond();
563 linop.Op(phaV,MphaV);
564 tmat+=usecond();
565 // std::cout << i << " " <<p << " MphaV "<<norm2(MphaV)<<" "<<norm2(phaV)<<std::endl;
566
567 tproj-=usecond();
568 blockProject(coarseInner,MphaV,U.subspace);
569 coarseInner = conjugate(pha[p]) * coarseInner;
570
571 ComputeProj[p] = coarseInner;
572 tproj+=usecond();
573 // std::cout << i << " " <<p << " ComputeProj "<<norm2(ComputeProj[p])<<std::endl;
574
575 }
576
577 tinv-=usecond();
578 for(int k=0;k<npoint;k++){
579 FT[k] = Zero();
580 for(int l=0;l<npoint;l++){
581 FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
582 }
583
584 int osites=CoarseGrid()->oSites();
585 autoView( A_v , _A[k], AcceleratorWrite);
586 autoView( FT_v , FT[k], AcceleratorRead);
587 accelerator_for(sss, osites, 1, {
588 for(int j=0;j<nbasis;j++){
589 A_v[sss](i,j) = FT_v[sss](j);
590 }
591 });
592 }
593 tinv+=usecond();
594 }
595
596 // Only needed if nonhermitian
597 if ( ! hermitian ) {
598 // std::cout << GridLogMessage<<"PopulateAdag "<<std::endl;
599 // PopulateAdag();
600 }
601
602 for(int p=0;p<geom.npoint;p++){
603 std::cout << " _A["<<p<<"] "<<norm2(_A[p])<<std::endl;
604 }
605
606 // Need to write something to populate Adag from A
608 std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
609 std::cout << GridLogMessage<<"CoarsenOperator phase "<<tphase<<" us"<<std::endl;
610 std::cout << GridLogMessage<<"CoarsenOperator phaseBZ "<<tphaseBZ<<" us"<<std::endl;
611 std::cout << GridLogMessage<<"CoarsenOperator mat "<<tmat <<" us"<<std::endl;
612 std::cout << GridLogMessage<<"CoarsenOperator proj "<<tproj<<" us"<<std::endl;
613 std::cout << GridLogMessage<<"CoarsenOperator inv "<<tinv<<" us"<<std::endl;
614 }
615#endif
617 for(int p=0;p<geom.npoint;p++){
618 _A[p] = Cell.ExchangePeriodic(_A[p]);
619 // _Adag[p]= Cell.ExchangePeriodic(_Adag[p]);
620 }
621 }
622 virtual void Mdiag (const Field &in, Field &out){ assert(0);};
623 virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);};
624 virtual void MdirAll (const Field &in, std::vector<Field> &out){assert(0);};
625};
626
627
628
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
std::vector< T, devAllocator< T > > deviceVector
#define one
Definition BGQQPX.h:108
AcceleratorVector< int, MaxDims > Coordinate
Definition Coordinate.h:95
accelerator_inline Grid_simd< S, V > exp(const Grid_simd< S, V > &r)
void conformable(const Lattice< obj1 > &lhs, const Lattice< obj2 > &rhs)
void LatticeCoordinate(Lattice< iobj > &l, int mu)
void pokeSite(const sobj &s, Lattice< vobj > &l, const Coordinate &site)
vobj::scalar_object peekSite(const Lattice< vobj > &l, const Coordinate &site)
Lattice< vobj > conjugate(const Lattice< vobj > &lhs)
Lattice< vobj > adj(const Lattice< vobj > &lhs)
RealD norm2(const Lattice< vobj > &arg)
void blockOrthogonalise(Lattice< CComplex > &ip, std::vector< Lattice< vobj > > &Basis)
void blockZAXPY(Lattice< vobj > &fineZ, const Lattice< CComplex > &coarseA, const Lattice< vobj2 > &fineX, const Lattice< vobj > &fineY)
void blockProject(Lattice< iVector< CComplex, nbasis > > &coarseData, const Lattice< vobj > &fineData, const VLattice &Basis)
#define autoView(l_v, l, mode)
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN")
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
@ AcceleratorRead
@ AcceleratorWrite
@ AcceleratorWriteDiscard
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
static constexpr int Nd
Definition QCD.h:52
std::complex< RealD > ComplexD
Definition Simd.h:79
double RealD
Definition Simd.h:61
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61
accelerator_inline vobj coalescedReadGeneralPermute(const vobj &__restrict__ vec, int perm_mask, int nd, int lane=0)
Definition Tensor_SIMT.h:78
double usecond(void)
Definition Timer.h:50
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230
#define M_PI
Definition Zolotarev.cc:41
accelerator_inline size_type size(void) const
Definition Coordinate.h:52
std::vector< Lattice< Fobj > > subspace
Definition Aggregates.h:58
std::vector< CoarseMatrix > _Adag
Lattice< iScalar< CComplex > > CoarseComplexField
void M(const CoarseVector &in, CoarseVector &out)
Lattice< CComplex > FineComplexField
Lattice< CComplex > CoarseScalar
Lattice< siteVector > CoarseVector
iMatrix< CComplex, nbasis > Cobj
std::vector< CoarseMatrix > _A
iVector< CComplex, nbasis > Cvec
virtual void Mdiag(const Field &in, Field &out)
virtual void MdirAll(const Field &in, std::vector< Field > &out)
void Mdag(const CoarseVector &in, CoarseVector &out)
void CoarsenOperator(LinearOperatorBase< Lattice< Fobj > > &linop, Aggregation< Fobj, CComplex, nbasis > &U, Aggregation< Fobj, CComplex, nbasis > &V)
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom, GridBase *FineGrid, GridCartesian *CoarseGrid)
iMatrix< CComplex, nbasis > siteMatrix
std::vector< CoarseVector > MultTemporaries
void CoarsenOperator(LinearOperatorBase< Lattice< Fobj > > &linop, Aggregation< Fobj, CComplex, nbasis > &Subspace)
iVector< CComplex, nbasis > siteVector
void Mult(std::vector< CoarseMatrix > &A, const CoarseVector &in, CoarseVector &out)
Lattice< iMatrix< CComplex, nbasis > > CoarseMatrix
GridCartesian * CoarseGrid(void)
NonLocalStencilGeometry & geom
GeneralCoarsenedMatrix< Fobj, CComplex, nbasis > GeneralCoarseOp
virtual void Mdir(const Field &in, Field &out, int dir, int disp)
const Coordinate & GlobalDimensions(void)
int64_t gSites(void) const
int oSites(void) const
int Nd(void) const
void GlobalIndexToGlobalCoor(int64_t gidx, Coordinate &gcoor)
accelerator_inline int Checkerboard(void) const
GridBase * Grid(void) const
std::vector< Coordinate > shifts
Definition Geometry.h:111
Definition Simd.h:194