Grid 0.7.0
BlockConjugateGradient.h
Go to the documentation of this file.
1/*************************************************************************************
2
3Grid physics library, www.github.com/paboyle/Grid
4
5Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
6
7Copyright (C) 2017
8
9Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11
12This program is free software; you can redistribute it and/or modify
13it under the terms of the GNU General Public License as published by
14the Free Software Foundation; either version 2 of the License, or
15(at your option) any later version.
16
17This program is distributed in the hope that it will be useful,
18but WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20GNU General Public License for more details.
21
22You should have received a copy of the GNU General Public License along
23with this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25
26See the full license in the file "LICENSE" in the top level distribution
27directory
28*************************************************************************************/
29/* END LEGAL */
30#pragma once
31
33
34template<class Field>
35void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
36 typedef typename Field::scalar_type scomplex;
37 int Nblock = X.size();
38 for(int b=0;b<Nblock;b++){
39 for(int bp=0;bp<Nblock;bp++) {
40 m(b,bp) = innerProduct(X[b],Y[bp]);
41 }}
42}
43template<class Field>
44void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
45 // Should make this cache friendly with site outermost, parallel_for
46 // Deal with case AP aliases with either Y or X
47 //
48 //Could pack "X" and "AP" into a Nblock x Volume dense array.
49 // AP(Nrhs x vol) = Y(Nrhs x vol) + scale * m(nrhs x nrhs) * X(nrhs*vol)
50 typedef typename Field::scalar_type scomplex;
51 int Nblock = AP.size();
52 std::vector<Field> tmp(Nblock,X[0]);
53 for(int b=0;b<Nblock;b++){
54 tmp[b] = Y[b];
55 for(int bp=0;bp<Nblock;bp++) {
56 tmp[b] = tmp[b] +scomplex(scale*m(bp,b))*X[bp];
57 }
58 }
59 for(int b=0;b<Nblock;b++){
60 AP[b] = tmp[b];
61 }
62}
63template<class Field>
64void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
65 // Should make this cache friendly with site outermost, parallel_for
66 typedef typename Field::scalar_type scomplex;
67 int Nblock = AP.size();
68 for(int b=0;b<Nblock;b++){
69 AP[b] = Zero();
70 for(int bp=0;bp<Nblock;bp++) {
71 AP[b] += scomplex(m(bp,b))*X[bp];
72 }
73 }
74}
75template<class Field>
76double normv(const std::vector<Field> &P){
77 int Nblock = P.size();
78 double nn = 0.0;
79 for(int b=0;b<Nblock;b++) {
80 nn+=norm2(P[b]);
81 }
82 return nn;
83}
84
85
87
89// Block conjugate gradient. Dimension zero should be the block direction
91template <class Field>
93 public:
94
95 typedef typename Field::scalar_type scomplex;
96
98 int Nblock;
99
101 bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
102 // Defaults true.
105 Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
106 Integer PrintInterval; //GridLogMessages or Iterative
108
109 BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
110 : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
111 {};
112
114// Thin QR factorisation (google it)
117 //Dimensions
118 // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
119 //
120 // Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
121 //
122 // Q C = R => Q = R C^{-1}
123 //
124 // Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
125 //
126 // Set C = L^{dag}, and then Q^dag Q = ident
127 //
128 // Checks:
129 // Cdag C = Rdag R ; passes.
130 // QdagQ = 1 ; passes
132void ThinQRfact (Eigen::MatrixXcd &m_rr,
133 Eigen::MatrixXcd &C,
134 Eigen::MatrixXcd &Cinv,
135 Field & Q,
136 const Field & R)
137{
138 int Orthog = blockDim; // First dimension is block dim; this is an assumption
139 sliceInnerProductMatrix(m_rr,R,R,Orthog);
140
141 // Force manifest hermitian to avoid rounding related
142 /*
143 int rank=m_rr.rows();
144 for(int r=0;r<rank;r++){
145 for(int s=0;s<rank;s++){
146 std::cout << "QR m_rr["<<r<<","<<s<<"] "<<m_rr(r,s)<<std::endl;
147 }}
148 */
149 m_rr = 0.5*(m_rr+m_rr.adjoint());
150
151 Eigen::MatrixXcd L = m_rr.llt().matrixL();
152
153// ComplexD det = L.determinant();
154// std::cout << " Det m_rr "<<det<<std::endl;
155 C = L.adjoint();
156 Cinv = C.inverse();
158 // Q = R C^{-1}
159 //
160 // Q_j = R_i Cinv(i,j)
161 //
162 // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
164 sliceMulMatrix(Q,Cinv,R,Orthog);
165}
166// see comments above
167void ThinQRfact (Eigen::MatrixXcd &m_rr,
168 Eigen::MatrixXcd &C,
169 Eigen::MatrixXcd &Cinv,
170 std::vector<Field> & Q,
171 const std::vector<Field> & R)
172{
173 InnerProductMatrix(m_rr,R,R);
174 /*
175 int rank=m_rr.rows();
176 for(int r=0;r<rank;r++){
177 for(int s=0;s<rank;s++){
178 std::cout << "QRvec m_rr["<<r<<","<<s<<"] "<<m_rr(r,s)<<std::endl;
179 }}
180 */
181 m_rr = 0.5*(m_rr+m_rr.adjoint());
182
183 Eigen::MatrixXcd L = m_rr.llt().matrixL();
184
185 // ComplexD det = L.determinant();
186 // std::cout << " Det m_rr "<<det<<std::endl;
187
188 C = L.adjoint();
189 Cinv = C.inverse();
190
191 MulMatrix(Q,Cinv,R);
192}
193
195// Call one of several implementations
197void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
198{
199 if ( CGtype == BlockCGrQ ) {
200 BlockCGrQsolve(Linop,Src,Psi);
201 } else if (CGtype == CGmultiRHS ) {
202 CGmultiRHSsolve(Linop,Src,Psi);
203 } else {
204 assert(0);
205 }
206}
207virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
208{
209 if ( CGtype == BlockCGrQVec ) {
210 BlockCGrQsolveVec(Linop,Src,Psi);
211 } else {
212 assert(0);
213 }
214}
215
217// BlockCGrQ implementation:
218//--------------------------
219// X is guess/Solution
220// B is RHS
221// Solve A X_i = B_i ; i refers to Nblock index
223void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
224{
225 int Orthog = blockDim; // First dimension is block dim; this is an assumption
226 Nblock = B.Grid()->_fdimensions[Orthog];
227/* FAKE */
228 Nblock=8;
229 std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
230
231 X.Checkerboard() = B.Checkerboard();
232 conformable(X, B);
233
234 Field tmp(B);
235 Field Q(B);
236 Field D(B);
237 Field Z(B);
238 Field AD(B);
239
240 Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
241 Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
242 Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
243
244 Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
245 Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
246 Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
247 Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
248
249 Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
250 Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
251
252 // Initial residual computation & set up
253 std::vector<RealD> residuals(Nblock);
254 std::vector<RealD> ssq(Nblock);
255
256 sliceNorm(ssq,B,Orthog);
257 RealD sssum=0;
258 for(int b=0;b<Nblock;b++) sssum+=ssq[b];
259 for(int b=0;b<Nblock;b++) std::cout << "src["<<b<<"]" << ssq[b] <<std::endl;
260
261 sliceNorm(residuals,B,Orthog);
262 for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
263
264 sliceNorm(residuals,X,Orthog);
265 for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
266
267 /************************************************************************
268 * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
269 ************************************************************************
270 * Dimensions:
271 *
272 * X,B==(Nferm x Nblock)
273 * A==(Nferm x Nferm)
274 *
275 * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
276 *
277 * QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
278 * for k:
279 * Z = AD
280 * M = [D^dag Z]^{-1}
281 * X = X + D MC
282 * QS = Q - ZM
283 * D = Q + D S^dag
284 * C = S C
285 */
287 // Initial block: initial search dir is guess
289 std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
290
291 //1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
292 Linop.HermOp(X, AD);
293 tmp = B - AD;
294
295 sliceNorm(residuals,tmp,Orthog);
296 for(int b=0;b<Nblock;b++) std::cout << "res["<<b<<"]" << residuals[b] <<std::endl;
297
298 ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
299 D=Q;
300
301 std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
302
304 // Timers
306 GridStopWatch sliceInnerTimer;
307 GridStopWatch sliceMaddTimer;
308 GridStopWatch QRTimer;
309 GridStopWatch MatrixTimer;
310 GridStopWatch SolverTimer;
311 SolverTimer.Start();
312
313 RealD max_resid=0;
314
315 int k;
316 for (k = 1; k <= MaxIterations; k++) {
317
318 //3. Z = AD
319 MatrixTimer.Start();
320 Linop.HermOp(D, Z);
321 MatrixTimer.Stop();
322
323 //4. M = [D^dag Z]^{-1}
324 sliceInnerTimer.Start();
325 sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
326 sliceInnerTimer.Stop();
327 m_M = m_DZ.inverse();
328
329 //5. X = X + D MC
330 m_tmp = m_M * m_C;
331 sliceMaddTimer.Start();
332 sliceMaddMatrix(X,m_tmp, D,X,Orthog);
333 sliceMaddTimer.Stop();
334
335 //6. QS = Q - ZM
336 sliceMaddTimer.Start();
337 sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
338 sliceMaddTimer.Stop();
339 QRTimer.Start();
340 ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
341 QRTimer.Stop();
342
343 //7. D = Q + D S^dag
344 m_tmp = m_S.adjoint();
345
346 sliceMaddTimer.Start();
347 sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
348 sliceMaddTimer.Stop();
349
350 //8. C = S C
351 m_C = m_S*m_C;
352
353 /*********************
354 * convergence monitor
355 *********************
356 */
357 m_rr = m_C.adjoint() * m_C;
358
359 max_resid=0;
360 RealD rrsum=0;
361 RealD rr;
362
363 for(int b=0;b<Nblock;b++) {
364 rrsum+=real(m_rr(b,b));
365 rr = real(m_rr(b,b))/ssq[b];
366 if ( rr > max_resid ) max_resid = rr;
367 }
368
369 std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
370 <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
371
372 if ( max_resid < Tolerance*Tolerance ) {
373
374 SolverTimer.Stop();
375
376 std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
377
378 for(int b=0;b<Nblock;b++){
379 std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
380 << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
381 }
382 std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
383
384 Linop.HermOp(X, AD);
385 AD = AD-B;
386 TrueResidual = std::sqrt(norm2(AD)/norm2(B));
387 std::cout << GridLogMessage <<"\tTrue residual is " << TrueResidual <<std::endl;
388
389 std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
390 std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
391 std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
392 std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
393 std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
394 std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
395
397 return;
398 }
399
400 }
401
402 std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge "<<k<<" / "<<MaxIterations
403 <<" residual "<< std::sqrt(max_resid)<< std::endl;
404
405 if (ErrorOnNoConverge) assert(0);
407}
408
409// multiRHS conjugate gradient. Dimension zero should be the block direction
410// Use this for spread out across nodes
412void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
413{
414 int Orthog = blockDim; // First dimension is block dim
415 Nblock = Src.Grid()->_fdimensions[Orthog];
416
417 std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
418
419 Psi.Checkerboard() = Src.Checkerboard();
420 conformable(Psi, Src);
421
422 Field P(Src);
423 Field AP(Src);
424 Field R(Src);
425
426 std::vector<ComplexD> v_pAp(Nblock);
427 std::vector<RealD> v_rr (Nblock);
428 std::vector<RealD> v_rr_inv(Nblock);
429 std::vector<RealD> v_alpha(Nblock);
430 std::vector<RealD> v_beta(Nblock);
431
432 // Initial residual computation & set up
433 std::vector<RealD> residuals(Nblock);
434 std::vector<RealD> ssq(Nblock);
435
436 sliceNorm(ssq,Src,Orthog);
437 RealD sssum=0;
438 for(int b=0;b<Nblock;b++) sssum+=ssq[b];
439
440 sliceNorm(residuals,Src,Orthog);
441 for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
442
443 sliceNorm(residuals,Psi,Orthog);
444 for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
445
446 // Initial search dir is guess
447 Linop.HermOp(Psi, AP);
448
449 R = Src - AP;
450 P = R;
451 sliceNorm(v_rr,R,Orthog);
452
453 GridStopWatch sliceInnerTimer;
454 GridStopWatch sliceMaddTimer;
455 GridStopWatch sliceNormTimer;
456 GridStopWatch MatrixTimer;
457 GridStopWatch SolverTimer;
458
459 SolverTimer.Start();
460 int k;
461 for (k = 1; k <= MaxIterations; k++) {
462
463 RealD rrsum=0;
464 for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
465
466 std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
467 <<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
468
469 MatrixTimer.Start();
470 Linop.HermOp(P, AP);
471 MatrixTimer.Stop();
472
473 // Alpha
474 sliceInnerTimer.Start();
475 sliceInnerProductVector(v_pAp,P,AP,Orthog);
476 sliceInnerTimer.Stop();
477 for(int b=0;b<Nblock;b++){
478 v_alpha[b] = v_rr[b]/real(v_pAp[b]);
479 }
480
481 // Psi, R update
482 sliceMaddTimer.Start();
483 sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
484 sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
485 sliceMaddTimer.Stop();
486
487 // Beta
488 for(int b=0;b<Nblock;b++){
489 v_rr_inv[b] = 1.0/v_rr[b];
490 }
491 sliceNormTimer.Start();
492 sliceNorm(v_rr,R,Orthog);
493 sliceNormTimer.Stop();
494 for(int b=0;b<Nblock;b++){
495 v_beta[b] = v_rr_inv[b] *v_rr[b];
496 }
497
498 // Search update
499 sliceMaddTimer.Start();
500 sliceMaddVector(P,v_beta,P,R,Orthog);
501 sliceMaddTimer.Stop();
502
503 /*********************
504 * convergence monitor
505 *********************
506 */
507 RealD max_resid=0;
508 for(int b=0;b<Nblock;b++){
509 RealD rr = v_rr[b]/ssq[b];
510 if ( rr > max_resid ) max_resid = rr;
511 }
512
513 if ( max_resid < Tolerance*Tolerance ) {
514
515 SolverTimer.Stop();
516
517 std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
518 for(int b=0;b<Nblock;b++){
519 std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
520 }
521 std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
522
523 Linop.HermOp(Psi, AP);
524 AP = AP-Src;
525 TrueResidual = std::sqrt(norm2(AP)/norm2(Src));
526 std::cout <<GridLogMessage << "\tTrue residual is " << TrueResidual <<std::endl;
527
528 std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
529 std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
530 std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
531 std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
532 std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
533 std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
534
535
537 return;
538 }
539
540 }
541 std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
542
543 if (ErrorOnNoConverge) assert(0);
545}
546
548// BlockCGrQvec implementation:
549//--------------------------
550// X is guess/Solution
551// B is RHS
552// Solve A X_i = B_i ; i refers to Nblock index
554void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
555{
556 Nblock = B.size();
557 assert(Nblock == X.size());
558
559 std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
560
561 for(int b=0;b<Nblock;b++){
562 X[b].Checkerboard() = B[b].Checkerboard();
563 conformable(X[b], B[b]);
564 conformable(X[b], X[0]);
565 }
566
567 Field Fake(B[0]);
568
569 std::vector<Field> tmp(Nblock,Fake);
570 std::vector<Field> Q(Nblock,Fake);
571 std::vector<Field> D(Nblock,Fake);
572 std::vector<Field> Z(Nblock,Fake);
573 std::vector<Field> AD(Nblock,Fake);
574
575 Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
576 Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
577 Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
578
579 Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
580 Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
581 Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
582 Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
583
584 Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
585 Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
586
587 // Initial residual computation & set up
588 std::vector<RealD> residuals(Nblock);
589 std::vector<RealD> ssq(Nblock);
590
591 RealD sssum=0;
592 for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
593 for(int b=0;b<Nblock;b++){ std::cout << "ssq["<<b<<"] "<<ssq[b]<<std::endl;}
594 for(int b=0;b<Nblock;b++) sssum+=ssq[b];
595
596 for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
597 for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
598
599 for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
600 for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
601
602 /************************************************************************
603 * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
604 ************************************************************************
605 * Dimensions:
606 *
607 * X,B==(Nferm x Nblock)
608 * A==(Nferm x Nferm)
609 *
610 * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
611 *
612 * QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
613 * for k:
614 * Z = AD
615 * M = [D^dag Z]^{-1}
616 * X = X + D MC
617 * QS = Q - ZM
618 * D = Q + D S^dag
619 * C = S C
620 */
622 // Initial block: initial search dir is guess
624 std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
625
626 //1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
627 for(int b=0;b<Nblock;b++) {
628 Linop.HermOp(X[b], AD[b]);
629 tmp[b] = B[b] - AD[b];
630 std::cout << "r0["<<b<<"] "<<norm2(tmp[b])<<std::endl;
631 }
632
633 ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
634
635 for(int b=0;b<Nblock;b++) D[b]=Q[b];
636
637 std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
638
640 // Timers
642 GridStopWatch sliceInnerTimer;
643 GridStopWatch sliceMaddTimer;
644 GridStopWatch QRTimer;
645 GridStopWatch MatrixTimer;
646 GridStopWatch SolverTimer;
647 SolverTimer.Start();
648
649 int k;
650 for (k = 1; k <= MaxIterations; k++) {
651
652 //3. Z = AD
653 MatrixTimer.Start();
654 for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
655 MatrixTimer.Stop();
656
657 //4. M = [D^dag Z]^{-1}
658 sliceInnerTimer.Start();
659 InnerProductMatrix(m_DZ,D,Z);
660 sliceInnerTimer.Stop();
661 m_M = m_DZ.inverse();
662
663 //5. X = X + D MC
664 m_tmp = m_M * m_C;
665 sliceMaddTimer.Start();
666 MaddMatrix(X,m_tmp, D,X);
667 sliceMaddTimer.Stop();
668
669 //6. QS = Q - ZM
670 sliceMaddTimer.Start();
671 MaddMatrix(tmp,m_M,Z,Q,-1.0);
672 sliceMaddTimer.Stop();
673 QRTimer.Start();
674 ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
675 QRTimer.Stop();
676
677 //7. D = Q + D S^dag
678 m_tmp = m_S.adjoint();
679 sliceMaddTimer.Start();
680 MaddMatrix(D,m_tmp,D,Q);
681 sliceMaddTimer.Stop();
682
683 //8. C = S C
684 m_C = m_S*m_C;
685
686 /*********************
687 * convergence monitor
688 *********************
689 */
690 m_rr = m_C.adjoint() * m_C;
691
692 RealD max_resid=0;
693 RealD rrsum=0;
694 RealD rr;
695
696 for(int b=0;b<Nblock;b++) {
697 rrsum+=real(m_rr(b,b));
698 rr = real(m_rr(b,b))/ssq[b];
699 if ( rr > max_resid ) max_resid = rr;
700 }
701
702 std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< std::sqrt(rrsum/sssum) << " max "<< std::sqrt(max_resid) <<std::endl;
703
704 if ( max_resid < Tolerance*Tolerance ) {
705
706 SolverTimer.Stop();
707
708 std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
709
710 for(int b=0;b<Nblock;b++){
711 std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
712 }
713 std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
714
715 for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
716 for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
717 TrueResidual = std::sqrt(normv(AD)/normv(B));
718 std::cout << GridLogMessage << "\tTrue residual is " << TrueResidual <<std::endl;
719
720 std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
721 std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
722 std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
723 std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
724 std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
725 std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
726
728 return;
729 }
730
731 }
732 std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
733
734 if (ErrorOnNoConverge) assert(0);
736}
737
738};
739
741
void InnerProductMatrix(Eigen::MatrixXcd &m, const std::vector< Field > &X, const std::vector< Field > &Y)
double normv(const std::vector< Field > &P)
void MaddMatrix(std::vector< Field > &AP, Eigen::MatrixXcd &m, const std::vector< Field > &X, const std::vector< Field > &Y, RealD scale=1.0)
void MulMatrix(std::vector< Field > &AP, Eigen::MatrixXcd &m, const std::vector< Field > &X)
B
void conformable(const Lattice< obj1 > &lhs, const Lattice< obj2 > &rhs)
static void sliceMulMatrix(Lattice< vobj > &R, Eigen::MatrixXcd &aa, const Lattice< vobj > &X, int Orthog, RealD scale=1.0)
static void sliceMaddMatrix(Lattice< vobj > &R, Eigen::MatrixXcd &aa, const Lattice< vobj > &X, const Lattice< vobj > &Y, int Orthog, RealD scale=1.0)
static void sliceInnerProductMatrix(Eigen::MatrixXcd &mat, const Lattice< vobj > &lhs, const Lattice< vobj > &rhs, int Orthog)
Lattice< vobj > real(const Lattice< vobj > &lhs)
ComplexD innerProduct(const Lattice< vobj > &left, const Lattice< vobj > &right)
static void sliceMaddVector(Lattice< vobj > &R, std::vector< RealD > &a, const Lattice< vobj > &X, const Lattice< vobj > &Y, int orthogdim, RealD scale=1.0)
static void sliceNorm(std::vector< RealD > &sn, const Lattice< vobj > &rhs, int Orthog)
static void sliceInnerProductVector(std::vector< ComplexD > &result, const Lattice< vobj > &lhs, const Lattice< vobj > &rhs, int orthogdim)
RealD norm2(const Lattice< vobj > &arg)
GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE")
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
uint32_t Integer
Definition Simd.h:58
double RealD
Definition Simd.h:61
void BlockCGrQsolveVec(LinearOperatorBase< Field > &Linop, const std::vector< Field > &B, std::vector< Field > &X)
void BlockCGrQsolve(LinearOperatorBase< Field > &Linop, const Field &B, Field &X)
virtual void operator()(LinearOperatorBase< Field > &Linop, const std::vector< Field > &Src, std::vector< Field > &Psi)
void ThinQRfact(Eigen::MatrixXcd &m_rr, Eigen::MatrixXcd &C, Eigen::MatrixXcd &Cinv, Field &Q, const Field &R)
void CGmultiRHSsolve(LinearOperatorBase< Field > &Linop, const Field &Src, Field &Psi)
void ThinQRfact(Eigen::MatrixXcd &m_rr, Eigen::MatrixXcd &C, Eigen::MatrixXcd &Cinv, std::vector< Field > &Q, const std::vector< Field > &R)
void operator()(LinearOperatorBase< Field > &Linop, const Field &Src, Field &Psi)
BlockConjugateGradient(BlockCGtype cgtype, int _Orthog, RealD tol, Integer maxit, bool err_on_no_conv=true)
void Start(void)
Definition Timer.h:92
GridTime Elapsed(void) const
Definition Timer.h:113
void Stop(void)
Definition Timer.h:99
virtual void HermOp(const Field &in, Field &out)=0
Definition Simd.h:194