148 std::cout <<
GridLogMessage<<
"HDCG: mrhs fPrecBlockcg starting"<<std::endl;
149 src[0].Grid()->Barrier();
150 int nrhs = src.size();
162 std::vector<RealD> ssq(nrhs);
163 for(
int rhs=0;rhs<nrhs;rhs++){
164 ssq[rhs]=
norm2(src[rhs]); assert(ssq[rhs]!=0.0);
170 std::vector<Field> Mtmp(nrhs,
grid);
171 std::vector<Field> tmp(nrhs,
grid);
172 std::vector<Field> Z(nrhs,
grid);
173 std::vector<Field> MZ(nrhs,
grid);
174 std::vector<Field> Q(nrhs,
grid);
175 std::vector<Field> MQ(nrhs,
grid);
176 std::vector<Field> D(nrhs,
grid);
177 std::vector<Field> AD(nrhs,
grid);
205 Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(nrhs,nrhs);
206 Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(nrhs,nrhs);
207 Eigen::MatrixXcd m_zz = Eigen::MatrixXcd::Zero(nrhs,nrhs);
208 Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(nrhs,nrhs);
210 Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(nrhs,nrhs);
211 Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(nrhs,nrhs);
212 Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(nrhs,nrhs);
213 Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(nrhs,nrhs);
215 Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(nrhs,nrhs);
216 Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(nrhs,nrhs);
228 for(
int rhs=0;rhs<nrhs;rhs++){
231 axpy (Z[rhs], -1.0,tmp[rhs], src[rhs]);
247 for(
int b=0;b<nrhs;b++) D[b]=MQ[b];
249 std::cout <<
GridLogMessage<<
"PrecBlockCGrQ vec computed initial residual and QR fact " <<std::endl;
267 std::vector<RealD> rn(nrhs);
274 for(
int b=0;b<nrhs;b++)
_FineLinop.HermOp(D[b], Z[b]);
288 InnerProdTimer.
Start();
290 InnerProdTimer.
Stop();
291 m_M = m_DZ.inverse();
308 ThinQRfact (m_zz, m_S, m_Sinv, Q, MQ, tmp, Mtmp);
314 m_tmp = m_S.adjoint();
327 m_rr = m_C.adjoint() * m_C;
336 for(
int b=0;b<nrhs;b++) {
337 rrsum+=
real(m_rr(b,b));
339 rr =
real(m_rr(b,b))/ssq[b];
340 if ( rr > max_resid ) max_resid = rr;
343 "\t Prec BlockCGrQ Iteration "<<k<<
" ave resid "<< std::sqrt(rrsum/sssum) <<
" max "<< std::sqrt(max_resid) <<std::endl;
349 std::cout<<
GridLogMessage<<
"HDCG: mrhs PrecBlockCGrQ converged in "<<k<<
" iterations and "<<HDCGTimer.
Elapsed()<<std::endl;;
362 for(
int rhs=0;rhs<nrhs;rhs++){
367 axpy(mytmp,-1.0,src[rhs],tmp[rhs]);
372 RealD true_residual = tmpnorm/srcnorm;
374 <<
"HDCG: true residual ["<<rhs<<
"] is "<<true_residual
375 <<
" solution "<<xnorm
376 <<
" source "<<srcnorm
390 std::cout <<
GridLogMessage<<
"HDCG: mrhs fPcg starting"<<std::endl;
391 src[0].Grid()->Barrier();
392 int nrhs = src.size();
393 std::vector<RealD> f(nrhs);
394 std::vector<RealD> rtzp(nrhs);
395 std::vector<RealD> rtz(nrhs);
396 std::vector<RealD> a(nrhs);
397 std::vector<RealD> d(nrhs);
398 std::vector<RealD> b(nrhs);
399 std::vector<RealD> rptzp(nrhs);
405 std::vector<std::vector<Field> > p(nrhs);
for(
int r=0;r<nrhs;r++) p[r].resize(mmax,
grid);
406 std::vector<std::vector<Field> > mmp(nrhs);
for(
int r=0;r<nrhs;r++) mmp[r].resize(mmax,
grid);
407 std::vector<std::vector<RealD> > pAp(nrhs);
for(
int r=0;r<nrhs;r++) pAp[r].resize(mmax);
409 std::vector<Field> z(nrhs,
grid);
410 std::vector<Field> mp (nrhs,
grid);
411 std::vector<Field> r (nrhs,
grid);
412 std::vector<Field> mu (nrhs,
grid);
415 std::vector<RealD> src_nrm(nrhs);
416 for(
int rhs=0;rhs<nrhs;rhs++) {
417 src_nrm[rhs]=
norm2(src[rhs]);
418 assert(src_nrm[rhs]!=0.0);
420 std::vector<RealD> tn(nrhs);
428 for(
int rhs=0;rhs<nrhs;rhs++){
431 axpy (r[rhs], -1.0,mmp[rhs][0], src[rhs]);
440 std::vector<RealD> ssq(nrhs);
441 std::vector<RealD> rsq(nrhs);
442 std::vector<Field> pp(nrhs,
grid);
444 for(
int rhs=0;rhs<nrhs;rhs++){
447 ssq[rhs]=
norm2(src[rhs]);
467 std::vector<RealD> rn(nrhs);
470 int peri_k = k % mmax;
471 int peri_kp = (k+1) % mmax;
473 for(
int rhs=0;rhs<nrhs;rhs++){
476 d[rhs]=
PcgM3(p[rhs][peri_k],mmp[rhs][peri_k]);
478 a[rhs] = rtz[rhs]/d[rhs];
482 pAp[rhs][peri_k] = d[rhs];
484 axpy(x[rhs],a[rhs],p[rhs][peri_k],x[rhs]);
485 rn[rhs] =
axpy_norm(r[rhs],-a[rhs],mmp[rhs][peri_k],r[rhs]);
496 for(
int rhs=0;rhs<nrhs;rhs++){
503 p[rhs][peri_kp]=mu[rhs];
506 b[rhs] = (rtzp[rhs])/rtz[rhs];
508 int northog = (k>mmax-1)?(mmax-1):k;
509 for(
int back=0; back < northog; back++){
510 int peri_back = (k-back)%mmax;
512 RealD beta = -pbApk/pAp[rhs][peri_back];
513 axpy(p[rhs][peri_kp],beta,p[rhs][peri_back],p[rhs][peri_kp]);
520 std::cout<<
GridLogMessage<<
"HDCG:fPcg rhs "<<rhs<<
" k= "<<k<<
" residual = "<<rrn<<
"\n";
521 if ( rrn > max_rn ) max_rn = rrn;
529 std::cout<<
GridLogMessage<<
"HDCG: mrhs fPcg converged in "<<k<<
" iterations and "<<HDCGTimer.
Elapsed()<<std::endl;;
542 for(
int rhs=0;rhs<nrhs;rhs++){
545 axpy(tmp,-1.0,src[rhs],mmp[rhs][0]);
551 RealD true_residual = tmpnorm/srcnorm;
553 <<
"HDCG: true residual ["<<rhs<<
"] is "<<true_residual
554 <<
" solution "<<xnorm
555 <<
" source "<<srcnorm
565 for(
int rhs=0;rhs<nrhs;rhs++){
568 std::cout<<
GridLogMessage<<
"HDCG: non-converged solution "<<xnorm<<
" source "<<srcnorm<<std::endl;