Grid 0.7.0
MultiRHSDeflation.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: MultiRHSDeflation.h
6
7 Copyright (C) 2023
8
9Author: Peter Boyle <pboyle@bnl.gov>
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25 See the full license in the file "LICENSE" in the top level distribution directory
26*************************************************************************************/
27/* END LEGAL */
28#pragma once
29
31
32
33/* Need helper object for BLAS accelerated mrhs projection
34
35 i) MultiRHS Deflation
36
37 Import Evecs -> nev x vol x internal
38 Import vector of Lattice objects -> nrhs x vol x internal
39 => Cij (nrhs x Nev) via GEMM.
40 => Guess (nrhs x vol x internal) = C x evecs (via GEMM)
41 Export
42
43
44 ii) MultiRHS block projection
45
46 Import basis -> nblock x nbasis x (block x internal)
47 Import vector of fine lattice objects -> nblock x nrhs x (block x internal)
48
49 => coarse_(nrhs x nbasis )^block = via batched GEMM
50
51 iii) Alternate interface:
52 Import higher dim Lattice object-> vol x nrhs layout
53
54*/
55template<class Field>
57{
58public:
59
60 typedef typename Field::scalar_type scalar;
61 typedef typename Field::scalar_object scalar_object;
62
63 int nev;
64 std::vector<RealD> eval;
66 uint64_t vol;
67 uint64_t words;
68
69 deviceVector<scalar> BLAS_E; // nev x vol -- the eigenbasis (up to a 1/sqrt(lambda))
70 deviceVector<scalar> BLAS_R; // nrhs x vol -- the sources
71 deviceVector<scalar> BLAS_G; // nrhs x vol -- the guess
72 deviceVector<scalar> BLAS_C; // nrhs x nev -- the coefficients
73
76
77 void Deallocate(void)
78 {
79 nev=0;
80 grid=nullptr;
81 vol=0;
82 words=0;
83 BLAS_E.resize(0);
84 BLAS_R.resize(0);
85 BLAS_C.resize(0);
86 BLAS_G.resize(0);
87 }
88 void Allocate(int _nev,GridBase *_grid)
89 {
90 nev=_nev;
91 grid=_grid;
92 vol = grid->lSites();
93 words = sizeof(scalar_object)/sizeof(scalar);
94 eval.resize(nev);
95 BLAS_E.resize (vol * words * nev );
96 std::cout << GridLogMessage << " Allocate for "<<nev<<" eigenvectors and volume "<<vol<<std::endl;
97 }
98 void ImportEigenVector(Field &evec,RealD &_eval, int ev)
99 {
100 // std::cout << " ev " <<ev<<" eval "<<_eval<< std::endl;
101 assert(ev<eval.size());
102 eval[ev] = _eval;
103
104 int64_t offset = ev*vol*words;
107
108 }
109 void ImportEigenBasis(std::vector<Field> &evec,std::vector<RealD> &_eval)
110 {
111 ImportEigenBasis(evec,_eval,0,evec.size());
112 }
113 // Could use to import a batch of eigenvectors
114 void ImportEigenBasis(std::vector<Field> &evec,std::vector<RealD> &_eval, int _ev0, int _nev)
115 {
116 assert(_ev0+_nev<=evec.size());
117
118 Allocate(_nev,evec[0].Grid());
119
120 // Imports a sub-batch of eigenvectors, _ev0, ..., _ev0+_nev-1
121 for(int e=0;e<nev;e++){
122 std::cout << "Importing eigenvector "<<e<<" evalue "<<_eval[_ev0+e]<<std::endl;
123 ImportEigenVector(evec[_ev0+e],_eval[_ev0+e],e);
124 }
125 }
126 void DeflateSources(std::vector<Field> &source,std::vector<Field> & guess)
127 {
128 int nrhs = source.size();
129 assert(source.size()==guess.size());
130 assert(grid == guess[0].Grid());
131 conformable(guess[0],source[0]);
132
133 int64_t vw = vol * words;
134
135 RealD t0 = usecond();
136 BLAS_R.resize(nrhs * vw); // cost free if size doesn't change
137 BLAS_G.resize(nrhs * vw); // cost free if size doesn't change
138 BLAS_C.resize(nev * nrhs);// cost free if size doesn't change
139
141 // Copy in the multi-rhs sources
143 // for(int r=0;r<nrhs;r++){
144 // std::cout << " source["<<r<<"] = "<<norm2(source[r])<<std::endl;
145 // }
146 for(int r=0;r<nrhs;r++){
147 int64_t offset = r*vw;
148 autoView(v,source[r],AcceleratorRead);
150 }
151
152 /*
153 * in Fortran column major notation (cuBlas order)
154 *
155 * Exe = [e1(x)][..][en(x)]
156 *
157 * Rxr = [r1(x)][..][rm(x)]
158 *
159 * C_er = E^dag R
160 * C_er = C_er / lambda_e
161 * G_xr = Exe Cer
162 */
167
168 scalar * Eh = & BLAS_E[0];
169 scalar * Rh = & BLAS_R[0];
170 scalar * Ch = & BLAS_C[0];
171 scalar * Gh = & BLAS_G[0];
172
173 acceleratorPut(Ed[0],Eh);
174 acceleratorPut(Rd[0],Rh);
175 acceleratorPut(Cd[0],Ch);
176 acceleratorPut(Gd[0],Gh);
177
178 GridBLAS BLAS;
179
181 // C_er = E^dag R
184 nev,nrhs,vw,
185 scalar(1.0),
186 Ed,
187 Rd,
188 scalar(0.0), // wipe out C
189 Cd);
190 BLAS.synchronise();
191
192 assert(BLAS_C.size()==nev*nrhs);
193
194 std::vector<scalar> HOST_C(BLAS_C.size()); // nrhs . nev -- the coefficients
195 acceleratorCopyFromDevice(&BLAS_C[0],&HOST_C[0],BLAS_C.size()*sizeof(scalar));
196 grid->GlobalSumVector(&HOST_C[0],nev*nrhs);
197 for(int e=0;e<nev;e++){
198 RealD lam(1.0/eval[e]);
199 for(int r=0;r<nrhs;r++){
200 int off = e+nev*r;
201 HOST_C[off]=HOST_C[off] * lam;
202 // std::cout << "C["<<e<<"]["<<r<<"] ="<<HOST_C[off]<< " eval[e] "<<eval[e] <<std::endl;
203 }
204 }
205 acceleratorCopyToDevice(&HOST_C[0],&BLAS_C[0],BLAS_C.size()*sizeof(scalar));
206
207
209 // Guess G_xr = Exe Cer
212 vw,nrhs,nev,
213 scalar(1.0),
214 Ed, // x . nev
215 Cd, // nev . nrhs
216 scalar(0.0),
217 Gd);
218 BLAS.synchronise();
219
221 // Copy out the multirhs
223 for(int r=0;r<nrhs;r++){
224 int64_t offset = r*vw;
225 autoView(v,guess[r],AcceleratorWrite);
227 }
228 RealD t1 = usecond();
229 std::cout << GridLogMessage << "MultiRHSDeflation for "<<nrhs<<" sources with "<<nev<<" eigenvectors took " << (t1-t0)/1e3 <<" ms"<<std::endl;
230 }
231};
232
void acceleratorPut(T &dev, const T &host)
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
void acceleratorCopyDeviceToDevice(void *from, void *to, size_t bytes)
void acceleratorCopyFromDevice(void *from, void *to, size_t bytes)
std::vector< T, devAllocator< T > > deviceVector
@ GridBLAS_OP_N
Definition BatchedBlas.h:67
@ GridBLAS_OP_C
Definition BatchedBlas.h:67
void conformable(const Lattice< obj1 > &lhs, const Lattice< obj2 > &rhs)
#define autoView(l_v, l, mode)
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
double RealD
Definition Simd.h:61
double usecond(void)
Definition Timer.h:50
void synchronise(void)
void gemmBatched(int m, int n, int k, ComplexD alpha, deviceVector< ComplexD * > &Amk, deviceVector< ComplexD * > &Bkn, ComplexD beta, deviceVector< ComplexD * > &Cmn)
Field::scalar_object scalar_object
void Allocate(int _nev, GridBase *_grid)
deviceVector< scalar > BLAS_R
void ImportEigenVector(Field &evec, RealD &_eval, int ev)
deviceVector< scalar > BLAS_E
void DeflateSources(std::vector< Field > &source, std::vector< Field > &guess)
deviceVector< scalar > BLAS_C
Field::scalar_type scalar
std::vector< RealD > eval
void ImportEigenBasis(std::vector< Field > &evec, std::vector< RealD > &_eval, int _ev0, int _nev)
deviceVector< scalar > BLAS_G
void ImportEigenBasis(std::vector< Field > &evec, std::vector< RealD > &_eval)