52 typedef typename Field::scalar_type
scalar;
88 std::vector<scalar> tmp(blas.size());
90 for(int64_t s=0;s<blas.size();s++){
91 ss=ss+tmp[s]*
adj(tmp[s]);
130 int nvec = vecs.size();
131 typedef typename Field::vector_object vobj;
142 for(
int d=0 ; d<_ndimension;d++){
146 uint64_t sz = blas.size();
153 for(
int v=0;v<vecs.size();v++){
158 auto blasData_p = &blas[0];
159 auto fineData_p = &fineData[0];
164 const int Nsimd = vobj::Nsimd();
168 uint64_t lwords=
words;
174 for(
int lane=0;lane<Nsimd;lane++) {
182 Lexicographic::CoorFromIndex(coor_f,sf,fine_rdimensions);
184 for(
int d=0;d<_ndimension;d++) coor_b[d] = coor_f[d]%block_r[d];
185 for(
int d=0;d<_ndimension;d++) coor_c[d] = coor_f[d]/block_r[d];
189 Lexicographic::IndexFromCoor(coor_c,sc,coarse_rdimensions);
190 Lexicographic::IndexFromCoor(coor_b,sb,block_r);
197 int64_t site = (lane*osites + sc*bv)*nvec
218 typedef typename Field::vector_object vobj;
220 int nvec = vecs.size();
230 for(
int d=0 ; d<_ndimension;d++){
239 for(
int v=0;v<vecs.size();v++){
243 auto blasData_p = &blas[0];
244 auto fineData_p = &fineData[0];
247 uint64_t lwords =
words;
258 for(
int lane=0;lane<vobj::Nsimd();lane++) {
265 Lexicographic::CoorFromIndex(coor_f,sf,fine_rdimensions);
267 for(
int d=0;d<_ndimension;d++) coor_b[d] = coor_f[d]%block_r[d];
268 for(
int d=0;d<_ndimension;d++) coor_c[d] = coor_f[d]/block_r[d];
272 Lexicographic::IndexFromCoor(coor_c,sc,coarse_rdimensions);
273 Lexicographic::IndexFromCoor(coor_b,sb,block_r);
277 int64_t site = (lane*osites + sc*bv)*nvec
297 int nvec = vecs.size();
298 typedef typename vobj::scalar_object coarse_scalar_object;
306 uint64_t sz = blas.size();
310 for(
int v=0;v<vecs.size();v++){
315 auto blasData_p = &blas[0];
316 auto coarseData_p = &coarseData[0];
321 const int Nsimd = vobj::Nsimd();
322 uint64_t cwords=
sizeof(
typename vobj::scalar_object)/
sizeof(
scalar);
330 for(
int lane=0;lane<Nsimd;lane++) {
333 int64_t blas_site = (lane*osites + sc)*nvec*cwords + v*cwords;
335 coarse_scalar_object data =
extractLane(lane,coarseData[sc]);
337 coarse_scalar_object * ptr = (coarse_scalar_object *)&blasData_p[blas_site];
352 int nvec = vecs.size();
353 typedef typename vobj::scalar_object coarse_scalar_object;
360 uint64_t sz = blas.size();
365 for(
int v=0;v<vecs.size();v++){
370 auto blasData_p = &blas[0];
371 auto coarseData_p = &coarseData[0];
376 const int Nsimd = vobj::Nsimd();
377 uint64_t cwords=
sizeof(
typename vobj::scalar_object)/
sizeof(
scalar);
386 for(
int lane=0;lane<Nsimd;lane++) {
388 int64_t blas_site = (lane*osites + sc)*nvec*cwords + v*cwords;
389 coarse_scalar_object * ptr = (coarse_scalar_object *)&blasData_p[blas_site];
390 coarse_scalar_object data = *ptr;
409 int nrhs=fine.size();
410 int _nbasis =
sizeof(
typename cobj::scalar_object)/
sizeof(
scalar);
465 int nrhs=fine.size();
466 int _nbasis =
sizeof(
typename cobj::scalar_object)/
sizeof(
scalar);
void acceleratorPut(T &dev, const T &host)
accelerator_inline int acceleratorSIMTlane(int Nsimd)
void acceleratorMemSet(void *base, int value, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
void acceleratorCopyFromDevice(void *from, void *to, size_t bytes)
std::vector< T, devAllocator< T > > deviceVector
AcceleratorVector< int, MaxDims > Coordinate
Lattice< vobj > real(const Lattice< vobj > &lhs)
Lattice< vobj > adj(const Lattice< vobj > &lhs)
void subdivides(GridBase *coarse, GridBase *fine)
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
void gemmBatched(int m, int n, int k, ComplexD alpha, deviceVector< ComplexD * > &Amk, deviceVector< ComplexD * > &Bkn, ComplexD beta, deviceVector< ComplexD * > &Cmn)
deviceVector< scalar > BLAS_C
void ImportCoarseGridVectors(std::vector< Lattice< vobj > > &vecs, deviceVector< scalar > &blas)
void Allocate(int _nbasis, GridBase *_fgrid, GridBase *_cgrid)
Field::scalar_type scalar
deviceVector< scalar > BLAS_F
void blockProject(std::vector< Field > &fine, std::vector< Lattice< cobj > > &coarse)
void ExportFineGridVectors(std::vector< Field > &vecs, deviceVector< scalar > &blas)
void ExportCoarseGridVectors(std::vector< Lattice< vobj > > &vecs, deviceVector< scalar > &blas)
void ImportFineGridVectors(std::vector< Field > &vecs, deviceVector< scalar > &blas)
void ImportBasis(std::vector< Field > &vecs)
RealD blasNorm2(deviceVector< scalar > &blas)
Field::scalar_object scalar_object
deviceVector< scalar > BLAS_V
void blockPromote(std::vector< Field > &fine, std::vector< Lattice< cobj > > &coarse)