44 for(
int j=0; j<k; ++j){
50template<
class VField,
class Matrix>
51void basisRotate(VField &basis,Matrix& Qt,
int j0,
int j1,
int k0,
int k1,
int Nm)
53 typedef decltype(basis[0]) Field;
58 typedef typename std::remove_reference<
decltype(h_basis_v[0][0])>
::type vobj;
59 typedef typename std::remove_reference<
decltype(Qt(0,0))>
::type Coeff_t;
63 for(
int k=0;k<basis.size();k++){
68 View *basis_vp = &d_basis_v[0];
74 uint64_t oSites =grid->
oSites();
75 uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot;
83 Coeff_t *Qt_p = & Qt_jv[0];
92 for(uint64_t s=0;s<oSites;s+=siteBlock){
95 int ssites=
MIN(siteBlock,oSites-s);
111 for(
int k=k0; k<k1; ++k){
126 for(
int k=0;k<basis.size();k++) h_basis_v[k].ViewClose();
131void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,
int j,
int k0,
int k1,
int Nm)
134 typedef typename Field::vector_object vobj;
137 result.Checkerboard() = basis[0].Checkerboard();
141 for(
int k=0;k<basis.size();k++){
148 double * Qt_j = & Qt_jv[0];
151 auto basis_vp=& d_basis_v[0];
155 auto B=coalescedRead(zzz);
156 for(int k=k0; k<k1; ++k){
157 B +=Qt_j[k] * coalescedRead(basis_vp[k][ss]);
161 for(
int k=0;k<basis.size();k++) h_basis_v[k].ViewClose();
167 int vlen = idx.size();
170 assert(vlen<=sort_vals.size());
171 assert(vlen<=_v.size());
173 for (
size_t i=0;i<vlen;i++) {
185 for (j=i;j<idx.size();j++)
189 assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
191 swap(_v[i],_v[idx[i]]);
192 std::swap(sort_vals[i],sort_vals[idx[i]]);
202 std::vector<int> idx(sort_vals.size());
203 std::iota(idx.begin(), idx.end(), 0);
206 std::sort(idx.begin(), idx.end(), [&sort_vals](
int i1,
int i2) {
207 return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
217 std::reverse(idx.begin(), idx.end());
225void basisDeflate(
const std::vector<Field> &_v,
const std::vector<RealD>&
eval,
const Field& src_orig,Field& result) {
227 assert(_v.size()==
eval.size());
228 int N = (int)_v.size();
229 for (
int i=0;i<N;i++) {
void acceleratorPut(T &dev, const T &host)
void acceleratorCopyToDevice(void *from, void *to, size_t bytes)
#define accelerator_for(iterator, num, nsimd,...)
std::vector< T, devAllocator< T > > deviceVector
std::vector< T, alignedAllocator< T > > hostVector
accelerator_inline sobj eval(const uint64_t ss, const sobj &arg)
void axpy(Lattice< vobj > &ret, sobj a, const Lattice< vobj > &x, const Lattice< vobj > &y)
void basisRotate(VField &basis, Matrix &Qt, int j0, int j1, int k0, int k1, int Nm)
void basisDeflate(const std::vector< Field > &_v, const std::vector< RealD > &eval, const Field &src_orig, Field &result)
void basisOrthogonalize(std::vector< Field > &basis, Field &w, int k)
std::vector< int > basisSortGetIndex(std::vector< RealD > &sort_vals)
void basisReorderInPlace(std::vector< Field > &_v, std::vector< RealD > &sort_vals, std::vector< int > &idx)
void basisSortInPlace(std::vector< Field > &_v, std::vector< RealD > &sort_vals, bool reverse)
void basisRotateJ(Field &result, std::vector< Field > &basis, Eigen::MatrixXd &Qt, int j, int k0, int k1, int Nm)
ComplexD innerProduct(const Lattice< vobj > &left, const Lattice< vobj > &right)
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
accelerator_inline std::enable_if<!isGridTensor< T >::value, T >::type TensorRemove(T arg)
#define thread_for(i, num,...)