Grid 0.7.0
HISQSmearing.h
Go to the documentation of this file.
1/*************************************************************************************
2
3Grid physics library, www.github.com/paboyle/Grid
4
5Source file: ./lib/qcd/smearing/HISQSmearing.h
6
7Copyright (C) 2023
8
9Author: D. A. Clarke <clarke.davida@gmail.com>
10
11This program is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License along
22with this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25See the full license in the file "LICENSE" in the top level distribution
26directory
27*************************************************************************************/
28/*
29 @file HISQSmearing.h
30 @brief Declares classes related to HISQ smearing
31*/
32
33
34#pragma once
35#include <Grid/Grid.h>
38
39
41
42
43// TODO: find a way to fold this into the stencil header. need to access grid to get
44// Nd, since you don't want to inherit from QCD.h
46template<typename... Args>
47void appendShift(std::vector<Coordinate>& shifts, int dir, Args... args) {
48 Coordinate shift(Nd,0);
49 generalShift(shift, dir, args...);
50 // push_back creates an element at the end of shifts and
51 // assigns the data in the argument to it.
52 shifts.push_back(shift);
53}
54
55
57accelerator_inline int stencilIndex(int mu, int nu) {
58 // Nshifts depends on how you built the stencil
59 int Nshifts = 6;
60 return Nshifts*nu + Nd*Nshifts*mu;
61}
62
63
67 Real c_1; // 1 link
68 Real c_naik; // Naik term
69 Real c_3; // 3 link
70 Real c_5; // 5 link
71 Real c_7; // 7 link
72 Real c_lp; // 5 link Lepage
73 HISQSmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp)
74 : c_1(c1),
75 c_naik(cnaik),
76 c_3(c3),
77 c_5(c5),
78 c_7(c7),
79 c_lp(clp){}
80};
81
82
84template<class Gimpl>
85class Smear_HISQ : public Gimpl {
86
87private:
90
91public:
92
94 typedef typename Gimpl::GaugeField GF;
95 typedef typename Gimpl::GaugeLinkField LF;
96 typedef typename Gimpl::ComplexField CF;
97
98 // Don't allow default values here.
99 Smear_HISQ(GridCartesian* grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp)
100 : _grid(grid),
101 _linkTreatment(c1,cnaik,c3,c5,c7,clp) {
102 assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3");
103 assert(Nd == 4 && "HISQ smearing only defined for Nd==4");
104 }
105
106 // Allow to pass a pointer to a C-style, double array for MILC convenience
107 Smear_HISQ(GridCartesian* grid, double* coeff)
108 : _grid(grid),
109 _linkTreatment(coeff[0],coeff[1],coeff[2],coeff[3],coeff[4],coeff[5]) {
110 assert(Nc == 3 && "HISQ smearing currently implemented only for Nc==3");
111 assert(Nd == 4 && "HISQ smearing only defined for Nd==4");
112 }
113
115
116 // Intent: OUT--u_smr, u_naik
117 // IN--u_thin
118 void smear(GF& u_smr, GF& u_naik, GF& u_thin) const {
119
120 HISQSmearingParameters lt = this->_linkTreatment;
121 auto grid = this->_grid;
122
123 // Create a padded cell of extra padding depth=1 and fill the padding.
124 int depth = 1;
125 PaddedCell Ghost(depth,grid);
126 GF Ughost = Ghost.Exchange(u_thin);
127
128 // This is where auxiliary N-link fields and the final smear will be stored.
129 GF Ughost_fat(Ughost.Grid());
130 GF Ughost_3link(Ughost.Grid());
131 GF Ughost_5linkA(Ughost.Grid());
132 GF Ughost_5linkB(Ughost.Grid());
133
134 // mu-nu plane stencil. We allow mu==nu to make indexing the stencil easier,
135 // but these entries will not be used.
136 std::vector<Coordinate> shifts;
137 for(int mu=0;mu<Nd;mu++)
138 for(int nu=0;nu<Nd;nu++) {
139 appendShift(shifts,mu);
140 appendShift(shifts,nu);
142 appendShift(shifts,mu,Back(nu));
143 appendShift(shifts,Back(nu));
144 appendShift(shifts,Back(mu));
145 }
146
147 // A GeneralLocalStencil has two indices: a site and stencil index
148 GeneralLocalStencil gStencil(Ughost.Grid(),shifts);
149
150 // This is where contributions from the smearing get added together
151 Ughost_fat=Zero();
152
153 // This loop handles 3-, 5-, and 7-link constructs, minus Lepage and Naik.
154 for(int mu=0;mu<Nd;mu++) {
155
156 // TODO: This approach is slightly memory inefficient. It uses 25% extra memory
157 Ughost_3link =Zero();
158 Ughost_5linkA=Zero();
159 Ughost_5linkB=Zero();
160
161 // Create the accessors
162 autoView(U_v , Ughost , AcceleratorRead);
163 autoView(U_fat_v , Ughost_fat , AcceleratorWrite);
164 autoView(U_3link_v , Ughost_3link , AcceleratorWrite);
165 autoView(U_5linkA_v, Ughost_5linkA, AcceleratorWrite);
166 autoView(U_5linkB_v, Ughost_5linkB, AcceleratorWrite);
167
168 // We infer some types that will be needed in the calculation.
169 typedef decltype(gStencil.GetEntry(0,0)) stencilElement;
170 typedef decltype(coalescedReadGeneralPermute(U_v[0](0),gStencil.GetEntry(0,0)->_permute,Nd)) U3matrix;
171
172 int Nsites = U_v.size();
173 auto gStencil_v = gStencil.View(AcceleratorRead);
174
175 accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
176 stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
177 U3matrix U0, U1, U2, U3, U4, U5, W;
178 for(int nu=0;nu<Nd;nu++) {
179 if(nu==mu) continue;
180 int s = stencilIndex(mu,nu);
181
182 // The stencil gives us support points in the mu-nu plane that we will use to
183 // grab the links we need.
184 SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
185 SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
186 SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset;
187 SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
188 SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
189 SE5 = gStencil_v.GetEntry(s+5,site); int x_m_mu = SE5->_offset;
190
191 // When you're deciding whether to take an adjoint, the question is: how is the
192 // stored link oriented compared to the one you want? If I imagine myself travelling
193 // with the to-be-updated link, I have two possible, alternative 3-link paths I can
194 // take, one starting by going to the left, the other starting by going to the right.
195 U0 = coalescedReadGeneralPermute(U_v[x_p_mu ](nu),SE0->_permute,Nd);
196 U1 = coalescedReadGeneralPermute(U_v[x_p_nu ](mu),SE1->_permute,Nd);
197 U2 = coalescedReadGeneralPermute(U_v[x ](nu),SE2->_permute,Nd);
198 U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd);
199 U4 = coalescedReadGeneralPermute(U_v[x_m_nu ](mu),SE4->_permute,Nd);
200 U5 = coalescedReadGeneralPermute(U_v[x_m_nu ](nu),SE4->_permute,Nd);
201
202 // "left" "right"
203 W = U2*U1*adj(U0) + adj(U5)*U4*U3;
204
205 // Save 3-link construct for later and add to smeared field.
206 coalescedWrite(U_3link_v[x](nu), W);
207
208 // The index operator (x) returns the coalesced read on GPU. The view [] index returns
209 // a reference to the vector object. The [x](mu) returns a reference to the densely
210 // packed (contiguous in memory) mu-th element of the vector object. On CPU,
211 // coalescedRead/Write is the identity mapping assigning vector object to vector object.
212 // But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
213 coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
214 }
215 })
216
217 accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
218 stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
219 U3matrix U0, U1, U2, U3, U4, U5, W;
220 int sigmaIndex = 0;
221 for(int nu=0;nu<Nd;nu++) {
222 if(nu==mu) continue;
223 int s = stencilIndex(mu,nu);
224 for(int rho=0;rho<Nd;rho++) {
225 if (rho == mu || rho == nu) continue;
226
227 SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
228 SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
229 SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset;
230 SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
231 SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
232
233 U0 = coalescedReadGeneralPermute( U_v[x_p_mu ](nu ),SE0->_permute,Nd);
234 U1 = coalescedReadGeneralPermute(U_3link_v[x_p_nu ](rho),SE1->_permute,Nd);
235 U2 = coalescedReadGeneralPermute( U_v[x ](nu ),SE2->_permute,Nd);
236 U3 = coalescedReadGeneralPermute( U_v[x_p_mu_m_nu](nu ),SE3->_permute,Nd);
237 U4 = coalescedReadGeneralPermute(U_3link_v[x_m_nu ](rho),SE4->_permute,Nd);
238 U5 = coalescedReadGeneralPermute( U_v[x_m_nu ](nu ),SE4->_permute,Nd);
239
240 W = U2*U1*adj(U0) + adj(U5)*U4*U3;
241
242 if(sigmaIndex<3) {
243 coalescedWrite(U_5linkA_v[x](rho), W);
244 } else {
245 coalescedWrite(U_5linkB_v[x](rho), W);
246 }
247
248 coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W);
249 sigmaIndex++;
250 }
251 }
252 })
253
254 accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
255 stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
256 U3matrix U0, U1, U2, U3, U4, U5, W;
257 int sigmaIndex = 0;
258 for(int nu=0;nu<Nd;nu++) {
259 if(nu==mu) continue;
260 int s = stencilIndex(mu,nu);
261 for(int rho=0;rho<Nd;rho++) {
262 if (rho == mu || rho == nu) continue;
263
264 SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
265 SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
266 SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset;
267 SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
268 SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
269
270 U0 = coalescedReadGeneralPermute(U_v[x_p_mu](nu),SE0->_permute,Nd);
271 if(sigmaIndex<3) {
272 U1 = coalescedReadGeneralPermute(U_5linkB_v[x_p_nu](rho),SE1->_permute,Nd);
273 } else {
274 U1 = coalescedReadGeneralPermute(U_5linkA_v[x_p_nu](rho),SE1->_permute,Nd);
275 }
276 U2 = coalescedReadGeneralPermute(U_v[x](nu),SE2->_permute,Nd);
277 U3 = coalescedReadGeneralPermute(U_v[x_p_mu_m_nu](nu),SE3->_permute,Nd);
278 if(sigmaIndex<3) {
279 U4 = coalescedReadGeneralPermute(U_5linkB_v[x_m_nu](rho),SE4->_permute,Nd);
280 } else {
281 U4 = coalescedReadGeneralPermute(U_5linkA_v[x_m_nu](rho),SE4->_permute,Nd);
282 }
283 U5 = coalescedReadGeneralPermute(U_v[x_m_nu](nu),SE4->_permute,Nd);
284
285 W = U2*U1*adj(U0) + adj(U5)*U4*U3;
286
287 coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_7*W);
288 sigmaIndex++;
289 }
290 }
291 })
292
293 } // end mu loop
294
295 // c1, c3, c5, c7 construct contributions
296 u_smr = Ghost.Extract(Ughost_fat) + lt.c_1*u_thin;
297
298 // Load up U and V std::vectors to access thin and smeared links.
299 std::vector<LF> U(Nd, grid);
300 std::vector<LF> V(Nd, grid);
301 std::vector<LF> Vnaik(Nd, grid);
302 for (int mu = 0; mu < Nd; mu++) {
303 U[mu] = PeekIndex<LorentzIndex>(u_thin, mu);
304 V[mu] = PeekIndex<LorentzIndex>(u_smr, mu);
305 }
306
307 for(int mu=0;mu<Nd;mu++) {
308
309 // Naik
310 Vnaik[mu] = lt.c_naik*Gimpl::CovShiftForward(U[mu],mu,
311 Gimpl::CovShiftForward(U[mu],mu,
312 Gimpl::CovShiftIdentityForward(U[mu],mu)));
313
314 // LePage
315 for (int nu_h=1;nu_h<Nd;nu_h++) {
316 int nu=(mu+nu_h)%Nd;
317 // nu, nu, mu, Back(nu), Back(nu)
318 V[mu] = V[mu] + lt.c_lp*Gimpl::CovShiftForward(U[nu],nu,
319 Gimpl::CovShiftForward(U[nu],nu,
320 Gimpl::CovShiftForward(U[mu],mu,
321 Gimpl::CovShiftBackward(U[nu],nu,
322 Gimpl::CovShiftIdentityBackward(U[nu],nu)))))
323 // Back(nu), Back(nu), mu, nu, nu
324 + lt.c_lp*Gimpl::CovShiftBackward(U[nu],nu,
325 Gimpl::CovShiftBackward(U[nu],nu,
326 Gimpl::CovShiftForward(U[mu],mu,
327 Gimpl::CovShiftForward(U[nu],nu,
328 Gimpl::CovShiftIdentityForward(U[nu],nu)))));
329 }
330 }
331
332 // Put V back into u_smr.
333 for (int mu = 0; mu < Nd; mu++) {
334 PokeIndex<LorentzIndex>(u_smr , V[mu] , mu);
335 PokeIndex<LorentzIndex>(u_naik, Vnaik[mu], mu);
336 }
337 };
338
339
340 // Intent: OUT--u_proj
341 // IN--u_mu
342 void projectU3(GF& u_proj, GF& u_mu) const {
343
344 auto grid = this->_grid;
345
346 LF V(grid), Q(grid), sqrtQinv(grid), id_3(grid), diff(grid);
347 CF c0(grid), c1(grid), c2(grid), g0(grid), g1(grid), g2(grid), S(grid), R(grid), theta(grid),
348 u(grid), v(grid), w(grid), den(grid), f0(grid), f1(grid), f2(grid);
349
350 // Follow MILC 10.1103/PhysRevD.82.074501, eqs (B2-B3) and (C1-C8)
351 for (int mu = 0; mu < Nd; mu++) {
352 V = PeekIndex<LorentzIndex>(u_mu, mu);
353 Q = adj(V)*V;
354 c0 = real(trace(Q));
355 c1 = (1/2.)*real(trace(Q*Q));
356 c2 = (1/3.)*real(trace(Q*Q*Q));
357 S = (1/3.)*c1-(1/18.)*c0*c0;
358 if (norm2(S)<1e-28) {
359 g0 = (1/3.)*c0; g1 = g0; g2 = g1;
360 } else {
361 R = (1/2.)*c2-(1/3. )*c0*c1+(1/27.)*c0*c0*c0;
362 theta = acos(R*pow(S,-1.5));
363 g0 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta-2*M_PI/3.);
364 g1 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta );
365 g2 = (1/3.)*c0+2.*sqrt(S)*cos((1/3.)*theta+2*M_PI/3.);
366 }
367// if (fabs(Q.determinant()/(g0*g1*g2)-1.0) > 1e-5) { SVD }
368 u = sqrt(g0) + sqrt(g1) + sqrt(g2);
369 v = sqrt(g0*g1) + sqrt(g0*g2) + sqrt(g1*g2);
370 w = sqrt(g0*g1*g2);
371 den = w*(u*v-w);
372 f0 = (-w*(u*u+v)+u*v*v)/den;
373 f1 = (-w-u*u*u+2.*u*v)/den;
374 f2 = u/den;
375 id_3 = 1.;
376
377 sqrtQinv = f0*id_3 + f1*Q + f2*Q*Q;
378
379 PokeIndex<LorentzIndex>(u_proj, V*sqrtQinv, mu);
380 }
381 };
382
383
384// void derivative(const GaugeField& Gauge) const {
385// };
386};
387
388
#define accelerator_inline
#define accelerator_for(iterator, num, nsimd,...)
#define U0
Definition BGQQPX.h:105
#define U1
Definition BGQQPX.h:106
#define U2
Definition BGQQPX.h:107
AcceleratorVector< int, MaxDims > Coordinate
Definition Coordinate.h:95
int Back(const int dir)
signals that you want to go backwards in direction dir
void generalShift(Coordinate &shift, int dir)
shift one unit in direction dir
accelerator_inline Grid_simd2< S, V > trace(const Grid_simd2< S, V > &arg)
accelerator_inline Grid_simd< S, V > cos(const Grid_simd< S, V > &r)
accelerator_inline Grid_simd< S, V > acos(const Grid_simd< S, V > &r)
accelerator_inline Grid_simd< S, V > sqrt(const Grid_simd< S, V > &r)
accelerator_inline int stencilIndex(int mu, int nu)
figure out the stencil index from mu and nu
void appendShift(std::vector< Coordinate > &shifts, int dir, Args... args)
append arbitrary shift path to shifts
void PokeIndex(Lattice< vobj > &lhs, const Lattice< decltype(peekIndex< Index >(vobj(), 0))> &rhs, int i)
auto PeekIndex(const Lattice< vobj > &lhs, int i) -> Lattice< decltype(peekIndex< Index >(vobj(), i))>
Lattice< vobj > real(const Lattice< vobj > &lhs)
Lattice< vobj > adj(const Lattice< vobj > &lhs)
RealD norm2(const Lattice< vobj > &arg)
Lattice< obj > pow(const Lattice< obj > &rhs_i, RealD y)
#define autoView(l_v, l, mode)
@ AcceleratorRead
@ AcceleratorWrite
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
static constexpr int Nd
Definition QCD.h:52
static constexpr int Nc
Definition QCD.h:50
RealF Real
Definition Simd.h:65
accelerator_inline void coalescedWrite(vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
Definition Tensor_SIMT.h:87
accelerator_inline vobj coalescedReadGeneralPermute(const vobj &__restrict__ vec, int perm_mask, int nd, int lane=0)
Definition Tensor_SIMT.h:78
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230
#define M_PI
Definition Zolotarev.cc:41
accelerator_inline GeneralStencilEntry * GetEntry(int point, int osite) const
View_type View(int mode) const
Lattice< vobj > Extract(const Lattice< vobj > &in) const
Definition PaddedCell.h:287
Lattice< vobj > Exchange(const Lattice< vobj > &in, const CshiftImplBase< vobj > &cshift=CshiftImplDefault< vobj >()) const
Definition PaddedCell.h:304
GridCartesian *const _grid
void smear(GF &u_smr, GF &u_naik, GF &u_thin) const
Gimpl::GaugeField GF
INHERIT_GIMPL_TYPES(Gimpl)
void projectU3(GF &u_proj, GF &u_mu) const
HISQSmearingParameters _linkTreatment
Gimpl::ComplexField CF
Smear_HISQ(GridCartesian *grid, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp)
Gimpl::GaugeLinkField LF
Smear_HISQ(GridCartesian *grid, double *coeff)
Definition Simd.h:194
structure holding the link treatment
HISQSmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp)