Grid 0.7.0
Tensor_SIMT.h File Reference
#include <string.h>

Go to the source code of this file.

Functions

 NAMESPACE_BEGIN (Grid)
template<class vobj>
accelerator_inline void exchangeSIMT (vobj &mp0, vobj &mp1, const vobj &vp0, const vobj &vp1, Integer type)
template<class vobj>
accelerator_inline vobj coalescedRead (const vobj &__restrict__ vec, int lane=0)
template<class vobj>
accelerator_inline vobj coalescedReadPermute (const vobj &__restrict__ vec, int ptype, int doperm, int lane=0)
template<class vobj>
accelerator_inline vobj coalescedReadGeneralPermute (const vobj &__restrict__ vec, int perm_mask, int nd, int lane=0)
template<class vobj>
accelerator_inline void coalescedWrite (vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
template<class vobj>
accelerator_inline void coalescedWriteNonTemporal (vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0)
 NAMESPACE_END (Grid)

Function Documentation

◆ NAMESPACE_BEGIN()

NAMESPACE_BEGIN ( Grid )

◆ exchangeSIMT()

◆ coalescedRead()

template<class vobj>
accelerator_inline vobj coalescedRead ( const vobj &__restrict__ vec,
int lane = 0 )

Definition at line 61 of file Tensor_SIMT.h.

Referenced by axpby_ssp_pminus(), axpby_ssp_pplus(), basisRotate(), CartesianStencil< SiteSpinor, SiteSpinor, ImplParams >::CommsCopy(), SimpleCompressorGather< vobj, FaceGatherSimple >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::CompressExchange(), WilsonKernels< Impl >::DhopDirK(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DhopInternal(), StaggeredKernels< Impl >::DhopSiteGenericExt(), StaggeredKernels< Impl >::DhopSiteGenericInt(), StaggeredKernels< Impl >::DhopSiteHand(), StaggeredKernels< Impl >::DhopSiteHandExt(), StaggeredKernels< Impl >::DhopSiteHandInt(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DselfInternal(), exchangeSIMT(), Gather_plane_simple(), WilsonKernels< Impl >::GenericDhopSite(), WilsonKernels< Impl >::GenericDhopSiteDag(), WilsonKernels< Impl >::GenericDhopSiteDagExt(), WilsonKernels< Impl >::GenericDhopSiteDagInt(), WilsonKernels< Impl >::GenericDhopSiteExt(), WilsonKernels< Impl >::GenericDhopSiteInt(), WilsonKernels< Impl >::HandDhopSite(), WilsonKernels< Impl >::HandDhopSiteDag(), WilsonKernels< Impl >::HandDhopSiteDagExt(), WilsonKernels< Impl >::HandDhopSiteDagInt(), WilsonKernels< Impl >::HandDhopSiteExt(), WilsonKernels< Impl >::HandDhopSiteInt(), CoarsenedMatrix< Fobj, CComplex, nbasis >::M(), CayleyFermion5D< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5D(), MobiusEOFAFermion< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5Ddag(), MobiusEOFAFermion< Impl >::M5Ddag(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdagNonHermitian(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdirCalc(), CompactWilsonCloverHelpers< Impl >::MooeeKernel_gpu(), GeneralCoarsenedMatrix< Fobj, CComplex, nbasis >::Mult(), WilsonCloverHelpers< Impl >::multClover(), WilsonCloverHelpers< Impl >::multCloverField(), GparityWilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::multLink(), StaggeredImpl< vComplex, FundamentalRepresentation >::multLink(), WilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::multLink(), StaggeredImpl< vComplex, FundamentalRepresentation >::multLinkAdd(), WilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::multLinkField(), outerProduct(), and Scatter_plane_simple().

◆ coalescedReadPermute()

template<class vobj>
accelerator_inline vobj coalescedReadPermute ( const vobj &__restrict__ vec,
int ptype,
int doperm,
int lane = 0 )

◆ coalescedReadGeneralPermute()

template<class vobj>
accelerator_inline vobj coalescedReadGeneralPermute ( const vobj &__restrict__ vec,
int perm_mask,
int nd,
int lane = 0 )

◆ coalescedWrite()

template<class vobj>
accelerator_inline void coalescedWrite ( vobj &__restrict__ vec,
const vobj &__restrict__ extracted,
int lane = 0 )

Definition at line 87 of file Tensor_SIMT.h.

Referenced by acceleratorPickCheckerboard(), acceleratorSetCheckerboard(), ag5xpbg5y_ssp(), ag5xpby_ssp(), CompactWilsonCloverHelpers< Impl >::ApplyBoundaryMask(), A2Autils< FImpl >::AslashField(), axpbg5y_ssp(), axpby_norm_fast(), axpby_ssp(), axpby_ssp_pminus(), axpby_ssp_pplus(), basisRotate(), basisRotateJ(), blockSum(), CoarsenedMatrix< Fobj, CComplex, nbasis >::CoarsenOperator(), CartesianStencil< SiteSpinor, SiteSpinor, ImplParams >::CommsCopy(), SimpleCompressorGather< vobj, FaceGatherSimple >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::CompressExchange(), WilsonKernels< Impl >::DhopDirK(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DhopInternal(), StaggeredKernels< Impl >::DhopSiteGenericExt(), StaggeredKernels< Impl >::DhopSiteGenericInt(), StaggeredKernels< Impl >::DhopSiteHand(), StaggeredKernels< Impl >::DhopSiteHandExt(), StaggeredKernels< Impl >::DhopSiteHandInt(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DselfInternal(), exchangeSIMT(), G5C(), G5R5(), Gather_plane_simple(), WilsonKernels< Impl >::GenericDhopSite(), WilsonKernels< Impl >::GenericDhopSiteDag(), WilsonKernels< Impl >::GenericDhopSiteDagExt(), WilsonKernels< Impl >::GenericDhopSiteDagInt(), WilsonKernels< Impl >::GenericDhopSiteExt(), WilsonKernels< Impl >::GenericDhopSiteInt(), GparityWilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::InsertForce5D(), CoarsenedMatrix< Fobj, CComplex, nbasis >::M(), CayleyFermion5D< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5D(), MobiusEOFAFermion< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5Ddag(), MobiusEOFAFermion< Impl >::M5Ddag(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdagNonHermitian(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdirCalc(), A2Autils< FImpl >::MesonField(), CompactWilsonCloverHelpers< Impl >::MooeeKernel_gpu(), GeneralCoarsenedMatrix< Fobj, CComplex, nbasis >::Mult(), rankInnerProduct(), WilsonLoops< PeriodicGimplR >::RectStaplePaddedAll(), Scatter_plane_simple(), Smear_HISQ< Gimpl >::smear(), and WilsonLoops< PeriodicGimplR >::StaplePaddedAll().

◆ coalescedWriteNonTemporal()

template<class vobj>
accelerator_inline void coalescedWriteNonTemporal ( vobj &__restrict__ vec,
const vobj &__restrict__ extracted,
int lane = 0 )

Definition at line 92 of file Tensor_SIMT.h.

References vstream().

◆ NAMESPACE_END()

NAMESPACE_END ( Grid )