|
Grid 0.7.0
|
#include <string.h>Go to the source code of this file.
Functions | |
| NAMESPACE_BEGIN (Grid) | |
| template<class vobj> | |
| accelerator_inline void | exchangeSIMT (vobj &mp0, vobj &mp1, const vobj &vp0, const vobj &vp1, Integer type) |
| template<class vobj> | |
| accelerator_inline vobj | coalescedRead (const vobj &__restrict__ vec, int lane=0) |
| template<class vobj> | |
| accelerator_inline vobj | coalescedReadPermute (const vobj &__restrict__ vec, int ptype, int doperm, int lane=0) |
| template<class vobj> | |
| accelerator_inline vobj | coalescedReadGeneralPermute (const vobj &__restrict__ vec, int perm_mask, int nd, int lane=0) |
| template<class vobj> | |
| accelerator_inline void | coalescedWrite (vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0) |
| template<class vobj> | |
| accelerator_inline void | coalescedWriteNonTemporal (vobj &__restrict__ vec, const vobj &__restrict__ extracted, int lane=0) |
| NAMESPACE_END (Grid) | |
| NAMESPACE_BEGIN | ( | Grid | ) |
| accelerator_inline void exchangeSIMT | ( | vobj & | mp0, |
| vobj & | mp1, | ||
| const vobj & | vp0, | ||
| const vobj & | vp1, | ||
| Integer | type ) |
Definition at line 38 of file Tensor_SIMT.h.
References accelerator_inline, acceleratorSIMTlane(), coalescedRead(), and coalescedWrite().
Referenced by SimpleCompressorGather< vobj, FaceGatherSimple >::CompressExchange(), SimpleCompressorGather< vobj, FaceGatherSimple >::Exchange(), and WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::Exchange().
| accelerator_inline vobj coalescedRead | ( | const vobj &__restrict__ | vec, |
| int | lane = 0 ) |
Definition at line 61 of file Tensor_SIMT.h.
Referenced by axpby_ssp_pminus(), axpby_ssp_pplus(), basisRotate(), CartesianStencil< SiteSpinor, SiteSpinor, ImplParams >::CommsCopy(), SimpleCompressorGather< vobj, FaceGatherSimple >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::CompressExchange(), WilsonKernels< Impl >::DhopDirK(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DhopInternal(), StaggeredKernels< Impl >::DhopSiteGenericExt(), StaggeredKernels< Impl >::DhopSiteGenericInt(), StaggeredKernels< Impl >::DhopSiteHand(), StaggeredKernels< Impl >::DhopSiteHandExt(), StaggeredKernels< Impl >::DhopSiteHandInt(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DselfInternal(), exchangeSIMT(), Gather_plane_simple(), WilsonKernels< Impl >::GenericDhopSite(), WilsonKernels< Impl >::GenericDhopSiteDag(), WilsonKernels< Impl >::GenericDhopSiteDagExt(), WilsonKernels< Impl >::GenericDhopSiteDagInt(), WilsonKernels< Impl >::GenericDhopSiteExt(), WilsonKernels< Impl >::GenericDhopSiteInt(), WilsonKernels< Impl >::HandDhopSite(), WilsonKernels< Impl >::HandDhopSiteDag(), WilsonKernels< Impl >::HandDhopSiteDagExt(), WilsonKernels< Impl >::HandDhopSiteDagInt(), WilsonKernels< Impl >::HandDhopSiteExt(), WilsonKernels< Impl >::HandDhopSiteInt(), CoarsenedMatrix< Fobj, CComplex, nbasis >::M(), CayleyFermion5D< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5D(), MobiusEOFAFermion< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5Ddag(), MobiusEOFAFermion< Impl >::M5Ddag(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdagNonHermitian(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdirCalc(), CompactWilsonCloverHelpers< Impl >::MooeeKernel_gpu(), GeneralCoarsenedMatrix< Fobj, CComplex, nbasis >::Mult(), WilsonCloverHelpers< Impl >::multClover(), WilsonCloverHelpers< Impl >::multCloverField(), GparityWilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::multLink(), StaggeredImpl< vComplex, FundamentalRepresentation >::multLink(), WilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::multLink(), StaggeredImpl< vComplex, FundamentalRepresentation >::multLinkAdd(), WilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::multLinkField(), outerProduct(), and Scatter_plane_simple().
| accelerator_inline vobj coalescedReadPermute | ( | const vobj &__restrict__ | vec, |
| int | ptype, | ||
| int | doperm, | ||
| int | lane = 0 ) |
Definition at line 66 of file Tensor_SIMT.h.
References permute(), and ptype.
Referenced by CoarsenedMatrix< Fobj, CComplex, nbasis >::M(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdagNonHermitian(), and CoarsenedMatrix< Fobj, CComplex, nbasis >::MdirCalc().
| accelerator_inline vobj coalescedReadGeneralPermute | ( | const vobj &__restrict__ | vec, |
| int | perm_mask, | ||
| int | nd, | ||
| int | lane = 0 ) |
Definition at line 78 of file Tensor_SIMT.h.
References permute().
Referenced by GeneralCoarsenedMatrix< Fobj, CComplex, nbasis >::Mult(), and Smear_HISQ< Gimpl >::smear().
| accelerator_inline void coalescedWrite | ( | vobj &__restrict__ | vec, |
| const vobj &__restrict__ | extracted, | ||
| int | lane = 0 ) |
Definition at line 87 of file Tensor_SIMT.h.
Referenced by acceleratorPickCheckerboard(), acceleratorSetCheckerboard(), ag5xpbg5y_ssp(), ag5xpby_ssp(), CompactWilsonCloverHelpers< Impl >::ApplyBoundaryMask(), A2Autils< FImpl >::AslashField(), axpbg5y_ssp(), axpby_norm_fast(), axpby_ssp(), axpby_ssp_pminus(), axpby_ssp_pplus(), basisRotate(), basisRotateJ(), blockSum(), CoarsenedMatrix< Fobj, CComplex, nbasis >::CoarsenOperator(), CartesianStencil< SiteSpinor, SiteSpinor, ImplParams >::CommsCopy(), SimpleCompressorGather< vobj, FaceGatherSimple >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::Compress(), WilsonCompressorTemplate< HCS, HS, S, WilsonProjector >< SiteHalfCommSpinor, SiteHalfSpinor, SiteSpinor >::CompressExchange(), WilsonKernels< Impl >::DhopDirK(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DhopInternal(), StaggeredKernels< Impl >::DhopSiteGenericExt(), StaggeredKernels< Impl >::DhopSiteGenericInt(), StaggeredKernels< Impl >::DhopSiteHand(), StaggeredKernels< Impl >::DhopSiteHandExt(), StaggeredKernels< Impl >::DhopSiteHandInt(), CoarsenedMatrix< Fobj, CComplex, nbasis >::DselfInternal(), exchangeSIMT(), G5C(), G5R5(), Gather_plane_simple(), WilsonKernels< Impl >::GenericDhopSite(), WilsonKernels< Impl >::GenericDhopSiteDag(), WilsonKernels< Impl >::GenericDhopSiteDagExt(), WilsonKernels< Impl >::GenericDhopSiteDagInt(), WilsonKernels< Impl >::GenericDhopSiteExt(), WilsonKernels< Impl >::GenericDhopSiteInt(), GparityWilsonImpl< vComplex, FundamentalRepresentation, CoeffReal >::InsertForce5D(), CoarsenedMatrix< Fobj, CComplex, nbasis >::M(), CayleyFermion5D< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5D(), MobiusEOFAFermion< Impl >::M5D(), DomainWallEOFAFermion< Impl >::M5Ddag(), MobiusEOFAFermion< Impl >::M5Ddag(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdagNonHermitian(), CoarsenedMatrix< Fobj, CComplex, nbasis >::MdirCalc(), A2Autils< FImpl >::MesonField(), CompactWilsonCloverHelpers< Impl >::MooeeKernel_gpu(), GeneralCoarsenedMatrix< Fobj, CComplex, nbasis >::Mult(), rankInnerProduct(), WilsonLoops< PeriodicGimplR >::RectStaplePaddedAll(), Scatter_plane_simple(), Smear_HISQ< Gimpl >::smear(), and WilsonLoops< PeriodicGimplR >::StaplePaddedAll().
| accelerator_inline void coalescedWriteNonTemporal | ( | vobj &__restrict__ | vec, |
| const vobj &__restrict__ | extracted, | ||
| int | lane = 0 ) |
Definition at line 92 of file Tensor_SIMT.h.
References vstream().
| NAMESPACE_END | ( | Grid | ) |