Grid/dev/Lattice__local_8h_source.html

/*************************************************************************************


    Grid physics library, www.github.com/paboyle/Grid


    Source file: ./lib/lattice/Lattice_local.h


    Copyright (C) 2015


Author: Peter Boyle <paboyle@ph.ed.ac.uk>


    This program is free software; you can redistribute it and/or modify

    it under the terms of the GNU General Public License as published by

    the Free Software Foundation; either version 2 of the License, or

    (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU General Public License along

    with this program; if not, write to the Free Software Foundation, Inc.,

    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


    See the full license in the file "LICENSE" in the top level distribution directory

*************************************************************************************/

/*  END LEGAL */

#ifndef GRID_LATTICE_LOCALREDUCTION_H

#define GRID_LATTICE_LOCALREDUCTION_H


// localInner, localNorm, outerProduct


NAMESPACE_BEGIN(Grid);


// Non site, reduced locally reduced routines


// localNorm2,

template<class vobj>


inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>

{

  Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());

  autoView( rhs_v , rhs, AcceleratorRead);

  autoView( ret_v , ret, AcceleratorWrite);

  accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{

    coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss)));

  });

  return ret;

}


// localInnerProduct

template<class vobj>


inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>

{

  Lattice<typename vobj::tensor_reduced> ret(rhs.Grid());

  autoView( lhs_v , lhs, AcceleratorRead);

  autoView( rhs_v , rhs, AcceleratorRead);

  autoView( ret_v , ret, AcceleratorWrite);

  accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{

    coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss)));

  });

  return ret;

}


// outerProduct Scalar x Scalar -> Scalar

//              Vector x Vector -> Matrix

template<class ll,class rr>


inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(ll(),rr()))>

{

  typedef decltype(coalescedRead(ll())) sll;

  typedef decltype(coalescedRead(rr())) srr;

  Lattice<decltype(outerProduct(ll(),rr()))> ret(rhs.Grid());

  autoView( lhs_v , lhs, AcceleratorRead);

  autoView( rhs_v , rhs, AcceleratorRead);

  autoView( ret_v , ret, AcceleratorWrite);

  accelerator_for(ss,rhs_v.size(),1,{

    // FIXME had issues with scalar version of outer

    // Use vector [] operator and don't read coalesce this loop

    ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]);

  });

  return ret;

}


NAMESPACE_END(Grid);

#endif

accelerator_for
#define accelerator_for(iterator, num, nsimd,...)
Definition Accelerator.h:609

localInnerProduct
auto localInnerProduct(const Lattice< vobj > &lhs, const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
Definition Lattice_local.h:56

outerProduct
auto outerProduct(const Lattice< ll > &lhs, const Lattice< rr > &rhs) -> Lattice< decltype(outerProduct(ll(), rr()))>
Definition Lattice_local.h:71

localNorm2
auto localNorm2(const Lattice< vobj > &rhs) -> Lattice< typename vobj::tensor_reduced >
Definition Lattice_local.h:43

autoView
#define autoView(l_v, l, mode)
Definition Lattice_view.h:119

AcceleratorRead
@ AcceleratorRead
Definition MemoryManager.h:66

AcceleratorWrite
@ AcceleratorWrite
Definition MemoryManager.h:67

NAMESPACE_BEGIN
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35

NAMESPACE_END
#define NAMESPACE_END(A)
Definition Namespace.h:36

coalescedRead
accelerator_inline vobj coalescedRead(const vobj &__restrict__ vec, int lane=0)
Definition Tensor_SIMT.h:61

Lattice
Definition Lattice_base.h:47

Grid
Definition Deflation.h:31