32#if defined(USE_MKL) || defined(GRID_SYCL)
33#include <fftw/fftw3.h>
41template<
class scalar>
struct FFTW { };
47 typedef fftw_complex FFTW_scalar;
48 typedef fftw_plan FFTW_plan;
50 static FFTW_plan fftw_plan_many_dft(
int rank,
const int *n,
int howmany,
51 FFTW_scalar *in,
const int *inembed,
52 int istride,
int idist,
53 FFTW_scalar *out,
const int *onembed,
54 int ostride,
int odist,
55 int sign,
unsigned flags) {
56 return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
59 static void fftw_flops(
const FFTW_plan p,
double *
add,
double *mul,
double *fmas){
60 ::fftw_flops(p,
add,mul,fmas);
63 inline static void fftw_execute_dft(
const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
64 ::fftw_execute_dft(p,in,out);
66 inline static void fftw_destroy_plan(
const FFTW_plan p) {
67 ::fftw_destroy_plan(p);
74 typedef fftwf_complex FFTW_scalar;
75 typedef fftwf_plan FFTW_plan;
77 static FFTW_plan fftw_plan_many_dft(
int rank,
const int *n,
int howmany,
78 FFTW_scalar *in,
const int *inembed,
79 int istride,
int idist,
80 FFTW_scalar *out,
const int *onembed,
81 int ostride,
int odist,
82 int sign,
unsigned flags) {
83 return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
86 static void fftw_flops(
const FFTW_plan p,
double *
add,
double *mul,
double *fmas){
87 ::fftwf_flops(p,
add,mul,fmas);
90 inline static void fftw_execute_dft(
const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
91 ::fftwf_execute_dft(p,in,out);
93 inline static void fftw_destroy_plan(
const FFTW_plan p) {
94 ::fftwf_destroy_plan(p);
101#define FFTW_FORWARD (-1)
102#define FFTW_BACKWARD (+1)
131 Nd(grid->_ndimension),
153 for(
int d=0;d<
Nd;d++){
171 std::cerr <<
"FFTW is not compiled but is called"<<std::endl;
177 int L =
vgrid->_ldimensions[dim];
178 int G =
vgrid->_fdimensions[dim];
189 typedef typename vobj::scalar_object sobj;
190 typedef typename sobj::scalar_type scalar;
199 int Ncomp =
sizeof(sobj)/
sizeof(scalar);
201 for(
int d=0;d<dim;d++){
202 Nlow*=
vgrid->_ldimensions[d];
208 int odist,idist,istride,ostride;
210 istride = ostride = Ncomp*Nlow;
211 int *inembed = n, *onembed = n;
221 FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0];
222 FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[0];
243 sgrid->LocalIndexToLocalCoor(idx,cbuf);
244 peekLocalSite(s,r_v,cbuf);
245 cbuf[dim]+=((pc+p) % processors[dim])*L;
246 pokeLocalSite(s,p_v,cbuf);
250 result =
Cshift(result,dim,L);
262 if ( cbuf[dim] == 0 ) {
263 FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[idx];
264 FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[idx];
283 Coordinate clbuf(Nd), cgbuf(Nd);
285 sgrid->LocalIndexToLocalCoor(idx,clbuf);
287 cgbuf[dim] = clbuf[dim]+L*pc;
288 peekLocalSite(s,pgbuf_v,cgbuf);
289 pokeLocalSite(s,result_v,clbuf);
AcceleratorVector< int, MaxDims > Coordinate
auto Cshift(const Expression &expr, int dim, int shift) -> decltype(closure(expr))
void add(Lattice< obj1 > &ret, const Lattice< obj2 > &lhs, const Lattice< obj3 > &rhs)
Lattice< obj > div(const Lattice< obj > &rhs_i, Integer y)
#define autoView(l_v, l, mode)
#define NAMESPACE_BEGIN(A)
std::complex< RealF > ComplexF
std::complex< RealD > ComplexD
#define thread_for(i, num,...)
Coordinate processor_coor
void FFT_dim(Lattice< vobj > &result, const Lattice< vobj > &source, int dim, int sign)
static const int backward
void FFT_dim_mask(Lattice< vobj > &result, const Lattice< vobj > &source, Coordinate mask, int sign)
void FFT_all_dim(Lattice< vobj > &result, const Lattice< vobj > &source, int sign)
void LocalIndexToLocalCoor(int lidx, Coordinate &lcoor)
uint64_t useconds(void) const
GridBase * Grid(void) const