Grid 0.7.0
Communicator_base.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/communicator/Communicator_base.h
6
7 Copyright (C) 2015
8
9Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24
25 See the full license in the file "LICENSE" in the top level distribution directory
26*************************************************************************************/
27/* END LEGAL */
28#ifndef GRID_COMMUNICATOR_BASE_H
29#define GRID_COMMUNICATOR_BASE_H
30
32// Processor layout information
35
36#define NVLINK_GET
37
39
40extern bool Stencil_force_mpi ;
41
43
44public:
45
47 // Policies
52 static int nCommThreads;
53
55 // Communicator should know nothing of the physics grid, only processor grid.
57 int _Nprocessors; // How many in all
58 int _processor; // linear processor rank
59 unsigned long _ndimension;
60 Coordinate _shm_processors; // Which dimensions get relayed out over processors lanes.
61 Coordinate _processors; // Which dimensions get relayed out over processors lanes.
62 Coordinate _processor_coor; // linear processor coordinate
65 std::vector<Grid_MPI_Comm> communicator_halo;
66
68 // Must call in Grid startup
70 static void Init(int *argc, char ***argv);
71
73 // Constructors to sub-divide a parent communicator
74 // and default to comm world
76 CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank);
77 CartesianCommunicator(const Coordinate &pdimensions_in);
78 virtual ~CartesianCommunicator();
79
80private:
81
83 // Private initialise from an MPI communicator
84 // Can use after an MPI_Comm_split, but hidden from user so private
86 void InitFromMPICommunicator(const Coordinate &processors, Grid_MPI_Comm communicator_base);
87
88public:
89
90
92 // Wraps MPI_Cart routines, or implements equivalent on other impls
94 void ShiftedRanks(int dim,int shift,int & source, int & dest);
96 void ProcessorCoorFromRank(int rank,Coordinate &coor);
97
98 int Dimensions(void) ;
99 int IsBoss(void) ;
100 int BossRank(void) ;
101 int ThisRank(void) ;
102 const Coordinate & ThisProcessorCoor(void) ;
103 const Coordinate & ShmGrid(void) { return _shm_processors; } ;
104 const Coordinate & ProcessorGrid(void) ;
105 int ProcessorCount(void) ;
106
108 // very VERY rarely (Log, serial RNG) we need world without a grid
110 static int RankWorld(void) ;
111 static void BroadcastWorld(int root,void* data, int bytes);
112 static void BarrierWorld(void);
113
115 // Reduction
117 void GlobalMax(RealD &);
118 void GlobalMax(RealF &);
119 void GlobalSum(RealF &);
120 void GlobalSumVector(RealF *,int N);
121 void GlobalSum(RealD &);
122 void GlobalSumVector(RealD *,int N);
123 void GlobalSum(uint32_t &);
124 void GlobalSum(uint64_t &);
125 void GlobalSumVector(uint64_t*,int N);
126 void GlobalSum(ComplexF &c);
127 void GlobalSumVector(ComplexF *c,int N);
128 void GlobalSum(ComplexD &c);
129 void GlobalSumVector(ComplexD *c,int N);
130 void GlobalXOR(uint32_t &);
131 void GlobalXOR(uint64_t &);
132
133 template<class obj> void GlobalSumP2P(obj &o)
134 {
135 std::vector<obj> column;
136 obj accum = o;
137 int source,dest;
138 for(int d=0;d<_ndimension;d++){
139 column.resize(_processors[d]);
140 column[0] = accum;
141 std::vector<MpiCommsRequest_t> list;
142 for(int p=1;p<_processors[d];p++){
143 ShiftedRanks(d,p,source,dest);
145 &column[0],
146 dest,
147 &column[p],
148 source,
149 sizeof(obj),d*100+p);
150
151 }
152 if (!list.empty()) // avoid triggering assert in comms == none
153 CommsComplete(list);
154 for(int p=1;p<_processors[d];p++){
155 accum = accum + column[p];
156 }
157 }
158 Broadcast(0,accum);
159 o=accum;
160 }
161
162 template<class obj> void GlobalSum(obj &o){
163 typedef typename obj::scalar_type scalar_type;
164 int words = sizeof(obj)/sizeof(scalar_type);
165 scalar_type * ptr = (scalar_type *)& o; // Safe alias
166 GlobalSumVector(ptr,words);
167 }
168
170 // Face exchange, buffer swap in translational invariant way
172 void CommsComplete(std::vector<MpiCommsRequest_t> &list);
173 void SendToRecvFromBegin(std::vector<MpiCommsRequest_t> &list,
174 void *xmit,
175 int dest,
176 void *recv,
177 int from,
178 int bytes,int dir);
179
180 void SendToRecvFrom(void *xmit,
181 int xmit_to_rank,
182 void *recv,
183 int recv_from_rank,
184 int bytes);
185
186 int IsOffNode(int rank);
187 double StencilSendToRecvFrom(void *xmit,
188 int xmit_to_rank,int do_xmit,
189 void *recv,
190 int recv_from_rank,int do_recv,
191 int bytes,int dir);
192
193 double StencilSendToRecvFromPrepare(std::vector<CommsRequest_t> &list,
194 void *xmit,
195 int xmit_to_rank,int do_xmit,
196 void *recv,
197 int recv_from_rank,int do_recv,
198 int xbytes,int rbytes,int dir);
199
200 // Could do a PollHtoD and have a CommsMerge dependence
201 void StencilSendToRecvFromPollDtoH (std::vector<CommsRequest_t> &list);
202 void StencilSendToRecvFromPollIRecv(std::vector<CommsRequest_t> &list);
203
204 double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
205 void *xmit,void *xmit_comp,
206 int xmit_to_rank,int do_xmit,
207 void *recv,void *recv_comp,
208 int recv_from_rank,int do_recv,
209 int xbytes,int rbytes,int dir);
210
211
212 void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i);
213 void StencilBarrier(void);
214
216 // Barrier
218 void Barrier(void);
219
221 // Broadcast a buffer and composite larger
223 void Broadcast(int root,void* data, int bytes);
224
226 // All2All down one dimension
228 template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){
229 assert(dim>=0);
230 assert(dim<_ndimension);
231 assert(in.size()==out.size());
232 int numnode = _processors[dim];
233 uint64_t bytes=sizeof(T);
234 uint64_t words=in.size()/numnode;
235 assert(numnode * words == in.size());
236 assert(words < (1ULL<<31));
237 AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
238 }
239 void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes);
240 void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes);
241
242 template<class obj> void Broadcast(int root,obj &data)
243 {
244 Broadcast(root,(void *)&data,sizeof(data));
245 }
246
247};
248
250
251#endif
bool Stencil_force_mpi
AcceleratorVector< int, MaxDims > Coordinate
Definition Coordinate.h:95
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
int Grid_MPI_Comm
std::complex< RealF > ComplexF
Definition Simd.h:78
float RealF
Definition Simd.h:60
std::complex< RealD > ComplexD
Definition Simd.h:79
double RealD
Definition Simd.h:61
double StencilSendToRecvFromPrepare(std::vector< CommsRequest_t > &list, void *xmit, int xmit_to_rank, int do_xmit, void *recv, int recv_from_rank, int do_recv, int xbytes, int rbytes, int dir)
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy)
void ProcessorCoorFromRank(int rank, Coordinate &coor)
CartesianCommunicator(const Coordinate &processors, const CartesianCommunicator &parent, int &srank)
void StencilSendToRecvFromComplete(std::vector< CommsRequest_t > &waitall, int i)
void Broadcast(int root, obj &data)
void Broadcast(int root, void *data, int bytes)
void StencilSendToRecvFromPollDtoH(std::vector< CommsRequest_t > &list)
static void Init(int *argc, char ***argv)
double StencilSendToRecvFrom(void *xmit, int xmit_to_rank, int do_xmit, void *recv, int recv_from_rank, int do_recv, int bytes, int dir)
void AllToAll(int dim, std::vector< T > &in, std::vector< T > &out)
void StencilSendToRecvFromPollIRecv(std::vector< CommsRequest_t > &list)
double StencilSendToRecvFromBegin(std::vector< CommsRequest_t > &list, void *xmit, void *xmit_comp, int xmit_to_rank, int do_xmit, void *recv, void *recv_comp, int recv_from_rank, int do_recv, int xbytes, int rbytes, int dir)
static CommunicatorPolicy_t CommunicatorPolicy
static void BarrierWorld(void)
const Coordinate & ThisProcessorCoor(void)
void CommsComplete(std::vector< MpiCommsRequest_t > &list)
void GlobalSumVector(RealF *, int N)
const Coordinate & ShmGrid(void)
void SendToRecvFrom(void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes)
int RankFromProcessorCoor(Coordinate &coor)
static Grid_MPI_Comm communicator_world
const Coordinate & ProcessorGrid(void)
static void BroadcastWorld(int root, void *data, int bytes)
void InitFromMPICommunicator(const Coordinate &processors, Grid_MPI_Comm communicator_base)
void SendToRecvFromBegin(std::vector< MpiCommsRequest_t > &list, void *xmit, int dest, void *recv, int from, int bytes, int dir)
void ShiftedRanks(int dim, int shift, int &source, int &dest)
std::vector< Grid_MPI_Comm > communicator_halo