Grid 0.7.0
Init.cc
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/Init.cc
6
7 Copyright (C) 2015
8
9Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
10Author: Peter Boyle <paboyle@ph.ed.ac.uk>
11Author: Peter Boyle <peterboyle@MacBook-Pro.local>
12Author: paboyle <paboyle@ph.ed.ac.uk>
13
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 2 of the License, or
17 (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License along
25 with this program; if not, write to the Free Software Foundation, Inc.,
26 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27
28 See the full license in the file "LICENSE" in the top level distribution directory
29*************************************************************************************/
30/* END LEGAL */
31/****************************************************************************/
32/* pab: Signal magic. Processor state dump is x86-64 specific */
33/****************************************************************************/
34#include <stdlib.h>
35#include <stdio.h>
36#include <stdint.h>
37#include <unistd.h>
38#include <sys/mman.h>
39#include <sys/stat.h>
40#include <sys/time.h>
41#include <signal.h>
42#include <iostream>
43#include <iterator>
44#include <algorithm>
45#include <iterator>
46#include <cstdlib>
47#include <memory>
48
49
50#include <Grid/Grid.h>
51
53
54#ifdef HAVE_UNWIND
55#include <libunwind.h>
56#endif
57
58#include <fenv.h>
59#ifdef __APPLE__
60static int
61feenableexcept (unsigned int excepts)
62{
63#if 0
64 // Fails on Apple M1
65 static fenv_t fenv;
66 unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
67 unsigned int old_excepts; // previous masks
68 int iold_excepts; // previous masks
69
70 if ( fegetenv (&fenv) ) return -1;
71 old_excepts = fenv.__control & FE_ALL_EXCEPT;
72
73 // unmask
74 fenv.__control &= ~new_excepts;
75 fenv.__mxcsr &= ~(new_excepts << 7);
76
77 iold_excepts = (int) old_excepts;
78 return ( fesetenv (&fenv) ? -1 : iold_excepts );
79#endif
80 return 0;
81}
82#endif
83
84#ifndef HOST_NAME_MAX
85#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
86#endif
87
89
91// Convenience functions to access stadard command line arg
92// driven parallelism controls
96
100
102
103char *GridHostname(void)
104{
105 return hostname;
106}
109const Coordinate GridDefaultSimd(int dims,int nsimd)
110{
111 Coordinate layout(dims);
112 int nn=nsimd;
113 for(int d=dims-1;d>=0;d--){
114 if ( nn>=2) {
115 layout[d]=2;
116 nn/=2;
117 } else {
118 layout[d]=1;
119 }
120 }
121 assert(nn==1);
122 return layout;
123}
124
126// Command line parsing assist for stock controls
128std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option)
129{
130 char ** itr = std::find(begin, end, option);
131 if (itr != end && ++itr != end) {
132 std::string payload(*itr);
133 return payload;
134 }
135 return std::string("");
136}
137bool GridCmdOptionExists(char** begin, char** end, const std::string& option)
138{
139 return std::find(begin, end, option) != end;
140}
141// Comma separated list
142void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec)
143{
144 size_t pos = 0;
145 std::string token;
146 std::string delimiter(",");
147
148 vec.resize(0);
149 while ((pos = str.find(delimiter)) != std::string::npos) {
150 token = str.substr(0, pos);
151 vec.push_back(token);
152 str.erase(0, pos + delimiter.length());
153 }
154 token = str;
155 vec.push_back(token);
156 return;
157}
158
159template<class VectorInt>
160void GridCmdOptionIntVector(const std::string &str,VectorInt & vec)
161{
162 vec.resize(0);
163 std::stringstream ss(str);
164 int i;
165 while (ss >> i){
166 vec.push_back(i);
167 if(std::ispunct(ss.peek()))
168 ss.ignore();
169 }
170 return;
171}
172
173template void GridCmdOptionIntVector(const std::string &str,std::vector<int> & vec);
174template void GridCmdOptionIntVector(const std::string &str,Coordinate & vec);
175
176void GridCmdOptionInt(std::string &str,int & val)
177{
178 std::stringstream ss(str);
179 ss>>val;
180 return;
181}
182
183void GridCmdOptionFloat(std::string &str,double & val)
184{
185 std::stringstream ss(str);
186 ss>>val;
187 return;
188}
189
190void GridParseLayout(char **argv,int argc,
191 Coordinate &latt_c,
192 Coordinate &mpi_c)
193{
194 auto mpi =std::vector<int>({1,1,1,1});
195 auto latt=std::vector<int>({8,8,8,8});
196
198
199 std::string arg;
200 if( GridCmdOptionExists(argv,argv+argc,"--mpi") ){
201 arg = GridCmdOptionPayload(argv,argv+argc,"--mpi");
202 GridCmdOptionIntVector(arg,mpi);
203 }
204 if( GridCmdOptionExists(argv,argv+argc,"--grid") ){
205 arg= GridCmdOptionPayload(argv,argv+argc,"--grid");
206 GridCmdOptionIntVector(arg,latt);
207 }
208 if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
209 std::vector<int> ompthreads(0);
210#ifndef GRID_OMP
211 std::cout << GridLogWarning << "'--threads' option used but Grid was"
212 << " not compiled with thread support" << std::endl;
213#endif
214 arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
215 GridCmdOptionIntVector(arg,ompthreads);
216 assert(ompthreads.size()==1);
217 GridThread::SetThreads(ompthreads[0]);
218 }
219 if( GridCmdOptionExists(argv,argv+argc,"--accelerator-threads") ){
220 std::vector<int> gputhreads(0);
221 arg= GridCmdOptionPayload(argv,argv+argc,"--accelerator-threads");
222 GridCmdOptionIntVector(arg,gputhreads);
223 assert(gputhreads.size()==1);
224 acceleratorThreads(gputhreads[0]);
225 }
226
227 if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
228 int cores;
229 arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
230 GridCmdOptionInt(arg,cores);
232 }
233 // Copy back into coordinate format
234 int nd = mpi.size();
235 assert(latt.size()==nd);
236 latt_c.resize(nd);
237 mpi_c.resize(nd);
238 for(int d=0;d<nd;d++){
239 latt_c[d] = latt[d];
240 mpi_c[d] = mpi[d];
241 }
242}
243
244template<class VectorInt>
245std::string GridCmdVectorIntToString(const VectorInt & vec_in){
246 int sz = vec_in.size();
247 std::vector<int> vec(sz);
248 for(int s=0;s<sz;s++) vec[s] = vec_in[s];
249 std::ostringstream oss;
250 std::copy(vec.begin(), vec.end(),std::ostream_iterator<int>(oss, " "));
251 return oss.str();
252}
253
254// Reinit guard
258
260// Reinit guard
262void GridBanner(void)
263{
264 std::cout <<std::endl;
265 std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
266 std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
267 std::cout << "__|_ | | | | | | | | | | | | _|__"<<std::endl;
268 std::cout << "__|_ _|__"<<std::endl;
269 std::cout << "__|_ GGGG RRRR III DDDD _|__"<<std::endl;
270 std::cout << "__|_ G R R I D D _|__"<<std::endl;
271 std::cout << "__|_ G R R I D D _|__"<<std::endl;
272 std::cout << "__|_ G GG RRRR I D D _|__"<<std::endl;
273 std::cout << "__|_ G G R R I D D _|__"<<std::endl;
274 std::cout << "__|_ GGGG R R III DDDD _|__"<<std::endl;
275 std::cout << "__|_ _|__"<<std::endl;
276 std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
277 std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
278 std::cout << " | | | | | | | | | | | | | | "<<std::endl;
279 std::cout << std::endl;
280 std::cout << std::endl;
281 std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
282 std::cout << std::endl;
283 std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
284 std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
285 std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
286 std::cout << "(at your option) any later version."<<std::endl;
287 std::cout << std::endl;
288 std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
289 std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
290 std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
291 std::cout << "GNU General Public License for more details."<<std::endl;
292 printHash();
293#ifdef GRID_BUILD_REF
294#define _GRID_BUILD_STR(x) #x
295#define GRID_BUILD_STR(x) _GRID_BUILD_STR(x)
296 std::cout << "Build " << GRID_BUILD_STR(GRID_BUILD_REF) << std::endl;
297#endif
298 std::cout << std::endl;
299 std::cout << std::setprecision(9);
300}
301
302//Some file local variables
303static int fileno_stdout;
304static int fileno_stderr;
305static int signal_delay;
306class dlRegion {
307public:
308 uint64_t start;
309 uint64_t end;
310 uint64_t size;
311 uint64_t offset;
312 std::string name;
313};
314std::vector<dlRegion> dlMap;
315
316void Grid_init(int *argc,char ***argv)
317{
318
319 assert(Grid_is_initialised == 0);
320
322
323 std::string arg;
324
326 // Early intialisation necessities without rank knowledge
328 acceleratorInit(); // Must come first to set device prior to MPI init due to Omnipath Driver
329
330 if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){
331 int MB;
332 arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm");
333 GridCmdOptionInt(arg,MB);
334 uint64_t MB64 = MB;
335 GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL;
336 }
337
338 if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-mpi") ){
339 int forcempi;
340 arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm-mpi");
341 GridCmdOptionInt(arg,forcempi);
342 Stencil_force_mpi = (bool)forcempi;
343 }
344
345 if( GridCmdOptionExists(*argv,*argv+*argc,"--device-mem") ){
346 int MB;
347 arg= GridCmdOptionPayload(*argv,*argv+*argc,"--device-mem");
348 GridCmdOptionInt(arg,MB);
349 uint64_t MB64 = MB;
350 MemoryManager::DeviceMaxBytes = MB64*1024LL*1024LL;
351 }
352
353 if( GridCmdOptionExists(*argv,*argv+*argc,"--hypercube") ){
354 int enable;
355 arg= GridCmdOptionPayload(*argv,*argv+*argc,"--hypercube");
356 GridCmdOptionInt(arg,enable);
358 }
359
360 if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){
362 }
363
364
365 if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
367 }
368 // Sleep n-seconds at end of handler
369 if( GridCmdOptionExists(*argv,*argv+*argc,"--signal-delay") ){
370 arg= GridCmdOptionPayload(*argv,*argv+*argc,"--signal-delay");
372 }
373 // periodic wakeup with stack trace printed
374 if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-heartbeat") ){
376 }
377 // periodic wakeup with empty handler (interrupts some system calls)
378 if( GridCmdOptionExists(*argv,*argv+*argc,"--heartbeat") ){
380 }
381
382#if defined(A64FX)
383 if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){
384 std::cout << "Option --comms-overlap currently not supported on QPACE4. Exiting." << std::endl;
385 exit(EXIT_FAILURE);
386 }
387#endif
388
390 // Memory manager
393
395 // MPI initialisation
398
401 GridLogger::GlobalStopWatch.Reset();// Back to zero with synchronised clock
403
405 // Banner after MPI (unless GPU)
407 if ( CartesianCommunicator::RankWorld() == 0 ) {
408 GridBanner();
409 }
410
412 // Rank information can be used to control who logs
414 if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
416 } else {
417 FILE *fp;
418 std::ostringstream fname;
419 fname<<"Grid.stdout.";
421 fp=freopen(fname.str().c_str(),"w",stdout);
422 assert(fp!=(FILE *)NULL);
423
424 std::ostringstream ename;
425 ename<<"Grid.stderr.";
427 fp=freopen(ename.str().c_str(),"w",stderr);
428 assert(fp!=(FILE *)NULL);
429 }
430 fileno_stdout = fileno(stdout);
431 fileno_stderr = fileno(stderr) ;
432
434 // OK to use GridLogMessage etc from here on
436 std::cout << GridLogMessage << "================================================ "<<std::endl;
437 std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl;
438 std::cout << GridLogMessage << "================================================ "<<std::endl;
439 {
440 gethostname(hostname, HOST_NAME_MAX+1);
441 time_t mytime;
442 struct tm *info;
443 char buffer[80];
444 time(&mytime);
445 info = localtime(&mytime);
446 strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", info);
447 std::cout << GridLogMessage << "This rank is running on host "<< hostname<<" at local time "<<buffer<<std::endl;
448 }
449
451 // Reporting
453 std::cout << GridLogMessage << "Requested "<< GlobalSharedMemory::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
455 std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
456 }
457
459
460 if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-mem") ){
463 }
464
466 // LD.so space
468#ifndef __APPLE__
469 {
470 // Provides mapping of .so files
471 FILE *f = fopen("/proc/self/maps", "r");
472 if (f) {
473 char line[256];
474 while (fgets(line, sizeof(line), f)) {
475 if (strstr(line, "r-xp")) {
476 dlRegion region;
477 uint32_t major, minor, inode;
478 uint64_t start,end,offset;
479 char path[PATH_MAX];
480 sscanf(line,"%lx-%lx r-xp %lx %x:%x %d %s",
481 &start,&end,&offset,
482 &major,&minor,&inode,path);
483 region.start=start;
484 region.end =end;
485 region.offset=offset;
486 region.name = std::string(path);
487 region.size = region.end-region.start;
488 dlMap.push_back(region);
489 // std::cout << GridLogMessage<< line;
490 }
491 }
492 fclose(f);
493 }
494 if( GridCmdOptionExists(*argv,*argv+*argc,"--dylib-map") ){
495 std::cout << GridLogMessage << "================================================ "<<std::endl;
496 std::cout << GridLogMessage<< " Dynamic library map: " <<std::endl;
497 std::cout << GridLogMessage << "================================================ "<<std::endl;
498 for(int r=0;r<dlMap.size();r++){
499 auto region = dlMap[r];
500 std::cout << GridLogMessage<<" "<<region.name<<std::hex<<region.start<<"-"<<region.end<<" sz "<<region.size<<std::dec<<std::endl;
501 }
502 std::cout << GridLogMessage << "================================================ "<<std::endl;
503 }
504 }
505#endif
507 // Logging
509 std::vector<std::string> logstreams;
510 std::string defaultLog("Error,Warning,Message");
511 GridCmdOptionCSL(defaultLog,logstreams);
512 GridLogConfigure(logstreams);
513
514
515 if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
516 arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log");
517 GridCmdOptionCSL(arg,logstreams);
518 GridLogConfigure(logstreams);
519 }
520
522 // Help message
524
525 if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
526 std::cout<<GridLogMessage<<" --help : this message"<<std::endl;
527 std::cout<<GridLogMessage<<std::endl;
528 std::cout<<GridLogMessage<<"Geometry:"<<std::endl;
529 std::cout<<GridLogMessage<<std::endl;
530 std::cout<<GridLogMessage<<" --mpi n.n.n.n : default MPI decomposition"<<std::endl;
531 std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
532 std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
533 std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
534 std::cout<<GridLogMessage<<" --shm-mpi 0|1 : Force MPI usage under multi-rank per node "<<std::endl;
535 std::cout<<GridLogMessage<<" --shm-hugepages : use explicit huge pages in mmap call "<<std::endl;
536 std::cout<<GridLogMessage<<" --device-mem M : Size of device software cache for lattice fields (MB) "<<std::endl;
537 std::cout<<GridLogMessage<<std::endl;
538 std::cout<<GridLogMessage<<"Verbose:"<<std::endl;
539 std::cout<<GridLogMessage<<std::endl;
540 std::cout<<GridLogMessage<<" --log list : comma separated list from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
541 std::cout<<GridLogMessage<<" --notimestamp : suppress millisecond resolution stamps"<<std::endl;
542 std::cout<<GridLogMessage<<" --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
543 std::cout<<GridLogMessage<<"Debug:"<<std::endl;
544 std::cout<<GridLogMessage<<" --dylib-map : print dynamic library map, useful for interpreting signal backtraces "<<std::endl;
545 std::cout<<GridLogMessage<<" --heartbeat : periodic itimer wakeup (interrupts stuck system calls!) "<<std::endl;
546 std::cout<<GridLogMessage<<" --signal-delay n : pause for n seconds after signal handling (useful to get ALL nodes in stuck state) "<<std::endl;
547 std::cout<<GridLogMessage<<" --debug-stdout : print stdout from EVERY node to file Grid.stdout/err.rank "<<std::endl;
548 std::cout<<GridLogMessage<<" --debug-signals : catch sigsegv and print a blame report, handle SIGHUP with a backtrace to stderr"<<std::endl;
549 std::cout<<GridLogMessage<<" --debug-heartbeat : periodically report backtrace "<<std::endl;
550 std::cout<<GridLogMessage<<" --debug-mem : print Grid allocator activity"<<std::endl;
551 std::cout<<GridLogMessage<<std::endl;
552 std::cout<<GridLogMessage<<"Performance:"<<std::endl;
553 std::cout<<GridLogMessage<<std::endl;
554 std::cout<<GridLogMessage<<" --comms-overlap : Overlap comms with compute "<<std::endl;
555 std::cout<<GridLogMessage<<std::endl;
556 std::cout<<GridLogMessage<<" --dslash-generic: Wilson kernel for generic Nc"<<std::endl;
557 std::cout<<GridLogMessage<<" --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;
558 std::cout<<GridLogMessage<<" --dslash-asm : Wilson kernel for AVX512"<<std::endl;
559 std::cout<<GridLogMessage<<std::endl;
560 std::cout<<GridLogMessage<<std::endl;
561 exit(EXIT_SUCCESS);
562 }
563
565 // Debug and performance options
567
568 if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
571 }
572 if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
575 }
576 if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
579 }
580 if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){
583 } else {
586 }
587
589 if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){
591 } else {
593 }
594
595 GridParseLayout(*argv,*argc,
598
599 if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){
600 std::cout << GridLogMessage <<" Enabling flight recorder " <<std::endl;
605 }
606
607 if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
608 std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
609 std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
610 std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
611 std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
612 std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
613 std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
614 std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
615 }
617}
618
619
621{
622 std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
623 std::cout<<GridLogMessage<<"******* Grid Finalize ******"<<std::endl;
624 std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
625
626#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT)
627 MPI_Barrier(MPI_COMM_WORLD);
628 MPI_Finalize();
630#endif
631#if defined (GRID_COMMS_SHMEM)
632 shmem_finalize();
633#endif
635}
636
638 std::cout << GridLogMessage << "Grid Layout\n";
639 std::cout << GridLogMessage << "\tGlobal lattice size : "<< GridCmdVectorIntToString(GridDefaultLatt()) << std::endl;
640 std::cout << GridLogMessage << "\tOpenMP threads : "<< GridThread::GetThreads() <<std::endl;
641 std::cout << GridLogMessage << "\tMPI tasks : "<< GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
642}
643
645#define SIGLOG(A) ::write(fileno_stderr,A,strlen(A));
646
647void sig_print_dig(uint32_t dig)
648{
649 const char *digits[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f" };
650 if ( dig>=0 && dig< 16){
651 SIGLOG(digits[dig]);
652 }
653}
654void sig_print_uint(uint32_t A)
655{
656 int dig;
657 int nz=0;
658#define DIGIT(DIV) dig = (A/DIV)%10 ; if(dig|nz) sig_print_dig(dig); nz = nz|dig;
659 DIGIT(1000000000); // Catches 4BN = 2^32
660 DIGIT(100000000);
661 DIGIT(10000000);
662 DIGIT(1000000);
663 DIGIT(100000);
664 DIGIT(10000);
665 DIGIT(1000);
666 DIGIT(100);
667 DIGIT(10);
668 DIGIT(1);
669 if (nz==0) SIGLOG("0");
670}
671void sig_print_hex(uint64_t A)
672{
673 int nz=0;
674 int dig;
675#define NIBBLE(A) dig = A ; if(dig|nz) sig_print_dig(dig); nz = nz|dig;
676 SIGLOG("0x");
677 NIBBLE((A>>(15*4))&0xF);
678 NIBBLE((A>>(14*4))&0xF);
679 NIBBLE((A>>(13*4))&0xF);
680 NIBBLE((A>>(12*4))&0xF);
681 NIBBLE((A>>(11*4))&0xF);
682 NIBBLE((A>>(10*4))&0xF);
683 NIBBLE((A>>(9*4))&0xF);
684 NIBBLE((A>>(8*4))&0xF);
685 NIBBLE((A>>(7*4))&0xF);
686 NIBBLE((A>>(6*4))&0xF);
687 NIBBLE((A>>(5*4))&0xF);
688 NIBBLE((A>>(4*4))&0xF);
689 NIBBLE((A>>(3*4))&0xF);
690 NIBBLE((A>>(2*4))&0xF);
691 NIBBLE((A>>4)&0xF);
692 sig_print_dig(A&0xF);
693}
694/*
695#ifdef __linux__
696#ifdef __x86_64__
697 ucontext_t * uc= (ucontext_t *)ptr;
698 struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
699 fprintf(stderr," instruction %llx\n",(unsigned long long)sc->rip);
700#endif
701#endif
702*/
703void Grid_generic_handler(int sig,siginfo_t *si,void * ptr)
704{
705 SIGLOG("Signal handler on host ");
707 SIGLOG(" process id ");
708 sig_print_uint((uint32_t)getpid());
709 SIGLOG("\n");
710 SIGLOG("FlightRecorder step ");
712 SIGLOG(" stage ");
714 SIGLOG("\n");
715 SIGLOG("Caught signal ");
716 sig_print_uint(si->si_signo);
717 SIGLOG("\n");
718 SIGLOG(" mem address ");
719 sig_print_hex((uint64_t)si->si_addr);
720 SIGLOG("\n");
721 SIGLOG(" code ");
722 sig_print_uint(si->si_code);
723 SIGLOG("\n");
724
725 ucontext_t *uc= (ucontext_t *)ptr;
726
727 SIGLOG("Backtrace:\n");
728#ifdef HAVE_UNWIND
729 // Debug cross check on offsets
730 // int symbols = backtrace(Grid_backtrace_buffer,_NBACKTRACE);
731 // backtrace_symbols_fd(Grid_backtrace_buffer,symbols,fileno_stderr);
732 unw_cursor_t cursor;
733 unw_word_t ip, off;
734 if (!unw_init_local(&cursor, uc) ) {
735
736 SIGLOG(" frame IP function\n");
737 int level = 0;
738 int ret = 0;
739 while(1) {
740 char name[128];
741 if (level >= _NBACKTRACE) return;
742
743 unw_get_reg(&cursor, UNW_REG_IP, &ip);
744
745 sig_print_uint(level); SIGLOG(" ");
746 sig_print_hex(ip); SIGLOG(" ");
747 for(int r=0;r<dlMap.size();r++){
748 if((ip>=dlMap[r].start) &&(ip<dlMap[r].end)){
749 SIGLOG(dlMap[r].name.c_str());
750 SIGLOG("+");
751 sig_print_hex((ip-dlMap[r].start));
752 break;
753 }
754 }
755 SIGLOG("\n");
756 Grid_backtrace_buffer[level]=(void *)ip;
757 level++;
758 ret = unw_step(&cursor);
759 if (ret <= 0) {
760 return;
761 }
762 }
763 }
764#else
765 // Known Asynch-Signal unsafe
766 int symbols = backtrace(Grid_backtrace_buffer,_NBACKTRACE);
767 backtrace_symbols_fd(Grid_backtrace_buffer,symbols,fileno_stderr);
768#endif
769}
770
771void Grid_heartbeat_signal_handler(int sig,siginfo_t *si,void * ptr)
772{
773 Grid_generic_handler(sig,si,ptr);
774 SIGLOG("\n");
775}
776void Grid_usr_signal_handler(int sig,siginfo_t *si,void * ptr)
777{
778 Grid_generic_handler(sig,si,ptr);
779 if (signal_delay) {
780 SIGLOG("Adding extra signal delay ");
782 SIGLOG(" s\n");
783 usleep( (uint64_t) signal_delay*1000LL*1000LL);
784 }
785 SIGLOG("\n");
786 return;
787}
788
789void Grid_fatal_signal_handler(int sig,siginfo_t *si,void * ptr)
790{
791 Grid_generic_handler(sig,si,ptr);
792 SIGLOG("\n");
793 exit(0);
794 return;
795};
796void Grid_empty_signal_handler(int sig,siginfo_t *si,void * ptr)
797{
798 // SIGLOG("heartbeat signal handled\n");
799 return;
800}
802{
803 struct sigaction sa_ping;
804
805 sigemptyset (&sa_ping.sa_mask);
806 sa_ping.sa_sigaction= Grid_usr_signal_handler;
807 sa_ping.sa_flags = SA_SIGINFO;
808 sigaction(SIGALRM,&sa_ping,NULL);
809
810 // repeating 10s heartbeat
811 struct itimerval it_val;
812 it_val.it_value.tv_sec = 10;
813 it_val.it_value.tv_usec = 0;
814 it_val.it_interval = it_val.it_value;
815 setitimer(ITIMER_REAL, &it_val, NULL);
816}
818{
819 struct sigaction sa_ping;
820
821 sigemptyset (&sa_ping.sa_mask);
822 sa_ping.sa_sigaction= Grid_empty_signal_handler;
823 sa_ping.sa_flags = SA_SIGINFO;
824 sigaction(SIGALRM,&sa_ping,NULL);
825
826 // repeating 10s heartbeat
827 struct itimerval it_val;
828 it_val.it_value.tv_sec = 10;
829 it_val.it_value.tv_usec = 1000;
830 it_val.it_interval = it_val.it_value;
831 setitimer(ITIMER_REAL, &it_val, NULL);
832}
834{
835 BACKTRACEFP(stdout);
836 fflush(stdout);
837}
839{
840 struct sigaction sa;
841 sigemptyset (&sa.sa_mask);
842 sa.sa_sigaction= Grid_fatal_signal_handler;
843 sa.sa_flags = SA_SIGINFO;
844 sigaction(SIGTRAP,&sa,NULL);
845 sigaction(SIGILL,&sa,NULL);
846#ifndef GRID_SYCL
847 sigaction(SIGSEGV,&sa,NULL); // SYCL is using SIGSEGV
848 sigaction(SIGBUS,&sa,NULL);
849 feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
850 sigaction(SIGFPE,&sa,NULL);
851#endif
852
853 // Non terminating SIGHUP handler
854 struct sigaction sa_ping;
855 sigemptyset (&sa_ping.sa_mask);
856 sa_ping.sa_sigaction= Grid_usr_signal_handler;
857 sa_ping.sa_flags = SA_SIGINFO;
858 sigaction(SIGHUP,&sa_ping,NULL);
859
860 // atexit(Grid_exit_handler);
861}
862
864
void acceleratorInit(void)
uint32_t acceleratorThreads(void)
Definition Accelerator.cc:7
bool Stencil_force_mpi
AcceleratorVector< int, MaxDims > Coordinate
Definition Coordinate.h:95
Grid_simd< complex< float >, SIMD_Ftype > vComplexF
Grid_simd< float, SIMD_Ftype > vRealF
Grid_simd< complex< double >, SIMD_Dtype > vComplexD
Grid_simd< double, SIMD_Dtype > vRealD
bool GridCmdOptionExists(char **begin, char **end, const std::string &option)
Definition Init.cc:137
char hostname[HOST_NAME_MAX+1]
Definition Init.cc:101
void Grid_finalize(void)
Definition Init.cc:620
void Grid_heartbeat_signal_handler(int sig, siginfo_t *si, void *ptr)
Definition Init.cc:771
char * GridHostname(void)
Definition Init.cc:103
static Coordinate Grid_default_latt
Definition Init.cc:94
static int Grid_is_initialised
Definition Init.cc:257
void sig_print_dig(uint32_t dig)
Definition Init.cc:647
static MemoryStats dbgMemStats
Definition Init.cc:256
void Grid_exit_handler(void)
Definition Init.cc:833
void Grid_empty_signal_handler(int sig, siginfo_t *si, void *ptr)
Definition Init.cc:796
void GridCmdOptionFloat(std::string &str, double &val)
Definition Init.cc:183
void GridLogLayout()
Definition Init.cc:637
const Coordinate GridDefaultSimd(int dims, int nsimd)
Definition Init.cc:109
#define DIGIT(DIV)
void Grid_debug_handler_init(void)
Definition Init.cc:838
void Grid_init(int *argc, char ***argv)
Definition Init.cc:316
void GridCmdOptionInt(std::string &str, int &val)
Definition Init.cc:176
void GridBanner(void)
Definition Init.cc:262
void Grid_heartbeat(void)
Definition Init.cc:817
void Grid_debug_heartbeat(void)
Definition Init.cc:801
void sig_print_uint(uint32_t A)
Definition Init.cc:654
void sig_print_hex(uint64_t A)
Definition Init.cc:671
static int fileno_stderr
Definition Init.cc:304
static int signal_delay
Definition Init.cc:305
const Coordinate & GridDefaultLatt(void)
Definition Init.cc:107
void GridCmdOptionIntVector(const std::string &str, VectorInt &vec)
Definition Init.cc:160
static Coordinate Grid_default_mpi
Definition Init.cc:95
std::vector< dlRegion > dlMap
Definition Init.cc:314
const Coordinate & GridDefaultMpi(void)
Definition Init.cc:108
void Grid_fatal_signal_handler(int sig, siginfo_t *si, void *ptr)
Definition Init.cc:789
std::string GridCmdVectorIntToString(const VectorInt &vec_in)
Definition Init.cc:245
#define HOST_NAME_MAX
Definition Init.cc:85
void GridCmdOptionCSL(std::string str, std::vector< std::string > &vec)
Definition Init.cc:142
static int fileno_stdout
Definition Init.cc:303
void Grid_usr_signal_handler(int sig, siginfo_t *si, void *ptr)
Definition Init.cc:776
#define SIGLOG(A)
Definition Init.cc:645
void Grid_generic_handler(int sig, siginfo_t *si, void *ptr)
Definition Init.cc:703
void GridParseLayout(char **argv, int argc, Coordinate &latt_c, Coordinate &mpi_c)
Definition Init.cc:190
#define NIBBLE(A)
std::string GridCmdOptionPayload(char **begin, char **end, const std::string &option)
Definition Init.cc:128
void printHash(void)
void GridLogTimestamp(int on)
Definition Log.cc:57
void Grid_quiesce_nodes(void)
Definition Log.cc:109
void GridLogConfigure(std::vector< std::string > &logstreams)
Definition Log.cc:77
void Grid_unquiesce_nodes(void)
Definition Log.cc:122
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW")
#define BACKTRACEFP(fp)
Definition Log.h:250
void * Grid_backtrace_buffer[_NBACKTRACE]
Definition Init.cc:644
#define _NBACKTRACE
Definition Log.h:229
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
accelerator_inline void resize(size_type sz)
Definition Coordinate.h:54
static void Init(int *argc, char ***argv)
static void BarrierWorld(void)
static const char * StepName
static int32_t StepLoggingCounter
static int PrintEntireLog
static void SetLoggingMode(LoggingMode_t mode)
static int ChecksumCommsSend
static int ChecksumComms
static uint64_t MAX_MPI_SHM_BYTES
static int HPEhypercube
static void SetMaxThreads(void)
static int GetThreads(void)
static int _cores
static int _hyperthreads
static int _threads
static void SetCores(int cr)
static void SetThreads(int thr)
static accelerator_inline constexpr int Nsimd(void)
static GridStopWatch GlobalStopWatch
Definition Log.h:94
static void InitMessage(void)
static uint64_t DeviceMaxBytes
static void Init(void)
static MemoryStats * stats
Definition MemoryStats.h:45
static bool debug
Definition MemoryStats.h:46
uint64_t size
Definition Init.cc:310
uint64_t end
Definition Init.cc:309
uint64_t start
Definition Init.cc:308
uint64_t offset
Definition Init.cc:311
std::string name
Definition Init.cc:312