Grid 0.7.0
BinaryIO.h
Go to the documentation of this file.
1 /*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./lib/parallelIO/BinaryIO.h
6
7 Copyright (C) 2015
8
9 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
10 Author: Guido Cossu<guido.cossu@ed.ac.uk>
11
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License along
23 with this program; if not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25
26 See the full license in the file "LICENSE" in the top level distribution directory
27 *************************************************************************************/
28 /* END LEGAL */
29#pragma once
30
31#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
32#define USE_MPI_IO
33#else
34#undef USE_MPI_IO
35#endif
36
37#ifdef HAVE_ENDIAN_H
38#include <endian.h>
39#endif
40
41#include <arpa/inet.h>
42#include <algorithm>
43
45
47// Byte reversal garbage
49inline uint32_t byte_reverse32(uint32_t f) {
50 f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
51 return f;
52}
53inline uint64_t byte_reverse64(uint64_t f) {
54 uint64_t g;
55 g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
56 g = g << 32;
57 f = f >> 32;
58 g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
59 return g;
60}
61
62#if BYTE_ORDER == BIG_ENDIAN
63inline uint64_t Grid_ntohll(uint64_t A) { return A; }
64#else
65inline uint64_t Grid_ntohll(uint64_t A) {
66 return byte_reverse64(A);
67}
68#endif
69
70// A little helper
71inline void removeWhitespace(std::string &key)
72{
73 key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
74}
75
77// Static class holding the parallel IO code
78// Could just use a namespace
80class BinaryIO {
81 public:
82 struct IoPerf
83 {
84 uint64_t size{0},time{0};
85 double mbytesPerSecond{0.};
86 };
87
90
92 // more byte manipulation helpers
94
95 template<class vobj> static inline void Uint32Checksum(Lattice<vobj> &lat,uint32_t &nersc_csum)
96 {
97 typedef typename vobj::scalar_object sobj;
98
99 GridBase *grid = lat.Grid();
100 uint64_t lsites = grid->lSites();
101
102 std::vector<sobj> scalardata(lsites);
103 unvectorizeToLexOrdArray(scalardata,lat);
104
105 NerscChecksum(grid,scalardata,nersc_csum);
106 }
107
108 template <class fobj>
109 static inline void NerscChecksum(GridBase *grid, std::vector<fobj> &fbuf, uint32_t &nersc_csum)
110 {
111 const uint64_t size32 = sizeof(fobj) / sizeof(uint32_t);
112
113 uint64_t lsites = grid->lSites();
114 if (fbuf.size() == 1)
115 {
116 lsites = 1;
117 }
118
120 {
121 uint32_t nersc_csum_thr = 0;
122
123 thread_for_in_region( local_site, lsites,
124 {
125 uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
126 for (uint64_t j = 0; j < size32; j++)
127 {
128 nersc_csum_thr = nersc_csum_thr + site_buf[j];
129 }
130 });
131
133 {
134 nersc_csum += nersc_csum_thr;
135 }
136 }
137 }
138
139 template<class fobj> static inline void ScidacChecksum(GridBase *grid,std::vector<fobj> &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb)
140 {
141 int nd = grid->_ndimension;
142
143 uint64_t lsites =grid->lSites();
144 if (fbuf.size()==1) {
145 lsites=1;
146 }
147 Coordinate local_vol =grid->LocalDimensions();
148 Coordinate local_start =grid->LocalStarts();
149 Coordinate global_vol =grid->FullDimensions();
150
152 {
153 Coordinate coor(nd);
154 uint32_t scidac_csuma_thr=0;
155 uint32_t scidac_csumb_thr=0;
156 uint32_t site_crc=0;
157
158 thread_for_in_region( local_site, lsites,
159 {
160
161 uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
162
163 /*
164 * Scidac csum is rather more heavyweight
165 * FIXME -- 128^3 x 256 x 16 will overflow.
166 */
167
168 int64_t global_site;
169
170 Lexicographic::CoorFromIndex(coor,local_site,local_vol);
171
172 for(int d=0;d<nd;d++) {
173 coor[d] = coor[d]+local_start[d];
174 }
175
176 Lexicographic::IndexFromCoor(coor,global_site,global_vol);
177
178 uint64_t gsite29 = global_site%29;
179 uint64_t gsite31 = global_site%31;
180
181 site_crc = crc32(0,(unsigned char *)site_buf,sizeof(fobj));
182 // std::cout << "Site "<<local_site << " crc "<<std::hex<<site_crc<<std::dec<<std::endl;
183 // std::cout << "Site "<<local_site << std::hex<<site_buf[0] <<site_buf[1]<<std::dec <<std::endl;
184 scidac_csuma_thr ^= site_crc<<gsite29 | site_crc>>(32-gsite29);
185 scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
186 });
187
189 {
190 scidac_csuma^= scidac_csuma_thr;
191 scidac_csumb^= scidac_csumb_thr;
192 }
193 }
194 }
195
196 // Network is big endian
197 static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
198 static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
199 static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
200 static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
201
202 static inline void be32toh_v(void *file_object,uint64_t bytes)
203 {
204 uint32_t * f = (uint32_t *)file_object;
205 uint64_t count = bytes/sizeof(uint32_t);
206 thread_for( i, count, {
207 f[i] = ntohl(f[i]);
208 });
209 }
210 // LE must Swap and switch to host
211 static inline void le32toh_v(void *file_object,uint64_t bytes)
212 {
213 uint32_t *fp = (uint32_t *)file_object;
214
215 uint64_t count = bytes/sizeof(uint32_t);
216 thread_for(i,count,{
217 uint32_t f;
218 f = fp[i];
219 // got network order and the network to host
220 f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
221 fp[i] = ntohl(f);
222 });
223 }
224
225 // BE is same as network
226 static inline void be64toh_v(void *file_object,uint64_t bytes)
227 {
228 uint64_t * f = (uint64_t *)file_object;
229 uint64_t count = bytes/sizeof(uint64_t);
230 thread_for( i, count, {
231 f[i] = Grid_ntohll(f[i]);
232 });
233 }
234
235 // LE must swap and switch;
236 static inline void le64toh_v(void *file_object,uint64_t bytes)
237 {
238 uint64_t *fp = (uint64_t *)file_object;
239 uint64_t count = bytes/sizeof(uint64_t);
240 thread_for( i, count, {
241 uint64_t f,g;
242 f = fp[i];
243 // got network order and the network to host
244 g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
245 g = g << 32;
246 f = f >> 32;
247 g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
248 fp[i] = Grid_ntohll(g);
249 });
250 }
251
252 // Real action:
253 // Read or Write distributed lexico array of ANY object to a specific location in file
255
256 static const int BINARYIO_MASTER_APPEND = 0x10;
257 static const int BINARYIO_UNORDERED = 0x08;
258 static const int BINARYIO_LEXICOGRAPHIC = 0x04;
259 static const int BINARYIO_READ = 0x02;
260 static const int BINARYIO_WRITE = 0x01;
261
262 template<class word,class fobj>
263 static inline void IOobject(word w,
264 GridBase *grid,
265 std::vector<fobj> &iodata,
266 std::string file,
267 uint64_t& offset,
268 const std::string &format, int control,
269 uint32_t &nersc_csum,
270 uint32_t &scidac_csuma,
271 uint32_t &scidac_csumb)
272 {
273 grid->Barrier();
274 GridStopWatch timer;
275 GridStopWatch bstimer;
276
277 nersc_csum=0;
278 scidac_csuma=0;
279 scidac_csumb=0;
280
281 int ndim = grid->Dimensions();
282 int nrank = grid->ProcessorCount();
283 int myrank = grid->ThisRank();
284
285 Coordinate psizes = grid->ProcessorGrid();
286 Coordinate pcoor = grid->ThisProcessorCoor();
287 Coordinate gLattice= grid->GlobalDimensions();
288 Coordinate lLattice= grid->LocalDimensions();
289
290 Coordinate lStart(ndim);
291 Coordinate gStart(ndim);
292
293 // Flatten the file
294 uint64_t lsites = grid->lSites();
295 if ( control & BINARYIO_MASTER_APPEND ) {
296 assert(iodata.size()==1);
297 } else {
298 assert(lsites==iodata.size());
299 }
300 for(int d=0;d<ndim;d++){
301 gStart[d] = lLattice[d]*pcoor[d];
302 lStart[d] = 0;
303 }
304
305#ifdef USE_MPI_IO
306 std::vector<int> distribs(ndim,MPI_DISTRIBUTE_BLOCK);
307 std::vector<int> dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG);
308 MPI_Datatype mpiObject;
309 MPI_Datatype fileArray;
310 MPI_Datatype localArray;
311 MPI_Datatype mpiword;
312 MPI_Offset disp = offset;
313 MPI_File fh ;
314 MPI_Status status;
315 int numword;
316
317 if ( sizeof( word ) == sizeof(float ) ) {
318 numword = sizeof(fobj)/sizeof(float);
319 mpiword = MPI_FLOAT;
320 } else {
321 numword = sizeof(fobj)/sizeof(double);
322 mpiword = MPI_DOUBLE;
323 }
324
326 // Sobj in MPI phrasing
328 int ierr;
329 ierr = MPI_Type_contiguous(numword,mpiword,&mpiObject); assert(ierr==0);
330 ierr = MPI_Type_commit(&mpiObject);
331
333 // File global array data type
335 ierr=MPI_Type_create_subarray(ndim,&gLattice[0],&lLattice[0],&gStart[0],MPI_ORDER_FORTRAN, mpiObject,&fileArray); assert(ierr==0);
336 ierr=MPI_Type_commit(&fileArray); assert(ierr==0);
337
339 // local lattice array
341 ierr=MPI_Type_create_subarray(ndim,&lLattice[0],&lLattice[0],&lStart[0],MPI_ORDER_FORTRAN, mpiObject,&localArray); assert(ierr==0);
342 ierr=MPI_Type_commit(&localArray); assert(ierr==0);
343#endif
344
346 // Byte order
348 int ieee32big = (format == std::string("IEEE32BIG"));
349 int ieee32 = (format == std::string("IEEE32"));
350 int ieee64big = (format == std::string("IEEE64BIG"));
351 int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
352 assert(ieee64||ieee32|ieee64big||ieee32big);
353 assert((ieee64+ieee32+ieee64big+ieee32big)==1);
355 // Do the I/O
357 if ( control & BINARYIO_READ ) {
358
359 timer.Start();
360
361 if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
362#ifdef USE_MPI_IO
363 std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl;
364 ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0);
365 ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0);
366 ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0);
367 MPI_File_close(&fh);
368 MPI_Type_free(&fileArray);
369 MPI_Type_free(&localArray);
370#else
371 assert(0);
372#endif
373 } else {
374 std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
375 << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
376 std::ifstream fin;
377 fin.open(file, std::ios::binary | std::ios::in);
378 if (control & BINARYIO_MASTER_APPEND)
379 {
380 fin.seekg(-sizeof(fobj), fin.end);
381 }
382 else
383 {
384 fin.seekg(offset + myrank * lsites * sizeof(fobj));
385 }
386 fin.read((char *)&iodata[0], iodata.size() * sizeof(fobj));
387 assert(fin.fail() == 0);
388 fin.close();
389 }
390 timer.Stop();
391
392 grid->Barrier();
393
394 bstimer.Start();
395 ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
396 if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
397 if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
398 if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
399 if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
400 NerscChecksum(grid,iodata,nersc_csum);
401 bstimer.Stop();
402 }
403
404 if ( control & BINARYIO_WRITE ) {
405
406 bstimer.Start();
407 NerscChecksum(grid,iodata,nersc_csum);
408 if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
409 if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
410 if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
411 if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
412 ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
413 bstimer.Stop();
414
415 grid->Barrier();
416
417 timer.Start();
418 if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
419#ifdef USE_MPI_IO
420 std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl;
421 ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
422 // std::cout << GridLogMessage << "Checking for errors" << std::endl;
423 if (ierr != MPI_SUCCESS)
424 {
425 char error_string[BUFSIZ];
426 int length_of_error_string, error_class;
427
428 MPI_Error_class(ierr, &error_class);
429 MPI_Error_string(error_class, error_string, &length_of_error_string);
430 fprintf(stderr, "%3d: %s\n", myrank, error_string);
431 MPI_Error_string(ierr, error_string, &length_of_error_string);
432 fprintf(stderr, "%3d: %s\n", myrank, error_string);
433 MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
434 }
435
436 std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
437 ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
438 assert(ierr == 0);
439
440 std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
441 ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
442 assert(ierr == 0);
443
444 MPI_Offset os;
445 MPI_File_get_position(fh, &os);
446 MPI_File_get_byte_offset(fh, os, &disp);
447 offset = disp;
448
449
450 MPI_File_close(&fh);
451 MPI_Type_free(&fileArray);
452 MPI_Type_free(&localArray);
453#else
454 assert(0);
455#endif
456 } else {
457
458 std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
459 << iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
460
461 std::ofstream fout;
462 fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
463 try {
464 if (offset) { // Must already exist and contain data
465 fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
466 } else { // Allow create
467 fout.open(file,std::ios::binary|std::ios::out);
468 }
469 } catch (const std::fstream::failure& exc) {
470 std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
471 std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
472 // std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
473#ifdef USE_MPI_IO
474 MPI_Abort(MPI_COMM_WORLD,1);
475#else
476 exit(1);
477#endif
478 }
479
480 if ( control & BINARYIO_MASTER_APPEND ) {
481 try {
482 fout.seekp(0,fout.end);
483 } catch (const std::fstream::failure& exc) {
484 std::cout << "Exception in seeking file end " << file << std::endl;
485 }
486 } else {
487 try {
488 fout.seekp(offset+myrank*lsites*sizeof(fobj));
489 } catch (const std::fstream::failure& exc) {
490 std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl;
491 }
492 }
493
494 try {
495 fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0);
496 }
497 catch (const std::fstream::failure& exc) {
498 std::cout << "Exception in writing file " << file << std::endl;
499 std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl;
500#ifdef USE_MPI_IO
501 MPI_Abort(MPI_COMM_WORLD,1);
502#else
503 exit(1);
504#endif
505 }
506 offset = fout.tellp();
507 fout.close();
508 }
509 timer.Stop();
510 }
511
512 lastPerf.size = sizeof(fobj)*iodata.size()*nrank;
513 lastPerf.time = timer.useconds();
514 lastPerf.mbytesPerSecond = lastPerf.size/1024./1024./(lastPerf.time/1.0e6);
515 std::cout<<GridLogMessage<<"IOobject: ";
516 if ( control & BINARYIO_READ) std::cout << " read ";
517 else std::cout << " write ";
518 uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
519 std::cout<< lastPerf.size <<" bytes in "<< timer.Elapsed() <<" "
520 << lastPerf.mbytesPerSecond <<" MB/s "<<std::endl;
521
522 std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
523
525 // Safety check
527 // if the data size is 1 we do not want to sum over the MPI ranks
528 if (iodata.size() != 1){
529 grid->Barrier();
530 grid->GlobalSum(nersc_csum);
531 grid->GlobalXOR(scidac_csuma);
532 grid->GlobalXOR(scidac_csumb);
533 grid->Barrier();
534 }
535 }
536
538 // Read a Lattice of object
540 template<class vobj,class fobj,class munger>
541 static inline void readLatticeObject(Lattice<vobj> &Umu,
542 std::string file,
543 munger munge,
544 uint64_t offset,
545 const std::string &format,
546 uint32_t &nersc_csum,
547 uint32_t &scidac_csuma,
548 uint32_t &scidac_csumb,
549 int control=BINARYIO_LEXICOGRAPHIC
550 )
551 {
552 typedef typename vobj::scalar_object sobj;
553 typedef typename vobj::Realified::scalar_type word; word w=0;
554
555 GridBase *grid = Umu.Grid();
556 uint64_t lsites = grid->lSites();
557
558 std::vector<sobj> scalardata(lsites);
559 std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
560
561 IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|control,
562 nersc_csum,scidac_csuma,scidac_csumb);
563
564 GridStopWatch timer;
565 timer.Start();
566
567 thread_for(x,lsites, { munge(iodata[x], scalardata[x]); });
568
569 vectorizeFromLexOrdArray(scalardata,Umu);
570 grid->Barrier();
571
572 timer.Stop();
573 std::cout<<GridLogMessage<<"readLatticeObject: vectorize overhead "<<timer.Elapsed() <<std::endl;
574 }
575
577 // Write a Lattice of object
579 template<class vobj,class fobj,class munger>
580 static inline void writeLatticeObject(Lattice<vobj> &Umu,
581 std::string file,
582 munger munge,
583 uint64_t offset,
584 const std::string &format,
585 uint32_t &nersc_csum,
586 uint32_t &scidac_csuma,
587 uint32_t &scidac_csumb,
588 int control=BINARYIO_LEXICOGRAPHIC)
589 {
590 typedef typename vobj::scalar_object sobj;
591 typedef typename vobj::Realified::scalar_type word; word w=0;
592 GridBase *grid = Umu.Grid();
593 uint64_t lsites = grid->lSites(), offsetCopy = offset;
594 int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
595 bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
596
597 std::vector<sobj> scalardata(lsites);
598 std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
599
601 // Munge [ .e.g 3rd row recon ]
603 GridStopWatch timer; timer.Start();
604 unvectorizeToLexOrdArray(scalardata,Umu);
605
606 thread_for(x, lsites, { munge(scalardata[x],iodata[x]); });
607
608 grid->Barrier();
609 timer.Stop();
610 while (attemptsLeft >= 0)
611 {
612 grid->Barrier();
613 IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|control,
614 nersc_csum,scidac_csuma,scidac_csumb);
615 if (checkWrite)
616 {
617 std::vector<fobj> ckiodata(lsites);
618 uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
619 uint64_t ckoffset = offsetCopy;
620
621 std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
622 grid->Barrier();
623 IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|control,
624 cknersc_csum,ckscidac_csuma,ckscidac_csumb);
625 if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
626 {
627 std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
628 offset = offsetCopy;
629 thread_for(x,lsites, { munge(scalardata[x],iodata[x]); });
630 }
631 else
632 {
633 std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
634 break;
635 }
636 }
637 attemptsLeft--;
638 }
639
640
641 std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
642 }
643
645 // Read a RNG; use IOobject and lexico map to an array of state
647 static inline void readRNG(GridSerialRNG &serial_rng,
648 GridParallelRNG &parallel_rng,
649 std::string file,
650 uint64_t offset,
651 uint32_t &nersc_csum,
652 uint32_t &scidac_csuma,
653 uint32_t &scidac_csumb)
654 {
655 typedef typename GridSerialRNG::RngStateType RngStateType;
656 const int RngStateCount = GridSerialRNG::RngStateCount;
657 typedef std::array<RngStateType,RngStateCount> RNGstate;
658 typedef RngStateType word; word w=0;
659
660 std::string format = "IEEE32BIG";
661
662 GridBase *grid = parallel_rng.Grid();
663 uint64_t gsites = grid->gSites();
664 uint64_t lsites = grid->lSites();
665
666 uint32_t nersc_csum_tmp = 0;
667 uint32_t scidac_csuma_tmp = 0;
668 uint32_t scidac_csumb_tmp = 0;
669
670 GridStopWatch timer;
671
672 std::cout << GridLogMessage << "RNG read I/O on file " << file << std::endl;
673
674 std::vector<RNGstate> iodata(lsites);
675 IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
676 nersc_csum,scidac_csuma,scidac_csumb);
677
678 timer.Start();
679 thread_for(lidx,lsites,{ // FIX ME, suboptimal implementation
680 std::vector<RngStateType> tmp(RngStateCount);
681 std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
682 Coordinate lcoor;
683 grid->LocalIndexToLocalCoor(lidx, lcoor);
684 int o_idx=grid->oIndex(lcoor);
685 int i_idx=grid->iIndex(lcoor);
686 int gidx=parallel_rng.generator_idx(o_idx,i_idx);
687 parallel_rng.SetState(tmp,gidx);
688 });
689 timer.Stop();
690
691 iodata.resize(1);
692 IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_MASTER_APPEND,
693 nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
694
695 {
696 std::vector<RngStateType> tmp(RngStateCount);
697 std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin());
698 serial_rng.SetState(tmp,0);
699 }
700
701 nersc_csum = nersc_csum + nersc_csum_tmp;
702 scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
703 scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
704
705 std::cout << GridLogMessage << "RNG file nersc_checksum " << std::hex << nersc_csum << std::dec << std::endl;
706 std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
707 std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
708
709 std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
710 }
711
712 // Write a RNG; lexico map to an array of state and use IOobject
714 static inline void writeRNG(GridSerialRNG &serial_rng,
715 GridParallelRNG &parallel_rng,
716 std::string file,
717 uint64_t offset,
718 uint32_t &nersc_csum,
719 uint32_t &scidac_csuma,
720 uint32_t &scidac_csumb)
721 {
722 typedef typename GridSerialRNG::RngStateType RngStateType;
723 typedef RngStateType word; word w=0;
724 const int RngStateCount = GridSerialRNG::RngStateCount;
725 typedef std::array<RngStateType,RngStateCount> RNGstate;
726
727 GridBase *grid = parallel_rng.Grid();
728 uint64_t gsites = grid->gSites();
729 uint64_t lsites = grid->lSites();
730
731 uint32_t nersc_csum_tmp;
732 uint32_t scidac_csuma_tmp;
733 uint32_t scidac_csumb_tmp;
734
735 GridStopWatch timer;
736 std::string format = "IEEE32BIG";
737
738 std::cout << GridLogMessage << "RNG write I/O on file " << file << std::endl;
739
740 timer.Start();
741 std::vector<RNGstate> iodata(lsites);
742 thread_for(lidx,lsites,{
743 std::vector<RngStateType> tmp(RngStateCount);
744 Coordinate lcoor;
745 grid->LocalIndexToLocalCoor(lidx, lcoor);
746 int o_idx=grid->oIndex(lcoor);
747 int i_idx=grid->iIndex(lcoor);
748 int gidx=parallel_rng.generator_idx(o_idx,i_idx);
749 parallel_rng.GetState(tmp,gidx);
750 std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
751 });
752 timer.Stop();
753
754 IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
755 nersc_csum,scidac_csuma,scidac_csumb);
756 iodata.resize(1);
757 {
758 std::vector<RngStateType> tmp(RngStateCount);
759 serial_rng.GetState(tmp,0);
760 std::copy(tmp.begin(),tmp.end(),iodata[0].begin());
761 }
762 IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND,
763 nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
764
765 nersc_csum = nersc_csum + nersc_csum_tmp;
766 scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
767 scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
768
769 std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum << std::dec << std::endl;
770 std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
771 std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
772 std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
773 }
774};
775
AcceleratorVector< int, MaxDims > Coordinate
Definition Coordinate.h:95
std::enable_if< isSIMDvectorized< vobj >::value &&!isSIMDvectorized< sobj >::value, void >::type vectorizeFromLexOrdArray(std::vector< sobj > &in, Lattice< vobj > &out)
std::enable_if< isSIMDvectorized< vobj >::value &&!isSIMDvectorized< sobj >::value, void >::type unvectorizeToLexOrdArray(std::vector< sobj > &out, const Lattice< vobj > &in)
GridLogger GridLogError(1, "Error", GridLogColours, "RED")
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE")
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL")
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
#define thread_critical
Definition Threads.h:73
#define thread_region
Definition Threads.h:72
#define thread_for(i, num,...)
Definition Threads.h:60
#define thread_for_in_region(i, num,...)
Definition Threads.h:68
static void htobe32_v(void *file_object, uint32_t bytes)
Definition BinaryIO.h:197
static void ScidacChecksum(GridBase *grid, std::vector< fobj > &fbuf, uint32_t &scidac_csuma, uint32_t &scidac_csumb)
Definition BinaryIO.h:139
static void le32toh_v(void *file_object, uint64_t bytes)
Definition BinaryIO.h:211
static void readRNG(GridSerialRNG &serial_rng, GridParallelRNG &parallel_rng, std::string file, uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb)
Definition BinaryIO.h:647
static int latticeWriteMaxRetry
Definition BinaryIO.h:89
static void NerscChecksum(GridBase *grid, std::vector< fobj > &fbuf, uint32_t &nersc_csum)
Definition BinaryIO.h:109
static const int BINARYIO_UNORDERED
Definition BinaryIO.h:257
static const int BINARYIO_READ
Definition BinaryIO.h:259
static void IOobject(word w, GridBase *grid, std::vector< fobj > &iodata, std::string file, uint64_t &offset, const std::string &format, int control, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb)
Definition BinaryIO.h:263
static IoPerf lastPerf
Definition BinaryIO.h:88
static void be32toh_v(void *file_object, uint64_t bytes)
Definition BinaryIO.h:202
static void le64toh_v(void *file_object, uint64_t bytes)
Definition BinaryIO.h:236
static const int BINARYIO_MASTER_APPEND
Definition BinaryIO.h:256
static void writeRNG(GridSerialRNG &serial_rng, GridParallelRNG &parallel_rng, std::string file, uint64_t offset, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb)
Definition BinaryIO.h:714
static void writeLatticeObject(Lattice< vobj > &Umu, std::string file, munger munge, uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb, int control=BINARYIO_LEXICOGRAPHIC)
Definition BinaryIO.h:580
static void Uint32Checksum(Lattice< vobj > &lat, uint32_t &nersc_csum)
Definition BinaryIO.h:95
static void be64toh_v(void *file_object, uint64_t bytes)
Definition BinaryIO.h:226
static void htobe64_v(void *file_object, uint32_t bytes)
Definition BinaryIO.h:198
static void htole32_v(void *file_object, uint32_t bytes)
Definition BinaryIO.h:199
static void readLatticeObject(Lattice< vobj > &Umu, std::string file, munger munge, uint64_t offset, const std::string &format, uint32_t &nersc_csum, uint32_t &scidac_csuma, uint32_t &scidac_csumb, int control=BINARYIO_LEXICOGRAPHIC)
Definition BinaryIO.h:541
static const int BINARYIO_WRITE
Definition BinaryIO.h:260
static const int BINARYIO_LEXICOGRAPHIC
Definition BinaryIO.h:258
static void htole64_v(void *file_object, uint32_t bytes)
Definition BinaryIO.h:200
const Coordinate & ThisProcessorCoor(void)
const Coordinate & ProcessorGrid(void)
void LocalIndexToLocalCoor(int lidx, Coordinate &lcoor)
const Coordinate & GlobalDimensions(void)
int64_t gSites(void) const
virtual int oIndex(Coordinate &coor)
int lSites(void) const
virtual int iIndex(Coordinate &lcoor)
const Coordinate & FullDimensions(void)
const Coordinate & LocalDimensions(void)
const Coordinate LocalStarts(void)
int generator_idx(int os, int is)
GridBase * Grid(void) const
void GetState(std::vector< RngStateType > &saved, RngEngine &eng)
void SetState(std::vector< RngStateType > &saved, RngEngine &eng)
void Start(void)
Definition Timer.h:92
GridTime Elapsed(void) const
Definition Timer.h:113
uint64_t useconds(void) const
Definition Timer.h:117
void Stop(void)
Definition Timer.h:99
GridBase * Grid(void) const
uint64_t Grid_ntohll(uint64_t A)
Definition BinaryIO.h:63
void removeWhitespace(std::string &key)
Definition BinaryIO.h:71
uint64_t byte_reverse64(uint64_t f)
Definition BinaryIO.h:53
uint32_t byte_reverse32(uint32_t f)
Definition BinaryIO.h:49
double mbytesPerSecond
Definition BinaryIO.h:85
uint64_t size
Definition BinaryIO.h:84
uint64_t time
Definition BinaryIO.h:84