Grid 0.7.0
Hdf5IO.h
Go to the documentation of this file.
1/*************************************************************************************
2
3 Grid physics library, www.github.com/paboyle/Grid
4
5 Source file: ./Grid/serialisation/VectorUtils.h
6
7 Copyright (C) 2015
8
9 Author: Peter Boyle <paboyle@ed.ac.uk>
10 Author: Antonin Portelli <antonin.portelli@me.com>
11 Author: Guido Cossu <guido.cossu@ed.ac.uk>
12 Author: Michael Marshall <michael.marshall@ed.ac.uk>
13
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation; either version 2 of the License, or
17 (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License along
25 with this program; if not, write to the Free Software Foundation, Inc.,
26 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27
28 See the full license in the file "LICENSE" in the top level distribution directory
29 *************************************************************************************/
30/* END LEGAL */
31
32#ifndef GRID_SERIALISATION_HDF5_H
33#define GRID_SERIALISATION_HDF5_H
34
35#include <stack>
36#include <string>
37#include <list>
38#include <vector>
39#include <H5Cpp.h>
41#include "Hdf5Type.h"
42
43// default thresold above which datasets are used instead of attributes
44#ifndef HDF5_DEF_DATASET_THRES
45#define HDF5_DEF_DATASET_THRES 6u
46#endif
47
48// name guard for Grid metadata
49#define HDF5_GRID_GUARD "_Grid_"
50
51namespace Grid
52{
53 class Hdf5Writer: public Writer<Hdf5Writer>
54 {
55 public:
56 Hdf5Writer(const std::string &fileName);
57 virtual ~Hdf5Writer(void) = default;
58 void push(const std::string &s);
59 void pop(void);
60 void writeDefault(const std::string &s, const char *x);
61 template <typename U>
62 void writeDefault(const std::string &s, const U &x);
63 template <typename U>
64 void writeRagged(const std::string &s, const std::vector<U> &x);
65 template <typename U>
66 typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
67 writeDefault(const std::string &s, const std::vector<U> &x);
68 template <typename U>
69 typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
70 writeDefault(const std::string &s, const std::vector<U> &x) { writeRagged(s, x); }
71 template <typename U>
72 void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
73 H5NS::Group & getGroup(void);
74 private:
75 template <typename U>
76 void writeSingleAttribute(const U &x, const std::string &name,
77 const H5NS::DataType &type);
78 private:
79 std::string fileName_;
80 std::vector<std::string> path_;
81 H5NS::H5File file_;
82 H5NS::Group group_;
84 };
85
86 class Hdf5Reader: public Reader<Hdf5Reader>
87 {
88 public:
89 Hdf5Reader(const std::string &fileName, const bool readOnly = true);
90 virtual ~Hdf5Reader(void) = default;
91 bool push(const std::string &s);
92 void pop(void);
93 template <typename U>
94 void readDefault(const std::string &s, U &output);
95 template <typename U>
96 void readRagged(const std::string &s, std::vector<U> &x);
97 template <typename U>
98 typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
99 readDefault(const std::string &s, std::vector<U> &x);
100 template <typename U>
101 typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
102 readDefault(const std::string &s, std::vector<U> &x) { readRagged(s, x); }
103 template <typename U>
104 void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
105 H5NS::Group & getGroup(void);
106 private:
107 template <typename U>
108 void readSingleAttribute(U &x, const std::string &name,
109 const H5NS::DataType &type);
110 private:
111 std::string fileName_;
112 std::vector<std::string> path_;
113 H5NS::H5File file_;
114 H5NS::Group group_;
115 unsigned int dataSetThres_;
116 };
117
118 // Writer template implementation ////////////////////////////////////////////
119 template <typename U>
120 void Hdf5Writer::writeSingleAttribute(const U &x, const std::string &name,
121 const H5NS::DataType &type)
122 {
123 H5NS::Attribute attribute;
124 hsize_t attrDim = 1;
125 H5NS::DataSpace attrSpace(1, &attrDim);
126
127 attribute = group_.createAttribute(name, type, attrSpace);
128 attribute.write(type, &x);
129 }
130
131 template <typename U>
132 void Hdf5Writer::writeDefault(const std::string &s, const U &x)
133 {
135 }
136
137 template <>
138 void Hdf5Writer::writeDefault(const std::string &s, const std::string &x);
139
140 template <typename U>
141 void Hdf5Writer::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
142 {
143 // Hdf5 needs the dimensions as hsize_t
144 const int rank = static_cast<int>(Dimensions.size());
145 std::vector<hsize_t> dim(rank);
146 for(int i = 0; i < rank; i++)
147 dim[i] = Dimensions[i];
148 // write the entire dataset to file
149 H5NS::DataSpace dataSpace(rank, dim.data());
150
151 if (NumElements > dataSetThres_)
152 {
153 // Make sure 1) each dimension; and 2) chunk size is < 4GB
154 const hsize_t MaxElements = ( sizeof( U ) == 1 ) ? 0xffffffff : 0x100000000 / sizeof( U );
155 hsize_t ElementsPerChunk = 1;
156 bool bTooBig = false;
157 for( int i = rank - 1 ; i != -1 ; i-- ) {
158 auto &d = dim[i];
159 if( bTooBig )
160 d = 1; // Chunk size is already as big as can be - remaining dimensions = 1
161 else {
162 // If individual dimension too big, reduce by prime factors if possible
163 while( d > MaxElements && ( d & 1 ) == 0 )
164 d >>= 1;
165 const char ErrorMsg[] = " dimension > 4GB and not divisible by 2^n. "
166 "Hdf5IO chunk size will be inefficient. NB Serialisation is not intended for large datasets - please consider alternatives.";
167 if( d > MaxElements ) {
168 std::cout << GridLogWarning << "Individual" << ErrorMsg << std::endl;
169 hsize_t quotient = d / MaxElements;
170 if( d % MaxElements )
171 quotient++;
172 d /= quotient;
173 }
174 // Now make sure overall size is not too big
175 hsize_t OverflowCheck = ElementsPerChunk;
176 ElementsPerChunk *= d;
177 assert( OverflowCheck == ElementsPerChunk / d && "Product of dimensions overflowed hsize_t" );
178 // If product of dimensions too big, reduce by prime factors
179 while( ElementsPerChunk > MaxElements && ( ElementsPerChunk & 1 ) == 0 ) {
180 bTooBig = true;
181 d >>= 1;
182 ElementsPerChunk >>= 1;
183 }
184 if( ElementsPerChunk > MaxElements ) {
185 std::cout << GridLogWarning << "Product of" << ErrorMsg << std::endl;
186 hsize_t quotient = ElementsPerChunk / MaxElements;
187 if( ElementsPerChunk % MaxElements )
188 quotient++;
189 d /= quotient;
190 ElementsPerChunk /= quotient;
191 }
192 }
193 }
194 H5NS::DataSet dataSet;
195 H5NS::DSetCreatPropList plist;
196 plist.setChunk(rank, dim.data());
197 plist.setFletcher32();
198 dataSet = group_.createDataSet(s, Hdf5Type<U>::type(), dataSpace, plist);
199 dataSet.write(pDataRowMajor, Hdf5Type<U>::type());
200 }
201 else
202 {
203 H5NS::Attribute attribute;
204 attribute = group_.createAttribute(s, Hdf5Type<U>::type(), dataSpace);
205 attribute.write(Hdf5Type<U>::type(), pDataRowMajor);
206 }
207 }
208
209 template <typename U>
210 typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
211 Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
212 {
213 if (isRegularShape(x))
214 {
215 // alias to element type
216 using Scalar = typename is_flattenable<std::vector<U>>::type;
217
218 // flatten the vector and getting dimensions
219 Flatten<std::vector<U>> flat(x);
220 std::vector<size_t> dim;
221 const auto &flatx = flat.getFlatVector();
222 for (auto &d: flat.getDim())
223 dim.push_back(d);
224 writeMultiDim<Scalar>(s, dim, &flatx[0], flatx.size());
225 }
226 else
227 {
228 writeRagged(s, x);
229 }
230 }
231
232 template <typename U>
233 void Hdf5Writer::writeRagged(const std::string &s, const std::vector<U> &x)
234 {
235 push(s);
236 writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
238 for (hsize_t i = 0; i < x.size(); ++i)
239 {
240 write(s + "_" + std::to_string(i), x[i]);
241 }
242 pop();
243 }
244
245 // Reader template implementation ////////////////////////////////////////////
246 template <typename U>
247 void Hdf5Reader::readSingleAttribute(U &x, const std::string &name,
248 const H5NS::DataType &type)
249 {
250 H5NS::Attribute attribute;
251
252 attribute = group_.openAttribute(name);
253 attribute.read(type, &x);
254 }
255
256 template <typename U>
257 void Hdf5Reader::readDefault(const std::string &s, U &output)
258 {
260 }
261
262 template <>
263 void Hdf5Reader::readDefault(const std::string &s, std::string &x);
264
265 template <typename U>
266 void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
267 {
268 // alias to element type
269 using Scalar = typename is_flattenable<std::vector<U>>::type;
270
271 // read the dimensions
272 H5NS::DataSpace dataSpace;
273 std::vector<hsize_t> hdim;
274 hsize_t size = 1;
275
276 if (group_.attrExists(s))
277 {
278 dataSpace = group_.openAttribute(s).getSpace();
279 }
280 else
281 {
282 dataSpace = group_.openDataSet(s).getSpace();
283 }
284 hdim.resize(dataSpace.getSimpleExtentNdims());
285 dataSpace.getSimpleExtentDims(hdim.data());
286 for (auto &d: hdim)
287 {
288 dim.push_back(d);
289 size *= d;
290 }
291
292 // read the flat vector
293 buf.resize(size);
294
295 if (size > dataSetThres_)
296 {
297 H5NS::DataSet dataSet;
298
299 dataSet = group_.openDataSet(s);
300 dataSet.read(buf.data(), Hdf5Type<Scalar>::type());
301 }
302 else
303 {
304 H5NS::Attribute attribute;
305
306 attribute = group_.openAttribute(s);
307 attribute.read(Hdf5Type<Scalar>::type(), buf.data());
308 }
309 }
310
311 template <typename U>
312 typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
313 Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
314 {
315 if (H5Lexists (group_.getId(), s.c_str(), H5P_DEFAULT) > 0
316 && H5Aexists_by_name(group_.getId(), s.c_str(), HDF5_GRID_GUARD "vector_size", H5P_DEFAULT ) > 0)
317 {
318 readRagged(s, x);
319 }
320 else
321 {
322 // alias to element type
323 using Scalar = typename is_flattenable<std::vector<U>>::type;
324
325 std::vector<size_t> dim;
326 std::vector<Scalar> buf;
327 readMultiDim( s, buf, dim );
328
329 // reconstruct the multidimensional vector
330 Reconstruct<std::vector<U>> r(buf, dim);
331
332 x = r.getVector();
333 }
334 }
335
336 template <typename U>
337 void Hdf5Reader::readRagged(const std::string &s, std::vector<U> &x)
338 {
339 uint64_t size;
340
341 push(s);
342 readSingleAttribute(size, HDF5_GRID_GUARD "vector_size",
344 x.resize(size);
345 for (hsize_t i = 0; i < x.size(); ++i)
346 {
347 read(s + "_" + std::to_string(i), x[i]);
348 }
349 pop();
350 }
351}
352
353#endif
#define HDF5_GRID_GUARD
Definition Hdf5IO.h:49
#define HDF5_DEF_DATASET_THRES
Definition Hdf5IO.h:45
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW")
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230
const std::vector< size_t > & getDim(void) const
const std::vector< Scalar > & getFlatVector(void) const
std::string fileName_
Definition Hdf5IO.h:111
unsigned int dataSetThres_
Definition Hdf5IO.h:115
H5NS::H5File file_
Definition Hdf5IO.h:113
void readMultiDim(const std::string &s, std::vector< U > &buf, std::vector< size_t > &dim)
Definition Hdf5IO.h:266
bool push(const std::string &s)
Definition Hdf5IO.cc:104
virtual ~Hdf5Reader(void)=default
void readSingleAttribute(U &x, const std::string &name, const H5NS::DataType &type)
Definition Hdf5IO.h:247
std::enable_if<!is_flattenable< std::vector< U > >::value >::type readDefault(const std::string &s, std::vector< U > &x)
Definition Hdf5IO.h:102
H5NS::Group group_
Definition Hdf5IO.h:114
std::vector< std::string > path_
Definition Hdf5IO.h:112
void readDefault(const std::string &s, U &output)
Definition Hdf5IO.h:257
void readRagged(const std::string &s, std::vector< U > &x)
Definition Hdf5IO.h:337
void pop(void)
Definition Hdf5IO.cc:112
H5NS::Group & getGroup(void)
Definition Hdf5IO.cc:143
Hdf5Reader(const std::string &fileName, const bool readOnly=true)
Definition Hdf5IO.cc:95
void pop(void)
Definition Hdf5IO.cc:55
const unsigned int dataSetThres_
Definition Hdf5IO.h:83
H5NS::Group & getGroup(void)
Definition Hdf5IO.cc:89
void writeRagged(const std::string &s, const std::vector< U > &x)
Definition Hdf5IO.h:233
void writeDefault(const std::string &s, const char *x)
Definition Hdf5IO.cc:82
std::enable_if<!is_flattenable< std::vector< U > >::value >::type writeDefault(const std::string &s, const std::vector< U > &x)
Definition Hdf5IO.h:70
std::string fileName_
Definition Hdf5IO.h:79
H5NS::H5File file_
Definition Hdf5IO.h:81
Hdf5Writer(const std::string &fileName)
Definition Hdf5IO.cc:40
std::vector< std::string > path_
Definition Hdf5IO.h:80
void writeSingleAttribute(const U &x, const std::string &name, const H5NS::DataType &type)
Definition Hdf5IO.h:120
H5NS::Group group_
Definition Hdf5IO.h:82
void push(const std::string &s)
Definition Hdf5IO.cc:49
virtual ~Hdf5Writer(void)=default
void writeMultiDim(const std::string &s, const std::vector< size_t > &Dimensions, const U *pDataRowMajor, size_t NumElements)
Definition Hdf5IO.h:141
std::enable_if< std::is_base_of< Serializable, U >::value, void >::type read(const std::string &s, U &output)
Definition BaseIO.h:393
const V & getVector(void) const
std::enable_if< std::is_base_of< Serializable, U >::value >::type write(const std::string &s, const U &output)
Definition BaseIO.h:260
bool isRegularShape(const T &t)