yoda is hosted by Hepforge, IPPP Durham
YODA - Yet more Objects for Data Analysis 2.1.0
H5Utils.h
Go to the documentation of this file.
1// -*- C++ -*-
2//
3// This file is part of YODA -- Yet more Objects for Data Analysis
4// Copyright (C) 2008-2023 The YODA collaboration (see AUTHORS for details)
5//
6#ifndef YODA_H5UTILS_H
7#define YODA_H5UTILS_H
8
11#ifdef WITH_HIGHFIVE
12#include <YODA/highfive/H5File.hpp>
13#else
14#include <highfive/H5File.hpp>
15#define YODA_H5 HighFive
16#endif
17
18#include <map>
19#include <memory>
20#include <string>
21#include <type_traits>
22#include <vector>
23
24using std::map;
25using std::string;
26using std::vector;
27
28namespace YODA {
29
34 template <typename T>
35 YODA_H5::DataSet H5DataSet(YODA_H5::File& h5file, const string& label,
36 vector<T>&& data, bool compress) {
37
38 YODA_H5::DataSetCreateProps props;
39 if (compress && data.size()) {
40 props.add(YODA_H5::Chunking(vector<hsize_t>{data.size()}));
41 props.add(YODA_H5::Deflate(9));
42 }
43 return h5file.createDataSet(label, std::move(data), props);
44 }
45
46
51 template <typename T>
52 YODA_H5::DataSet H5DataSet(YODA_H5::File& h5file, const string& label,
53 size_t nrows, size_t chunksize, bool compress) {
54
55 YODA_H5::DataSetCreateProps props;
56 if (compress) { // enable compression
57 props.add(YODA_H5::Chunking(vector<hsize_t>{1,chunksize}));
58 props.add(YODA_H5::Deflate(9));
59 }
60 // create the dataset
61 return h5file.createDataSet(label, YODA_H5::DataSpace({nrows,chunksize}),
62 YODA_H5::create_datatype<T>(), props);
63 }
64
67 public:
68
71
72 // @brief Constructor from existing DataSet
73 H5DataSetReader(const YODA_H5::File& h5file, const string& label)
74 : _nrows(0), _thisrow(0), _ncols(0), _thiscol(0), _ds(nullptr) {
75 _ds = std::make_unique<YODA_H5::DataSet>(h5file.getDataSet(label));
76 const auto& dims = _ds->getDimensions();
77 _nrows = dims.at(0);
78 if (dims.size() > 1) _ncols = dims.at(1);
79 }
80
82 void skip(size_t len) noexcept { _thisrow += len; }
83
85 template <typename T = size_t>
86 T next() noexcept {
87 T item;
88 _ds->select({_thisrow++}, {1}).read(item);
89 return item;
90 }
91
93 template <typename T>
94 vector<T> read() noexcept {
95 if (_nrows == 0) return vector<T>{};
96 vector<T> data; data.reserve(_nrows);
97 _ds->select({0}, {_nrows}).read(data);
98 return data;
99 }
100
102 template <typename T>
103 vector<T> read(size_t len) noexcept {
104 if (len == 0) return {};
105 vector<T> data; data.reserve(len);
106 _ds->select({_thisrow}, {len}).read(data);
107 _thisrow += len;
108 return data;
109 }
110
112 template <typename T>
113 vector<T> readSlice(size_t len) noexcept {
114 if (len == 0) return {};
115 vector<T> data; data.reserve(len);
116 while (len) {
117 vector<vector<T>> tmp;
118 size_t ncols = std::min(len, _ncols - _thiscol);
119 size_t nrows = 1+(ncols-1)/_ncols; // C-style ceil
120 _ds->select({_thisrow,_thiscol}, {nrows,ncols}).read(tmp);
121 for (size_t i=0; i < tmp.size(); ++i) {
122 data.insert(data.end(), std::make_move_iterator(std::begin(tmp[i])),
123 std::make_move_iterator(std::end(tmp[i])));
124 }
125 if ((_thiscol + ncols) == _ncols) ++_thisrow;
126 _thiscol = (_thiscol + ncols) % _ncols;
127 len -= ncols;
128 }
129 return data;
130 }
131
133 template <typename T>
134 vector<T> readAt(size_t row, size_t len) noexcept {
135 _thisrow = row;
136 return read<T>(len);
137 }
138
140 template <typename T = size_t>
141 vector<T> readAttribute(const string& label) const noexcept {
142 vector<T> data;
143 _ds->getAttribute(label).read(data);
144 return data;
145 }
146
147 private:
148
149 size_t _nrows, _thisrow, _ncols, _thiscol;
150
151 std::unique_ptr<YODA_H5::DataSet> _ds;
152
153 };
154
155
157 template <typename T>
159 public:
160
161 // Constructor for empty DataSet
162 H5DataSetWriter(YODA_H5::File& h5file, const string& label,
163 size_t datalen, size_t chunksize, bool compress)
164 : _ncols(chunksize), _thiscol(0), _nrows(1+(datalen-1)/chunksize), _thisrow(0),
165 _ds(H5DataSet<T>(h5file, label, _nrows, chunksize, compress)) { }
166
168 void writeSlice(vector<T>&& data) noexcept {
169
170 if (data.empty()) return;
171
172 size_t offset = 0, len = data.size();
173 const auto itr = data.cbegin();
174 while (len) {
175 vector<T> tmp;
176 size_t ncols = std::min(len, _ncols - _thiscol);
177 auto first = itr + offset;
178 auto last = first + ncols;
179 _ds.select({_thisrow,_thiscol},
180 {1,ncols}).write(vector<vector<T>>{{std::make_move_iterator(first),
181 std::make_move_iterator(last)}});
182 offset += ncols;
183 _thiscol += ncols;
184 if (_thiscol == _ncols) {
185 _thiscol = 0; ++_thisrow;
186 }
187 len -= ncols;
188 }
189 }
190
192 template <typename U>
193 void createAttribute(const string& label, vector<U>&& data) noexcept {
194 _ds.createAttribute(label, std::forward<vector<U>>(data));
195 }
196
197 size_t colPos() const noexcept { return _thiscol; }
198
199 size_t rowPos() const noexcept { return _thisrow; }
200
201 private:
202
203 size_t _ncols, _thiscol, _nrows, _thisrow;
204
205 YODA_H5::DataSet _ds;
206
207 };
208
209
210
211
214 public:
215
218
220 virtual ~EdgeHandlerBase() { }
221
222 virtual void writeToFile(const string&, YODA_H5::File&, bool compress) = 0;
223
224 };
225
226
227
228
230 template<typename T>
232 public:
233
235
236 // Constructor from a vector of edges
237 EdgeHandler(const vector<T>& edges)
238 : _edges(edges) { }
239
240 // Constructor from an rvalue vector of edges
241 EdgeHandler(vector<T>&& edges)
242 : _edges(std::move(edges)) { }
243
244 // Method to extend the vector of edges
245 void extend(vector<T>&& edges) {
246 _edges.insert(_edges.end(), std::make_move_iterator(std::begin(edges)),
247 std::make_move_iterator(std::end(edges)));
248 }
249
250 auto begin() const { return _edges.cbegin(); }
251
252 // Method to commit vector of edges to H5 @a file using @a label
253 void writeToFile(const string& label, YODA_H5::File& file, bool compress) {
254 (void)H5DataSet(file, label, std::move(_edges), compress);
255 }
256
257 private:
258
259 vector<T> _edges;
260 };
261
263 template <typename T>
264 using EdgeHandlerPtr = typename std::shared_ptr<EdgeHandler<T>>;
265
266 using EdgeHandlerBasePtr = typename std::shared_ptr<EdgeHandlerBase>;
267
268
269
270
273
274 public:
275
276 static constexpr size_t AO_META = 2;
277
279 H5FileManager(const YODA_H5::File& file)
280 : _index(-1), _cachepos(-1),
281 _labelindex(0), _h5file(file),
282 _aoinfo(file, "aoinfo"),
283 _layout(file, "sizeinfo"),
284 _content(file, "content"),
285 _annos(file, "annotations"),
286 _labels(H5DataSetReader(file, "labels").read<string>()) {
287
288 _meta = _aoinfo.readAttribute("meta");
289 if (_meta.size() < 2)
290 throw ReadError("No file metadata found!");
291
292 _annosizes = _layout.read<size_t>(_meta.at(1));
293 _datasizes = _layout.read<size_t>(_meta.at(1));
294 _labelsizes = _layout.read<size_t>(_layout.next());
295 }
296
298 size_t version() const {
299 return _meta.at(0);
300 }
301
303 size_t size() const {
304 return _meta.at(1);
305 }
306
308 bool next() {
309 ++_index;
310 if ((size_t)_index == size()) return false;
311
312 _aodims = _aoinfo.readAt<string>(AO_META*_index, AO_META);
313 if (_aodims.empty()) {
314 throw ReadError("No AO information found!");
315 }
316
317 _cachepos = -1;
318
319 return true;
320 }
321
323 const string& path() const {
324 return _aodims.at(0);
325 }
326
328 const string& type() const {
329 return _aodims.at(1);
330 }
331
333 vector<string> loadAnnotations() noexcept {
334 return _annos.readSlice<string>(_annosizes[_index]);
335 }
336
338 vector<double> loadContent() noexcept {
339 return _content.readSlice<double>(_datasizes[_index]);
340 }
341
343 void skipCommon() noexcept {
344 _annos.skip(_annosizes[_index]);
345 _content.skip(_datasizes[_index]);
346 }
347
349 vector<size_t> loadMasks() noexcept {
350 return _layout.read<size_t>(_layout.next());
351 }
352
354 void skipMasks() noexcept {
355 _layout.skip(_layout.next());
356 }
357
359 vector<string> loadSources() noexcept {
360
361 if (_cachepos < 0) { // fill label cache
362 _labelcache = _layout.read<size_t>(_labelsizes[_labelindex++]);
363 _cachepos = 0;
364 }
365
366 size_t len = _labelcache[_cachepos++];
367 if (len == 0) return {};
368
369 vector<size_t> indices(_labelcache.begin()+_cachepos, _labelcache.begin()+_cachepos+len);
370 vector<string> sources; sources.reserve(indices.size());
371 for (size_t idx : indices) {
372 sources.emplace_back(_labels[idx]);
373 }
374 _cachepos += len;
375 return sources;
376 }
377
379 void skipSources() noexcept {
380 _layout.skip(_labelsizes[_labelindex++]);
381 }
382
384 template <typename EdgeT>
385 vector<EdgeT> loadEdges() noexcept {
386
387 const string label = string("edges_") + TypeID<EdgeT>::name();
388 auto itr = datasets.find(label);
389 if (itr == datasets.end()) {
390 // put H5::DataSet into DataSet cache
391 datasets[label] = H5DataSetReader(_h5file, label);
392 itr = datasets.find(label);
393 }
394 return itr->second.read<EdgeT>(_layout.next());
395 }
396
398 template <typename EdgeT>
399 void skipEdges() noexcept {
400 const string label = string("edges_") + TypeID<EdgeT>::name();
401 auto itr = datasets.find(label);
402 if (itr == datasets.end()) {
403 // put H5::DataSet into DataSet cache
404 datasets[label] = H5DataSetReader(_h5file, label);
405 itr = datasets.find(label);
406 }
407 itr->second.skip(_layout.next());
408 }
409
410 private:
411
412 ssize_t _index, _cachepos;
413
414 size_t _labelindex;
415
416 const YODA_H5::File _h5file;
417
418 H5DataSetReader _aoinfo, _layout, _content, _annos;
419
420 vector<string> _labels, _aodims;
421
422 vector<size_t> _meta, _annosizes, _datasizes, _labelsizes, _labelcache;
423
424 map<string,H5DataSetReader> datasets;
425 };
426
427
428}
429
430#endif
Base wrapper around a vector of edges.
Definition H5Utils.h:213
virtual void writeToFile(const string &, YODA_H5::File &, bool compress)=0
virtual ~EdgeHandlerBase()
Default destructor.
Definition H5Utils.h:220
EdgeHandlerBase()
Default constructor.
Definition H5Utils.h:217
Specialised wrapper for a vector of type T.
Definition H5Utils.h:231
EdgeHandler(const vector< T > &edges)
Definition H5Utils.h:237
auto begin() const
Definition H5Utils.h:250
EdgeHandler(vector< T > &&edges)
Definition H5Utils.h:241
void extend(vector< T > &&edges)
Definition H5Utils.h:245
void writeToFile(const string &label, YODA_H5::File &file, bool compress)
Definition H5Utils.h:253
Helper class to extract information from YODA_H5::DataSets.
Definition H5Utils.h:66
vector< T > readSlice(size_t len) noexcept
Method to read a subset of the 1D dataset.
Definition H5Utils.h:113
vector< T > readAttribute(const string &label) const noexcept
Method to read an attribute decorated onto this dataset.
Definition H5Utils.h:141
H5DataSetReader()
Nullary constructor.
Definition H5Utils.h:70
T next() noexcept
Load next item and increment cursor.
Definition H5Utils.h:86
vector< T > readAt(size_t row, size_t len) noexcept
Method to read a subset of the 1D dataset starting from row row.
Definition H5Utils.h:134
H5DataSetReader(const YODA_H5::File &h5file, const string &label)
Definition H5Utils.h:73
void skip(size_t len) noexcept
Move internal cursor by len elements.
Definition H5Utils.h:82
vector< T > read() noexcept
Method to read and return the entire 1D dataset.
Definition H5Utils.h:94
vector< T > read(size_t len) noexcept
Method to read a subset of the 1D dataset.
Definition H5Utils.h:103
A helper class to deal with chunking of H5 datasets.
Definition H5Utils.h:158
size_t rowPos() const noexcept
Definition H5Utils.h:199
size_t colPos() const noexcept
Definition H5Utils.h:197
H5DataSetWriter(YODA_H5::File &h5file, const string &label, size_t datalen, size_t chunksize, bool compress)
Definition H5Utils.h:162
void createAttribute(const string &label, vector< U > &&data) noexcept
Method to decorate dataset with an attribute.
Definition H5Utils.h:193
void writeSlice(vector< T > &&data) noexcept
Method to write a slice that may span multiple rows/columns.
Definition H5Utils.h:168
Helper class to extract AO information from a H5 file.
Definition H5Utils.h:272
vector< string > loadSources() noexcept
Labels of error sources of current AO.
Definition H5Utils.h:359
void skipMasks() noexcept
Skips next set of masked indices of current AO.
Definition H5Utils.h:354
vector< size_t > loadMasks() noexcept
Indices of masked bins in current AO.
Definition H5Utils.h:349
const string & path() const
Path of current AO.
Definition H5Utils.h:323
H5FileManager(const YODA_H5::File &file)
Constructor.
Definition H5Utils.h:279
static constexpr size_t AO_META
Definition H5Utils.h:276
void skipCommon() noexcept
Skips next set of annotations and content of current AO.
Definition H5Utils.h:343
vector< EdgeT > loadEdges() noexcept
Returns next set of edges of type EdgeT.
Definition H5Utils.h:385
vector< string > loadAnnotations() noexcept
Serialized annotations of current AO.
Definition H5Utils.h:333
vector< double > loadContent() noexcept
Serialized content of current AO.
Definition H5Utils.h:338
size_t size() const
Number of AOs in this H5 file.
Definition H5Utils.h:303
size_t version() const
H5 YODA format version.
Definition H5Utils.h:298
bool next()
Loads next AO from file.
Definition H5Utils.h:308
void skipSources() noexcept
Skips next set of error sources of current AO.
Definition H5Utils.h:379
const string & type() const
Type of current AO.
Definition H5Utils.h:328
void skipEdges() noexcept
Skips next set of edges of type EdgeT.
Definition H5Utils.h:399
Error for file reading errors.
Definition Exceptions.h:72
Anonymous namespace to limit visibility.
typename std::shared_ptr< EdgeHandlerBase > EdgeHandlerBasePtr
Definition H5Utils.h:266
YODA_H5::DataSet H5DataSet(YODA_H5::File &h5file, const string &label, vector< T > &&data, bool compress)
Helper method to construct and fill a YODA_H5::DataSet.
Definition H5Utils.h:35
typename std::shared_ptr< EdgeHandler< T > > EdgeHandlerPtr
Convenience aliases.
Definition H5Utils.h:264
void write(const std::string &filename, const AnalysisObject &ao, int precision=-1)
Write out object ao to file filename.
Definition IO.h:19
void read(const std::string &filename, std::vector< AnalysisObject * > &aos, const std::string &match="", const std::string &unmatch="")
Read in a collection of objects objs from file filename.
Definition IO.h:85
static const char * name()