|
YODA - Yet more Objects for Data Analysis 2.1.0
|
Go to the documentation of this file.
12#include <YODA/highfive/H5File.hpp>
14#include <highfive/H5File.hpp>
15#define YODA_H5 HighFive
35 YODA_H5::DataSet H5DataSet(YODA_H5::File& h5file, const string& label,
36 vector<T>&& data, bool compress) {
38 YODA_H5::DataSetCreateProps props;
39 if (compress && data.size()) {
40 props.add(YODA_H5::Chunking(vector<hsize_t>{data.size()}));
41 props.add(YODA_H5::Deflate(9));
43 return h5file.createDataSet(label, std::move(data), props);
52 YODA_H5::DataSet H5DataSet(YODA_H5::File& h5file, const string& label,
53 size_t nrows, size_t chunksize, bool compress) {
55 YODA_H5::DataSetCreateProps props;
57 props.add(YODA_H5::Chunking(vector<hsize_t>{1,chunksize}));
58 props.add(YODA_H5::Deflate(9));
61 return h5file.createDataSet(label, YODA_H5::DataSpace({nrows,chunksize}),
62 YODA_H5::create_datatype<T>(), props);
74 : _nrows(0), _thisrow(0), _ncols(0), _thiscol(0), _ds(nullptr) {
75 _ds = std::make_unique<YODA_H5::DataSet>(h5file.getDataSet(label));
76 const auto& dims = _ds->getDimensions();
78 if (dims.size() > 1) _ncols = dims.at(1);
82 void skip( size_t len) noexcept { _thisrow += len; }
85 template < typename T = size_t>
88 _ds->select({_thisrow++}, {1}). read(item);
94 vector<T> read() noexcept {
95 if (_nrows == 0) return vector<T>{};
96 vector<T> data; data.reserve(_nrows);
97 _ds->select({0}, {_nrows}). read(data);
102 template < typename T>
103 vector<T> read( size_t len) noexcept {
104 if (len == 0) return {};
105 vector<T> data; data.reserve(len);
106 _ds->select({_thisrow}, {len}). read(data);
112 template < typename T>
114 if (len == 0) return {};
115 vector<T> data; data.reserve(len);
117 vector<vector<T>> tmp;
118 size_t ncols = std::min(len, _ncols - _thiscol);
119 size_t nrows = 1+(ncols-1)/_ncols;
120 _ds->select({_thisrow,_thiscol}, {nrows,ncols}). read(tmp);
121 for ( size_t i=0; i < tmp.size(); ++i) {
122 data.insert(data.end(), std::make_move_iterator(std::begin(tmp[i])),
123 std::make_move_iterator(std::end(tmp[i])));
125 if ((_thiscol + ncols) == _ncols) ++_thisrow;
126 _thiscol = (_thiscol + ncols) % _ncols;
133 template < typename T>
134 vector<T> readAt( size_t row, size_t len) noexcept {
140 template < typename T = size_t>
143 _ds->getAttribute(label).read(data);
149 size_t _nrows, _thisrow, _ncols, _thiscol;
151 std::unique_ptr<YODA_H5::DataSet> _ds;
157 template < typename T>
163 size_t datalen, size_t chunksize, bool compress)
164 : _ncols(chunksize), _thiscol(0), _nrows(1+(datalen-1)/chunksize), _thisrow(0),
165 _ds( H5DataSet<T>(h5file, label, _nrows, chunksize, compress)) { }
170 if (data.empty()) return;
172 size_t offset = 0, len = data.size();
173 const auto itr = data.cbegin();
176 size_t ncols = std::min(len, _ncols - _thiscol);
177 auto first = itr + offset;
178 auto last = first + ncols;
179 _ds.select({_thisrow,_thiscol},
180 {1,ncols}). write(vector<vector<T>>{{std::make_move_iterator(first),
181 std::make_move_iterator(last)}});
184 if (_thiscol == _ncols) {
185 _thiscol = 0; ++_thisrow;
192 template < typename U>
194 _ds.createAttribute(label, std::forward<vector<U>>(data));
197 size_t colPos() const noexcept { return _thiscol; }
199 size_t rowPos() const noexcept { return _thisrow; }
203 size_t _ncols, _thiscol, _nrows, _thisrow;
205 YODA_H5::DataSet _ds;
222 virtual void writeToFile( const string&, YODA_H5::File&, bool compress) = 0;
242 : _edges(std::move(edges)) { }
246 _edges.insert(_edges.end(), std::make_move_iterator(std::begin(edges)),
247 std::make_move_iterator(std::end(edges)));
250 auto begin() const { return _edges.cbegin(); }
253 void writeToFile( const string& label, YODA_H5::File& file, bool compress) {
254 (void) H5DataSet(file, label, std::move(_edges), compress);
263 template < typename T>
280 : _index(-1), _cachepos(-1),
281 _labelindex(0), _h5file(file),
282 _aoinfo(file, "aoinfo"),
283 _layout(file, "sizeinfo"),
284 _content(file, "content"),
285 _annos(file, "annotations"),
288 _meta = _aoinfo.readAttribute( "meta");
289 if (_meta.size() < 2)
290 throw ReadError( "No file metadata found!");
292 _annosizes = _layout.read< size_t>(_meta.at(1));
293 _datasizes = _layout.read< size_t>(_meta.at(1));
294 _labelsizes = _layout.read< size_t>(_layout.next());
310 if (( size_t)_index == size()) return false;
313 if (_aodims.empty()) {
314 throw ReadError( "No AO information found!");
324 return _aodims.at(0);
329 return _aodims.at(1);
334 return _annos.readSlice< string>(_annosizes[_index]);
339 return _content.readSlice< double>(_datasizes[_index]);
344 _annos.skip(_annosizes[_index]);
345 _content.skip(_datasizes[_index]);
350 return _layout.read< size_t>(_layout.next());
355 _layout.skip(_layout.next());
362 _labelcache = _layout.read< size_t>(_labelsizes[_labelindex++]);
366 size_t len = _labelcache[_cachepos++];
367 if (len == 0) return {};
369 vector<size_t> indices(_labelcache.begin()+_cachepos, _labelcache.begin()+_cachepos+len);
370 vector<string> sources; sources.reserve(indices.size());
371 for ( size_t idx : indices) {
372 sources.emplace_back(_labels[idx]);
380 _layout.skip(_labelsizes[_labelindex++]);
384 template < typename EdgeT>
388 auto itr = datasets.find(label);
389 if (itr == datasets.end()) {
392 itr = datasets.find(label);
394 return itr->second.read<EdgeT>(_layout.next());
398 template < typename EdgeT>
401 auto itr = datasets.find(label);
402 if (itr == datasets.end()) {
405 itr = datasets.find(label);
407 itr->second.skip(_layout.next());
412 ssize_t _index, _cachepos;
416 const YODA_H5::File _h5file;
420 vector<string> _labels, _aodims;
422 vector<size_t> _meta, _annosizes, _datasizes, _labelsizes, _labelcache;
424 map<string,H5DataSetReader> datasets;
Base wrapper around a vector of edges.
virtual void writeToFile(const string &, YODA_H5::File &, bool compress)=0
virtual ~EdgeHandlerBase() Default destructor.
EdgeHandlerBase() Default constructor.
Specialised wrapper for a vector of type T.
EdgeHandler(const vector< T > &edges)
EdgeHandler(vector< T > &&edges)
void extend(vector< T > &&edges)
void writeToFile(const string &label, YODA_H5::File &file, bool compress)
Helper class to extract information from YODA_H5::DataSets.
vector< T > readSlice(size_t len) noexcept Method to read a subset of the 1D dataset.
vector< T > readAttribute(const string &label) const noexcept Method to read an attribute decorated onto this dataset.
H5DataSetReader() Nullary constructor.
T next() noexcept Load next item and increment cursor.
vector< T > readAt(size_t row, size_t len) noexcept Method to read a subset of the 1D dataset starting from row row.
H5DataSetReader(const YODA_H5::File &h5file, const string &label)
void skip(size_t len) noexcept Move internal cursor by len elements.
vector< T > read() noexcept Method to read and return the entire 1D dataset.
vector< T > read(size_t len) noexcept Method to read a subset of the 1D dataset.
A helper class to deal with chunking of H5 datasets.
size_t rowPos() const noexcept
size_t colPos() const noexcept
H5DataSetWriter(YODA_H5::File &h5file, const string &label, size_t datalen, size_t chunksize, bool compress)
void createAttribute(const string &label, vector< U > &&data) noexcept Method to decorate dataset with an attribute.
void writeSlice(vector< T > &&data) noexcept Method to write a slice that may span multiple rows/columns.
Helper class to extract AO information from a H5 file.
vector< string > loadSources() noexcept Labels of error sources of current AO.
void skipMasks() noexcept Skips next set of masked indices of current AO.
vector< size_t > loadMasks() noexcept Indices of masked bins in current AO.
const string & path() const Path of current AO.
H5FileManager(const YODA_H5::File &file) Constructor.
static constexpr size_t AO_META
void skipCommon() noexcept Skips next set of annotations and content of current AO.
vector< EdgeT > loadEdges() noexcept Returns next set of edges of type EdgeT.
vector< string > loadAnnotations() noexcept Serialized annotations of current AO.
vector< double > loadContent() noexcept Serialized content of current AO.
size_t size() const Number of AOs in this H5 file.
size_t version() const H5 YODA format version.
bool next() Loads next AO from file.
void skipSources() noexcept Skips next set of error sources of current AO.
const string & type() const Type of current AO.
void skipEdges() noexcept Skips next set of edges of type EdgeT.
Error for file reading errors.
Anonymous namespace to limit visibility.
typename std::shared_ptr< EdgeHandlerBase > EdgeHandlerBasePtr
YODA_H5::DataSet H5DataSet(YODA_H5::File &h5file, const string &label, vector< T > &&data, bool compress) Helper method to construct and fill a YODA_H5::DataSet.
typename std::shared_ptr< EdgeHandler< T > > EdgeHandlerPtr Convenience aliases.
void write(const std::string &filename, const AnalysisObject &ao, int precision=-1) Write out object ao to file filename.
void read(const std::string &filename, std::vector< AnalysisObject * > &aos, const std::string &match="", const std::string &unmatch="") Read in a collection of objects objs from file filename.
static const char * name()
|