yoda is hosted by Hepforge, IPPP Durham
YODA - Yet more Objects for Data Analysis 2.0.2
ReaderUtils.h
Go to the documentation of this file.
1// -*- C++ -*-
2//
3// This file is part of YODA -- Yet more Objects for Data Analysis
4// Copyright (C) 2008-2024 The YODA collaboration (see AUTHORS for details)
5//
6#ifndef YODA_READERUTILS_H
7#define YODA_READERUTILS_H
8
10#include "YODA/Counter.h"
11#include "YODA/Estimate0D.h"
12#include "YODA/Scatter.h"
13#include "YODA/Histo.h"
14#include "YODA/Profile.h"
15#include "YODA/BinnedEstimate.h"
16
17#include <iostream>
18#include <locale>
19#include <cstring>
20#include <regex>
21#include <cmath>
22
23#ifdef WITH_OSX
24#include <xlocale.h>
25#endif
26
27using std::string;
28using std::vector;
29
30namespace YODA {
31
33 namespace {
34
35 static const std::regex regex_string_pat("([\"\'])(?:(?=(\\\\?))\\2.)*?\\1");
36
37 }
38
39
41
43 class aistringstream {
44 public:
45 // Constructor from char*
46 aistringstream(const char* line=0) {
47 reset(line);
48 _set_locale();
49 }
50 // Constructor from std::string
51 aistringstream(const string& line) {
52 reset(line);
53 _set_locale();
54 }
55 ~aistringstream() {
56 _reset_locale();
57 }
58
59 bool peek(const std::string& s) const {
60 return s == std::string(_next, s.size());
61 }
62
63 // Re-init to new line as char*
64 void reset(const char* line=0) {
65 _next = const_cast<char*>(line);
66 _new_next = _next;
67 _error = false;
68 }
69 // Re-init to new line as std::string
70 void reset(const string& line) { reset(line.c_str()); }
71
72 // Tokenizing stream operator (forwards to specialisations)
73 template<class T>
74 aistringstream& operator >> (T& value) {
75 _get(value);
76 if (_new_next == _next) _error = true; // handy error condition behaviour!
77 _next = _new_next;
78 return *this;
79 }
80
81 // Allow use of operator>> in a while loop
82 operator bool() const { return !_error; }
83
84
85 private:
86
87 // Changes the thread-local locale to interpret numbers in the "C" locale
88 void _set_locale() {
89 _locale_set = newlocale(LC_NUMERIC_MASK, "C", NULL);
90 _locale_prev = uselocale(_locale_set);
91 if (!_locale_prev) {
92 throw ReadError(std::string("Error setting locale: ") + strerror(errno));
93 }
94 }
95 void _reset_locale() {
96 if (!uselocale(_locale_prev)) {
97 throw ReadError(std::string("Error setting locale: ") + strerror(errno));
98 }
99 freelocale(_locale_set);
100 }
101
102 void _get(double& x) { x = std::strtod(_next, &_new_next); }
103 void _get(float& x) { x = std::strtof(_next, &_new_next); }
104 void _get(int& i) { i = std::strtol(_next, &_new_next, 10); } // force base 10!
105 void _get(long& i) { i = std::strtol(_next, &_new_next, 10); } // force base 10!
106 void _get(unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); } // force base 10!
107 void _get(long unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); } // force base 10!
108 void _get(string& x) {
109 while (std::isspace(*_next) && _next[0] != '\0') _next += 1;
110 _new_next = _next;
111 while (!std::isspace(*_new_next) && _new_next[0] != '\0') _new_next += 1;
112 x = string(_next, _new_next-_next);
113 }
114
115 locale_t _locale_set, _locale_prev;
116 char *_next, *_new_next;
117 bool _error;
118 };
119
120
121 public:
122
125
127 virtual ~AOReaderBase() { }
128
129 virtual void parse(const string& line) = 0;
130
131 virtual AnalysisObject* assemble(const string& path = "") = 0;
132
133 template<typename T>
134 void extractVector(const std::string& line, std::vector<T>& vec) {
135 if constexpr (std::is_same<T, std::string>::value) {
136 string::const_iterator initpos( line.cbegin() );
137 const string::const_iterator finpos( line.cend() );
138 std::smatch m;
139 while ( std::regex_search(initpos, finpos, m, regex_string_pat) ) {
140 string label;
141 std::stringstream ss(m[0].str());
142 ss >> std::quoted(label); // removes outer quotes and de-escapes inner quotes
143 vec.push_back(label);
144 initpos = m.suffix().first;
145 }
146 }
147 else {
148 std::string content = line.substr(line.find(": [")+3);
149 content.pop_back(); // remove the "]" at the end
150 for (const std::string& item : Utils::split(content, ",")) {
151 aiss.reset(item);
152 T tmp;
153 aiss >> tmp;
154 vec.push_back(std::move(tmp));
155 }
156 }
157 }
158
159 protected:
160
161 aistringstream aiss;
162
163 };
164
165
166
167
168 template<class T>
169 class AOReader;
170
171 template<>
172 class AOReader<Counter> : public AOReaderBase {
173
174 Dbn0D dbn;
175
176 public:
177
178 void parse(const string& line) {
179 aiss.reset(line);
180 double sumw(0), sumw2(0), n(0);
181 aiss >> sumw >> sumw2 >> n;
182 dbn = Dbn0D(n, sumw, sumw2);
183 }
184
185 AnalysisObject* assemble(const string& path = "") {
186 auto* ao = new Counter(path);
187 ao->setDbn(dbn);
188 dbn = Dbn0D();
189 return ao;
190 }
191 };
192
193
194 template<>
196
197 Estimate0D est;
198 vector<string> sources;
199
200 void readErrors(std::map<string,std::pair<double,double>>& errors) {
201 string eDn, eUp;
202 for (size_t i = 0; i < sources.size(); ++i) {
203 aiss >> eDn >> eUp;
204 if (eDn != "---" && eUp != "---") {
205 errors[sources[i]] = { Utils::toDbl(eDn), Utils::toDbl(eUp) };
206 }
207 }
208 }
209
210 public:
211
212 void parse(const string& line) {
213 if (!line.rfind("ErrorLabels: ", 0)) { // parse error labels
214 extractVector<std::string>(line, sources);
215 return;
216 }
217 // parse content
218 aiss.reset(line);
219 double val(0);
220 aiss >> val;
221 std::map<string,std::pair<double,double>> errors;
222 readErrors(errors);
223 est = Estimate0D(val, errors);
224 }
225
226 AnalysisObject* assemble(const string& path = "") {
227
228 auto* ao = new Estimate0D(est, path);
229 est = Estimate0D();
230 sources.clear();
231 return ao;
232 }
233
234 };
235
236
237 template <size_t N>
238 class AOReader<ScatterND<N>> : public AOReaderBase {
239
240 vector<PointND<N>> points;
241
242 template<size_t I>
243 void readCoords(vector<double>& vals, vector<double>& errm, vector<double>& errp) {
244 if constexpr(I < N) {
245 double v(0), em(0), ep(0);
246 aiss >> v >> em >> ep;
247 vals[I] = v;
248 errm[I] = fabs(em);
249 errp[I] = fabs(ep);
250 readCoords<I+1>(vals, errm, errp);
251 }
252 }
253
254 public:
255
256 void parse(const string& line) {
257 aiss.reset(line);
258 vector<double> vals(N), errm(N), errp(N);
259 readCoords<0>(vals, errm, errp);
260 points.push_back(PointND<N>(vals, errm, errp));
261 }
262
263 AnalysisObject* assemble(const string& path = "") {
264 auto* ao = new ScatterND<N>();
265 ao->setPath(path);
266 ao->addPoints(points);
267 points.clear();
268 return ao;
269 }
270 };
271
272
273 template <size_t DbnN, typename... AxisT>
274 class AOReader<BinnedDbn<DbnN, AxisT...>> : public AOReaderBase {
275
276 using BaseT = BinnedDbn<DbnN, AxisT...>;
277
278 template <size_t I>
279 using is_CAxis = typename std::is_floating_point<typename std::tuple_element_t<I, std::tuple<AxisT...>>>;
280
281 std::tuple<vector<AxisT> ...> edges;
282 Dbn<DbnN> yoda1Overflow;
283 vector<Dbn<DbnN>> dbns;
284 vector<size_t> maskedBins;
285 std::array<double,DbnN*(DbnN-1)/2> crossTerms;
286 bool isYODA1 = false;
287 size_t axisCheck = 0;
288
289
290 template<size_t I>
291 void readEdges() { // YODA1 version for backwards compatibility
292 if constexpr(I < sizeof...(AxisT)) {
293 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
294 if constexpr (is_CAxis<I>::value) { // continuous case
295 EdgeT lo, hi;
296 aiss >> lo >> hi;
297 auto& curr_edges = std::get<I>(edges);
298 if (!std::isinf(lo)) {
299 if (curr_edges.empty()) curr_edges.push_back(lo);
300 }
301 if (!std::isinf(hi)) {
302 if (curr_edges.size() && curr_edges[ curr_edges.size() - 1 ] != hi) {
303 curr_edges.push_back(hi);
304 }
305 }
306 }
307 else { // discrete case
308 throw BinningError("Discrete axes are not supported in this YODA1-style legacy format.");
309 }
310 readEdges<I+1>();
311 }
312 }
313
314 template<size_t I>
315 void readEdges(const std::string& line) { // YODA2 version
316 if constexpr(I < sizeof...(AxisT)) {
317 if (I == axisCheck) {
318 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
319 auto& curr_edges = std::get<I>(edges);
320 extractVector<EdgeT>(line, curr_edges);
321 }
322 readEdges<I+1>(line);
323 }
324 }
325
326 template<size_t I>
327 void readDbn(std::array<double,DbnN+1>& sumW, std::array<double,DbnN+1>& sumW2) {
328 if constexpr(I <= DbnN) {
329 double w(0), w2(0);
330 aiss >> w >> w2;
331 sumW[I] = w;
332 sumW2[I] = w2;
333 readDbn<I+1>(sumW, sumW2);
334 }
335 }
336
337 template <class tupleT, size_t... Is>
338 BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
339 BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
340 rtn->maskBins(maskedBins);
341 return rtn;
342 }
343
344 template <class tupleT>
345 BaseT* make_from_tuple(tupleT&& tuple) {
346 return make_from_tuple(std::forward<tupleT>(tuple),
347 std::make_index_sequence<sizeof...(AxisT)+1>{});
348 }
349
350 template<size_t I>
351 void clearEdges() {
352 if constexpr(I < sizeof...(AxisT)) {
353 std::get<I>(edges).clear();
354 clearEdges<I+1>();
355 }
356 }
357
358 public:
359
360 void parse(const string& line) {
361 if (line.find("Total") != string::npos) {
362 isYODA1 = true;
363 return; // YODA1 backwards compatibility
364 }
365 if (!line.rfind("Edges(A", 0)) { // parse binning
366 readEdges<0>(line);
367 ++axisCheck;
368 return;
369 }
370 if (!line.rfind("MaskedBins: ", 0)) { // parse indices of masked bins
371 extractVector<size_t>(line, maskedBins);
372 return;
373 }
374 aiss.reset(line);
375 if (line.find("Underflow") != string::npos || line.find("Overflow") != string::npos) {
376 // This must be the YODA1-style format ...
377 if constexpr (sizeof...(AxisT) == 1) {
378 string tmp1, tmp2;
379 aiss >> tmp1 >> tmp2; // not needed
380 }
381 }
382 else if (isYODA1) readEdges<0>();
383 std::array<double,DbnN+1> sumW, sumW2;
384 readDbn<0>(sumW, sumW2);
385 for (size_t i = 0; i < crossTerms.size(); ++i) {
386 double tmp(0.);
387 aiss >> tmp;
388 crossTerms.at(i) = tmp;
389 }
390 double numEntries(0);
391 aiss >> numEntries;
392 if (line.find("Overflow") != string::npos) {
393 if constexpr (sizeof...(AxisT) == 1) {
394 if constexpr (DbnN < 2)
395 yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2);
396 else
397 yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2, crossTerms);
398 }
399 }
400 else {
401 if constexpr (DbnN < 2) {
402 dbns.emplace_back(numEntries, sumW, sumW2);
403 }
404 else {
405 dbns.emplace_back(numEntries, sumW, sumW2, crossTerms);
406 }
407 }
408 }
409
410 AnalysisObject* assemble(const string& path = "") {
411
412 auto args = std::tuple_cat(edges, std::make_tuple(path));
413 BaseT* ao = make_from_tuple(std::move(args));
414
415 size_t global_index = 0;
416 if constexpr (sizeof...(AxisT) == 2) {
417 if (isYODA1) { // 2D objects had no under-/overflows in Y1
418 for (size_t ix = 1; ix < ao->numBinsAt(0)+1; ++ix) { //< visible bins only
419 for (size_t iy = 1; iy < ao->numBinsAt(1)+1; ++iy) { //< visible bins only
420 ao->bin(ix,iy).set(std::move(dbns[global_index++]));
421 }
422 }
423 }
424 }
425 if ( !(isYODA1 && sizeof...(AxisT) == 2) ) { //< still works for Y1-style 1D
426 for (auto&& d : dbns) {
427 ao->bin(global_index++).set(std::move(d));
428 }
429 }
430
431 if constexpr (sizeof...(AxisT) == 1) { // YODA1-style overflows
432 if (isYODA1) ao->bin(global_index).set(yoda1Overflow);
433 yoda1Overflow = Dbn<DbnN>();
434 }
435
436 crossTerms.fill(0);
437 maskedBins.clear();
438 isYODA1 = false;
439 clearEdges<0>();
440 dbns.clear();
441 axisCheck = 0;
442 return ao;
443 }
444 };
445
446
447 template <typename... AxisT>
448 class AOReader<BinnedEstimate<AxisT...>> : public AOReaderBase {
449
450 using BaseT = BinnedEstimate<AxisT...>;
451
452 std::tuple<vector<AxisT> ...> edges;
453 vector<Estimate> estimates;
454 vector<size_t> maskedBins;
455 vector<string> sources;
456 size_t axisCheck = 0;
457
458
459 template<size_t I>
460 void readEdges(const std::string& line) {
461 if constexpr(I < sizeof...(AxisT)) {
462 if (I == axisCheck) {
463 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
464 auto& curr_edges = std::get<I>(edges);
465 extractVector<EdgeT>(line, curr_edges);
466 }
467 readEdges<I+1>(line);
468 }
469 }
470
471 void readErrors(std::map<string,std::pair<double,double>>& errors) {
472 string eDn, eUp;
473 for (const std::string& src : sources) {
474 aiss >> eDn >> eUp;
475 if (eDn != "---" && eUp != "---") {
476 errors[src] = { Utils::toDbl(eDn), Utils::toDbl(eUp) };
477 }
478 }
479 }
480
481 template <class tupleT, size_t... Is>
482 BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
483 BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
484 rtn->maskBins(maskedBins);
485 return rtn;
486 }
487
488 template <class tupleT>
489 BaseT* make_from_tuple(tupleT&& tuple) {
490 return make_from_tuple(std::forward<tupleT>(tuple),
491 std::make_index_sequence<sizeof...(AxisT)+1>{});
492 }
493
494 template<size_t I>
495 void clearEdges() {
496 if constexpr(I < sizeof...(AxisT)) {
497 std::get<I>(edges).clear();
498 clearEdges<I+1>();
499 }
500 }
501
502 public:
503
504 void parse(const string& line) {
505 if (!line.rfind("Edges(A", 0)) { // parse binning
506 readEdges<0>(line);
507 ++axisCheck;
508 return;
509 }
510 if (!line.rfind("MaskedBins: ", 0)) { // parse indices of masked bins
511 extractVector<size_t>(line, maskedBins);
512 return;
513 }
514 if (!line.rfind("ErrorLabels: ", 0)) { // parse error labels
515 extractVector<std::string>(line, sources);
516 return;
517 }
518 // parse bin content
519 aiss.reset(line);
520 double val(0);
521 aiss >> val;
522 std::map<string,std::pair<double,double>> errors;
523 readErrors(errors);
524 estimates.emplace_back(val, errors);
525 }
526
527 AnalysisObject* assemble(const string& path = "") {
528
529 auto args = std::tuple_cat(edges, std::make_tuple(path));
530 BaseT* ao = make_from_tuple(std::move(args));
531
532 size_t global_index = 0;
533 for (auto&& e : estimates) {
534 ao->bin(global_index++) = std::move(e);
535 }
536
537 clearEdges<0>();
538 sources.clear();
539 estimates.clear();
540 maskedBins.clear();
541 axisCheck = 0;
542 return ao;
543 }
544 };
545
546
547}
548
549#endif
AOReaderBase()
Default constructor.
virtual void parse(const string &line)=0
virtual ~AOReaderBase()
Default destructor.
aistringstream aiss
virtual AnalysisObject * assemble(const string &path="")=0
void extractVector(const std::string &line, std::vector< T > &vec)
AnalysisObject * assemble(const string &path="")
AnalysisObject * assemble(const string &path="")
void parse(const string &line)
AnalysisObject * assemble(const string &path="")
AnalysisObject * assemble(const string &path="")
void parse(const string &line)
void parse(const string &line)
AnalysisObject * assemble(const string &path="")
AnalysisObject is the base class for histograms and scatters.
User-facing BinnedDbn class in arbitrary dimension.
Definition BinnedDbn.h:50
Forward declaration.
BinT & bin(size_t idx) noexcept
Returns reference to the bin at idx.
void maskBins(const std::vector< size_t > &indicesToMask, const bool status=true) noexcept
Mask a range of bins.
size_t numBinsAt(const size_t axisN, const bool includeOverflows=false) const noexcept
Number of bins in the BinnedStorage.
Error for general binning problems.
Definition Exceptions.h:27
A weighted counter.
Definition Counter.h:26
Partial template specialisation for Dbn0D.
Definition Dbn.h:647
User-facing Dbn class inheriting from DbnBase.
Definition Dbn.h:637
An estimate in 0D.
Definition Estimate0D.h:24
Error for file reading errors.
Definition Exceptions.h:86
A generic data type which is just a collection of n-dim data points with errors.
Definition Scatter.h:154
Anonymous namespace to limit visibility.
Dbn< 0 > Dbn0D
User-friendly aliases.
Definition Dbn.h:775