yoda is hosted by Hepforge, IPPP Durham
YODA - Yet more Objects for Data Analysis 2.0.0
ReaderUtils.h
Go to the documentation of this file.
1// -*- C++ -*-
2//
3// This file is part of YODA -- Yet more Objects for Data Analysis
4// Copyright (C) 2008-2023 The YODA collaboration (see AUTHORS for details)
5//
6#ifndef YODA_READERUTILS_H
7#define YODA_READERUTILS_H
8
10#include "YODA/Counter.h"
11#include "YODA/Estimate0D.h"
12#include "YODA/Scatter.h"
13#include "YODA/Histo.h"
14#include "YODA/Profile.h"
15#include "YODA/BinnedEstimate.h"
16
17#include <iostream>
18#include <locale>
19#include <cstring>
20#include <regex>
21#include <cmath>
22
23#ifdef WITH_OSX
24#include <xlocale.h>
25#endif
26
27using std::string;
28using std::vector;
29
30namespace YODA {
31
33 namespace {
34
35 static const std::regex regex_string_pat("([\"\'])(?:(?=(\\\\?))\\2.)*?\\1");
36
37 }
38
39
41
43 class aistringstream {
44 public:
45 // Constructor from char*
46 aistringstream(const char* line=0) {
47 reset(line);
48 _set_locale();
49 }
50 // Constructor from std::string
51 aistringstream(const string& line) {
52 reset(line);
53 _set_locale();
54 }
55 ~aistringstream() {
56 _reset_locale();
57 }
58
59 bool peek(const std::string& s) const {
60 return s == std::string(_next, s.size());
61 }
62
63 // Re-init to new line as char*
64 void reset(const char* line=0) {
65 _next = const_cast<char*>(line);
66 _new_next = _next;
67 _error = false;
68 }
69 // Re-init to new line as std::string
70 void reset(const string& line) { reset(line.c_str()); }
71
72 // Tokenizing stream operator (forwards to specialisations)
73 template<class T>
74 aistringstream& operator >> (T& value) {
75 _get(value);
76 if (_new_next == _next) _error = true; // handy error condition behaviour!
77 _next = _new_next;
78 return *this;
79 }
80
81 // Allow use of operator>> in a while loop
82 operator bool() const { return !_error; }
83
84
85 private:
86
87 // Changes the thread-local locale to interpret numbers in the "C" locale
88 void _set_locale() {
89 _locale_set = newlocale(LC_NUMERIC_MASK, "C", NULL);
90 _locale_prev = uselocale(_locale_set);
91 if (!_locale_prev) {
92 throw ReadError(std::string("Error setting locale: ") + strerror(errno));
93 }
94 }
95 void _reset_locale() {
96 if (!uselocale(_locale_prev)) {
97 throw ReadError(std::string("Error setting locale: ") + strerror(errno));
98 }
99 freelocale(_locale_set);
100 }
101
102 void _get(double& x) { x = std::strtod(_next, &_new_next); }
103 void _get(float& x) { x = std::strtof(_next, &_new_next); }
104 void _get(int& i) { i = std::strtol(_next, &_new_next, 10); } // force base 10!
105 void _get(long& i) { i = std::strtol(_next, &_new_next, 10); } // force base 10!
106 void _get(unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); } // force base 10!
107 void _get(long unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); } // force base 10!
108 void _get(string& x) {
110 while (std::isspace(*_next)) _next += 1;
111 _new_next = _next;
112 while (!std::isspace(*_new_next)) _new_next += 1;
113 x = string(_next, _new_next-_next);
114 }
115
116 locale_t _locale_set, _locale_prev;
117 char *_next, *_new_next;
118 bool _error;
119 };
120
121
122 public:
123
126
128 virtual ~AOReaderBase() { }
129
130 virtual void parse(const string& line) = 0;
131
132 virtual AnalysisObject* assemble(const string& path = "") = 0;
133
134 template<typename T>
135 void extractVector(const std::string& line, std::vector<T>& vec) {
136 if constexpr (std::is_same<T, std::string>::value) {
137 string::const_iterator initpos( line.cbegin() );
138 const string::const_iterator finpos( line.cend() );
139 std::smatch m;
140 while ( std::regex_search(initpos, finpos, m, regex_string_pat) ) {
141 string label;
142 std::stringstream ss(m[0].str());
143 ss >> std::quoted(label); // removes outer quotes and de-escapes inner quotes
144 vec.push_back(label);
145 initpos = m.suffix().first;
146 }
147 }
148 else {
149 std::string content = line.substr(line.find(": [")+3);
150 content.pop_back(); // remove the "]" at the end
151 for (const std::string& item : Utils::split(content, ",")) {
152 aiss.reset(item);
153 T tmp;
154 aiss >> tmp;
155 vec.push_back(std::move(tmp));
156 }
157 }
158 }
159
160 protected:
161
162 aistringstream aiss;
163
164 };
165
166
167
168
169 template<class T>
170 class AOReader;
171
172 template<>
173 class AOReader<Counter> : public AOReaderBase {
174
175 Dbn0D dbn;
176
177 public:
178
179 void parse(const string& line) {
180 aiss.reset(line);
181 double sumw(0), sumw2(0), n(0);
182 aiss >> sumw >> sumw2 >> n;
183 dbn = Dbn0D(n, sumw, sumw2);
184 }
185
186 AnalysisObject* assemble(const string& path = "") {
187 auto* ao = new Counter(path);
188 ao->setDbn(dbn);
189 dbn = Dbn0D();
190 return ao;
191 }
192 };
193
194
195 template<>
197
198 Estimate0D est;
199 vector<string> sources;
200
201 void readErrors(std::map<string,std::pair<double,double>>& errors) {
202 string eDn, eUp;
203 for (size_t i = 0; i < sources.size(); ++i) {
204 aiss >> eDn >> eUp;
205 if (eDn != "---" && eUp != "---") {
206 errors[sources[i]] = { std::stod(eDn), std::stod(eUp) };
207 }
208 }
209 }
210
211 public:
212
213 void parse(const string& line) {
214 if (!line.rfind("ErrorLabels: ", 0)) { // parse error labels
215 extractVector<std::string>(line, sources);
216 return;
217 }
218 // parse content
219 aiss.reset(line);
220 double val(0);
221 aiss >> val;
222 std::map<string,std::pair<double,double>> errors;
223 readErrors(errors);
224 est = Estimate0D(val, errors);
225 }
226
227 AnalysisObject* assemble(const string& path = "") {
228
229 auto* ao = new Estimate0D(est, path);
230 est = Estimate0D();
231 sources.clear();
232 return ao;
233 }
234
235 };
236
237
238 template <size_t N>
239 class AOReader<ScatterND<N>> : public AOReaderBase {
240
241 vector<PointND<N>> points;
242
243 template<size_t I>
244 void readCoords(vector<double>& vals, vector<double>& errm, vector<double>& errp) {
245 if constexpr(I < N) {
246 double v(0), em(0), ep(0);
247 aiss >> v >> em >> ep;
248 vals[I] = v;
249 errm[I] = em;
250 errp[I] = ep;
251 readCoords<I+1>(vals, errm, errp);
252 }
253 }
254
255 public:
256
257 void parse(const string& line) {
258 aiss.reset(line);
259 vector<double> vals(N), errm(N), errp(N);
260 readCoords<0>(vals, errm, errp);
261 points.push_back(PointND<N>(vals, errm, errp));
262 }
263
264 AnalysisObject* assemble(const string& path = "") {
265 auto* ao = new ScatterND<N>();
266 ao->setPath(path);
267 ao->addPoints(points);
268 points.clear();
269 return ao;
270 }
271 };
272
273
274 template <size_t DbnN, typename... AxisT>
275 class AOReader<BinnedDbn<DbnN, AxisT...>> : public AOReaderBase {
276
277 using BaseT = BinnedDbn<DbnN, AxisT...>;
278
279 template <size_t I>
280 using is_CAxis = typename std::is_floating_point<typename std::tuple_element_t<I, std::tuple<AxisT...>>>;
281
282 std::tuple<vector<AxisT> ...> edges;
283 Dbn<DbnN> yoda1Overflow;
284 vector<Dbn<DbnN>> dbns;
285 vector<size_t> maskedBins;
286 std::array<double,DbnN*(DbnN-1)/2> crossTerms;
287 bool isYODA1 = false;
288 size_t axisCheck = 0;
289
290
291 template<size_t I>
292 void readEdges() { // YODA1 version for backwards compatibility
293 if constexpr(I < sizeof...(AxisT)) {
294 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
295 if constexpr (is_CAxis<I>::value) { // continuous case
296 EdgeT lo, hi;
297 aiss >> lo >> hi;
298 if constexpr (I == 0) {
299 if (isYODA1 && !std::isinf(lo)) {
300 auto& curr_edges = std::get<I>(edges);
301 if (curr_edges.empty()) curr_edges.push_back(lo);
302 }
303 }
304 if (!std::isinf(hi)) {
305 auto& curr_edges = std::get<I>(edges);
306 if (curr_edges.empty()) curr_edges.push_back(hi);
307 else if (curr_edges[ curr_edges.size() - 1 ] != hi) {
308 curr_edges.push_back(hi);
309 }
310 }
311 }
312 else { // discrete case
313 throw BinningError("Discrete axes are not supported in this YODA1-style legacy format.");
314 }
315 readEdges<I+1>();
316 }
317 }
318
319 template<size_t I>
320 void readEdges(const std::string& line) { // YODA2 version
321 if constexpr(I < sizeof...(AxisT)) {
322 if (I == axisCheck) {
323 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
324 auto& curr_edges = std::get<I>(edges);
325 extractVector<EdgeT>(line, curr_edges);
326 }
327 readEdges<I+1>(line);
328 }
329 }
330
331 template<size_t I>
332 void readDbn(std::array<double,DbnN+1>& sumW, std::array<double,DbnN+1>& sumW2) {
333 if constexpr(I <= DbnN) {
334 double w(0), w2(0);
335 aiss >> w >> w2;
336 sumW[I] = w;
337 sumW2[I] = w2;
338 readDbn<I+1>(sumW, sumW2);
339 }
340 }
341
342 template <class tupleT, size_t... Is>
343 BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
344 BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
345 rtn->maskBins(maskedBins);
346 return rtn;
347 }
348
349 template <class tupleT>
350 BaseT* make_from_tuple(tupleT&& tuple) {
351 return make_from_tuple(std::forward<tupleT>(tuple),
352 std::make_index_sequence<sizeof...(AxisT)+1>{});
353 }
354
355 template<size_t I>
356 void clearEdges() {
357 if constexpr(I < sizeof...(AxisT)) {
358 std::get<I>(edges).clear();
359 clearEdges<I+1>();
360 }
361 }
362
363 public:
364
365 void parse(const string& line) {
366 if (line.find("Total") != string::npos) {
367 isYODA1 = true;
368 return; // YODA1 backwards compatibility
369 }
370 if (!line.rfind("Edges(A", 0)) { // parse binning
371 readEdges<0>(line);
372 ++axisCheck;
373 return;
374 }
375 if (!line.rfind("MaskedBins: ", 0)) { // parse indices of masked bins
376 extractVector<size_t>(line, maskedBins);
377 return;
378 }
379 aiss.reset(line);
380 if (line.find("Underflow") != string::npos || line.find("Overflow") != string::npos) {
381 // This must be the YODA1-style format ...
382 if constexpr (sizeof...(AxisT) == 1) {
383 string tmp1, tmp2;
384 aiss >> tmp1 >> tmp2; // not needed
385 }
386 }
387 else if (isYODA1) readEdges<0>();
388 std::array<double,DbnN+1> sumW, sumW2;
389 readDbn<0>(sumW, sumW2);
390 for (size_t i = 0; i < crossTerms.size(); ++i) {
391 double tmp(0.);
392 aiss >> tmp;
393 crossTerms.at(i) = tmp;
394 }
395 double numEntries(0);
396 aiss >> numEntries;
397 if (line.find("Overflow") != string::npos) {
398 if constexpr (sizeof...(AxisT) == 1) {
399 if constexpr (DbnN < 2)
400 yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2);
401 else
402 yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2, crossTerms);
403 }
404 }
405 else {
406 if constexpr (DbnN < 2) {
407 dbns.emplace_back(numEntries, sumW, sumW2);
408 }
409 else {
410 dbns.emplace_back(numEntries, sumW, sumW2, crossTerms);
411 }
412 }
413 }
414
415 AnalysisObject* assemble(const string& path = "") {
416
417 auto args = std::tuple_cat(edges, std::make_tuple(path));
418 BaseT* ao = make_from_tuple(std::move(args));
419
420 size_t global_index = 0;
421 if (isYODA1 && sizeof...(AxisT) == 2) ++global_index; // no 2D overflow in Y1
422 for (auto&& d : dbns) {
423 ao->bin(global_index++).set(std::move(d));
424 }
425
426 if constexpr (sizeof...(AxisT) == 1) { // YODA1-style overflows
427 if (isYODA1) ao->bin(global_index).set(yoda1Overflow);
428 yoda1Overflow = Dbn<DbnN>();
429 }
430
431 crossTerms.fill(0);
432 maskedBins.clear();
433 isYODA1 = false;
434 clearEdges<0>();
435 dbns.clear();
436 axisCheck = 0;
437 return ao;
438 }
439 };
440
441
442 template <typename... AxisT>
443 class AOReader<BinnedEstimate<AxisT...>> : public AOReaderBase {
444
445 using BaseT = BinnedEstimate<AxisT...>;
446
447 std::tuple<vector<AxisT> ...> edges;
448 vector<Estimate> estimates;
449 vector<size_t> maskedBins;
450 vector<string> sources;
451 size_t axisCheck = 0;
452
453
454 template<size_t I>
455 void readEdges(const std::string& line) {
456 if constexpr(I < sizeof...(AxisT)) {
457 if (I == axisCheck) {
458 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
459 auto& curr_edges = std::get<I>(edges);
460 extractVector<EdgeT>(line, curr_edges);
461 }
462 readEdges<I+1>(line);
463 }
464 }
465
466 void readErrors(std::map<string,std::pair<double,double>>& errors) {
467 string eDn, eUp;
468 for (const std::string& src : sources) {
469 aiss >> eDn >> eUp;
470 if (eDn != "---" && eUp != "---") {
471 errors[src] = { std::stod(eDn), std::stod(eUp) };
472 }
473 }
474 }
475
476 template <class tupleT, size_t... Is>
477 BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
478 BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
479 rtn->maskBins(maskedBins);
480 return rtn;
481 }
482
483 template <class tupleT>
484 BaseT* make_from_tuple(tupleT&& tuple) {
485 return make_from_tuple(std::forward<tupleT>(tuple),
486 std::make_index_sequence<sizeof...(AxisT)+1>{});
487 }
488
489 template<size_t I>
490 void clearEdges() {
491 if constexpr(I < sizeof...(AxisT)) {
492 std::get<I>(edges).clear();
493 clearEdges<I+1>();
494 }
495 }
496
497 public:
498
499 void parse(const string& line) {
500 if (!line.rfind("Edges(A", 0)) { // parse binning
501 readEdges<0>(line);
502 ++axisCheck;
503 return;
504 }
505 if (!line.rfind("MaskedBins: ", 0)) { // parse indices of masked bins
506 extractVector<size_t>(line, maskedBins);
507 return;
508 }
509 if (!line.rfind("ErrorLabels: ", 0)) { // parse error labels
510 extractVector<std::string>(line, sources);
511 return;
512 }
513 // parse bin content
514 aiss.reset(line);
515 double val(0);
516 aiss >> val;
517 std::map<string,std::pair<double,double>> errors;
518 readErrors(errors);
519 estimates.emplace_back(val, errors);
520 }
521
522 AnalysisObject* assemble(const string& path = "") {
523
524 auto args = std::tuple_cat(edges, std::make_tuple(path));
525 BaseT* ao = make_from_tuple(std::move(args));
526
527 size_t global_index = 0;
528 for (auto&& e : estimates) {
529 ao->bin(global_index++) = std::move(e);
530 }
531
532 clearEdges<0>();
533 sources.clear();
534 estimates.clear();
535 maskedBins.clear();
536 axisCheck = 0;
537 return ao;
538 }
539 };
540
541
542}
543
544#endif
AOReaderBase()
Default constructor.
virtual void parse(const string &line)=0
virtual ~AOReaderBase()
Default destructor.
aistringstream aiss
virtual AnalysisObject * assemble(const string &path="")=0
void extractVector(const std::string &line, std::vector< T > &vec)
AnalysisObject * assemble(const string &path="")
AnalysisObject * assemble(const string &path="")
void parse(const string &line)
AnalysisObject * assemble(const string &path="")
AnalysisObject * assemble(const string &path="")
void parse(const string &line)
void parse(const string &line)
AnalysisObject * assemble(const string &path="")
AnalysisObject is the base class for histograms and scatters.
User-facing BinnedDbn class in arbitrary dimension.
Definition BinnedDbn.h:50
Forward declaration.
BinT & bin(size_t idx) noexcept
Returns reference to the bin at idx.
void maskBins(const std::vector< size_t > &indicesToMask, const bool status=true) noexcept
Mask a range of bins.
Error for general binning problems.
Definition Exceptions.h:27
A weighted counter.
Definition Counter.h:26
Partial template specialisation for Dbn0D.
Definition Dbn.h:647
User-facing Dbn class inheriting from DbnBase.
Definition Dbn.h:637
An estimate in 0D.
Definition Estimate0D.h:24
Error for file reading errors.
Definition Exceptions.h:86
A generic data type which is just a collection of n-dim data points with errors.
Definition Scatter.h:154
Anonymous namespace to limit visibility.
Dbn< 0 > Dbn0D
User-friendly aliases.
Definition Dbn.h:771