yoda is hosted by Hepforge, IPPP Durham
YODA - Yet more Objects for Data Analysis 2.0.2
MathUtils.h
Go to the documentation of this file.
1// -*- C++ -*-
2//
3// This file is part of YODA -- Yet more Objects for Data Analysis
4// Copyright (C) 2008-2024 The YODA collaboration (see AUTHORS for details)
5//
6#ifndef YODA_MathUtils_H
7#define YODA_MathUtils_H
8
10
11#include "YODA/Exceptions.h"
13
14#include <algorithm>
15#include <functional>
16#include <numeric>
17#include <cassert>
18#include <cfloat>
19#include <climits>
20#include <cmath>
21#include <functional>
22#include <iostream>
23#include <limits>
24#include <map>
25#include <numeric>
26#include <ostream>
27#include <sstream>
28#include <stdexcept>
29#include <string>
30#include <utility>
31#include <vector>
32
33namespace YODA {
34
35
38 const static double MAXDOUBLE = DBL_MAX; // was std::numeric_limits<double>::max(); -- warns in GCC5
39 const static double MAXINT = INT_MAX; // was std::numeric_limits<int>::max(); -- warns in GCC5
40
42 static const double PI = M_PI;
43
45 static const double TWOPI = 2*M_PI;
46
48 static const double HALFPI = M_PI_2;
49
51 enum Sign { MINUS = -1, ZERO = 0, PLUS = 1 };
52
53
55
56
61 template <typename NUM>
62 inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
63 isZero(NUM val, double tolerance=1e-8) {
64 return fabs(val) < tolerance;
65 }
66
71 template <typename NUM>
72 inline typename std::enable_if_t<std::is_integral_v<NUM>, bool>
73 isZero(NUM val, double = 1e-5) {
74 return val==0;
75 }
76
78 template <typename NUM>
79 inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
80 isNaN(NUM val) { return std::isnan(val); }
81
83 template <typename NUM>
84 inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
85 notNaN(NUM val) { return !std::isnan(val); }
86
92 template <typename N1, typename N2>
93 inline typename std::enable_if_t<
94 std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2> &&
95 (std::is_floating_point_v<N1> || std::is_floating_point_v<N2>), bool>
96 fuzzyEquals(N1 a, N2 b, double tolerance=1e-5) {
97 const double absavg = (std::abs(a) + std::abs(b))/2.0;
98 const double absdiff = std::abs(a - b);
99 const bool rtn = (isZero(a) && isZero(b)) || absdiff < tolerance*absavg;
100 return rtn;
101 }
102
107 template <typename N1, typename N2>
108 inline typename std::enable_if_t<
109 std::is_integral_v<N1> && std::is_integral_v<N2>, bool>
110 fuzzyEquals(N1 a, N2 b, double) { //< NB. unused tolerance parameter for ints, still needs a default value!
111 return a == b;
112 }
113
115 static std::function<bool(const double, const double)> fuzzyEqComp =
116 [](const double& lhs, const double& rhs) { return fuzzyEquals(lhs, rhs); };
117
118
122 template <typename N1, typename N2>
123 inline typename std::enable_if_t<
124 std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>
125 fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5) {
126 return a > b || fuzzyEquals(a, b, tolerance);
127 }
128
132 template <typename N1, typename N2>
133 inline typename std::enable_if_t<
134 std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>
135 fuzzyLessEquals(N1 a, N2 b, double tolerance=1e-5) {
136 return a < b || fuzzyEquals(a, b, tolerance);
137 }
138
140 inline double approx(double a, int n = 5) {
141 double roundTo = pow(10.0,n);
142 a *= roundTo;
143 a = floor(a);
144 return a/roundTo;
145 }
147
148
150
151
156 enum RangeBoundary { OPEN=0, SOFT=0, CLOSED=1, HARD=1 };
157
158
163 template<typename NUM>
164 inline bool inRange(NUM value, NUM low, NUM high,
165 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
166 if (lowbound == OPEN && highbound == OPEN) {
167 return (value > low && value < high);
168 } else if (lowbound == OPEN && highbound == CLOSED) {
169 return (value > low && value <= high);
170 } else if (lowbound == CLOSED && highbound == OPEN) {
171 return (value >= low && value < high);
172 } else { // if (lowbound == CLOSED && highbound == CLOSED) {
173 return (value >= low && value <= high);
174 }
175 }
176
178 template<typename NUM>
179 inline bool inRange(NUM value, std::pair<NUM, NUM> lowhigh,
180 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
181 return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);
182 }
183
184
189 inline bool inRange(int value, int low, int high,
190 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=CLOSED) {
191 if (lowbound == OPEN && highbound == OPEN) {
192 return (value > low && value < high);
193 } else if (lowbound == OPEN && highbound == CLOSED) {
194 return (value > low && value <= high);
195 } else if (lowbound == CLOSED && highbound == OPEN) {
196 return (value >= low && value < high);
197 } else { // if (lowbound == CLOSED && highbound == CLOSED) {
198 return (value >= low && value <= high);
199 }
200 }
201
203 inline bool inRange(int value, std::pair<int, int> lowhigh,
204 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
205 return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);
206 }
207
209
210
212
213
215 template <typename NUM>
216 inline NUM sqr(NUM a) {
217 return a*a;
218 }
219
221 template <typename Num>
222 inline Num add_quad(Num a, Num b) {
223 return sqrt(a*a + b*b);
224 }
225
227 template <typename Num>
228 inline Num add_quad(Num a, Num b, Num c) {
229 return sqrt(a*a + b*b + c*c);
230 }
231
233 inline int sign(double val) {
234 if (isZero(val)) return ZERO;
235 const int valsign = (val > 0) ? PLUS : MINUS;
236 return valsign;
237 }
238
240 inline int sign(int val) {
241 if (val == 0) return ZERO;
242 return (val > 0) ? PLUS : MINUS;
243 }
244
246 inline int sign(long val) {
247 if (val == 0) return ZERO;
248 return (val > 0) ? PLUS : MINUS;
249 }
250
252
253
255
256
261 inline std::vector<double> linspace(size_t nbins, double xmin, double xmax, bool include_end=true) {
262 if (xmax < xmin) throw RangeError("xmax should not be smaller than xmin!");
263 if (nbins == 0) throw RangeError("Requested number of bins is 0!");
264 std::vector<double> rtn;
265 const double interval = (xmax-xmin)/static_cast<double>(nbins);
266 for (size_t i = 0; i < nbins; ++i) {
267 rtn.push_back(xmin + i*interval);
268 }
269 assert(rtn.size() == nbins);
270 if (include_end) rtn.push_back(xmax); // exact xmax, not result of n * interval
271 return rtn;
272 }
273
274
280 inline std::vector<double> logspace(size_t nbins, double xmin, double xmax, bool include_end=true) {
281 if (xmax < xmin) throw RangeError("xmax should not be smaller than xmin!");
282 if (xmin < 0) throw RangeError("xmin should not be negative!");
283 if (nbins == 0) throw RangeError("Requested number of bins is 0!");
284 const double logxmin = std::log(xmin);
285 const double logxmax = std::log(xmax);
286 const std::vector<double> logvals = linspace(nbins, logxmin, logxmax);
287 assert(logvals.size() == nbins+1);
288 std::vector<double> rtn; rtn.reserve(logvals.size());
289 rtn.push_back(xmin);
290 for (size_t i = 1; i < logvals.size()-1; ++i) {
291 rtn.push_back(std::exp(logvals[i]));
292 }
293 assert(rtn.size() == nbins);
294 if (include_end) rtn.push_back(xmax);
295 return rtn;
296 }
297
298
300 //inline std::vector<double> fspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn) {
301
302
322 inline std::vector<double> pdfspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn, size_t nsample=10000) {
323 const double dx = (xmax-xmin)/(double)nsample;
324 const std::vector<double> xs = linspace(nsample, xmin, xmax);
325 std::vector<double> ys(0, nsample);
326 auto posfn = [&](double x){return std::max(fn(x), 0.0);};
327 std::transform(xs.begin(), xs.end(), ys.begin(), posfn);
328 std::vector<double> areas; areas.reserve(nsample);
329 double areasum = 0;
330 for (size_t i = 0; i < ys.size()-1; ++i) {
331 const double area = (ys[i] + ys[i+1])*dx/2.0;
332 areas[i] = area;
333 areasum += area;
334 }
335 const double df = areasum/(double)nbins;
336 std::vector<double> xedges{xmin}; xedges.reserve(nbins+1);
337 double fsum = 0;
338 for (size_t i = 0; i < nsample-1; ++i) {
339 fsum += areas[i];
340 if (fsum > df) {
341 fsum = 0;
342 xedges.push_back(xs[i+1]);
343 }
344 }
345 xedges.push_back(xmax);
346 assert(xedges.size() == nbins+1);
347 return xedges;
348 }
349
350
354 template <typename NUM>
355 inline int index_between(const NUM& val, const std::vector<NUM>& binedges) {
356 if (!inRange(val, binedges.front(), binedges.back())) return -1; //< Out of histo range
357 int index = -1;
358 for (size_t i = 1; i < binedges.size(); ++i) {
359 if (val < binedges[i]) {
360 index = i-1;
361 break;
362 }
363 }
364 assert(inRange(index, -1, binedges.size()-1));
365 return index;
366 }
367
369
370
372
373
375 inline double effNumEntries(const double sumW, const double sumW2) {
376 if (isZero(sumW2)) return 0;
377 return sqr(sumW) / sumW2;
378 }
379
381 inline double effNumEntries(const std::vector<double>& weights) {
382 double sumW = 0.0, sumW2 = 0.0;
383 for (size_t i = 0; i < weights.size(); ++i) {
384 sumW += weights[i];
385 sumW2 += sqr(weights[i]);
386 }
387 return effNumEntries(sumW, sumW2);
388 }
389
391 inline double mean(const std::vector<int>& sample) {
392 double mean = 0.0;
393 for (size_t i=0; i<sample.size(); ++i) {
394 mean += sample[i];
395 }
396 return mean/sample.size();
397 }
398
400 inline double mean(const double sumWX, const double sumW) {
401 return sumW? sumWX / sumW : std::numeric_limits<double>::quiet_NaN();
402 }
403
405 inline double mean(const std::vector<double>& sample,
406 const std::vector<double>& weights) {
407 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
408 double sumWX = 0., sumW = 0.;
409 for (size_t i = 0; i < sample.size(); ++i) {
410 sumW += weights[i];
411 sumWX += weights[i]*sample[i];
412 }
413 return mean(sumWX, sumW);
414 }
415
421 inline double variance(const double sumWX, const double sumW,
422 const double sumWX2, const double sumW2) {
423 const double num = sumWX2*sumW - sqr(sumWX);
424 const double den = sqr(sumW) - sumW2;
428 // if (fabs(num) < 1e-10 && fabs(den) < 1e-10) {
429 // return std::numeric_limits<double>::quiet_NaN();
430 // }
435 return den? fabs(num/den): std::numeric_limits<double>::quiet_NaN();
436 }
437
439 inline double variance(const std::vector<double>& sample,
440 const std::vector<double>& weights) {
441 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
442 if (fuzzyLessEquals(effNumEntries(weights), 1.0)) {
443 //throw LowStatsError("Requested variance of a distribution with only one effective entry");
444 return std::numeric_limits<double>::quiet_NaN();
445 }
446 double sumWX = 0., sumW = 0.;
447 double sumWX2 = 0., sumW2 = 0.;
448 for (size_t i = 0; i < sample.size(); ++i) {
449 sumW += weights[i];
450 sumWX += weights[i]*sample[i];
451 sumW2 += sqr(weights[i]);
452 sumWX2 += weights[i]*sqr(sample[i]);
453 }
454 return variance(sumWX, sumW, sumWX2, sumW2);
455 }
456
458 inline double stdDev(const double sumWX, const double sumW,
459 const double sumWX2, const double sumW2) {
460 return std::sqrt(variance(sumWX, sumW, sumWX2, sumW2));
461 }
462
464 inline double stdDev(const std::vector<double>& sample,
465 const std::vector<double>& weights) {
466 return std::sqrt(variance(sample, weights));
467 }
468
470 inline double stdErr(const double sumWX, const double sumW,
471 const double sumWX2, const double sumW2) {
472 const double effN = effNumEntries(sumW, sumW2);
473 if (effN == 0) return std::numeric_limits<double>::quiet_NaN();
474 const double var = variance(sumWX, sumW, sumWX2, sumW2);
475 return std::sqrt(var / effN);
476 }
477
479 inline double stdErr(const std::vector<double>& sample,
480 const std::vector<double>& weights) {
481 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
482 const double effN = effNumEntries(weights);
483 if (effN == 0) return std::numeric_limits<double>::quiet_NaN();
484 const double var = variance(sample, weights);
485 return std::sqrt(var / effN);
486 }
487
489 inline double RMS(const double sumWX2, const double sumW, const double sumW2) {
490 // Weighted RMS defined as
491 // rms = sqrt(sum{w x^2} / sum{w})
492 const double effN = effNumEntries(sumW, sumW2);
493 if (effN == 0) return std::numeric_limits<double>::quiet_NaN();
494 const double meanSq = sumWX2 / sumW;
495 return std::sqrt(meanSq);
496 }
497
499 inline double RMS(const std::vector<double>& sample,
500 const std::vector<double>& weights) {
501 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
502 double sumWX2 = 0., sumW = 0., sumW2 = 0.;
503 for (size_t i = 0; i < sample.size(); ++i) {
504 sumW += weights[i];
505 sumW2 += sqr(weights[i]);
506 sumWX2 += weights[i]*sqr(sample[i]);
507 }
508 return RMS(sumWX2, sumW, sumW2);
509 }
510
512 inline double covariance(const std::vector<int>& sample1, const std::vector<int>& sample2) {
513 const double mean1 = mean(sample1);
514 const double mean2 = mean(sample2);
515 const size_t N = sample1.size();
516 double cov = 0.0;
517 for (size_t i = 0; i < N; i++) {
518 const double cov_i = (sample1[i] - mean1)*(sample2[i] - mean2);
519 cov += cov_i;
520 }
521 if (N > 1) return cov/(N-1);
522 else return 0.0;
523 }
524
525
527 inline double correlation(const std::vector<int>& sample1, const std::vector<int>& sample2) {
528 const double cov = covariance(sample1, sample2);
529 const double var1 = covariance(sample1, sample1);
530 const double var2 = covariance(sample2, sample2);
531 const double correlation = cov/sqrt(var1*var2);
532 const double corr_strength = correlation*sqrt(var2/var1);
533 return corr_strength;
534 }
535
536
543 inline double naiveChi2(const std::vector<double>& sample1, const std::vector<double>& sample2,
544 const std::vector<double>& s1errors = std::vector<double>{},
545 const std::vector<double>& s2errors = std::vector<double>{}) {
546 if (sample1.size() != sample2.size()) {
547 throw RangeError("Inputs should have equal length!");
548 }
549 if (s1errors.size() && sample1.size() != s1errors.size()) {
550 throw RangeError("Inputs should have equal length!");
551 }
552 if (s2errors.size() && sample2.size() != s2errors.size()) {
553 throw RangeError("Inputs should have equal length!");
554 }
555 const size_t N = sample1.size();
556 double chi2 = 0.0;
557 for (size_t i = 0; i < N; ++i) {
558 double temp = sqr(sample1[i] - sample2[i]);
559 if (s1errors.size()) {
560 temp /= sqr(s1errors[i]) + sqr(s2errors[i]);
561 }
562 chi2 += temp;
563 }
564 return chi2;
565 }
566
573 inline double naiveChi2reduced(const std::vector<double>& sample1, const std::vector<double>& sample2,
574 const std::vector<double>& s1errors = std::vector<double>{},
575 const std::vector<double>& s2errors = std::vector<double>{}) {
576 if (sample1.empty()) throw RangeError("Inputs should not have 0 length!");
577 return naiveChi2(sample1, sample2, s1errors, s2errors)/sample1.size();
578 }
579
581
582
583}
584
585#endif
Error for e.g. use of invalid bin ranges.
Definition Exceptions.h:34
Anonymous namespace to limit visibility.
double stdErr(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted standard error of a sample.
Definition MathUtils.h:470
static const double HALFPI
A pre-defined value of .
Definition MathUtils.h:48
double RMS(const double sumWX2, const double sumW, const double sumW2)
Calculate the weighted RMS of a sample.
Definition MathUtils.h:489
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 >, bool > fuzzyLessEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two floating point numbers for <= with a degree of fuzziness.
Definition MathUtils.h:135
std::vector< double > logspace(size_t nbins, double xmin, double xmax, bool include_end=true)
Make a list of nbins + 1 values uniformly spaced in log(x) between xmin and xmax inclusive.
Definition MathUtils.h:280
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 >, bool > fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two numbers for >= with a degree of fuzziness.
Definition MathUtils.h:125
int index_between(const NUM &val, const std::vector< NUM > &binedges)
Return the bin index of the given value, val, given a vector of bin edges.
Definition MathUtils.h:355
std::enable_if_t< std::is_floating_point_v< NUM >, bool > isNaN(NUM val)
Check if a number is NaN.
Definition MathUtils.h:80
std::vector< double > linspace(size_t nbins, double xmin, double xmax, bool include_end=true)
Make a list of nbins + 1 values uniformly spaced between xmin and xmax inclusive.
Definition MathUtils.h:261
double covariance(const std::vector< int > &sample1, const std::vector< int > &sample2)
Calculate the covariance (variance) between two samples.
Definition MathUtils.h:512
RangeBoundary
Definition MathUtils.h:156
@ CLOSED
Definition MathUtils.h:156
@ OPEN
Definition MathUtils.h:156
@ HARD
Definition MathUtils.h:156
@ SOFT
Definition MathUtils.h:156
double naiveChi2(const std::vector< double > &sample1, const std::vector< double > &sample2, const std::vector< double > &s1errors=std::vector< double >{}, const std::vector< double > &s2errors=std::vector< double >{})
Calculate the error-weighted chi2 statistic between two samples.
Definition MathUtils.h:543
double approx(double a, int n=5)
Returns a number floored at the nth decimal place.
Definition MathUtils.h:140
double correlation(const std::vector< int > &sample1, const std::vector< int > &sample2)
Calculate the correlation strength between two samples.
Definition MathUtils.h:527
static const double TWOPI
A pre-defined value of .
Definition MathUtils.h:45
NUM sqr(NUM a)
Named number-type squaring operation.
Definition MathUtils.h:216
static const double PI
A pre-defined value of .
Definition MathUtils.h:42
double stdDev(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted standard deviation of a sample.
Definition MathUtils.h:458
Num add_quad(Num a, Num b)
Named number-type addition in quadrature operation.
Definition MathUtils.h:222
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 > &&(std::is_floating_point_v< N1 >||std::is_floating_point_v< N2 >), bool > fuzzyEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two numbers for equality with a degree of fuzziness.
Definition MathUtils.h:96
std::enable_if_t< std::is_floating_point_v< NUM >, bool > notNaN(NUM val)
Check if a number is non-NaN.
Definition MathUtils.h:85
double naiveChi2reduced(const std::vector< double > &sample1, const std::vector< double > &sample2, const std::vector< double > &s1errors=std::vector< double >{}, const std::vector< double > &s2errors=std::vector< double >{})
Calculate the error-weighted reduced chi2 statistic between two samples.
Definition MathUtils.h:573
int sign(double val)
Find the sign of a number.
Definition MathUtils.h:233
static std::function< bool(const double, const double)> fuzzyEqComp
Comparator wrapper to use with STL algorithms, e.g. std::equal etc.
Definition MathUtils.h:115
static const double MAXDOUBLE
Definition MathUtils.h:38
std::enable_if_t< std::is_floating_point_v< NUM >, bool > isZero(NUM val, double tolerance=1e-8)
Compare a number to zero.
Definition MathUtils.h:63
static const double MAXINT
Definition MathUtils.h:39
double variance(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted variance of a sample.
Definition MathUtils.h:421
std::vector< double > pdfspace(size_t nbins, double xmin, double xmax, std::function< double(double)> &fn, size_t nsample=10000)
Make a list of nbins + 1 values spaced with density ~ f(x) between xmin and end inclusive.
Definition MathUtils.h:322
double mean(const std::vector< int > &sample)
Calculate the mean of a sample.
Definition MathUtils.h:391
double effNumEntries(const double sumW, const double sumW2)
Calculate the effective number of entries of a sample.
Definition MathUtils.h:375
Sign
Enum for signs of numbers.
Definition MathUtils.h:51
@ ZERO
Definition MathUtils.h:51
@ PLUS
Definition MathUtils.h:51
@ MINUS
Definition MathUtils.h:51
bool inRange(NUM value, NUM low, NUM high, RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN)
Determine if value is in the range low to high, for floating point numbers.
Definition MathUtils.h:164