yoda is hosted by Hepforge, IPPP Durham
YODA - Yet more Objects for Data Analysis 2.0.3
MathUtils.h
Go to the documentation of this file.
1// -*- C++ -*-
2//
3// This file is part of YODA -- Yet more Objects for Data Analysis
4// Copyright (C) 2008-2024 The YODA collaboration (see AUTHORS for details)
5//
6#ifndef YODA_MathUtils_H
7#define YODA_MathUtils_H
8
10
11#include "YODA/Exceptions.h"
13
14#include <algorithm>
15#include <functional>
16#include <numeric>
17#include <cassert>
18#include <cfloat>
19#include <climits>
20#include <cmath>
21#include <functional>
22#include <iostream>
23#include <limits>
24#include <map>
25#include <numeric>
26#include <ostream>
27#include <sstream>
28#include <stdexcept>
29#include <string>
30#include <utility>
31#include <vector>
32
33namespace YODA {
34
35
38 const static double MAXDOUBLE = DBL_MAX; // was std::numeric_limits<double>::max(); -- warns in GCC5
39 const static double MAXINT = INT_MAX; // was std::numeric_limits<int>::max(); -- warns in GCC5
40
42 static const double PI = M_PI;
43
45 static const double TWOPI = 2*M_PI;
46
48 static const double HALFPI = M_PI_2;
49
51 enum Sign { MINUS = -1, ZERO = 0, PLUS = 1 };
52
53
55
56
61 template <typename NUM>
62 inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
63 isZero(NUM val, double tolerance=1e-5) {
64 return fabs(val) < tolerance;
65 }
66
71 template <typename NUM>
72 inline typename std::enable_if_t<std::is_integral_v<NUM>, bool>
73 isZero(NUM val, double = 1e-5) {
74 return val==0;
75 }
76
78 template <typename NUM>
79 inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
80 isNaN(NUM val) { return std::isnan(val); }
81
83 template <typename NUM>
84 inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
85 notNaN(NUM val) { return !std::isnan(val); }
86
92 template <typename N1, typename N2>
93 inline typename std::enable_if_t<
94 std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2> &&
95 (std::is_floating_point_v<N1> || std::is_floating_point_v<N2>), bool>
96 fuzzyEquals(N1 a, N2 b, double tolerance=1e-5) {
97 const double absavg = (std::abs(a) + std::abs(b))/2.0;
98 const double absdiff = std::abs(a - b);
99 const bool rtn = (isZero(a) && isZero(b)) || absdiff < tolerance*absavg;
100 return rtn;
101 }
102
107 template <typename N1, typename N2>
108 inline typename std::enable_if_t<
109 std::is_integral_v<N1> && std::is_integral_v<N2>, bool>
110 fuzzyEquals(N1 a, N2 b, double) { //< NB. unused tolerance parameter for ints, still needs a default value!
111 return a == b;
112 }
113
115 static std::function<bool(const double, const double)> fuzzyEqComp =
116 [](const double& lhs, const double& rhs) { return fuzzyEquals(lhs, rhs); };
117
118
122 template <typename N1, typename N2>
123 inline typename std::enable_if_t<
124 std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>
125 fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5) {
126 return a > b || fuzzyEquals(a, b, tolerance);
127 }
128
132 template <typename N1, typename N2>
133 inline typename std::enable_if_t<
134 std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>
135 fuzzyLessEquals(N1 a, N2 b, double tolerance=1e-5) {
136 return a < b || fuzzyEquals(a, b, tolerance);
137 }
138
140 inline double approx(double a, int n = 5) {
141 double roundTo = pow(10.0,n);
142 a *= roundTo;
143 a = floor(a);
144 return a/roundTo;
145 }
147
148
150
151
156 enum RangeBoundary { OPEN=0, SOFT=0, CLOSED=1, HARD=1 };
157
158
163 template<typename NUM>
164 inline bool inRange(NUM value, NUM low, NUM high,
165 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
166 if (lowbound == OPEN && highbound == OPEN) {
167 return (value > low && value < high);
168 } else if (lowbound == OPEN && highbound == CLOSED) {
169 return (value > low && value <= high);
170 } else if (lowbound == CLOSED && highbound == OPEN) {
171 return (value >= low && value < high);
172 } else { // if (lowbound == CLOSED && highbound == CLOSED) {
173 return (value >= low && value <= high);
174 }
175 }
176
178 template<typename NUM>
179 inline bool inRange(NUM value, std::pair<NUM, NUM> lowhigh,
180 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
181 return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);
182 }
183
184
189 inline bool inRange(int value, int low, int high,
190 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=CLOSED) {
191 if (lowbound == OPEN && highbound == OPEN) {
192 return (value > low && value < high);
193 } else if (lowbound == OPEN && highbound == CLOSED) {
194 return (value > low && value <= high);
195 } else if (lowbound == CLOSED && highbound == OPEN) {
196 return (value >= low && value < high);
197 } else { // if (lowbound == CLOSED && highbound == CLOSED) {
198 return (value >= low && value <= high);
199 }
200 }
201
203 inline bool inRange(int value, std::pair<int, int> lowhigh,
204 RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
205 return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);
206 }
207
209
210
212
213
215 template <typename NUM>
216 inline NUM sqr(NUM a) {
217 return a*a;
218 }
219
221 template <typename... Num>
222 inline std::enable_if_t<std::conjunction_v<std::is_arithmetic<Num>...>, double>
223 add_quad(Num ... vals) {
224 return sqrt((0.0 + ... + sqr(vals)));
225 }
226
228 inline int sign(double val) {
229 if (isZero(val)) return ZERO;
230 const int valsign = (val > 0) ? PLUS : MINUS;
231 return valsign;
232 }
233
235 inline int sign(int val) {
236 if (val == 0) return ZERO;
237 return (val > 0) ? PLUS : MINUS;
238 }
239
241 inline int sign(long val) {
242 if (val == 0) return ZERO;
243 return (val > 0) ? PLUS : MINUS;
244 }
245
247 inline double subtract(double a, double b, double tolerance = 1e-5) {
248 if (fuzzyEquals(a,b,tolerance)) return 0.;
249 return a - b;
250 }
251
253 inline double add(double a, double b, double tolerance = 1e-5) {
254 return subtract(a,-b,tolerance);
255 }
256
258
259
261
262
267 inline std::vector<double> linspace(size_t nbins, double xmin, double xmax, bool include_end=true) {
268 if (xmax < xmin) throw RangeError("xmax should not be smaller than xmin!");
269 if (nbins == 0) throw RangeError("Requested number of bins is 0!");
270 std::vector<double> rtn;
271 const double interval = (xmax-xmin)/static_cast<double>(nbins);
272 for (size_t i = 0; i < nbins; ++i) {
273 rtn.push_back(xmin + i*interval);
274 }
275 assert(rtn.size() == nbins);
276 if (include_end) rtn.push_back(xmax); // exact xmax, not result of n * interval
277 return rtn;
278 }
279
280
286 inline std::vector<double> logspace(size_t nbins, double xmin, double xmax, bool include_end=true) {
287 if (xmax < xmin) throw RangeError("xmax should not be smaller than xmin!");
288 if (xmin < 0) throw RangeError("xmin should not be negative!");
289 if (nbins == 0) throw RangeError("Requested number of bins is 0!");
290 const double logxmin = std::log(xmin);
291 const double logxmax = std::log(xmax);
292 const std::vector<double> logvals = linspace(nbins, logxmin, logxmax);
293 assert(logvals.size() == nbins+1);
294 std::vector<double> rtn; rtn.reserve(logvals.size());
295 rtn.push_back(xmin);
296 for (size_t i = 1; i < logvals.size()-1; ++i) {
297 rtn.push_back(std::exp(logvals[i]));
298 }
299 assert(rtn.size() == nbins);
300 if (include_end) rtn.push_back(xmax);
301 return rtn;
302 }
303
304
306 //inline std::vector<double> fspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn) {
307
308
328 inline std::vector<double> pdfspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn, size_t nsample=10000) {
329 const double dx = (xmax-xmin)/(double)nsample;
330 const std::vector<double> xs = linspace(nsample, xmin, xmax);
331 std::vector<double> ys(0, nsample);
332 auto posfn = [&](double x){return std::max(fn(x), 0.0);};
333 std::transform(xs.begin(), xs.end(), ys.begin(), posfn);
334 std::vector<double> areas; areas.reserve(nsample);
335 double areasum = 0;
336 for (size_t i = 0; i < ys.size()-1; ++i) {
337 const double area = (ys[i] + ys[i+1])*dx/2.0;
338 areas[i] = area;
339 areasum += area;
340 }
341 const double df = areasum/(double)nbins;
342 std::vector<double> xedges{xmin}; xedges.reserve(nbins+1);
343 double fsum = 0;
344 for (size_t i = 0; i < nsample-1; ++i) {
345 fsum += areas[i];
346 if (fsum > df) {
347 fsum = 0;
348 xedges.push_back(xs[i+1]);
349 }
350 }
351 xedges.push_back(xmax);
352 assert(xedges.size() == nbins+1);
353 return xedges;
354 }
355
356
360 template <typename NUM>
361 inline int index_between(const NUM& val, const std::vector<NUM>& binedges) {
362 if (!inRange(val, binedges.front(), binedges.back())) return -1; //< Out of histo range
363 int index = -1;
364 for (size_t i = 1; i < binedges.size(); ++i) {
365 if (val < binedges[i]) {
366 index = i-1;
367 break;
368 }
369 }
370 assert(inRange(index, -1, binedges.size()-1));
371 return index;
372 }
373
375
376
378
379
381 inline double effNumEntries(const double sumW, const double sumW2) {
382 if (isZero(sumW2)) return 0;
383 return sqr(sumW) / sumW2;
384 }
385
387 inline double effNumEntries(const std::vector<double>& weights) {
388 double sumW = 0.0, sumW2 = 0.0;
389 for (size_t i = 0; i < weights.size(); ++i) {
390 sumW += weights[i];
391 sumW2 += sqr(weights[i]);
392 }
393 return effNumEntries(sumW, sumW2);
394 }
395
397 inline double mean(const std::vector<int>& sample) {
398 double mean = 0.0;
399 for (size_t i=0; i<sample.size(); ++i) {
400 mean += sample[i];
401 }
402 return mean/sample.size();
403 }
404
406 inline double mean(const double sumWX, const double sumW) {
407 return sumW? sumWX / sumW : std::numeric_limits<double>::quiet_NaN();
408 }
409
411 inline double mean(const std::vector<double>& sample,
412 const std::vector<double>& weights) {
413 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
414 double sumWX = 0., sumW = 0.;
415 for (size_t i = 0; i < sample.size(); ++i) {
416 sumW += weights[i];
417 sumWX += weights[i]*sample[i];
418 }
419 return mean(sumWX, sumW);
420 }
421
427 inline double variance(const double sumWX, const double sumW,
428 const double sumWX2, const double sumW2) {
429 const double num = subtract(sumWX2*sumW, sqr(sumWX));
430 const double den = subtract(sqr(sumW), sumW2);
434 // if (fabs(num) < 1e-10 && fabs(den) < 1e-10) {
435 // return std::numeric_limits<double>::quiet_NaN();
436 // }
441 return den? fabs(num/den): std::numeric_limits<double>::quiet_NaN();
442 }
443
445 inline double variance(const std::vector<double>& sample,
446 const std::vector<double>& weights) {
447 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
448 if (fuzzyLessEquals(effNumEntries(weights), 1.0)) {
449 //throw LowStatsError("Requested variance of a distribution with only one effective entry");
450 return std::numeric_limits<double>::quiet_NaN();
451 }
452 double sumWX = 0., sumW = 0.;
453 double sumWX2 = 0., sumW2 = 0.;
454 for (size_t i = 0; i < sample.size(); ++i) {
455 sumW += weights[i];
456 sumWX += weights[i]*sample[i];
457 sumW2 += sqr(weights[i]);
458 sumWX2 += weights[i]*sqr(sample[i]);
459 }
460 return variance(sumWX, sumW, sumWX2, sumW2);
461 }
462
464 inline double stdDev(const double sumWX, const double sumW,
465 const double sumWX2, const double sumW2) {
466 return std::sqrt(variance(sumWX, sumW, sumWX2, sumW2));
467 }
468
470 inline double stdDev(const std::vector<double>& sample,
471 const std::vector<double>& weights) {
472 return std::sqrt(variance(sample, weights));
473 }
474
476 inline double stdErr(const double sumWX, const double sumW,
477 const double sumWX2, const double sumW2) {
478 const double effN = effNumEntries(sumW, sumW2);
479 if (effN == 0) return std::numeric_limits<double>::quiet_NaN();
480 const double var = variance(sumWX, sumW, sumWX2, sumW2);
481 return std::sqrt(var / effN);
482 }
483
485 inline double stdErr(const std::vector<double>& sample,
486 const std::vector<double>& weights) {
487 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
488 const double effN = effNumEntries(weights);
489 if (effN == 0) return std::numeric_limits<double>::quiet_NaN();
490 const double var = variance(sample, weights);
491 return std::sqrt(var / effN);
492 }
493
495 inline double RMS(const double sumWX2, const double sumW, const double sumW2) {
496 // Weighted RMS defined as
497 // rms = sqrt(sum{w x^2} / sum{w})
498 const double effN = effNumEntries(sumW, sumW2);
499 if (effN == 0) return std::numeric_limits<double>::quiet_NaN();
500 const double meanSq = sumWX2 / sumW;
501 return std::sqrt(meanSq);
502 }
503
505 inline double RMS(const std::vector<double>& sample,
506 const std::vector<double>& weights) {
507 if (sample.size() != weights.size()) throw RangeError("Inputs should have equal length!");
508 double sumWX2 = 0., sumW = 0., sumW2 = 0.;
509 for (size_t i = 0; i < sample.size(); ++i) {
510 sumW += weights[i];
511 sumW2 += sqr(weights[i]);
512 sumWX2 += weights[i]*sqr(sample[i]);
513 }
514 return RMS(sumWX2, sumW, sumW2);
515 }
516
518 inline double covariance(const std::vector<int>& sample1, const std::vector<int>& sample2) {
519 const double mean1 = mean(sample1);
520 const double mean2 = mean(sample2);
521 const size_t N = sample1.size();
522 double cov = 0.0;
523 for (size_t i = 0; i < N; i++) {
524 const double cov_i = (sample1[i] - mean1)*(sample2[i] - mean2);
525 cov += cov_i;
526 }
527 if (N > 1) return cov/(N-1);
528 else return 0.0;
529 }
530
531
533 inline double correlation(const std::vector<int>& sample1, const std::vector<int>& sample2) {
534 const double cov = covariance(sample1, sample2);
535 const double var1 = covariance(sample1, sample1);
536 const double var2 = covariance(sample2, sample2);
537 const double correlation = cov/sqrt(var1*var2);
538 const double corr_strength = correlation*sqrt(var2/var1);
539 return corr_strength;
540 }
541
542
549 inline double naiveChi2(const std::vector<double>& sample1, const std::vector<double>& sample2,
550 const std::vector<double>& s1errors = std::vector<double>{},
551 const std::vector<double>& s2errors = std::vector<double>{}) {
552 if (sample1.size() != sample2.size()) {
553 throw RangeError("Inputs should have equal length!");
554 }
555 if (s1errors.size() && sample1.size() != s1errors.size()) {
556 throw RangeError("Inputs should have equal length!");
557 }
558 if (s2errors.size() && sample2.size() != s2errors.size()) {
559 throw RangeError("Inputs should have equal length!");
560 }
561 const size_t N = sample1.size();
562 double chi2 = 0.0;
563 for (size_t i = 0; i < N; ++i) {
564 double temp = sqr(sample1[i] - sample2[i]);
565 if (s1errors.size()) {
566 temp /= sqr(s1errors[i]) + sqr(s2errors[i]);
567 }
568 chi2 += temp;
569 }
570 return chi2;
571 }
572
579 inline double naiveChi2reduced(const std::vector<double>& sample1, const std::vector<double>& sample2,
580 const std::vector<double>& s1errors = std::vector<double>{},
581 const std::vector<double>& s2errors = std::vector<double>{}) {
582 if (sample1.empty()) throw RangeError("Inputs should not have 0 length!");
583 return naiveChi2(sample1, sample2, s1errors, s2errors)/sample1.size();
584 }
585
587
588
589}
590
591#endif
Error for e.g. use of invalid bin ranges.
Definition Exceptions.h:34
Anonymous namespace to limit visibility.
double stdErr(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted standard error of a sample.
Definition MathUtils.h:476
static const double HALFPI
A pre-defined value of .
Definition MathUtils.h:48
double RMS(const double sumWX2, const double sumW, const double sumW2)
Calculate the weighted RMS of a sample.
Definition MathUtils.h:495
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 >, bool > fuzzyLessEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two floating point numbers for <= with a degree of fuzziness.
Definition MathUtils.h:135
std::vector< double > logspace(size_t nbins, double xmin, double xmax, bool include_end=true)
Make a list of nbins + 1 values uniformly spaced in log(x) between xmin and xmax inclusive.
Definition MathUtils.h:286
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 >, bool > fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two numbers for >= with a degree of fuzziness.
Definition MathUtils.h:125
int index_between(const NUM &val, const std::vector< NUM > &binedges)
Return the bin index of the given value, val, given a vector of bin edges.
Definition MathUtils.h:361
std::enable_if_t< std::is_floating_point_v< NUM >, bool > isNaN(NUM val)
Check if a number is NaN.
Definition MathUtils.h:80
std::vector< double > linspace(size_t nbins, double xmin, double xmax, bool include_end=true)
Make a list of nbins + 1 values uniformly spaced between xmin and xmax inclusive.
Definition MathUtils.h:267
double covariance(const std::vector< int > &sample1, const std::vector< int > &sample2)
Calculate the covariance (variance) between two samples.
Definition MathUtils.h:518
RangeBoundary
Definition MathUtils.h:156
@ CLOSED
Definition MathUtils.h:156
@ OPEN
Definition MathUtils.h:156
@ HARD
Definition MathUtils.h:156
@ SOFT
Definition MathUtils.h:156
double naiveChi2(const std::vector< double > &sample1, const std::vector< double > &sample2, const std::vector< double > &s1errors=std::vector< double >{}, const std::vector< double > &s2errors=std::vector< double >{})
Calculate the error-weighted chi2 statistic between two samples.
Definition MathUtils.h:549
double approx(double a, int n=5)
Returns a number floored at the nth decimal place.
Definition MathUtils.h:140
double correlation(const std::vector< int > &sample1, const std::vector< int > &sample2)
Calculate the correlation strength between two samples.
Definition MathUtils.h:533
std::enable_if_t< std::conjunction_v< std::is_arithmetic< Num >... >, double > add_quad(Num ... vals)
Named number-type addition in quadrature operation.
Definition MathUtils.h:223
static const double TWOPI
A pre-defined value of .
Definition MathUtils.h:45
NUM sqr(NUM a)
Named number-type squaring operation.
Definition MathUtils.h:216
static const double PI
A pre-defined value of .
Definition MathUtils.h:42
double stdDev(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted standard deviation of a sample.
Definition MathUtils.h:464
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 > &&(std::is_floating_point_v< N1 >||std::is_floating_point_v< N2 >), bool > fuzzyEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two numbers for equality with a degree of fuzziness.
Definition MathUtils.h:96
std::enable_if_t< std::is_floating_point_v< NUM >, bool > notNaN(NUM val)
Check if a number is non-NaN.
Definition MathUtils.h:85
double naiveChi2reduced(const std::vector< double > &sample1, const std::vector< double > &sample2, const std::vector< double > &s1errors=std::vector< double >{}, const std::vector< double > &s2errors=std::vector< double >{})
Calculate the error-weighted reduced chi2 statistic between two samples.
Definition MathUtils.h:579
BinnedEstimate< AxisT... > add(const BinnedDbn< DbnN, AxisT... > &dbn, const BinnedEstimate< AxisT... > &est)
Calculate the addition of a BinnedDbn with a BinnedEstimate.
Definition BinnedDbn.h:1285
int sign(double val)
Find the sign of a number.
Definition MathUtils.h:228
BinnedEstimate< AxisT... > subtract(const BinnedDbn< DbnN, AxisT... > &dbn, const BinnedEstimate< AxisT... > &est)
Calculate the subtraction of a BinnedEstimate from a BinnedDbn.
Definition BinnedDbn.h:1317
static std::function< bool(const double, const double)> fuzzyEqComp
Comparator wrapper to use with STL algorithms, e.g. std::equal etc.
Definition MathUtils.h:115
static const double MAXDOUBLE
Definition MathUtils.h:38
static const double MAXINT
Definition MathUtils.h:39
double variance(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted variance of a sample.
Definition MathUtils.h:427
std::vector< double > pdfspace(size_t nbins, double xmin, double xmax, std::function< double(double)> &fn, size_t nsample=10000)
Make a list of nbins + 1 values spaced with density ~ f(x) between xmin and end inclusive.
Definition MathUtils.h:328
std::enable_if_t< std::is_floating_point_v< NUM >, bool > isZero(NUM val, double tolerance=1e-5)
Compare a number to zero.
Definition MathUtils.h:63
double mean(const std::vector< int > &sample)
Calculate the mean of a sample.
Definition MathUtils.h:397
double effNumEntries(const double sumW, const double sumW2)
Calculate the effective number of entries of a sample.
Definition MathUtils.h:381
Sign
Enum for signs of numbers.
Definition MathUtils.h:51
@ ZERO
Definition MathUtils.h:51
@ PLUS
Definition MathUtils.h:51
@ MINUS
Definition MathUtils.h:51
bool inRange(NUM value, NUM low, NUM high, RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN)
Determine if value is in the range low to high, for floating point numbers.
Definition MathUtils.h:164