doxy/MathUtils_8h_source.html

// -*- C++ -*-

//

// This file is part of YODA -- Yet more Objects for Data Analysis

// Copyright (C) 2008-2025 The YODA collaboration (see AUTHORS for details)

//

#ifndef YODA_MathUtils_H

#define YODA_MathUtils_H


#include "YODA/Exceptions.h"

#include "YODA/Config/BuildConfig.h"


#include <algorithm>

#include <functional>

#include <numeric>

#include <cassert>

#include <cfloat>

#include <climits>

#include <cmath>

#include <functional>

#include <iostream>

#include <limits>

#include <map>

#include <numeric>

#include <ostream>

#include <sstream>

#include <stdexcept>

#include <string>

#include <utility>

#include <vector>


namespace YODA {


  const static double MAXDOUBLE = DBL_MAX; // was std::numeric_limits<double>::max(); -- warns in GCC5

  const static double MAXINT = INT_MAX; // was std::numeric_limits<int>::max(); -- warns in GCC5


  static const double PI = M_PI;


  static const double TWOPI = 2*M_PI;


  static const double HALFPI = M_PI_2;


  enum Sign { MINUS = -1, ZERO = 0, PLUS = 1 };


  template <typename NUM>

  inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>


  isZero(NUM val, double tolerance=1e-5) {

    return fabs(val) < tolerance;

  }


  template <typename NUM>

  inline typename std::enable_if_t<std::is_integral_v<NUM>, bool>


  isZero(NUM val, double = 1e-5) {

    return val==0;

  }


  template <typename NUM>

  inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>

  isNaN(NUM val) { return std::isnan(val); }


  template <typename NUM>

  inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>

  notNaN(NUM val) { return !std::isnan(val); }


  template <typename N1, typename N2>

  inline typename std::enable_if_t<

    std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2> &&

   (std::is_floating_point_v<N1> || std::is_floating_point_v<N2>), bool>


  fuzzyEquals(N1 a, N2 b, double tolerance=1e-5) {

    const double absavg = (std::abs(a) + std::abs(b))/2.0;

    const double absdiff = std::abs(a - b);

    const bool rtn = (isZero(a) && isZero(b)) || absdiff < tolerance*absavg;

    return rtn;

  }


  template <typename N1, typename N2>

  inline typename std::enable_if_t<

    std::is_integral_v<N1> && std::is_integral_v<N2>, bool>


    fuzzyEquals(N1 a, N2 b, double) { //< NB. unused tolerance parameter for ints, still needs a default value!

    return a == b;

  }


  static std::function<bool(const double, const double)> fuzzyEqComp =

    [](const double& lhs, const double& rhs) { return fuzzyEquals(lhs, rhs); };


  template <typename N1, typename N2>

  inline typename std::enable_if_t<

    std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>


  fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5) {

    return a > b || fuzzyEquals(a, b, tolerance);

  }


  template <typename N1, typename N2>

  inline typename std::enable_if_t<

    std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>


  fuzzyLessEquals(N1 a, N2 b, double tolerance=1e-5) {

    return a < b || fuzzyEquals(a, b, tolerance);

  }


  inline double approx(double a, int n = 5) {

    double roundTo = pow(10.0,n);

    a *= roundTo;

    a = floor(a);

    return a/roundTo;

  }


  enum RangeBoundary { OPEN=0, SOFT=0, CLOSED=1, HARD=1 };


  template<typename NUM>


  inline bool inRange(NUM value, NUM low, NUM high,

                      RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {

    if (lowbound == OPEN && highbound == OPEN) {

      return (value > low && value < high);

    } else if (lowbound == OPEN && highbound == CLOSED) {

      return (value > low && value <= high);

    } else if (lowbound == CLOSED && highbound == OPEN) {

      return (value >= low && value < high);

    } else { // if (lowbound == CLOSED && highbound == CLOSED) {

      return (value >= low && value <= high);

    }

  }


  template<typename NUM>


  inline bool inRange(NUM value, std::pair<NUM, NUM> lowhigh,

                      RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {

    return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);

  }


  inline bool inRange(int value, int low, int high,

                      RangeBoundary lowbound=CLOSED, RangeBoundary highbound=CLOSED) {

    if (lowbound == OPEN && highbound == OPEN) {

      return (value > low && value < high);

    } else if (lowbound == OPEN && highbound == CLOSED) {

      return (value > low && value <= high);

    } else if (lowbound == CLOSED && highbound == OPEN) {

      return (value >= low && value < high);

    } else { // if (lowbound == CLOSED && highbound == CLOSED) {

      return (value >= low && value <= high);

    }

  }


  inline bool inRange(int value, std::pair<int, int> lowhigh,

                      RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {

    return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);

  }


  template <typename NUM>


  inline NUM sqr(NUM a) {

    return a*a;

  }


  template <typename... Num>

  inline std::enable_if_t<std::conjunction_v<std::is_arithmetic<Num>...>, double>


  add_quad(Num ... vals) {

    return sqrt((0.0 + ... + sqr(vals)));

  }


  inline int sign(double val) {

    if (isZero(val)) return ZERO;

    const int valsign = (val > 0) ? PLUS : MINUS;

    return valsign;

  }


  inline int sign(int val) {

    if (val == 0) return ZERO;

    return (val > 0) ? PLUS : MINUS;

  }


  inline int sign(long val) {

    if (val == 0) return ZERO;

    return (val > 0) ? PLUS : MINUS;

  }


  inline double subtract(double a, double b, double tolerance = 1e-5) {

    if (fuzzyEquals(a,b,tolerance))  return 0.;

    return a - b;

  }


  inline double add(double a, double b, double tolerance = 1e-5) {

    return subtract(a,-b,tolerance);

  }


  inline std::vector<double> linspace(size_t nbins, double xmin, double xmax, bool include_end=true) {

    if (xmax < xmin)  throw RangeError("xmax should not be smaller than xmin!");

    if (nbins == 0)   throw RangeError("Requested number of bins is 0!");

    std::vector<double> rtn;

    const double interval = (xmax-xmin)/static_cast<double>(nbins);

    for (size_t i = 0; i < nbins; ++i) {

      rtn.push_back(xmin + i*interval);

    }

    assert(rtn.size() == nbins);

    if (include_end) rtn.push_back(xmax); // exact xmax, not result of n * interval

    return rtn;

  }


  inline std::vector<double> logspace(size_t nbins, double xmin, double xmax, bool include_end=true) {

    if (xmax < xmin)  throw RangeError("xmax should not be smaller than xmin!");

    if (xmin < 0)     throw RangeError("xmin should not be negative!");

    if (nbins == 0)   throw RangeError("Requested number of bins is 0!");

    const double logxmin = std::log(xmin);

    const double logxmax = std::log(xmax);

    const std::vector<double> logvals = linspace(nbins, logxmin, logxmax);

    assert(logvals.size() == nbins+1);

    std::vector<double> rtn; rtn.reserve(logvals.size());

    rtn.push_back(xmin);

    for (size_t i = 1; i < logvals.size()-1; ++i) {

      rtn.push_back(std::exp(logvals[i]));

    }

    assert(rtn.size() == nbins);

    if (include_end) rtn.push_back(xmax);

    return rtn;

  }


  //inline std::vector<double> fspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn) {


  inline std::vector<double> pdfspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn, size_t nsample=10000) {

    const double dx = (xmax-xmin)/(double)nsample;

    const std::vector<double> xs = linspace(nsample, xmin, xmax);

    std::vector<double> ys(0, nsample);

    auto posfn = [&](double x){return std::max(fn(x), 0.0);};

    std::transform(xs.begin(), xs.end(), ys.begin(), posfn);

    std::vector<double> areas; areas.reserve(nsample);

    double areasum = 0;

    for (size_t i = 0; i < ys.size()-1; ++i) {

      const double area = (ys[i] + ys[i+1])*dx/2.0;

      areas[i] = area;

      areasum += area;

    }

    const double df = areasum/(double)nbins;

    std::vector<double> xedges{xmin}; xedges.reserve(nbins+1);

    double fsum = 0;

    for (size_t i = 0; i < nsample-1; ++i) {

      fsum += areas[i];

      if (fsum > df) {

        fsum = 0;

        xedges.push_back(xs[i+1]);

      }

    }

    xedges.push_back(xmax);

    assert(xedges.size() == nbins+1);

    return xedges;

  }


  template <typename NUM>


  inline int index_between(const NUM& val, const std::vector<NUM>& binedges) {

    if (!inRange(val, binedges.front(), binedges.back())) return -1; //< Out of histo range

    int index = -1;

    for (size_t i = 1; i < binedges.size(); ++i) {

      if (val < binedges[i]) {

        index = i-1;

        break;

      }

    }

    assert(inRange(index, -1, binedges.size()-1));

    return index;

  }


  inline double effNumEntries(const double sumW, const double sumW2) {

    if (isZero(sumW2))  return 0;

    return sqr(sumW) / sumW2;

  }


  inline double effNumEntries(const std::vector<double>& weights) {

    double sumW = 0.0, sumW2 = 0.0;

    for (size_t i = 0; i < weights.size(); ++i) {

      sumW += weights[i];

      sumW2 += sqr(weights[i]);

    }

    return effNumEntries(sumW, sumW2);

  }


  inline double mean(const std::vector<int>& sample) {

    double mean = 0.0;

    for (size_t i=0; i<sample.size(); ++i) {

      mean += sample[i];

    }

    return mean/sample.size();

  }


  inline double mean(const double sumWX, const double sumW) {

    return sumW? sumWX / sumW : std::numeric_limits<double>::quiet_NaN();

  }


  inline double mean(const std::vector<double>& sample,

                     const std::vector<double>& weights) {

    if (sample.size() != weights.size())  throw RangeError("Inputs should have equal length!");

    double sumWX = 0., sumW = 0.;

    for (size_t i = 0; i < sample.size(); ++i) {

      sumW  += weights[i];

      sumWX += weights[i]*sample[i];

    }

    return mean(sumWX, sumW);

  }


  inline double variance(const double sumWX, const double sumW,

                         const double sumWX2, const double sumW2) {

    const double num = subtract(sumWX2*sumW, sqr(sumWX));

    const double den = subtract(sqr(sumW), sumW2);

    // if (fabs(num) < 1e-10 && fabs(den) < 1e-10) {

    //   return std::numeric_limits<double>::quiet_NaN();

    // }

    return den? fabs(num/den): std::numeric_limits<double>::quiet_NaN();

  }


  inline double variance(const std::vector<double>& sample,

                         const std::vector<double>& weights) {

    if (sample.size() != weights.size())  throw RangeError("Inputs should have equal length!");

    if (fuzzyLessEquals(effNumEntries(weights), 1.0)) {

       //throw LowStatsError("Requested variance of a distribution with only one effective entry");

       return std::numeric_limits<double>::quiet_NaN();

    }

    double sumWX = 0., sumW = 0.;

    double sumWX2 = 0., sumW2 = 0.;

    for (size_t i = 0; i < sample.size(); ++i) {

      sumW   += weights[i];

      sumWX  += weights[i]*sample[i];

      sumW2  += sqr(weights[i]);

      sumWX2 += weights[i]*sqr(sample[i]);

    }

    return variance(sumWX, sumW, sumWX2, sumW2);

  }


  inline double stdDev(const double sumWX, const double sumW,

                       const double sumWX2, const double sumW2) {

    return std::sqrt(variance(sumWX, sumW, sumWX2, sumW2));

  }


  inline double stdDev(const std::vector<double>& sample,

                       const std::vector<double>& weights) {

    return std::sqrt(variance(sample, weights));

  }


  inline double stdErr(const double sumWX, const double sumW,

                       const double sumWX2, const double sumW2) {

    const double effN = effNumEntries(sumW, sumW2);

    if (effN == 0)  return std::numeric_limits<double>::quiet_NaN();

    const double var = variance(sumWX, sumW, sumWX2, sumW2);

    return std::sqrt(var / effN);

  }


  inline double stdErr(const std::vector<double>& sample,

                        const std::vector<double>& weights) {

    if (sample.size() != weights.size())  throw RangeError("Inputs should have equal length!");

    const double effN = effNumEntries(weights);

    if (effN == 0)  return std::numeric_limits<double>::quiet_NaN();

    const double var = variance(sample, weights);

    return std::sqrt(var / effN);

  }


  inline double RMS(const double sumWX2, const double sumW, const double sumW2) {

    // Weighted RMS defined as

    // rms = sqrt(sum{w x^2} / sum{w})

    const double effN = effNumEntries(sumW, sumW2);

    if (effN == 0)  return std::numeric_limits<double>::quiet_NaN();

    const double meanSq = sumWX2 / sumW;

    return std::sqrt(meanSq);

  }


  inline double RMS(const std::vector<double>& sample,

                    const std::vector<double>& weights) {

    if (sample.size() != weights.size())  throw RangeError("Inputs should have equal length!");

    double sumWX2 = 0., sumW = 0., sumW2 = 0.;

    for (size_t i = 0; i < sample.size(); ++i) {

      sumW   += weights[i];

      sumW2  += sqr(weights[i]);

      sumWX2 += weights[i]*sqr(sample[i]);

    }

    return RMS(sumWX2, sumW, sumW2);

  }


  inline double covariance(const std::vector<int>& sample1, const std::vector<int>& sample2) {

    const double mean1 = mean(sample1);

    const double mean2 = mean(sample2);

    const size_t N = sample1.size();

    double cov = 0.0;

    for (size_t i = 0; i < N; i++) {

      const double cov_i = (sample1[i] - mean1)*(sample2[i] - mean2);

      cov += cov_i;

    }

    if (N > 1) return cov/(N-1);

    else return 0.0;

  }


  inline double correlation(const std::vector<int>& sample1, const std::vector<int>& sample2) {

    const double cov = covariance(sample1, sample2);

    const double var1 = covariance(sample1, sample1);

    const double var2 = covariance(sample2, sample2);

    const double correlation = cov/sqrt(var1*var2);

    const double corr_strength = correlation*sqrt(var2/var1);

    return corr_strength;

  }


  inline double naiveChi2(const std::vector<double>& sample1, const std::vector<double>& sample2,

                     const std::vector<double>& s1errors = std::vector<double>{},

                     const std::vector<double>& s2errors = std::vector<double>{}) {

    if (sample1.size() != sample2.size()) {

      throw RangeError("Inputs should have equal length!");

    }

    if (s1errors.size() && sample1.size() != s1errors.size()) {

      throw RangeError("Inputs should have equal length!");

    }

    if (s2errors.size() && sample2.size() != s2errors.size()) {

      throw RangeError("Inputs should have equal length!");

    }

    const size_t N = sample1.size();

    double chi2 = 0.0;

    for (size_t i = 0; i < N; ++i) {

      double temp = sqr(sample1[i] - sample2[i]);

      if (s1errors.size()) {

        temp /= sqr(s1errors[i]) + sqr(s2errors[i]);

      }

      chi2 += temp;

    }

    return chi2;

  }


  inline double naiveChi2reduced(const std::vector<double>& sample1, const std::vector<double>& sample2,

                        const std::vector<double>& s1errors = std::vector<double>{},

                        const std::vector<double>& s2errors = std::vector<double>{}) {

    if (sample1.empty()) throw RangeError("Inputs should not have 0 length!");

    return naiveChi2(sample1, sample2, s1errors, s2errors)/sample1.size();

  }


}


#endif

BuildConfig.h

Exceptions.h

YODA::RangeError
Error for e.g. use of invalid bin ranges.
Definition Exceptions.h:34

YODA
Anonymous namespace to limit visibility.
Definition AnalysisObject.h:22

YODA::stdErr
double stdErr(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted standard error of a sample.
Definition MathUtils.h:476

YODA::HALFPI
static const double HALFPI
A pre-defined value of .
Definition MathUtils.h:48

YODA::RMS
double RMS(const double sumWX2, const double sumW, const double sumW2)
Calculate the weighted RMS of a sample.
Definition MathUtils.h:495

YODA::fuzzyLessEquals
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 >, bool > fuzzyLessEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two floating point numbers for <= with a degree of fuzziness.
Definition MathUtils.h:135

YODA::logspace
std::vector< double > logspace(size_t nbins, double xmin, double xmax, bool include_end=true)
Make a list of nbins + 1 values uniformly spaced in log(x) between xmin and xmax inclusive.
Definition MathUtils.h:286

YODA::fuzzyGtrEquals
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 >, bool > fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two numbers for >= with a degree of fuzziness.
Definition MathUtils.h:125

YODA::index_between
int index_between(const NUM &val, const std::vector< NUM > &binedges)
Return the bin index of the given value, val, given a vector of bin edges.
Definition MathUtils.h:361

YODA::isNaN
std::enable_if_t< std::is_floating_point_v< NUM >, bool > isNaN(NUM val)
Check if a number is NaN.
Definition MathUtils.h:80

YODA::linspace
std::vector< double > linspace(size_t nbins, double xmin, double xmax, bool include_end=true)
Make a list of nbins + 1 values uniformly spaced between xmin and xmax inclusive.
Definition MathUtils.h:267

YODA::covariance
double covariance(const std::vector< int > &sample1, const std::vector< int > &sample2)
Calculate the covariance (variance) between two samples.
Definition MathUtils.h:518

YODA::RangeBoundary
RangeBoundary
Definition MathUtils.h:156

YODA::CLOSED
@ CLOSED
Definition MathUtils.h:156

YODA::OPEN
@ OPEN
Definition MathUtils.h:156

YODA::HARD
@ HARD
Definition MathUtils.h:156

YODA::SOFT
@ SOFT
Definition MathUtils.h:156

YODA::naiveChi2
double naiveChi2(const std::vector< double > &sample1, const std::vector< double > &sample2, const std::vector< double > &s1errors=std::vector< double >{}, const std::vector< double > &s2errors=std::vector< double >{})
Calculate the error-weighted chi2 statistic between two samples.
Definition MathUtils.h:549

YODA::approx
double approx(double a, int n=5)
Returns a number floored at the nth decimal place.
Definition MathUtils.h:140

YODA::correlation
double correlation(const std::vector< int > &sample1, const std::vector< int > &sample2)
Calculate the correlation strength between two samples.
Definition MathUtils.h:533

YODA::add_quad
std::enable_if_t< std::conjunction_v< std::is_arithmetic< Num >... >, double > add_quad(Num ... vals)
Named number-type addition in quadrature operation.
Definition MathUtils.h:223

YODA::TWOPI
static const double TWOPI
A pre-defined value of .
Definition MathUtils.h:45

YODA::sqr
NUM sqr(NUM a)
Named number-type squaring operation.
Definition MathUtils.h:216

YODA::PI
static const double PI
A pre-defined value of .
Definition MathUtils.h:42

YODA::stdDev
double stdDev(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted standard deviation of a sample.
Definition MathUtils.h:464

YODA::fuzzyEquals
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 > &&(std::is_floating_point_v< N1 >||std::is_floating_point_v< N2 >), bool > fuzzyEquals(N1 a, N2 b, double tolerance=1e-5)
Compare two numbers for equality with a degree of fuzziness.
Definition MathUtils.h:96

YODA::notNaN
std::enable_if_t< std::is_floating_point_v< NUM >, bool > notNaN(NUM val)
Check if a number is non-NaN.
Definition MathUtils.h:85

YODA::naiveChi2reduced
double naiveChi2reduced(const std::vector< double > &sample1, const std::vector< double > &sample2, const std::vector< double > &s1errors=std::vector< double >{}, const std::vector< double > &s2errors=std::vector< double >{})
Calculate the error-weighted reduced chi2 statistic between two samples.
Definition MathUtils.h:579

YODA::add
BinnedEstimate< AxisT... > add(const BinnedDbn< DbnN, AxisT... > &dbn, const BinnedEstimate< AxisT... > &est)
Calculate the addition of a BinnedDbn with a BinnedEstimate.
Definition BinnedDbn.h:1347

YODA::sign
int sign(double val)
Find the sign of a number.
Definition MathUtils.h:228

YODA::subtract
BinnedEstimate< AxisT... > subtract(const BinnedDbn< DbnN, AxisT... > &dbn, const BinnedEstimate< AxisT... > &est)
Calculate the subtraction of a BinnedEstimate from a BinnedDbn.
Definition BinnedDbn.h:1379

YODA::fuzzyEqComp
static std::function< bool(const double, const double)> fuzzyEqComp
Comparator wrapper to use with STL algorithms, e.g. std::equal etc.
Definition MathUtils.h:115

YODA::MAXDOUBLE
static const double MAXDOUBLE
Definition MathUtils.h:38

YODA::MAXINT
static const double MAXINT
Definition MathUtils.h:39

YODA::variance
double variance(const double sumWX, const double sumW, const double sumWX2, const double sumW2)
Calculate the weighted variance of a sample.
Definition MathUtils.h:427

YODA::pdfspace
std::vector< double > pdfspace(size_t nbins, double xmin, double xmax, std::function< double(double)> &fn, size_t nsample=10000)
Make a list of nbins + 1 values spaced with density ~ f(x) between xmin and end inclusive.
Definition MathUtils.h:328

YODA::isZero
std::enable_if_t< std::is_floating_point_v< NUM >, bool > isZero(NUM val, double tolerance=1e-5)
Compare a number to zero.
Definition MathUtils.h:63

YODA::mean
double mean(const std::vector< int > &sample)
Calculate the mean of a sample.
Definition MathUtils.h:397

YODA::effNumEntries
double effNumEntries(const double sumW, const double sumW2)
Calculate the effective number of entries of a sample.
Definition MathUtils.h:381

YODA::Sign
Sign
Enum for signs of numbers.
Definition MathUtils.h:51

YODA::ZERO
@ ZERO
Definition MathUtils.h:51

YODA::PLUS
@ PLUS
Definition MathUtils.h:51

YODA::MINUS
@ MINUS
Definition MathUtils.h:51

YODA::inRange
bool inRange(NUM value, NUM low, NUM high, RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN)
Determine if value is in the range low to high, for floating point numbers.
Definition MathUtils.h:164