|
YODA - Yet more Objects for Data Analysis 2.0.2
|
Go to the documentation of this file.
6#ifndef YODA_BINNEDAXIS_H
7#define YODA_BINNEDAXIS_H
31 namespace YODAConcepts {
49 using index_sig = size_t (T::*)(const typename T::EdgeT&) const;
50 using edge_sig = typename T::EdgeT (T::*)(const size_t) const;
51 using edges_sig = const std::vector<std::reference_wrapper<const typename T::EdgeT>> (T::*)() const noexcept;
52 using size_sig = size_t (T::*)() const noexcept;
54 using shared_edges_sig = std::vector<typename T::EdgeT> (T::*)(const T&) const noexcept;
57 std::is_same< index_sig, decltype(&T::index)>,
58 std::is_same< edge_sig, decltype(&T::edge)>,
60 std::is_same< size_sig, decltype(&T::size)>,
71 template < typename EdgeT>
72 using isCAxis = std::enable_if_t<std::is_floating_point<EdgeT>::value>;
76 struct hasWidth : std::false_type {};
79 struct hasWidth<std::string> : std::true_type {};
83 template < typename T, typename>
90 template < typename T, typename = void>
106 Axis( const std::vector<T>& edges);
111 Axis(std::vector<T>&& edges);
116 Axis(std::initializer_list<T>&& edges);
125 if ( this != &other) _edges = other._edges;
130 if ( this != &other) _edges = std::move(other._edges);
139 void _renderYODA(std::ostream& os) const noexcept {
141 for ( size_t i = 0; i < _edges.size(); ++i) {
143 if constexpr(std::is_same<T, std::string>::value) {
144 os << std::quoted(_edges[i]);
158 if constexpr (hasWidth<EdgeT>::value) {
159 auto it = std::max_element(_edges.begin(), _edges.end(),
160 []( const auto& a, const auto& b) {
161 return a.size() < b.size();
163 maxwidth = (*it).size();
173 size_t index( const T& x) const;
176 EdgeT edge( const size_t i) const;
179 std::vector<EdgeT> edges() const noexcept;
188 size_t size() const noexcept;
191 size_t numBins(const bool includeOverflows = false) const noexcept;
194 bool hasSameEdges(const Axis<T>& other) const noexcept;
199 std::vector<T> sharedEdges(const Axis<T>& other) const noexcept;
203 bool isSubsetEdges(const Axis<T>& other) const noexcept;
211 void fillEdges(std::vector<EdgeT>&& edges) noexcept;
216 std::vector<T> _edges;
222 template <typename T, typename U>
223 Axis<T, U>:: Axis(const std::vector<T>& edges) : Axis(std::vector<T>(edges)) {
227 "Axis<T> should implement Axis concept.");
230 template < typename T, typename U>
232 fillEdges(std::move(edges));
235 template < typename T, typename U>
237 fillEdges(std::vector<T>{edges});
240 template < typename T, typename U>
242 auto it = std::find(this->_edges.begin(), this->_edges.end(), x);
243 if (it == this->_edges.end()) return 0;
244 return it - this->_edges.begin() + 1;
247 template < typename T, typename U>
249 if (this->_edges.empty()) {
252 if (!i || i > this->_edges.size()) {
253 throw RangeError( "Invalid index, must be in range 1.." + std::to_string(this->_edges.size()));
255 return this->_edges.at(i-1);
258 template < typename T, typename U>
264 template < typename T, typename U>
266 return numBins( true);
270 template < typename T, typename U>
272 return _edges. size() + (includeOverflows? 1 : 0);
275 template < typename T, typename U>
277 return _edges. size() == other._edges.size() &&
278 std::equal(_edges.begin(), _edges.end(), other._edges.begin());
281 template < typename T, typename U>
283 std::vector<EdgeT> res;
284 const auto& otherBegin = other._edges. begin();
285 const auto& otherEnd = other._edges.end();
286 for ( const T& edge : this->_edges) {
287 if (std::find(otherBegin, otherEnd, edge) != otherEnd)
288 res.emplace_back(std::move(edge));
293 template < typename T, typename U>
295 if (_edges.size() == other._edges.size()) return hasSameEdges(other);
297 std::vector<T> edgesLhs(edges());
298 std::vector<T> edgesRhs(other.edges());
300 std::sort(edgesLhs.begin(), edgesLhs.end());
301 std::sort(edgesRhs.begin(), edgesRhs.end());
304 return std::includes(edgesLhs.begin(), edgesLhs.end(),
305 edgesRhs.begin(), edgesRhs.end());
308 template < typename T, typename U>
310 for ( auto& edge : edges) {
311 if (std::find(this->_edges.begin(),
312 this->_edges.end(), edge) == this->_edges.end())
313 _edges.emplace_back(std::move(edge));
325 template < typename T>
343 _maskedBins(std::move(other._maskedBins)),
344 _edges(std::move(other._edges)) {}
349 Axis(std::vector<EdgeT> edges);
354 Axis(std::initializer_list<T>&& edges);
359 Axis(std::vector<std::pair<EdgeT, EdgeT>> binsEdges);
361 Axis( size_t nBins, EdgeT lower, EdgeT upper);
364 if ( this != &other) {
366 _maskedBins = other._maskedBins;
367 _edges = other._edges;
373 if ( this != &other) {
374 _est = std::move(other._est);
375 _maskedBins = std::move(other._maskedBins);
376 _edges = std::move(other._edges);
410 size_t index( const EdgeT& x) const;
413 size_t size() const noexcept;
416 size_t numBins(const bool includeOverflows = false) const noexcept;
419 EdgeT edge(const size_t i) const;
422 std::vector<EdgeT> edges() const noexcept;
442 std::vector<size_t> maskedBins() const noexcept { return _maskedBins; }
454 void _renderYODA(std::ostream& os) const noexcept {
456 size_t nEdges = _edges.size() - 2;
457 for ( size_t i = 0; i < nEdges; ++i) {
477 void mergeBins(std::pair<size_t, size_t>);
484 std::vector<T> widths( const bool includeOverflows = false) const noexcept {
487 size_t offset = includeOverflows? 1 : 3;
488 std::vector<T> ret(_edges.size()-offset);
489 size_t start = 1 + !includeOverflows;
490 size_t end = _edges.size() - !includeOverflows;
491 for ( size_t i = start; i < end; ++i) {
492 ret[i-start] = _edges[i] - _edges[i-1];
499 return _edges[binNum+1] - _edges[binNum];
504 return _edges[binNum+1];
510 return _edges[binNum];
517 return std::numeric_limits<EdgeT>::min();
518 if (binNum == (numBins( true) - 1))
519 return std::numeric_limits<EdgeT>::max();
521 EdgeT minVal = min(binNum);
522 return (max(binNum) - minVal)/2 + minVal;
529 void _updateEdges(std::vector<EdgeT>&& edges) noexcept;
533 void _setEstimator() noexcept;
538 ssize_t _linsearch_forward( size_t istart, double x, size_t nmax) const noexcept;
543 ssize_t _linsearch_backward( size_t istart, double x, size_t nmax) const noexcept;
546 size_t _bisect( double x, size_t imin, size_t imax) const noexcept;
552 std::vector< size_t> _maskedBins;
555 std::vector<T> _edges;
559 template <typename T>
566 "Axis<T> should implement Axis concept.");
569 _edges = other._edges;
570 _maskedBins = other._maskedBins;
573 template < typename T>
576 throw(std::logic_error( "Upper bound should be larger than lower."));
577 _edges.resize(nBins+1+2);
578 double step = (upper - lower) / nBins;
580 _edges[0] = -std::numeric_limits<double>::infinity();
584 for( size_t i = 2; i < _edges.size()-1; i++) {
585 _edges[i] = _edges[i-1] + step;
588 _edges[_edges.size()-1] = std::numeric_limits<double>::infinity();
593 template < typename T>
595 if (binsEdges.empty()) throw BinningError( "Expected at least one discrete edge");
597 std::sort(binsEdges.begin(), binsEdges.end(), []( auto &left, auto &right) {
598 return left.first < right.first;
601 _edges.resize(binsEdges.size()*2+2);
603 _edges[0] = -std::numeric_limits<double>::infinity();
616 for ( const auto& pair : binsEdges) {
618 if (i == 1 && std::isinf(pair.first) && pair.first < 0) {
619 _edges[i++] = pair.second;
623 _edges[i++] = pair.second;
626 if (i != 1 && pair.first > _edges[i-1]) {
628 _maskedBins.emplace_back(i-1);
630 _edges[i++] = pair.first;
631 _edges[i++] = pair.second;
634 _edges[i] = std::numeric_limits<double>::infinity();
641 template < typename T>
643 std::sort(edges.begin(), edges.end());
644 edges.erase( std::unique(edges.begin(), edges.end()), edges.end() );
646 _updateEdges(std::move(edges));
651 template < typename T>
653 std::vector<T> edges{edgeList};
654 std::sort(edges.begin(), edges.end());
655 edges.erase( std::unique(edges.begin(), edges.end()), edges.end() );
657 _updateEdges(std::move(edges));
662 template < typename T>
664 if (_edges.size() <= 2) throw BinningError( "Axis initialised without specifying edges");
666 if (std::isinf(x)) { return x < 0? 0 : _edges.size() - 2; }
668 size_t index = std::min(_est->estindex(x), _edges.size()-2);
670 if (x >= this->_edges[index] && x < this->_edges[index+1]) return index;
673 if (x > this->_edges[index]) {
674 const ssize_t newindex = _linsearch_forward(index, x, SEARCH_SIZElc);
675 index = (newindex > 0) ? newindex : _bisect(x, index, this->_edges.size()-1);
676 } else if (x < this->_edges[index]) {
677 const ssize_t newindex = _linsearch_backward(index, x, SEARCH_SIZElc);
678 index = (newindex > 0) ? newindex : _bisect(x, 0, index+1);
681 assert(x >= this->_edges[index] && (x < this->_edges[index+1] || std::isinf(x)));
685 template < typename T>
687 return numBins( true);
690 template < typename T>
692 if (_edges.size() < 3) return includeOverflows? 1 : 0;
693 return this->_edges. size() - (includeOverflows? 1 : 3);
696 template < typename T>
698 return this->_edges.at(i);
701 template < typename T>
706 template < typename T>
708 if (this->numBins( true) != other.numBins( true)) return false;
709 for ( size_t i = 1; i < this->numBins( true) - 1; i++) {
711 if (! fuzzyEquals(edge(i), other.edge(i))) return false;
716 template < typename T>
718 std::vector<T> intersection;
721 if (_edges.size() > 2 && other._edges.size() > 2) {
722 std::set_intersection(std::next(_edges.begin()), std::prev(_edges.end()),
723 std::next(other._edges.begin()), std::prev(other._edges.end()),
724 std::back_inserter(intersection));
728 rtn.reserve(intersection.size()+2);
730 rtn.emplace_back(-std::numeric_limits< Axis<T, isCAxis<T>>::EdgeT>::infinity());
731 rtn.insert(std::next(rtn.begin()),
732 std::make_move_iterator(intersection.begin()),
733 std::make_move_iterator(intersection.end()));
734 rtn.emplace_back(std::numeric_limits< Axis<T, isCAxis<T>>::EdgeT>::infinity());
739 template < typename T>
741 if (_edges.size() == other._edges.size()) return hasSameEdges(other);
744 if (_edges.size() > 2 && other._edges.size() > 2) {
746 return std::includes(std::next(_edges.begin()), std::prev(_edges.end()),
747 std::next(other._edges.begin()), std::prev(other._edges.end()));
755 template < typename T>
757 if (_edges.size() <= 2) throw BinningError( "Axis initialised without specifying edges");
758 if (mergeRange.second < mergeRange.first) throw RangeError( "Upper index comes before lower index");
759 if (mergeRange.second >= numBins( true)) throw RangeError( "Upper index exceeds last visible bin");
760 _edges.erase(_edges.begin()+mergeRange.first+1, _edges.begin() + mergeRange.second + 1);
763 std::vector<size_t> toRemove;
764 size_t mrange = mergeRange.second - mergeRange.first;
765 for ( size_t i = 0; i < _maskedBins.size(); ++i) {
766 if (_maskedBins[i] > mergeRange.second) _maskedBins[i] -= mrange;
767 else if (_maskedBins[i] >= mergeRange.first) toRemove.push_back(_maskedBins[i]);
769 if (toRemove.size()) {
771 std::remove_if(_maskedBins.begin(), _maskedBins.end(), [&]( const auto& idx) {
772 return std::find(toRemove.begin(), toRemove.end(), idx) != toRemove.end();
773 }), _maskedBins.end());
778 template < typename T>
784 _edges.emplace_back(-std::numeric_limits< Axis<T, isCAxis<T>>::EdgeT>::infinity());
785 _edges.insert(std::next(_edges.begin()),
786 std::make_move_iterator(edges.begin()),
787 std::make_move_iterator(edges.end()));
788 _edges.emplace_back(std::numeric_limits< Axis<T, isCAxis<T>>::EdgeT>::infinity());
791 template < typename T>
792 void Axis<T, isCAxis<T>>::_setEstimator() noexcept {
793 if (_edges.empty()) {
794 _est = std::make_shared<LinBinEstimator>(0, 0, 1);
795 } else if (_edges.front() <= 0.0) {
796 _est = std::make_shared<LinBinEstimator>(_edges.size()-1, _edges.front(), _edges.back());
798 LinBinEstimator linEst(_edges.size()-1, _edges.front(), _edges.back());
799 LogBinEstimator logEst(_edges.size()-1, _edges.front(), _edges.back());
802 double logsum = 0, linsum = 0;
803 for ( size_t i = 0; i < _edges.size(); i++) {
804 logsum += logEst(_edges[i]) - i;
805 linsum += linEst(_edges[i]) - i;
807 const double log_avg = logsum / _edges.size();
808 const double lin_avg = linsum / _edges.size();
813 if (log_avg < lin_avg) {
814 _est = std::make_shared<LogBinEstimator>(logEst);
816 _est = std::make_shared<LinBinEstimator>(linEst);
822 template < typename T>
823 ssize_t Axis<T, isCAxis<T>>::_linsearch_forward( size_t istart, double x, size_t nmax) const noexcept {
824 assert(x >= this->_edges[istart]);
825 for ( size_t i = 0; i < nmax; i++) {
826 const size_t j = istart + i + 1;
827 if (j > this->_edges.size()-1) return -1;
828 if (x < this->_edges[j]) {
829 assert(x >= this->_edges[j-1] && (x < this->_edges[j] || std::isinf(x)));
836 template < typename T>
837 ssize_t Axis<T, isCAxis<T>>::_linsearch_backward( size_t istart, double x, size_t nmax) const noexcept {
838 assert(x < this->_edges[istart]);
839 for ( size_t i = 0; i < nmax; i++) {
840 const int j = istart - i - 1;
841 if (j < 0) return -1;
842 if (x >= this->_edges[j]) {
843 assert(x >= this->_edges[j] && (x < this->_edges[j+1] || std::isinf(x)));
850 template < typename T>
851 size_t Axis<T, isCAxis<T>>::_bisect( double x, size_t imin, size_t imax) const noexcept {
852 size_t len = imax - imin;
853 while (len >= BISECT_LINEAR_THRESHOLDlc) {
854 const size_t half = len >> 1;
855 const size_t imid = imin + half;
856 if (x >= this->_edges[imid]) {
857 if (x < this->_edges[imid+1]) return imid;
864 assert(x >= this->_edges[imin] && (x < this->_edges[imax] || std::isinf(x)));
865 return _linsearch_forward(imin, x, BISECT_LINEAR_THRESHOLDlc);
std::string type() const noexcept Returns a string representation of EdgeT.
std::vector< T > ContainerT
Axis(Axis< T, CAxisT > &&other)
Axis() Nullary constructor for unique pointers etc.
EdgeT mid(size_t binNum) const noexcept Mid of bin side on this axis.
EdgeT width(size_t binNum) const noexcept Width of bin side on this axis.
const_iterator end() const Returns the const end iterator for the edges container.
std::vector< T > widths(const bool includeOverflows=false) const noexcept
typename ContainerT::const_iterator const_iterator
Axis & operator=(const Axis &other)
std::vector< size_t > maskedBins() const noexcept Returns the masked indices.
EdgeT min(size_t binNum) const noexcept Min of bin side on this axis.
Axis & operator=(Axis &&other)
EdgeT max(size_t binNum) const noexcept Max of bin side on this axis.
Discrete axis with edges of non-floating-point-type.
std::string type() const noexcept Returns a string representation of EdgeT.
Axis & operator=(const Axis &other)
std::vector< T > ContainerT
size_t size() const noexcept Returns number of edges + 1 for this Axis.
const_iterator begin() const Returns the const begin iterator for the edges container.
Axis() Nullary constructor for unique pointers etc.
const_iterator end() const Returns the const end iterator for the edges container.
Axis(const Axis< T > &other) Copy constructs Axis.
int maxEdgeWidth() const noexcept
Axis & operator=(Axis &&other)
Axis(Axis< T > &&other) Move constructs Axis.
typename ContainerT::const_iterator const_iterator
Error for general binning problems.
Error for e.g. use of invalid bin ranges.
Anonymous namespace to limit visibility.
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 >, bool > fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5) Compare two numbers for >= with a degree of fuzziness.
const size_t SEARCH_SIZElc
std::enable_if_t< std::is_arithmetic_v< N1 > &&std::is_arithmetic_v< N2 > &&(std::is_floating_point_v< N1 >||std::is_floating_point_v< N2 >), bool > fuzzyEquals(N1 a, N2 b, double tolerance=1e-5) Compare two numbers for equality with a degree of fuzziness.
const size_t BISECT_LINEAR_THRESHOLDlc
Returns the type ID as a character sequence.
const std::vector< std::reference_wrapper< const typename T::EdgeT > >(T::*)() const noexcept edges_sig
typename T::EdgeT(T::*)(const size_t) const edge_sig
size_t(T::*)(const typename T::EdgeT &) const index_sig Function signatures.
bool(T::*)(const T &) const noexcept same_edges_sig
std::vector< typename T::EdgeT >(T::*)(const T &) const noexcept shared_edges_sig
size_t(T::*)() const noexcept size_sig
|