diff --git a/CMakeLists.txt b/CMakeLists.txt index 73fcc539a5d704f129bb780b9b66d293ef3b4579..5bad596a13f59716282e78313b2e60f10a43f2a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ SET (${PROJECT_NAME}_HEADERS include/hpp/statistics/bin.hh include/hpp/statistics/success-bin.hh include/hpp/statistics/operators.hh + include/hpp/statistics/distribution.hh ) # Add dependency toward hpp-model library in pkg-config file. diff --git a/include/hpp/statistics/distribution.hh b/include/hpp/statistics/distribution.hh new file mode 100644 index 0000000000000000000000000000000000000000..b75f450bdb6fb3ff5f888998d0ea6f67b97f2b92 --- /dev/null +++ b/include/hpp/statistics/distribution.hh @@ -0,0 +1,124 @@ +// Copyright (c) 2014, LAAS-CNRS +// Authors: Joseph Mirabel (joseph.mirabel@laas.fr) +// +// This file is part of hpp-statistics. +// hpp-statistics is free software: you can redistribute it +// and/or modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation, either version +// 3 of the License, or (at your option) any later version. +// +// hpp-statistics is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Lesser Public License for more details. You should have +// received a copy of the GNU Lesser General Public License along with +// hpp-statistics. If not, see <http://www.gnu.org/licenses/>. + +#ifndef HPP_STATISTICS_DISTRIBUTION_HH +# define HPP_STATISTICS_DISTRIBUTION_HH + +# include <vector> +# include <stdlib.h> + +namespace hpp { + namespace statistics { + template < typename Value_t > + class DiscreteDistribution + { + public: + typedef double Proba_t; + typedef unsigned int Weight_t; + typedef typename std::pair < Weight_t, Value_t> ProbaTPair; + typedef typename std::vector < ProbaTPair >::iterator iterator; + typedef typename std::vector < ProbaTPair >::const_iterator const_iterator; + + DiscreteDistribution () : values_(), cumulative_weights_() {} + + Value_t operator() () const { + assert (values_.size() > 0); + Weight_t r = rand() % cumulative_weights_.back(); + return operator() (r); + } + Value_t operator() (const Proba_t& p) const { + return operator() ((Weight_t)(p * cumulative_weights_.back ())); + } + Value_t operator() (const Weight_t& w) const { + assert (values_.size() > 0); + return values_[dichotomy (w % cumulative_weights_.back())].second; + } + + /// Update the distribution. + /// If v is already in the set, then update its weight. + /// Otherwise, insert it. + void insert (Value_t v, Weight_t w = 1) { + iterator itv = values_.begin (); + WeightsIterator itcumul = cumulative_weights_.begin(); + Weight_t c = 0; + while (itv != values_.end()) { + if (itv->second == v) + break; + c = *itcumul; + itv++; itcumul++; + } + if (itv == values_.end ()) { + values_.push_back (ProbaTPair(w,v)); + cumulative_weights_.push_back (c + w); + return; + } + itv->first = w; + while (itv != values_.end()) { + c = c + itv->first; + *itcumul = c; + itv++; itcumul++; + } + } + + /// Return the probabilities. + std::vector < Proba_t > probabilities () const { + std::vector < Proba_t > proba (values_.size()); + Proba_t total = cumulative_weights_.back(); + for (size_t i = 0; i < values_.size (); i++) + proba[i] = values_[i].first / total; + return proba; + } + + /// \name Iterators + /// Iterate on the values. + /// \{ + iterator begin () { + return values_.begin (); + } + const_iterator begin () const { + return values_.begin (); + } + iterator end () { + return values_.end (); + } + const_iterator end () const { + return values_.end (); + } + /// \} + private: + size_t dichotomy (const Weight_t& r) const { + size_t l = 0, h = values_.size () - 1, m; + while (l != h) { + m = (l + h) / 2; + if (cumulative_weights_[m] < r) + l = m; + else if (cumulative_weights_[m] > r) + h = m; + else + return m; + } + return l; + } + + std::vector < ProbaTPair > values_; + std::vector < Weight_t > cumulative_weights_; + + typedef std::vector < Weight_t >::iterator WeightsIterator; + }; + } // namespace statistics +} // namespace hpp + +#endif // HPP_STATISTICS_DISTRIBUTION_HH diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4c27c4abf68b6a1e89f1c94afa0698f9ca390efc..a6d99391ee370b40ca0680b572b426040c37df36 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -39,3 +39,4 @@ MACRO(ADD_TESTCASE NAME GENERATED) ENDMACRO(ADD_TESTCASE) ADD_TESTCASE (test-successstatistics FALSE) +ADD_TESTCASE (test-distribution FALSE) diff --git a/tests/test-distribution.cc b/tests/test-distribution.cc new file mode 100644 index 0000000000000000000000000000000000000000..cb8e780095a51aee3ec70b707e6e9fc8ec650af9 --- /dev/null +++ b/tests/test-distribution.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2014, LAAS-CNRS +// Authors: Joseph Mirabel (joseph.mirabel@laas.fr) +// +// This file is part of hpp-statistics. +// hpp-statistics is free software: you can redistribute it +// and/or modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation, either version +// 3 of the License, or (at your option) any later version. +// +// hpp-statistics is distributed in the hope that it will be +// useful, but WITHOUT ANY WARRANTY; without even the implied warranty +// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Lesser Public License for more details. You should have +// received a copy of the GNU Lesser General Public License along with +// hpp-statistics. If not, see <http://www.gnu.org/licenses/>. + +#include <iostream> +#include <cfloat> +#include <time.h> + +#include "hpp/statistics/distribution.hh" + +int main () +{ + /* initialize random seed: */ + srand (time(NULL)); + + std::cout.precision(15); + + using namespace hpp::statistics; + typedef DiscreteDistribution <double> Distrib; + Distrib dd; + const int nbValues = 4; + const double values[] = {1.0, 3.0, 2.0, 4.5}; + const Distrib::Weight_t weight[] = + {1, 23, 99, 6}; + Distrib::Weight_t total_weight = 0; + dd.insert (values[0], 9.0); + for (int i = 0; i < nbValues; i++) { + dd.insert (values[i], weight[i]); + total_weight += weight[i]; + } + + std::vector < Distrib::Proba_t > p = dd.probabilities (); + + for (size_t i = 0; i < p.size (); i++) + if (p[i] - (double)(weight[i] / (double)total_weight) > DBL_EPSILON + || p[i] - (double)(weight[i] / (double)total_weight) > DBL_EPSILON) { + std::cout << "p[" << i << "] = " << p[i] << std::endl + << "weight[" << i << "] = " << weight[i] << std::endl + << "Total weight = " << total_weight << std::endl + << "Difference = " << p[i] - (double)(weight[i] / (double)total_weight) << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +}