PageRenderTime 35ms CodeModel.GetById 15ms app.highlight 15ms RepoModel.GetById 1ms app.codeStats 0ms

/Src/Dependencies/Boost/boost/accumulators/statistics/weighted_p_square_cumulative_distribution.hpp

http://hadesmem.googlecode.com/
C++ Header | 262 lines | 172 code | 36 blank | 54 comment | 22 complexity | cbdf8de635285b6d3db9ced6d74981ee MD5 | raw file
  1///////////////////////////////////////////////////////////////////////////////
  2// weighted_p_square_cumulative_distribution.hpp
  3//
  4//  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
  5//  Software License, Version 1.0. (See accompanying file
  6//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7
  8#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006
  9#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006
 10
 11#include <vector>
 12#include <functional>
 13#include <boost/parameter/keyword.hpp>
 14#include <boost/mpl/placeholders.hpp>
 15#include <boost/range.hpp>
 16#include <boost/accumulators/framework/accumulator_base.hpp>
 17#include <boost/accumulators/framework/extractor.hpp>
 18#include <boost/accumulators/numeric/functional.hpp>
 19#include <boost/accumulators/framework/parameters/sample.hpp>
 20#include <boost/accumulators/statistics_fwd.hpp>
 21#include <boost/accumulators/statistics/count.hpp>
 22#include <boost/accumulators/statistics/sum.hpp>
 23#include <boost/accumulators/statistics/p_square_cumulative_distribution.hpp> // for named parameter p_square_cumulative_distribution_num_cells
 24
 25namespace boost { namespace accumulators
 26{
 27
 28namespace impl
 29{
 30    ///////////////////////////////////////////////////////////////////////////////
 31    // weighted_p_square_cumulative_distribution_impl
 32    //  cumulative distribution calculation (as histogram)
 33    /**
 34        @brief Histogram calculation of the cumulative distribution with the \f$P^2\f$ algorithm for weighted samples
 35
 36        A histogram of the sample cumulative distribution is computed dynamically without storing samples
 37        based on the \f$ P^2 \f$ algorithm for weighted samples. The returned histogram has a specifiable
 38        amount (num_cells) equiprobable (and not equal-sized) cells.
 39
 40        Note that applying importance sampling results in regions to be more and other regions to be less
 41        accurately estimated than without importance sampling, i.e., with unweighted samples.
 42
 43        For further details, see
 44
 45        R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and
 46        histograms without storing observations, Communications of the ACM,
 47        Volume 28 (October), Number 10, 1985, p. 1076-1085.
 48
 49        @param p_square_cumulative_distribution_num_cells
 50    */
 51    template<typename Sample, typename Weight>
 52    struct weighted_p_square_cumulative_distribution_impl
 53      : accumulator_base
 54    {
 55        typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
 56        typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type;
 57        typedef std::vector<std::pair<float_type, float_type> > histogram_type;
 58        typedef std::vector<float_type> array_type;
 59        // for boost::result_of
 60        typedef iterator_range<typename histogram_type::iterator> result_type;
 61
 62        template<typename Args>
 63        weighted_p_square_cumulative_distribution_impl(Args const &args)
 64          : num_cells(args[p_square_cumulative_distribution_num_cells])
 65          , heights(num_cells + 1)
 66          , actual_positions(num_cells + 1)
 67          , desired_positions(num_cells + 1)
 68          , histogram(num_cells + 1)
 69          , is_dirty(true)
 70        {
 71        }
 72
 73        template<typename Args>
 74        void operator ()(Args const &args)
 75        {
 76            this->is_dirty = true;
 77
 78            std::size_t cnt = count(args);
 79            std::size_t sample_cell = 1; // k
 80            std::size_t b = this->num_cells;
 81
 82            // accumulate num_cells + 1 first samples
 83            if (cnt <= b + 1)
 84            {
 85                this->heights[cnt - 1] = args[sample];
 86                this->actual_positions[cnt - 1] = args[weight];
 87
 88                // complete the initialization of heights by sorting
 89                if (cnt == b + 1)
 90                {
 91                    //std::sort(this->heights.begin(), this->heights.end());
 92
 93                    // TODO: we need to sort the initial samples (in heights) in ascending order and
 94                    // sort their weights (in actual_positions) the same way. The following lines do
 95                    // it, but there must be a better and more efficient way of doing this.
 96                    typename array_type::iterator it_begin, it_end, it_min;
 97
 98                    it_begin = this->heights.begin();
 99                    it_end   = this->heights.end();
100
101                    std::size_t pos = 0;
102
103                    while (it_begin != it_end)
104                    {
105                        it_min = std::min_element(it_begin, it_end);
106                        std::size_t d = std::distance(it_begin, it_min);
107                        std::swap(*it_begin, *it_min);
108                        std::swap(this->actual_positions[pos], this->actual_positions[pos + d]);
109                        ++it_begin;
110                        ++pos;
111                    }
112
113                    // calculate correct initial actual positions
114                    for (std::size_t i = 1; i < b; ++i)
115                    {
116                        this->actual_positions[i] += this->actual_positions[i - 1];
117                    }
118                }
119            }
120            else
121            {
122                // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values
123                if (args[sample] < this->heights[0])
124                {
125                    this->heights[0] = args[sample];
126                    this->actual_positions[0] = args[weight];
127                    sample_cell = 1;
128                }
129                else if (this->heights[b] <= args[sample])
130                {
131                    this->heights[b] = args[sample];
132                    sample_cell = b;
133                }
134                else
135                {
136                    typename array_type::iterator it;
137                    it = std::upper_bound(
138                        this->heights.begin()
139                      , this->heights.end()
140                      , args[sample]
141                    );
142
143                    sample_cell = std::distance(this->heights.begin(), it);
144                }
145
146                // increment positions of markers above sample_cell
147                for (std::size_t i = sample_cell; i < b + 1; ++i)
148                {
149                    this->actual_positions[i] += args[weight];
150                }
151
152                // determine desired marker positions
153                for (std::size_t i = 1; i < b + 1; ++i)
154                {
155                    this->desired_positions[i] = this->actual_positions[0]
156                                               + numeric::average((i-1) * (sum_of_weights(args) - this->actual_positions[0]), b);
157                }
158
159                // adjust heights of markers 2 to num_cells if necessary
160                for (std::size_t i = 1; i < b; ++i)
161                {
162                    // offset to desire position
163                    float_type d = this->desired_positions[i] - this->actual_positions[i];
164
165                    // offset to next position
166                    float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
167
168                    // offset to previous position
169                    float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
170
171                    // height ds
172                    float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
173                    float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
174
175                    if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) )
176                    {
177                        short sign_d = static_cast<short>(d / std::abs(d));
178
179                        // try adjusting heights[i] using p-squared formula
180                        float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm );
181
182                        if ( this->heights[i - 1] < h && h < this->heights[i + 1] )
183                        {
184                            this->heights[i] = h;
185                        }
186                        else
187                        {
188                            // use linear formula
189                            if (d>0)
190                            {
191                                this->heights[i] += hp;
192                            }
193                            if (d<0)
194                            {
195                                this->heights[i] -= hm;
196                            }
197                        }
198                        this->actual_positions[i] += sign_d;
199                    }
200                }
201            }
202        }
203
204        template<typename Args>
205        result_type result(Args const &args) const
206        {
207            if (this->is_dirty)
208            {
209                this->is_dirty = false;
210
211                // creates a vector of std::pair where each pair i holds
212                // the values heights[i] (x-axis of histogram) and
213                // actual_positions[i] / sum_of_weights (y-axis of histogram)
214
215                for (std::size_t i = 0; i < this->histogram.size(); ++i)
216                {
217                    this->histogram[i] = std::make_pair(this->heights[i], numeric::average(this->actual_positions[i], sum_of_weights(args)));
218                }
219            }
220
221            return make_iterator_range(this->histogram);
222        }
223
224    private:
225        std::size_t num_cells;            // number of cells b
226        array_type  heights;              // q_i
227        array_type  actual_positions;     // n_i
228        array_type  desired_positions;    // n'_i
229        mutable histogram_type histogram; // histogram
230        mutable bool is_dirty;
231    };
232
233} // namespace detail
234
235///////////////////////////////////////////////////////////////////////////////
236// tag::weighted_p_square_cumulative_distribution
237//
238namespace tag
239{
240    struct weighted_p_square_cumulative_distribution
241      : depends_on<count, sum_of_weights>
242      , p_square_cumulative_distribution_num_cells
243    {
244        typedef accumulators::impl::weighted_p_square_cumulative_distribution_impl<mpl::_1, mpl::_2> impl;
245    };
246}
247
248///////////////////////////////////////////////////////////////////////////////
249// extract::weighted_p_square_cumulative_distribution
250//
251namespace extract
252{
253    extractor<tag::weighted_p_square_cumulative_distribution> const weighted_p_square_cumulative_distribution = {};
254
255    BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_cumulative_distribution)
256}
257
258using extract::weighted_p_square_cumulative_distribution;
259
260}} // namespace boost::accumulators
261
262#endif