PageRenderTime 1192ms CodeModel.GetById 887ms app.highlight 18ms RepoModel.GetById 269ms app.codeStats 0ms

/apiary/tools/stattools.py

https://bitbucket.org/lindenlab/apiary/
Python | 173 lines | 84 code | 22 blank | 67 comment | 16 complexity | 9eb6107e9be5cf18cfb50dd563592968 MD5 | raw file
  1#
  2# $LicenseInfo:firstyear=2010&license=mit$
  3# 
  4# Copyright (c) 2010, Linden Research, Inc.
  5# 
  6# Permission is hereby granted, free of charge, to any person obtaining a copy
  7# of this software and associated documentation files (the "Software"), to deal
  8# in the Software without restriction, including without limitation the rights
  9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10# copies of the Software, and to permit persons to whom the Software is
 11# furnished to do so, subject to the following conditions:
 12# 
 13# The above copyright notice and this permission notice shall be included in
 14# all copies or substantial portions of the Software.
 15# 
 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22# THE SOFTWARE.
 23# $/LicenseInfo$
 24#
 25
 26"""Simple statistics gathering tools
 27
 28Classes:
 29    StatValue - accumulate statistics about a value
 30
 31Exceptions:
 32    StatErrorNoSamples
 33
 34"""
 35
 36import math
 37
 38class StatErrorNoSamples(Exception):
 39    """Raised when requesting a statistic on a value with no samples"""
 40    pass
 41
 42class StatValue(object):
 43    """Accumulate statistics on a value
 44    
 45    Methods:
 46        sample - sample the variable
 47        count - return the number of samples
 48        min - return the minimum sample
 49        max - return the maximum sample
 50        average - return the average of samples
 51        stddev - return the standard deviation of samples
 52        format - return a string suitable for output
 53    
 54    """
 55    
 56    def __init__(self):
 57        self.reset()
 58    
 59    def _must_have_samples(self):
 60        if self._n == 0:
 61            raise StatErrorNoSamples()
 62    
 63    def reset(self):
 64        self._n = 0
 65        self._min = None
 66        self._max = None
 67        self._sum = 0.0
 68        self._sumsq = 0.0
 69
 70    def sample(self, x):
 71        """Sample the variable"""
 72        self._n += 1
 73        if self._min is None or self._min > x:
 74            self._min = x
 75        if self._max is None or self._max < x:
 76            self._max = x
 77        self._sum += x
 78        self._sumsq += x*x
 79    
 80    def count(self):
 81        """Return the number of samples"""
 82        return self._n
 83    
 84    def min(self):
 85        """Return the minimum sample"""
 86        self._must_have_samples()
 87        return self._min
 88    
 89    def max(self):
 90        """Return the maximum sample"""
 91        self._must_have_samples()
 92        return self._max
 93    
 94    def average(self):
 95        """Return the average of samples"""
 96        self._must_have_samples()
 97        return self._sum / self._n
 98    
 99    def stddev(self):
100        """Return the average of samples"""
101        self._must_have_samples()
102        avg = self._sum / self._n
103        return math.sqrt(self._sumsq / self._n - avg*avg)
104    
105    def format(self, count_fmt="%6d", value_fmt="%12f"):
106        """Return a string suitable for output
107        
108        The format will be five columns: count, min, avg., max, std.dev.,
109        with some amount of punctuation separating them. For example:
110
111            n=   3:  16.00,  42.63,  83.80, sd= 29.53        
112        
113        The numeric formats of the first column can be controlled by supplying
114        a format string as the count_fmt argument. The format of the remaining
115        columns is governed by the value_fmt argument. See the default values
116        for examples.
117        
118        """
119        
120        if self._n == 0:
121            return 'n=' + (count_fmt % 0) + ':'
122            
123        fmt = ("n=%s: %s, %s, %s, sd=%s" % 
124                (count_fmt, value_fmt, value_fmt, value_fmt, value_fmt))
125        return (fmt %
126            (self._n, self._min, self.average(), self._max, self.stddev()))
127
128
129class StatWindow(StatValue):
130    def __init__(self, window):
131        StatValue.__init__(self) # will call self.reset()
132        self._window = window
133    
134    def reset(self):
135        StatValue.reset(self)
136        self._samples = []
137
138    def _reduce_to(self, count):
139        count_to_drop = self._n - count
140        if count_to_drop <= 0:
141            return
142        if count_to_drop >= count:
143            # faster to just replay the remaining samples
144            samples = self._samples[count_to_drop:]
145            self.reset()
146            for x in samples:
147                self.sample(x)
148        else:
149            # faster to undo the dropped samples
150            dropped_min = False
151            dropped_max = False
152            for y in self._samples[:count_to_drop]:
153                if y == self._min:
154                    dropped_min = True
155                if y == self._max:
156                    dropped_max = True
157                self._sum -= y
158                self._sumsq -= y*y
159            self._n -= count_to_drop
160            self._samples = self._samples[count_to_drop:]
161            if dropped_min:
162                self._min = min(self._samples)
163            if dropped_max:
164                self._max = max(self._samples)
165                        
166    def setwindow(self, window):
167        self._window = window
168        self._reduce_to(window)
169            
170    def sample(self, x):
171        self._reduce_to(self._window - 1)
172        self._samples.append(x)
173        StatValue.sample(self, x)