/historical/stattools.py
Python | 173 lines | 84 code | 22 blank | 67 comment | 16 complexity | 9eb6107e9be5cf18cfb50dd563592968 MD5 | raw file
1# 2# $LicenseInfo:firstyear=2010&license=mit$ 3# 4# Copyright (c) 2010, Linden Research, Inc. 5# 6# Permission is hereby granted, free of charge, to any person obtaining a copy 7# of this software and associated documentation files (the "Software"), to deal 8# in the Software without restriction, including without limitation the rights 9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10# copies of the Software, and to permit persons to whom the Software is 11# furnished to do so, subject to the following conditions: 12# 13# The above copyright notice and this permission notice shall be included in 14# all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22# THE SOFTWARE. 23# $/LicenseInfo$ 24# 25 26"""Simple statistics gathering tools 27 28Classes: 29 StatValue - accumulate statistics about a value 30 31Exceptions: 32 StatErrorNoSamples 33 34""" 35 36import math 37 38class StatErrorNoSamples(Exception): 39 """Raised when requesting a statistic on a value with no samples""" 40 pass 41 42class StatValue(object): 43 """Accumulate statistics on a value 44 45 Methods: 46 sample - sample the variable 47 count - return the number of samples 48 min - return the minimum sample 49 max - return the maximum sample 50 average - return the average of samples 51 stddev - return the standard deviation of samples 52 format - return a string suitable for output 53 54 """ 55 56 def __init__(self): 57 self.reset() 58 59 def _must_have_samples(self): 60 if self._n == 0: 61 raise StatErrorNoSamples() 62 63 def reset(self): 64 self._n = 0 65 self._min = None 66 self._max = None 67 self._sum = 0.0 68 self._sumsq = 0.0 69 70 def sample(self, x): 71 """Sample the variable""" 72 self._n += 1 73 if self._min is None or self._min > x: 74 self._min = x 75 if self._max is None or self._max < x: 76 self._max = x 77 self._sum += x 78 self._sumsq += x*x 79 80 def count(self): 81 """Return the number of samples""" 82 return self._n 83 84 def min(self): 85 """Return the minimum sample""" 86 self._must_have_samples() 87 return self._min 88 89 def max(self): 90 """Return the maximum sample""" 91 self._must_have_samples() 92 return self._max 93 94 def average(self): 95 """Return the average of samples""" 96 self._must_have_samples() 97 return self._sum / self._n 98 99 def stddev(self): 100 """Return the average of samples""" 101 self._must_have_samples() 102 avg = self._sum / self._n 103 return math.sqrt(self._sumsq / self._n - avg*avg) 104 105 def format(self, count_fmt="%6d", value_fmt="%12f"): 106 """Return a string suitable for output 107 108 The format will be five columns: count, min, avg., max, std.dev., 109 with some amount of punctuation separating them. For example: 110 111 n= 3: 16.00, 42.63, 83.80, sd= 29.53 112 113 The numeric formats of the first column can be controlled by supplying 114 a format string as the count_fmt argument. The format of the remaining 115 columns is governed by the value_fmt argument. See the default values 116 for examples. 117 118 """ 119 120 if self._n == 0: 121 return 'n=' + (count_fmt % 0) + ':' 122 123 fmt = ("n=%s: %s, %s, %s, sd=%s" % 124 (count_fmt, value_fmt, value_fmt, value_fmt, value_fmt)) 125 return (fmt % 126 (self._n, self._min, self.average(), self._max, self.stddev())) 127 128 129class StatWindow(StatValue): 130 def __init__(self, window): 131 StatValue.__init__(self) # will call self.reset() 132 self._window = window 133 134 def reset(self): 135 StatValue.reset(self) 136 self._samples = [] 137 138 def _reduce_to(self, count): 139 count_to_drop = self._n - count 140 if count_to_drop <= 0: 141 return 142 if count_to_drop >= count: 143 # faster to just replay the remaining samples 144 samples = self._samples[count_to_drop:] 145 self.reset() 146 for x in samples: 147 self.sample(x) 148 else: 149 # faster to undo the dropped samples 150 dropped_min = False 151 dropped_max = False 152 for y in self._samples[:count_to_drop]: 153 if y == self._min: 154 dropped_min = True 155 if y == self._max: 156 dropped_max = True 157 self._sum -= y 158 self._sumsq -= y*y 159 self._n -= count_to_drop 160 self._samples = self._samples[count_to_drop:] 161 if dropped_min: 162 self._min = min(self._samples) 163 if dropped_max: 164 self._max = max(self._samples) 165 166 def setwindow(self, window): 167 self._window = window 168 self._reduce_to(window) 169 170 def sample(self, x): 171 self._reduce_to(self._window - 1) 172 self._samples.append(x) 173 StatValue.sample(self, x)