PageRenderTime 29ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/apiary/tools/stattools.py

https://bitbucket.org/lindenlab/apiary/
Python | 173 lines | 84 code | 22 blank | 67 comment | 15 complexity | 9eb6107e9be5cf18cfb50dd563592968 MD5 | raw file
  1. #
  2. # $LicenseInfo:firstyear=2010&license=mit$
  3. #
  4. # Copyright (c) 2010, Linden Research, Inc.
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining a copy
  7. # of this software and associated documentation files (the "Software"), to deal
  8. # in the Software without restriction, including without limitation the rights
  9. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. # copies of the Software, and to permit persons to whom the Software is
  11. # furnished to do so, subject to the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be included in
  14. # all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. # THE SOFTWARE.
  23. # $/LicenseInfo$
  24. #
  25. """Simple statistics gathering tools
  26. Classes:
  27. StatValue - accumulate statistics about a value
  28. Exceptions:
  29. StatErrorNoSamples
  30. """
  31. import math
  32. class StatErrorNoSamples(Exception):
  33. """Raised when requesting a statistic on a value with no samples"""
  34. pass
  35. class StatValue(object):
  36. """Accumulate statistics on a value
  37. Methods:
  38. sample - sample the variable
  39. count - return the number of samples
  40. min - return the minimum sample
  41. max - return the maximum sample
  42. average - return the average of samples
  43. stddev - return the standard deviation of samples
  44. format - return a string suitable for output
  45. """
  46. def __init__(self):
  47. self.reset()
  48. def _must_have_samples(self):
  49. if self._n == 0:
  50. raise StatErrorNoSamples()
  51. def reset(self):
  52. self._n = 0
  53. self._min = None
  54. self._max = None
  55. self._sum = 0.0
  56. self._sumsq = 0.0
  57. def sample(self, x):
  58. """Sample the variable"""
  59. self._n += 1
  60. if self._min is None or self._min > x:
  61. self._min = x
  62. if self._max is None or self._max < x:
  63. self._max = x
  64. self._sum += x
  65. self._sumsq += x*x
  66. def count(self):
  67. """Return the number of samples"""
  68. return self._n
  69. def min(self):
  70. """Return the minimum sample"""
  71. self._must_have_samples()
  72. return self._min
  73. def max(self):
  74. """Return the maximum sample"""
  75. self._must_have_samples()
  76. return self._max
  77. def average(self):
  78. """Return the average of samples"""
  79. self._must_have_samples()
  80. return self._sum / self._n
  81. def stddev(self):
  82. """Return the average of samples"""
  83. self._must_have_samples()
  84. avg = self._sum / self._n
  85. return math.sqrt(self._sumsq / self._n - avg*avg)
  86. def format(self, count_fmt="%6d", value_fmt="%12f"):
  87. """Return a string suitable for output
  88. The format will be five columns: count, min, avg., max, std.dev.,
  89. with some amount of punctuation separating them. For example:
  90. n= 3: 16.00, 42.63, 83.80, sd= 29.53
  91. The numeric formats of the first column can be controlled by supplying
  92. a format string as the count_fmt argument. The format of the remaining
  93. columns is governed by the value_fmt argument. See the default values
  94. for examples.
  95. """
  96. if self._n == 0:
  97. return 'n=' + (count_fmt % 0) + ':'
  98. fmt = ("n=%s: %s, %s, %s, sd=%s" %
  99. (count_fmt, value_fmt, value_fmt, value_fmt, value_fmt))
  100. return (fmt %
  101. (self._n, self._min, self.average(), self._max, self.stddev()))
  102. class StatWindow(StatValue):
  103. def __init__(self, window):
  104. StatValue.__init__(self) # will call self.reset()
  105. self._window = window
  106. def reset(self):
  107. StatValue.reset(self)
  108. self._samples = []
  109. def _reduce_to(self, count):
  110. count_to_drop = self._n - count
  111. if count_to_drop <= 0:
  112. return
  113. if count_to_drop >= count:
  114. # faster to just replay the remaining samples
  115. samples = self._samples[count_to_drop:]
  116. self.reset()
  117. for x in samples:
  118. self.sample(x)
  119. else:
  120. # faster to undo the dropped samples
  121. dropped_min = False
  122. dropped_max = False
  123. for y in self._samples[:count_to_drop]:
  124. if y == self._min:
  125. dropped_min = True
  126. if y == self._max:
  127. dropped_max = True
  128. self._sum -= y
  129. self._sumsq -= y*y
  130. self._n -= count_to_drop
  131. self._samples = self._samples[count_to_drop:]
  132. if dropped_min:
  133. self._min = min(self._samples)
  134. if dropped_max:
  135. self._max = max(self._samples)
  136. def setwindow(self, window):
  137. self._window = window
  138. self._reduce_to(window)
  139. def sample(self, x):
  140. self._reduce_to(self._window - 1)
  141. self._samples.append(x)
  142. StatValue.sample(self, x)