stattools.py - This Python code implements a simple statist…

/apiary/tools/stattools.py

https://bitbucket.org/lindenlab/apiary/ · Python · 173 lines · 84 code · 22 blank · 67 comment · 15 complexity · 9eb6107e9be5cf18cfb50dd563592968 MD5 · raw file


#
# $LicenseInfo:firstyear=2010&license=mit$
# 
# Copyright (c) 2010, Linden Research, Inc.
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# $/LicenseInfo$
#

"""Simple statistics gathering tools

Classes:
    StatValue - accumulate statistics about a value

Exceptions:
    StatErrorNoSamples

"""

import math

class StatErrorNoSamples(Exception):
    """Raised when requesting a statistic on a value with no samples"""
    pass

class StatValue(object):
    """Accumulate statistics on a value
    
    Methods:
        sample - sample the variable
        count - return the number of samples
        min - return the minimum sample
        max - return the maximum sample
        average - return the average of samples
        stddev - return the standard deviation of samples
        format - return a string suitable for output
    
    """
    
    def __init__(self):
        self.reset()
    
    def _must_have_samples(self):
        if self._n == 0:
            raise StatErrorNoSamples()
    
    def reset(self):
        self._n = 0
        self._min = None
        self._max = None
        self._sum = 0.0
        self._sumsq = 0.0

    def sample(self, x):
        """Sample the variable"""
        self._n += 1
        if self._min is None or self._min > x:
            self._min = x
        if self._max is None or self._max < x:
            self._max = x
        self._sum += x
        self._sumsq += x*x
    
    def count(self):
        """Return the number of samples"""
        return self._n
    
    def min(self):
        """Return the minimum sample"""
        self._must_have_samples()
        return self._min
    
    def max(self):
        """Return the maximum sample"""
        self._must_have_samples()
        return self._max
    
    def average(self):
        """Return the average of samples"""
        self._must_have_samples()
        return self._sum / self._n
    
    def stddev(self):
        """Return the average of samples"""
        self._must_have_samples()
        avg = self._sum / self._n
        return math.sqrt(self._sumsq / self._n - avg*avg)
    
    def format(self, count_fmt="%6d", value_fmt="%12f"):
        """Return a string suitable for output
        
        The format will be five columns: count, min, avg., max, std.dev.,
        with some amount of punctuation separating them. For example:

            n=   3:  16.00,  42.63,  83.80, sd= 29.53        
        
        The numeric formats of the first column can be controlled by supplying
        a format string as the count_fmt argument. The format of the remaining
        columns is governed by the value_fmt argument. See the default values
        for examples.
        
        """
        
        if self._n == 0:
            return 'n=' + (count_fmt % 0) + ':'
            
        fmt = ("n=%s: %s, %s, %s, sd=%s" % 
                (count_fmt, value_fmt, value_fmt, value_fmt, value_fmt))
        return (fmt %
            (self._n, self._min, self.average(), self._max, self.stddev()))


class StatWindow(StatValue):
    def __init__(self, window):
        StatValue.__init__(self) # will call self.reset()
        self._window = window
    
    def reset(self):
        StatValue.reset(self)
        self._samples = []

    def _reduce_to(self, count):
        count_to_drop = self._n - count
        if count_to_drop <= 0:
            return
        if count_to_drop >= count:
            # faster to just replay the remaining samples
            samples = self._samples[count_to_drop:]
            self.reset()
            for x in samples:
                self.sample(x)
        else:
            # faster to undo the dropped samples
            dropped_min = False
            dropped_max = False
            for y in self._samples[:count_to_drop]:
                if y == self._min:
                    dropped_min = True
                if y == self._max:
                    dropped_max = True
                self._sum -= y
                self._sumsq -= y*y
            self._n -= count_to_drop
            self._samples = self._samples[count_to_drop:]
            if dropped_min:
                self._min = min(self._samples)
            if dropped_max:
                self._max = max(self._samples)
                        
    def setwindow(self, window):
        self._window = window
        self._reduce_to(window)
            
    def sample(self, x):
        self._reduce_to(self._window - 1)
        self._samples.append(x)
        StatValue.sample(self, x)

Summary ✨

This Python code implements a simple statistics gathering tool. It provides classes for accumulating statistics on values, including calculating mean, standard deviation, and minimum/maximum values. The StatWindow class allows for time-based sampling of values, reducing the number of samples to a specified window size. The output is a formatted string displaying the count, min, average, max, and standard deviation of the sampled values.

Alerts (4)

'def' Ensure functions have docstrings for documentation
63 134 166 170