/util/stats/db.py
Python | 436 lines | 400 code | 8 blank | 28 comment | 12 complexity | 0dd6a918bf06fa836831f6a4a4adcce8 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
- # Copyright (c) 2003-2004 The Regents of The University of Michigan
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are
- # met: redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer;
- # redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution;
- # neither the name of the copyright holders nor the names of its
- # contributors may be used to endorse or promote products derived from
- # this software without specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- # Authors: Nathan Binkert
- import MySQLdb, re, string
- def statcmp(a, b):
- v1 = a.split('.')
- v2 = b.split('.')
- last = min(len(v1), len(v2)) - 1
- for i,j in zip(v1[0:last], v2[0:last]):
- if i != j:
- return cmp(i, j)
- # Special compare for last element.
- if len(v1) == len(v2):
- return cmp(v1[last], v2[last])
- else:
- return cmp(len(v1), len(v2))
- class RunData:
- def __init__(self, row):
- self.run = int(row[0])
- self.name = row[1]
- self.user = row[2]
- self.project = row[3]
- class SubData:
- def __init__(self, row):
- self.stat = int(row[0])
- self.x = int(row[1])
- self.y = int(row[2])
- self.name = row[3]
- self.descr = row[4]
- class Data:
- def __init__(self, row):
- if len(row) != 5:
- raise 'stat db error'
- self.stat = int(row[0])
- self.run = int(row[1])
- self.x = int(row[2])
- self.y = int(row[3])
- self.data = float(row[4])
- def __repr__(self):
- return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
- self.run, self.x, self.y, self.data)
- class StatData(object):
- def __init__(self, row):
- self.stat = int(row[0])
- self.name = row[1]
- self.desc = row[2]
- self.type = row[3]
- self.prereq = int(row[5])
- self.precision = int(row[6])
- import flags
- self.flags = 0
- if int(row[4]): self.flags |= flags.printable
- if int(row[7]): self.flags |= flags.nozero
- if int(row[8]): self.flags |= flags.nonan
- if int(row[9]): self.flags |= flags.total
- if int(row[10]): self.flags |= flags.pdf
- if int(row[11]): self.flags |= flags.cdf
- if self.type == 'DIST' or self.type == 'VECTORDIST':
- self.min = float(row[12])
- self.max = float(row[13])
- self.bktsize = float(row[14])
- self.size = int(row[15])
- if self.type == 'FORMULA':
- self.formula = self.db.allFormulas[self.stat]
- class Node(object):
- def __init__(self, name):
- self.name = name
- def __str__(self):
- return self.name
- class Result(object):
- def __init__(self, x, y):
- self.data = {}
- self.x = x
- self.y = y
- def __contains__(self, run):
- return run in self.data
- def __getitem__(self, run):
- if run not in self.data:
- self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ]
- return self.data[run]
- class Database(object):
- def __init__(self):
- self.host = 'zizzer.pool'
- self.user = ''
- self.passwd = ''
- self.db = 'm5stats'
- self.cursor = None
- self.allStats = []
- self.allStatIds = {}
- self.allStatNames = {}
- self.allSubData = {}
- self.allRuns = []
- self.allRunIds = {}
- self.allRunNames = {}
- self.allFormulas = {}
- self.stattop = {}
- self.statdict = {}
- self.statlist = []
- self.mode = 'sum';
- self.runs = None
- self.ticks = None
- self.method = 'sum'
- self._method = type(self).sum
- def get(self, job, stat, system=None):
- run = self.allRunNames.get(str(job), None)
- if run is None:
- return None
- from info import ProxyError, scalar, vector, value, values, total, len
- if system is None and hasattr(job, 'system'):
- system = job.system
- if system is not None:
- stat.system = self[system]
- try:
- if scalar(stat):
- return value(stat, run.run)
- if vector(stat):
- return values(stat, run.run)
- except ProxyError:
- return None
- return None
- def query(self, sql):
- self.cursor.execute(sql)
- def update_dict(self, dict):
- dict.update(self.stattop)
- def append(self, stat):
- statname = re.sub(':', '__', stat.name)
- path = string.split(statname, '.')
- pathtop = path[0]
- fullname = ''
- x = self
- while len(path) > 1:
- name = path.pop(0)
- if not x.__dict__.has_key(name):
- x.__dict__[name] = Node(fullname + name)
- x = x.__dict__[name]
- fullname = '%s%s.' % (fullname, name)
- name = path.pop(0)
- x.__dict__[name] = stat
- self.stattop[pathtop] = self.__dict__[pathtop]
- self.statdict[statname] = stat
- self.statlist.append(statname)
- def connect(self):
- # connect
- self.thedb = MySQLdb.connect(db=self.db,
- host=self.host,
- user=self.user,
- passwd=self.passwd)
- # create a cursor
- self.cursor = self.thedb.cursor()
- self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
- from runs''')
- for result in self.cursor.fetchall():
- run = RunData(result);
- self.allRuns.append(run)
- self.allRunIds[run.run] = run
- self.allRunNames[run.name] = run
- self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
- for result in self.cursor.fetchall():
- subdata = SubData(result)
- if self.allSubData.has_key(subdata.stat):
- self.allSubData[subdata.stat].append(subdata)
- else:
- self.allSubData[subdata.stat] = [ subdata ]
- self.query('select * from formulas')
- for id,formula in self.cursor.fetchall():
- self.allFormulas[int(id)] = formula.tostring()
- StatData.db = self
- self.query('select * from stats')
- import info
- for result in self.cursor.fetchall():
- stat = info.NewStat(self, StatData(result))
- self.append(stat)
- self.allStats.append(stat)
- self.allStatIds[stat.stat] = stat
- self.allStatNames[stat.name] = stat
- # Name: listruns
- # Desc: Prints all runs matching a given user, if no argument
- # is given all runs are returned
- def listRuns(self, user=None):
- print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
- print '-' * 62
- for run in self.allRuns:
- if user == None or user == run.user:
- print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
- # Name: listTicks
- # Desc: Prints all samples for a given run
- def listTicks(self, runs=None):
- print "tick"
- print "----------------------------------------"
- sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
- if runs != None:
- first = True
- for run in runs:
- if first:
- # sql += ' where'
- first = False
- else:
- sql += ' or'
- sql += ' dt_run=%s' % run.run
- sql += ')'
- self.query(sql)
- for r in self.cursor.fetchall():
- print r[0]
- # Name: retTicks
- # Desc: Prints all samples for a given run
- def retTicks(self, runs=None):
- sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
- if runs != None:
- first = True
- for run in runs:
- if first:
- first = False
- else:
- sql += ' or'
- sql += ' dt_run=%s' % run.run
- sql += ')'
- self.query(sql)
- ret = []
- for r in self.cursor.fetchall():
- ret.append(r[0])
- return ret
- # Name: liststats
- # Desc: Prints all statistics that appear in the database,
- # the optional argument is a regular expression that can
- # be used to prune the result set
- def listStats(self, regex=None):
- print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
- print '-' * 80
- rx = None
- if regex != None:
- rx = re.compile(regex)
- stats = [ stat.name for stat in self.allStats ]
- stats.sort(statcmp)
- for stat in stats:
- stat = self.allStatNames[stat]
- if rx == None or rx.match(stat.name):
- print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
- # Name: liststats
- # Desc: Prints all statistics that appear in the database,
- # the optional argument is a regular expression that can
- # be used to prune the result set
- def listFormulas(self, regex=None):
- print '%-60s %s' % ('formula name', 'formula')
- print '-' * 80
- rx = None
- if regex != None:
- rx = re.compile(regex)
- stats = [ stat.name for stat in self.allStats ]
- stats.sort(statcmp)
- for stat in stats:
- stat = self.allStatNames[stat]
- if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
- print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
- def getStat(self, stats):
- if type(stats) is not list:
- stats = [ stats ]
- ret = []
- for stat in stats:
- if type(stat) is int:
- ret.append(self.allStatIds[stat])
- if type(stat) is str:
- rx = re.compile(stat)
- for stat in self.allStats:
- if rx.match(stat.name):
- ret.append(stat)
- return ret
- #########################################
- # get the data
- #
- def query(self, op, stat, ticks, group=False):
- sql = 'select '
- sql += 'dt_stat as stat, '
- sql += 'dt_run as run, '
- sql += 'dt_x as x, '
- sql += 'dt_y as y, '
- if group:
- sql += 'dt_tick as tick, '
- sql += '%s(dt_data) as data ' % op
- sql += 'from data '
- sql += 'where '
- if isinstance(stat, list):
- val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
- sql += ' (%s)' % val
- else:
- sql += ' dt_stat=%d' % stat.stat
- if self.runs != None and len(self.runs):
- val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
- sql += ' and (%s)' % val
- if ticks != None and len(ticks):
- val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
- sql += ' and (%s)' % val
- sql += ' group by dt_stat,dt_run,dt_x,dt_y'
- if group:
- sql += ',dt_tick'
- return sql
- # Name: sum
- # Desc: given a run, a stat and an array of samples, total the samples
- def sum(self, *args, **kwargs):
- return self.query('sum', *args, **kwargs)
- # Name: avg
- # Desc: given a run, a stat and an array of samples, average the samples
- def avg(self, stat, ticks):
- return self.query('avg', *args, **kwargs)
- # Name: stdev
- # Desc: given a run, a stat and an array of samples, get the standard
- # deviation
- def stdev(self, stat, ticks):
- return self.query('stddev', *args, **kwargs)
- def __setattr__(self, attr, value):
- super(Database, self).__setattr__(attr, value)
- if attr != 'method':
- return
- if value == 'sum':
- self._method = self.sum
- elif value == 'avg':
- self._method = self.avg
- elif value == 'stdev':
- self._method = self.stdev
- else:
- raise AttributeError, "can only set get to: sum | avg | stdev"
- def data(self, stat, ticks=None):
- if ticks is None:
- ticks = self.ticks
- sql = self._method(self, stat, ticks)
- self.query(sql)
- runs = {}
- xmax = 0
- ymax = 0
- for x in self.cursor.fetchall():
- data = Data(x)
- if not runs.has_key(data.run):
- runs[data.run] = {}
- if not runs[data.run].has_key(data.x):
- runs[data.run][data.x] = {}
- xmax = max(xmax, data.x)
- ymax = max(ymax, data.y)
- runs[data.run][data.x][data.y] = data.data
- results = Result(xmax + 1, ymax + 1)
- for run,data in runs.iteritems():
- result = results[run]
- for x,ydata in data.iteritems():
- for y,data in ydata.iteritems():
- result[x][y] = data
- return results
- def __getitem__(self, key):
- return self.stattop[key]