PageRenderTime 49ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/util/stats/db.py

https://bitbucket.org/musleh123/gem5_cetus
Python | 436 lines | 400 code | 8 blank | 28 comment | 12 complexity | 0dd6a918bf06fa836831f6a4a4adcce8 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. # Copyright (c) 2003-2004 The Regents of The University of Michigan
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met: redistributions of source code must retain the above copyright
  7. # notice, this list of conditions and the following disclaimer;
  8. # redistributions in binary form must reproduce the above copyright
  9. # notice, this list of conditions and the following disclaimer in the
  10. # documentation and/or other materials provided with the distribution;
  11. # neither the name of the copyright holders nor the names of its
  12. # contributors may be used to endorse or promote products derived from
  13. # this software without specific prior written permission.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. #
  27. # Authors: Nathan Binkert
  28. import MySQLdb, re, string
  29. def statcmp(a, b):
  30. v1 = a.split('.')
  31. v2 = b.split('.')
  32. last = min(len(v1), len(v2)) - 1
  33. for i,j in zip(v1[0:last], v2[0:last]):
  34. if i != j:
  35. return cmp(i, j)
  36. # Special compare for last element.
  37. if len(v1) == len(v2):
  38. return cmp(v1[last], v2[last])
  39. else:
  40. return cmp(len(v1), len(v2))
  41. class RunData:
  42. def __init__(self, row):
  43. self.run = int(row[0])
  44. self.name = row[1]
  45. self.user = row[2]
  46. self.project = row[3]
  47. class SubData:
  48. def __init__(self, row):
  49. self.stat = int(row[0])
  50. self.x = int(row[1])
  51. self.y = int(row[2])
  52. self.name = row[3]
  53. self.descr = row[4]
  54. class Data:
  55. def __init__(self, row):
  56. if len(row) != 5:
  57. raise 'stat db error'
  58. self.stat = int(row[0])
  59. self.run = int(row[1])
  60. self.x = int(row[2])
  61. self.y = int(row[3])
  62. self.data = float(row[4])
  63. def __repr__(self):
  64. return '''Data(['%d', '%d', '%d', '%d', '%f'])''' % ( self.stat,
  65. self.run, self.x, self.y, self.data)
  66. class StatData(object):
  67. def __init__(self, row):
  68. self.stat = int(row[0])
  69. self.name = row[1]
  70. self.desc = row[2]
  71. self.type = row[3]
  72. self.prereq = int(row[5])
  73. self.precision = int(row[6])
  74. import flags
  75. self.flags = 0
  76. if int(row[4]): self.flags |= flags.printable
  77. if int(row[7]): self.flags |= flags.nozero
  78. if int(row[8]): self.flags |= flags.nonan
  79. if int(row[9]): self.flags |= flags.total
  80. if int(row[10]): self.flags |= flags.pdf
  81. if int(row[11]): self.flags |= flags.cdf
  82. if self.type == 'DIST' or self.type == 'VECTORDIST':
  83. self.min = float(row[12])
  84. self.max = float(row[13])
  85. self.bktsize = float(row[14])
  86. self.size = int(row[15])
  87. if self.type == 'FORMULA':
  88. self.formula = self.db.allFormulas[self.stat]
  89. class Node(object):
  90. def __init__(self, name):
  91. self.name = name
  92. def __str__(self):
  93. return self.name
  94. class Result(object):
  95. def __init__(self, x, y):
  96. self.data = {}
  97. self.x = x
  98. self.y = y
  99. def __contains__(self, run):
  100. return run in self.data
  101. def __getitem__(self, run):
  102. if run not in self.data:
  103. self.data[run] = [ [ 0.0 ] * self.y for i in xrange(self.x) ]
  104. return self.data[run]
  105. class Database(object):
  106. def __init__(self):
  107. self.host = 'zizzer.pool'
  108. self.user = ''
  109. self.passwd = ''
  110. self.db = 'm5stats'
  111. self.cursor = None
  112. self.allStats = []
  113. self.allStatIds = {}
  114. self.allStatNames = {}
  115. self.allSubData = {}
  116. self.allRuns = []
  117. self.allRunIds = {}
  118. self.allRunNames = {}
  119. self.allFormulas = {}
  120. self.stattop = {}
  121. self.statdict = {}
  122. self.statlist = []
  123. self.mode = 'sum';
  124. self.runs = None
  125. self.ticks = None
  126. self.method = 'sum'
  127. self._method = type(self).sum
  128. def get(self, job, stat, system=None):
  129. run = self.allRunNames.get(str(job), None)
  130. if run is None:
  131. return None
  132. from info import ProxyError, scalar, vector, value, values, total, len
  133. if system is None and hasattr(job, 'system'):
  134. system = job.system
  135. if system is not None:
  136. stat.system = self[system]
  137. try:
  138. if scalar(stat):
  139. return value(stat, run.run)
  140. if vector(stat):
  141. return values(stat, run.run)
  142. except ProxyError:
  143. return None
  144. return None
  145. def query(self, sql):
  146. self.cursor.execute(sql)
  147. def update_dict(self, dict):
  148. dict.update(self.stattop)
  149. def append(self, stat):
  150. statname = re.sub(':', '__', stat.name)
  151. path = string.split(statname, '.')
  152. pathtop = path[0]
  153. fullname = ''
  154. x = self
  155. while len(path) > 1:
  156. name = path.pop(0)
  157. if not x.__dict__.has_key(name):
  158. x.__dict__[name] = Node(fullname + name)
  159. x = x.__dict__[name]
  160. fullname = '%s%s.' % (fullname, name)
  161. name = path.pop(0)
  162. x.__dict__[name] = stat
  163. self.stattop[pathtop] = self.__dict__[pathtop]
  164. self.statdict[statname] = stat
  165. self.statlist.append(statname)
  166. def connect(self):
  167. # connect
  168. self.thedb = MySQLdb.connect(db=self.db,
  169. host=self.host,
  170. user=self.user,
  171. passwd=self.passwd)
  172. # create a cursor
  173. self.cursor = self.thedb.cursor()
  174. self.query('''select rn_id,rn_name,rn_sample,rn_user,rn_project
  175. from runs''')
  176. for result in self.cursor.fetchall():
  177. run = RunData(result);
  178. self.allRuns.append(run)
  179. self.allRunIds[run.run] = run
  180. self.allRunNames[run.name] = run
  181. self.query('select sd_stat,sd_x,sd_y,sd_name,sd_descr from subdata')
  182. for result in self.cursor.fetchall():
  183. subdata = SubData(result)
  184. if self.allSubData.has_key(subdata.stat):
  185. self.allSubData[subdata.stat].append(subdata)
  186. else:
  187. self.allSubData[subdata.stat] = [ subdata ]
  188. self.query('select * from formulas')
  189. for id,formula in self.cursor.fetchall():
  190. self.allFormulas[int(id)] = formula.tostring()
  191. StatData.db = self
  192. self.query('select * from stats')
  193. import info
  194. for result in self.cursor.fetchall():
  195. stat = info.NewStat(self, StatData(result))
  196. self.append(stat)
  197. self.allStats.append(stat)
  198. self.allStatIds[stat.stat] = stat
  199. self.allStatNames[stat.name] = stat
  200. # Name: listruns
  201. # Desc: Prints all runs matching a given user, if no argument
  202. # is given all runs are returned
  203. def listRuns(self, user=None):
  204. print '%-40s %-10s %-5s' % ('run name', 'user', 'id')
  205. print '-' * 62
  206. for run in self.allRuns:
  207. if user == None or user == run.user:
  208. print '%-40s %-10s %-10d' % (run.name, run.user, run.run)
  209. # Name: listTicks
  210. # Desc: Prints all samples for a given run
  211. def listTicks(self, runs=None):
  212. print "tick"
  213. print "----------------------------------------"
  214. sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
  215. if runs != None:
  216. first = True
  217. for run in runs:
  218. if first:
  219. # sql += ' where'
  220. first = False
  221. else:
  222. sql += ' or'
  223. sql += ' dt_run=%s' % run.run
  224. sql += ')'
  225. self.query(sql)
  226. for r in self.cursor.fetchall():
  227. print r[0]
  228. # Name: retTicks
  229. # Desc: Prints all samples for a given run
  230. def retTicks(self, runs=None):
  231. sql = 'select distinct dt_tick from data where dt_stat=1180 and ('
  232. if runs != None:
  233. first = True
  234. for run in runs:
  235. if first:
  236. first = False
  237. else:
  238. sql += ' or'
  239. sql += ' dt_run=%s' % run.run
  240. sql += ')'
  241. self.query(sql)
  242. ret = []
  243. for r in self.cursor.fetchall():
  244. ret.append(r[0])
  245. return ret
  246. # Name: liststats
  247. # Desc: Prints all statistics that appear in the database,
  248. # the optional argument is a regular expression that can
  249. # be used to prune the result set
  250. def listStats(self, regex=None):
  251. print '%-60s %-8s %-10s' % ('stat name', 'id', 'type')
  252. print '-' * 80
  253. rx = None
  254. if regex != None:
  255. rx = re.compile(regex)
  256. stats = [ stat.name for stat in self.allStats ]
  257. stats.sort(statcmp)
  258. for stat in stats:
  259. stat = self.allStatNames[stat]
  260. if rx == None or rx.match(stat.name):
  261. print '%-60s %-8s %-10s' % (stat.name, stat.stat, stat.type)
  262. # Name: liststats
  263. # Desc: Prints all statistics that appear in the database,
  264. # the optional argument is a regular expression that can
  265. # be used to prune the result set
  266. def listFormulas(self, regex=None):
  267. print '%-60s %s' % ('formula name', 'formula')
  268. print '-' * 80
  269. rx = None
  270. if regex != None:
  271. rx = re.compile(regex)
  272. stats = [ stat.name for stat in self.allStats ]
  273. stats.sort(statcmp)
  274. for stat in stats:
  275. stat = self.allStatNames[stat]
  276. if stat.type == 'FORMULA' and (rx == None or rx.match(stat.name)):
  277. print '%-60s %s' % (stat.name, self.allFormulas[stat.stat])
  278. def getStat(self, stats):
  279. if type(stats) is not list:
  280. stats = [ stats ]
  281. ret = []
  282. for stat in stats:
  283. if type(stat) is int:
  284. ret.append(self.allStatIds[stat])
  285. if type(stat) is str:
  286. rx = re.compile(stat)
  287. for stat in self.allStats:
  288. if rx.match(stat.name):
  289. ret.append(stat)
  290. return ret
  291. #########################################
  292. # get the data
  293. #
  294. def query(self, op, stat, ticks, group=False):
  295. sql = 'select '
  296. sql += 'dt_stat as stat, '
  297. sql += 'dt_run as run, '
  298. sql += 'dt_x as x, '
  299. sql += 'dt_y as y, '
  300. if group:
  301. sql += 'dt_tick as tick, '
  302. sql += '%s(dt_data) as data ' % op
  303. sql += 'from data '
  304. sql += 'where '
  305. if isinstance(stat, list):
  306. val = ' or '.join([ 'dt_stat=%d' % s.stat for s in stat ])
  307. sql += ' (%s)' % val
  308. else:
  309. sql += ' dt_stat=%d' % stat.stat
  310. if self.runs != None and len(self.runs):
  311. val = ' or '.join([ 'dt_run=%d' % r for r in self.runs ])
  312. sql += ' and (%s)' % val
  313. if ticks != None and len(ticks):
  314. val = ' or '.join([ 'dt_tick=%d' % s for s in ticks ])
  315. sql += ' and (%s)' % val
  316. sql += ' group by dt_stat,dt_run,dt_x,dt_y'
  317. if group:
  318. sql += ',dt_tick'
  319. return sql
  320. # Name: sum
  321. # Desc: given a run, a stat and an array of samples, total the samples
  322. def sum(self, *args, **kwargs):
  323. return self.query('sum', *args, **kwargs)
  324. # Name: avg
  325. # Desc: given a run, a stat and an array of samples, average the samples
  326. def avg(self, stat, ticks):
  327. return self.query('avg', *args, **kwargs)
  328. # Name: stdev
  329. # Desc: given a run, a stat and an array of samples, get the standard
  330. # deviation
  331. def stdev(self, stat, ticks):
  332. return self.query('stddev', *args, **kwargs)
  333. def __setattr__(self, attr, value):
  334. super(Database, self).__setattr__(attr, value)
  335. if attr != 'method':
  336. return
  337. if value == 'sum':
  338. self._method = self.sum
  339. elif value == 'avg':
  340. self._method = self.avg
  341. elif value == 'stdev':
  342. self._method = self.stdev
  343. else:
  344. raise AttributeError, "can only set get to: sum | avg | stdev"
  345. def data(self, stat, ticks=None):
  346. if ticks is None:
  347. ticks = self.ticks
  348. sql = self._method(self, stat, ticks)
  349. self.query(sql)
  350. runs = {}
  351. xmax = 0
  352. ymax = 0
  353. for x in self.cursor.fetchall():
  354. data = Data(x)
  355. if not runs.has_key(data.run):
  356. runs[data.run] = {}
  357. if not runs[data.run].has_key(data.x):
  358. runs[data.run][data.x] = {}
  359. xmax = max(xmax, data.x)
  360. ymax = max(ymax, data.y)
  361. runs[data.run][data.x][data.y] = data.data
  362. results = Result(xmax + 1, ymax + 1)
  363. for run,data in runs.iteritems():
  364. result = results[run]
  365. for x,ydata in data.iteritems():
  366. for y,data in ydata.iteritems():
  367. result[x][y] = data
  368. return results
  369. def __getitem__(self, key):
  370. return self.stattop[key]