PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/tool/logparser.py

https://bitbucket.org/quangquach/pypy
Python | 438 lines | 424 code | 3 blank | 11 comment | 22 complexity | 82fe94463ed2ee79f88e87e1647c87ec MD5 | raw file
  1. #! /usr/bin/env python
  2. """
  3. Syntax:
  4. python logparser.py <action> <logfilename> <output> <options...>
  5. Actions:
  6. draw-time draw a timeline image of the log (format PNG by default)
  7. print-summary print a summary of the log
  8. """
  9. import autopath
  10. import sys, re
  11. from pypy.rlib.debug import DebugLog
  12. from pypy.tool import progressbar
  13. def parse_log_file(filename, verbose=True):
  14. f = open(filename, 'r')
  15. if f.read(2) == 'BZ':
  16. f.close()
  17. import bz2
  18. f = bz2.BZ2File(filename, 'r')
  19. else:
  20. f.seek(0)
  21. lines = f.readlines()
  22. f.close()
  23. #
  24. return parse_log(lines, verbose=verbose)
  25. def parse_log(lines, verbose=False):
  26. color = "(?:\x1b.*?m)?"
  27. r_start = re.compile(color + r"\[([0-9a-fA-F]+)\] \{([\w-]+)" + color + "$")
  28. r_stop = re.compile(color + r"\[([0-9a-fA-F]+)\] ([\w-]+)\}" + color + "$")
  29. lasttime = 0
  30. log = DebugLog()
  31. time_decrase = False
  32. performance_log = True
  33. nested = 0
  34. #
  35. if verbose and sys.stdout.isatty():
  36. progress = progressbar.ProgressBar(color='green')
  37. counter = 0
  38. else:
  39. progress = None
  40. single_percent = len(lines) / 100
  41. if verbose:
  42. vnext = 0
  43. else:
  44. vnext = -1
  45. for i, line in enumerate(lines):
  46. if i == vnext:
  47. if progress is not None:
  48. progress.render(counter)
  49. counter += 1
  50. vnext += single_percent
  51. else:
  52. sys.stderr.write('%d%%..' % int(100.0*i/len(lines)))
  53. vnext += 500000
  54. line = line.rstrip()
  55. match = r_start.match(line)
  56. if match:
  57. record = log.debug_start
  58. nested += 1
  59. else:
  60. match = r_stop.match(line)
  61. if match:
  62. record = log.debug_stop
  63. nested -= 1
  64. else:
  65. log.debug_print(line)
  66. performance_log = performance_log and nested == 0
  67. continue
  68. time = int(int(match.group(1), 16))
  69. time_decrase = time_decrase or time < lasttime
  70. lasttime = time
  71. record(match.group(2), time=int(match.group(1), 16))
  72. if verbose:
  73. sys.stderr.write('loaded\n')
  74. if performance_log and time_decrase:
  75. print ("The time decreases! The log file may have been"
  76. " produced on a multi-CPU machine and the process"
  77. " moved between CPUs.")
  78. return log
  79. def extract_category(log, catprefix='', toplevel=False):
  80. got = []
  81. resulttext = []
  82. for entry in log:
  83. if entry[0] == 'debug_print':
  84. resulttext.append(entry[1])
  85. elif len(entry) == 4:
  86. got.extend(extract_category(
  87. entry[3], catprefix, toplevel=entry[0].startswith(catprefix)))
  88. else:
  89. resulttext.append('... LOG TRUCATED ...')
  90. if toplevel:
  91. resulttext.append('')
  92. got.insert(0, '\n'.join(resulttext))
  93. return got
  94. def print_log(log):
  95. for entry in log:
  96. if entry[0] == 'debug_print':
  97. print entry[1]
  98. else:
  99. print "{%s" % entry[0]
  100. if len(entry)>3:
  101. print_log(entry[3])
  102. print "%s}" % entry[0]
  103. def kill_category(log, catprefix=''):
  104. newlog = []
  105. for entry in log:
  106. if not entry[0].startswith(catprefix):
  107. if len(entry) > 3:
  108. newlog.append(entry[:3] +
  109. (kill_category(entry[3], catprefix),))
  110. else:
  111. newlog.append(entry)
  112. return newlog
  113. def getsubcategories(log):
  114. return [entry for entry in log if entry[0] != 'debug_print']
  115. def gettimebounds(log):
  116. # returns (mintime, maxtime)
  117. maincats = getsubcategories(log)
  118. return (maincats[0][1], maincats[-1][2])
  119. def gettotaltimes(log):
  120. # returns a dict {'label' or None: totaltime}
  121. def rectime(category1, timestart1, timestop1, subcats):
  122. substartstop = []
  123. for entry in getsubcategories(subcats):
  124. rectime(*entry)
  125. substartstop.append(entry[1:3]) # (start, stop)
  126. # compute the total time for category1 as the part of the
  127. # interval [timestart1, timestop1] that is not covered by
  128. # any interval from one of the subcats.
  129. mytime = 0
  130. substartstop.sort()
  131. for substart, substop in substartstop:
  132. if substart >= timestop1:
  133. break
  134. if substart > timestart1:
  135. mytime += substart - timestart1
  136. if timestart1 < substop:
  137. timestart1 = substop
  138. if timestart1 < timestop1:
  139. mytime += timestop1 - timestart1
  140. #
  141. try:
  142. result[category1] += mytime
  143. except KeyError:
  144. result[category1] = mytime
  145. #
  146. result = {}
  147. timestart0, timestop0 = gettimebounds(log)
  148. rectime(None, timestart0, timestop0, log)
  149. return result
  150. # ____________________________________________________________
  151. COLORS = {
  152. None: (248, 248, 248),
  153. '': (160, 160, 160),
  154. 'gc-': (224, 0, 0),
  155. 'gc-minor': (192, 0, 16),
  156. 'gc-collect': (255, 0, 0),
  157. 'jit-': (0, 224, 0),
  158. 'jit-running': (192, 255, 160),
  159. 'jit-tracing': (0, 255, 0),
  160. 'jit-optimize': (160, 255, 0),
  161. 'jit-backend': (0, 255, 144),
  162. 'jit-blackhole': (0, 160, 0),
  163. }
  164. SUMMARY = {
  165. None: 'normal execution',
  166. '': 'other',
  167. 'gc-': 'gc',
  168. 'jit-': 'jit',
  169. 'jit-running': 'jit-running',
  170. }
  171. def getcolor(category):
  172. while category not in COLORS:
  173. category = category[:-1]
  174. return COLORS[category]
  175. def getlightercolor((r, g, b)):
  176. return ((r*2+255)//3, (g*2+255)//3, (b*2+255)//3)
  177. def getdarkercolor((r, g, b)):
  178. return (r*2//3, g*2//3, b*2//3)
  179. def getlabel(text, _cache={}):
  180. try:
  181. return _cache[text]
  182. except KeyError:
  183. pass
  184. from PIL import Image, ImageDraw
  185. if None not in _cache:
  186. image = Image.new("RGBA", (1, 1), (0, 0, 0, 0))
  187. draw = ImageDraw.Draw(image)
  188. _cache[None] = draw
  189. else:
  190. draw = _cache[None]
  191. sx, sy = draw.textsize(text)
  192. texthoriz = Image.new("RGBA", (sx, sy), (0, 0, 0, 0))
  193. ImageDraw.Draw(texthoriz).text((0, 0), text, fill=(0, 0, 0))
  194. textvert = texthoriz.rotate(90)
  195. _cache[text] = sx, sy, texthoriz, textvert
  196. return _cache[text]
  197. def bevelrect(draw, (x1, y1, x2, y2), color):
  198. if x2 <= x1:
  199. x2 = x1 + 1 # minimal width
  200. elif x2 >= x1 + 4:
  201. draw.line((x1, y1+1, x1, y2-1), fill=getlightercolor(color))
  202. x1 += 1
  203. x2 -= 1
  204. draw.line((x2, y1+1, x2, y2-1), fill=getdarkercolor(color))
  205. draw.line((x1, y1, x2-1, y1), fill=getlightercolor(color))
  206. y1 += 1
  207. y2 -= 1
  208. draw.line((x1, y2, x2-1, y2), fill=getdarkercolor(color))
  209. draw.rectangle((x1, y1, x2-1, y2-1), fill=color)
  210. # ----------
  211. def get_timeline_image(log, width, height):
  212. from PIL import Image, ImageDraw
  213. timestart0, timestop0 = gettimebounds(log)
  214. assert timestop0 > timestart0
  215. timefactor = float(width) / (timestop0 - timestart0)
  216. #
  217. def recdraw(sublist, subheight):
  218. firstx1 = None
  219. for category1, timestart1, timestop1, subcats in sublist:
  220. x1 = int((timestart1 - timestart0) * timefactor)
  221. x2 = int((timestop1 - timestart0) * timefactor)
  222. y1 = (height - subheight) / 2
  223. y2 = y1 + subheight
  224. y1 = int(y1)
  225. y2 = int(y2)
  226. color = getcolor(category1)
  227. if firstx1 is None:
  228. firstx1 = x1
  229. bevelrect(draw, (x1, y1, x2, y2), color)
  230. subcats = getsubcategories(subcats)
  231. if subcats:
  232. x2 = recdraw(subcats, subheight * 0.94) - 1
  233. sx, sy, texthoriz, textvert = getlabel(category1)
  234. if sx <= x2-x1-8:
  235. image.paste(texthoriz, (x1+5, y1+5), texthoriz)
  236. elif sy <= x2-x1-2:
  237. image.paste(textvert, (x1+1, y1+5), textvert)
  238. return firstx1
  239. #
  240. image = Image.new("RGBA", (width, height), (255, 255, 255, 0))
  241. draw = ImageDraw.Draw(image)
  242. recdraw(getsubcategories(log), height)
  243. return image
  244. # ----------
  245. def render_histogram(times, time0, labels, width, barheight):
  246. # Render a histogram showing horizontal time bars are given by the
  247. # 'times' dictionary. Each entry has the label specified by 'labels',
  248. # or by default the key used in 'times'.
  249. from PIL import Image, ImageDraw
  250. times = [(time, key) for (key, time) in times.items()]
  251. times.sort()
  252. times.reverse()
  253. images = []
  254. for time, key in times:
  255. fraction = float(time) / time0
  256. if fraction < 0.01:
  257. break
  258. color = getcolor(key)
  259. image = Image.new("RGBA", (width, barheight), (255, 255, 255, 0))
  260. draw = ImageDraw.Draw(image)
  261. x2 = int(fraction * width)
  262. bevelrect(draw, (0, 0, x2, barheight), color)
  263. # draw the labels "x%" and "key"
  264. percent = "%.1f%%" % (100.0 * fraction,)
  265. s1x, s1y, textpercent, vtextpercent = getlabel(percent)
  266. s2x, _, textlabel, _ = getlabel(labels.get(key, key))
  267. t1x = 5
  268. if t1x + s1x >= x2 - 3:
  269. if t1x + s1y < x2 - 3:
  270. textpercent = vtextpercent
  271. s1x = s1y
  272. else:
  273. t1x = x2 + 6
  274. t2x = t1x + s1x + 12
  275. if t2x + s2x >= x2 - 3:
  276. t2x = max(t2x, x2 + 8)
  277. image.paste(textpercent, (t1x, 5), textpercent)
  278. image.paste(textlabel, (t2x, 5), textlabel)
  279. images.append(image)
  280. if not images:
  281. return None
  282. return combine(images, spacing=0, border=1, horizontal=False)
  283. def get_timesummary_single_image(totaltimes, totaltime0, componentdict,
  284. width, barheight):
  285. # Compress the totaltimes dict so that its only entries left are
  286. # from componentdict. We do that by taking the times assigned to
  287. # subkeys in totaltimes and adding them to the superkeys specified
  288. # in componentdict.
  289. totaltimes = totaltimes.copy()
  290. for key, value in totaltimes.items():
  291. if key in componentdict:
  292. continue
  293. del totaltimes[key]
  294. if key is not None:
  295. while key not in componentdict:
  296. key = key[:-1]
  297. try:
  298. totaltimes[key] += value
  299. except KeyError:
  300. totaltimes[key] = value
  301. return render_histogram(totaltimes, totaltime0, componentdict,
  302. width, barheight)
  303. def get_timesummary_image(log, summarywidth, summarybarheight):
  304. timestart0, timestop0 = gettimebounds(log)
  305. totaltime0 = timestop0 - timestart0
  306. totaltimes = gettotaltimes(log)
  307. spacing = 50
  308. width = (summarywidth - spacing) // 2
  309. img1 = get_timesummary_single_image(totaltimes, totaltime0, SUMMARY,
  310. width, summarybarheight)
  311. if None in totaltimes:
  312. del totaltimes[None]
  313. img2 = render_histogram(totaltimes, totaltime0, {},
  314. width, summarybarheight)
  315. if img2 is None:
  316. return img1
  317. return combine([img1, img2], spacing=spacing, horizontal=True)
  318. # ----------
  319. def combine(imagelist, spacing=50, border=0, horizontal=False):
  320. if len(imagelist) <= 1 and not border:
  321. return imagelist[0]
  322. from PIL import Image, ImageDraw
  323. wlist = [image.size[0] for image in imagelist]
  324. hlist = [image.size[1] for image in imagelist]
  325. if horizontal:
  326. w = sum(wlist) + spacing*(len(imagelist)-1)
  327. h = max(hlist)
  328. else:
  329. w = max(wlist)
  330. h = sum(hlist) + spacing*(len(imagelist)-1)
  331. w += 2*border
  332. h += 2*border
  333. bigimage = Image.new("RGBA", (w, h), (255, 255, 255, 0))
  334. if border:
  335. draw = ImageDraw.Draw(bigimage)
  336. draw.rectangle((0, 0, w-1, border-1), fill=(0, 0, 0))
  337. draw.rectangle((0, h-border, w-1, h-1), fill=(0, 0, 0))
  338. draw.rectangle((0, 0, border-1, h-1), fill=(0, 0, 0))
  339. draw.rectangle((w-1, 0, w-border, h-1), fill=(0, 0, 0))
  340. x = border
  341. y = border
  342. for image in imagelist:
  343. bigimage.paste(image, (x, y))
  344. if horizontal:
  345. x += image.size[0] + spacing
  346. else:
  347. y += image.size[1] + spacing
  348. return bigimage
  349. def draw_timeline_image(log, output=None, mainwidth=3000, mainheight=150,
  350. summarywidth=850, summarybarheight=40):
  351. mainwidth = int(mainwidth)
  352. mainheight = int(mainheight)
  353. summarywidth = int(summarywidth)
  354. summarybarheight = int(summarybarheight)
  355. images = []
  356. if mainwidth > 0 and mainheight > 0:
  357. images.append(get_timeline_image(log, mainwidth, mainheight))
  358. if summarywidth > 0 and summarybarheight > 0:
  359. images.append(get_timesummary_image(log, summarywidth,
  360. summarybarheight))
  361. image = combine(images, horizontal=False)
  362. if output is None:
  363. image.save(sys.stdout, format='png')
  364. else:
  365. image.save(output)
  366. def print_summary(log, out):
  367. totaltimes = gettotaltimes(log)
  368. if out == '-':
  369. outfile = sys.stdout
  370. else:
  371. outfile = open(out, "w")
  372. l = totaltimes.items()
  373. l.sort(cmp=lambda a, b: cmp(b[1], a[1]))
  374. total = sum([b for a, b in l])
  375. for a, b in l:
  376. if a is None:
  377. a = 'interpret'
  378. s = " " * (50 - len(a))
  379. print >>outfile, a, s, str(b*100/total) + "%"
  380. if out != '-':
  381. outfile.close()
  382. # ____________________________________________________________
  383. ACTIONS = {
  384. 'draw-time': (draw_timeline_image, ['output=',
  385. 'mainwidth=', 'mainheight=',
  386. 'summarywidth=', 'summarybarheight=',
  387. ]),
  388. 'print-summary': (print_summary, []),
  389. }
  390. if __name__ == '__main__':
  391. import getopt
  392. if len(sys.argv) < 3:
  393. print __doc__
  394. sys.exit(2)
  395. action = sys.argv[1]
  396. func, longopts = ACTIONS[action]
  397. options, args = getopt.gnu_getopt(sys.argv[2:], '', longopts)
  398. if len(args) != 2:
  399. print __doc__
  400. sys.exit(2)
  401. kwds = {}
  402. for name, value in options:
  403. assert name.startswith('--')
  404. kwds[name[2:]] = value
  405. log = parse_log_file(args[0])
  406. func(log, args[1], **kwds)