PageRenderTime 40ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/jit/backend/x86/tool/viewcode.py

https://bitbucket.org/quangquach/pypy
Python | 451 lines | 417 code | 17 blank | 17 comment | 28 complexity | afed1fd1f8219287c2939f724a89c3b8 MD5 | raw file
  1. #! /usr/bin/env python
  2. """
  3. Viewer for the output of compiled programs generating code.
  4. Use on the log files created with 'PYPYLOG=jit-backend-dump:log'.
  5. Try:
  6. ./viewcode.py --text log # text only disassembly
  7. ./viewcode.py log # also includes a pygame viewer
  8. """
  9. import new
  10. import operator
  11. import os
  12. import py
  13. import re
  14. import sys
  15. import subprocess
  16. from bisect import bisect_left
  17. # don't use pypy.tool.udir here to avoid removing old usessions which
  18. # might still contain interesting executables
  19. udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
  20. tmpfile = str(udir.join('dump.tmp'))
  21. # hack hack
  22. import pypy.tool
  23. mod = new.module('pypy.tool.udir')
  24. mod.udir = udir
  25. sys.modules['pypy.tool.udir'] = mod
  26. pypy.tool.udir = mod
  27. # ____________________________________________________________
  28. # Some support code from Psyco. There is more over there,
  29. # I am porting it in a lazy fashion... See py-utils/xam.py
  30. if sys.platform == "win32":
  31. pass # lots more in Psyco
  32. def find_objdump():
  33. exe = ('objdump', 'gobjdump')
  34. path = os.environ['PATH'].split(os.pathsep)
  35. for e in exe:
  36. for p in path:
  37. path_to = os.path.join(p, e)
  38. if not os.path.exists(path_to):
  39. continue
  40. return e
  41. raise AssertionError('(g)objdump was not found in PATH')
  42. def machine_code_dump(data, originaddr, backend_name, label_list=None):
  43. objdump_backend_option = {
  44. 'x86': 'i386',
  45. 'x86_32': 'i386',
  46. 'x86_64': 'x86-64',
  47. 'i386': 'i386',
  48. }
  49. cmd = find_objdump()
  50. objdump = ('%(command)s -M %(backend)s -b binary -m i386 '
  51. '--disassembler-options=intel-mnemonics '
  52. '--adjust-vma=%(origin)d -D %(file)s')
  53. #
  54. f = open(tmpfile, 'wb')
  55. f.write(data)
  56. f.close()
  57. p = subprocess.Popen(objdump % {
  58. 'command': cmd,
  59. 'file': tmpfile,
  60. 'origin': originaddr,
  61. 'backend': objdump_backend_option[backend_name],
  62. }, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  63. stdout, stderr = p.communicate()
  64. assert not p.returncode, ('Encountered an error running objdump: %s' %
  65. stderr)
  66. # drop some objdump cruft
  67. lines = stdout.splitlines(True)[6:] # drop some objdump cruft
  68. return format_code_dump_with_labels(originaddr, lines, label_list)
  69. def format_code_dump_with_labels(originaddr, lines, label_list):
  70. from pypy.rlib.rarithmetic import r_uint
  71. if not label_list:
  72. label_list = []
  73. originaddr = r_uint(originaddr)
  74. itlines = iter(lines)
  75. yield itlines.next() # don't process the first line
  76. for lbl_start, lbl_name in label_list:
  77. for line in itlines:
  78. addr, _ = line.split(':', 1)
  79. addr = int(addr, 16)
  80. if addr >= originaddr+lbl_start:
  81. yield '\n'
  82. if lbl_name is None:
  83. yield '--end of the loop--\n'
  84. else:
  85. yield str(lbl_name) + '\n'
  86. yield line
  87. break
  88. yield line
  89. # yield all the remaining lines
  90. for line in itlines:
  91. yield line
  92. def load_symbols(filename):
  93. # the program that lists symbols, and the output it gives
  94. symbollister = 'nm %s'
  95. re_symbolentry = re.compile(r'([0-9a-fA-F]+)\s\w\s(.*)')
  96. #
  97. print 'loading symbols from %s...' % (filename,)
  98. symbols = {}
  99. p = subprocess.Popen(symbollister % filename, shell=True,
  100. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  101. stdout, stderr = p.communicate()
  102. assert not p.returncode, ('Encountered an error running nm: %s' %
  103. stderr)
  104. for line in stdout.splitlines(True):
  105. match = re_symbolentry.match(line)
  106. if match:
  107. addr = long(match.group(1), 16)
  108. name = match.group(2)
  109. if name.startswith('pypy_g_'):
  110. name = '\xb7' + name[7:]
  111. symbols[addr] = name
  112. print '%d symbols found' % (len(symbols),)
  113. return symbols
  114. re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]+)')
  115. re_lineaddr = re.compile(r'\s*0?x?([0-9a-fA-F]+)')
  116. def lineaddresses(line):
  117. result = []
  118. i = 0
  119. while 1:
  120. match = re_addr.search(line, i)
  121. if not match:
  122. break
  123. i = match.end()
  124. addr = long(match.group(1), 16)
  125. result.append(addr)
  126. return result
  127. # ____________________________________________________________
  128. class CodeRange(object):
  129. fallthrough = False
  130. def __init__(self, world, addr, data):
  131. self.world = world
  132. self.addr = addr
  133. self.data = data
  134. def __repr__(self):
  135. return '<CodeRange %s length %d>' % (hex(self.addr), len(self.data))
  136. def touches(self, other):
  137. return (self .addr < other.addr + len(other.data) and
  138. other.addr < self .addr + len(self.data))
  139. def update_from_old(self, other):
  140. if other.addr < self.addr:
  141. delta = self.addr - other.addr
  142. assert delta <= len(other.data)
  143. self.addr -= delta
  144. self.data = other.data[:delta] + self.data
  145. self_end = self .addr + len(self .data)
  146. other_end = other.addr + len(other.data)
  147. if other_end > self_end:
  148. extra = other_end - self_end
  149. assert extra <= len(other.data)
  150. self.data += other.data[-extra:]
  151. def cmpop(op):
  152. def _cmp(self, other):
  153. if not isinstance(other, CodeRange):
  154. return NotImplemented
  155. return op((self.addr, self.data), (other.addr, other.data))
  156. return _cmp
  157. __lt__ = cmpop(operator.lt)
  158. __le__ = cmpop(operator.le)
  159. __eq__ = cmpop(operator.eq)
  160. __ne__ = cmpop(operator.ne)
  161. __gt__ = cmpop(operator.gt)
  162. __ge__ = cmpop(operator.ge)
  163. del cmpop
  164. def disassemble(self):
  165. if not hasattr(self, 'text'):
  166. lines = machine_code_dump(self.data, self.addr, self.world.backend_name)
  167. lines = list(lines)
  168. # instead of adding symbol names in the dumps we could
  169. # also make the 0xNNNNNNNN addresses be red and show the
  170. # symbol name when the mouse is over them
  171. logentries = self.world.logentries
  172. symbols = self.world.symbols
  173. for i, line in enumerate(lines):
  174. match = re_lineaddr.match(line)
  175. if match:
  176. addr = long(match.group(1), 16)
  177. logentry = logentries.get(addr)
  178. if logentry:
  179. lines[i] = '\n%s\n%s' % (logentry, lines[i])
  180. for addr in lineaddresses(line):
  181. sym = symbols.get(addr)
  182. if sym:
  183. lines[i] = '%s\t%s\n' % (lines[i].rstrip(), sym)
  184. self.text = ''.join(lines)
  185. return self.text
  186. def findjumps(self):
  187. text = self.disassemble()
  188. lines = text.splitlines()
  189. line = ''
  190. for i, line in enumerate(lines):
  191. if '\tj' not in line: # poor heuristic to recognize lines that
  192. continue # could be jump instructions
  193. addrs = list(lineaddresses(line))
  194. if not addrs:
  195. continue
  196. addr = addrs[-1]
  197. final = '\tjmp' in line
  198. yield i, addr, final
  199. if self.fallthrough and '\tret' not in line:
  200. yield len(lines), self.addr + len(self.data), True
  201. class World(object):
  202. def __init__(self):
  203. self.ranges = []
  204. self.labeltargets = {}
  205. self.jumps = {}
  206. self.symbols = {}
  207. self.logentries = {}
  208. self.backend_name = None
  209. self.executable_name = None
  210. def parse(self, f, textonly=True):
  211. for line in f:
  212. if line.startswith('BACKEND '):
  213. self.backend_name = line.split(' ')[1].strip()
  214. elif line.startswith('CODE_DUMP '):
  215. pieces = line.split()
  216. assert pieces[1].startswith('@')
  217. assert pieces[2].startswith('+')
  218. if len(pieces) == 3:
  219. continue # empty line
  220. baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
  221. offset = int(pieces[2][1:])
  222. addr = baseaddr + offset
  223. data = pieces[3].replace(':', '').decode('hex')
  224. coderange = CodeRange(self, addr, data)
  225. i = bisect_left(self.ranges, coderange)
  226. j = i
  227. while i>0 and coderange.touches(self.ranges[i-1]):
  228. coderange.update_from_old(self.ranges[i-1])
  229. i -= 1
  230. while j<len(self.ranges) and coderange.touches(self.ranges[j]):
  231. coderange.update_from_old(self.ranges[j])
  232. j += 1
  233. self.ranges[i:j] = [coderange]
  234. elif line.startswith('LOG '):
  235. pieces = line.split(None, 3)
  236. assert pieces[1].startswith('@')
  237. assert pieces[2].startswith('+')
  238. baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
  239. offset = int(pieces[2][1:])
  240. addr = baseaddr + offset
  241. self.logentries[addr] = pieces[3]
  242. elif line.startswith('SYS_EXECUTABLE '):
  243. filename = line[len('SYS_EXECUTABLE '):].strip()
  244. if filename != self.executable_name and filename != '??':
  245. self.symbols.update(load_symbols(filename))
  246. self.executable_name = filename
  247. def find_cross_references(self):
  248. # find cross-references between blocks
  249. fnext = 0.1
  250. for i, r in enumerate(self.ranges):
  251. for lineno, targetaddr, _ in r.findjumps():
  252. self.labeltargets[targetaddr] = True
  253. if i % 100 == 99:
  254. f = float(i) / len(self.ranges)
  255. if f >= fnext:
  256. sys.stderr.write("%d%%" % int(f*100.0))
  257. fnext += 0.1
  258. sys.stderr.write(".")
  259. sys.stderr.write("100%")
  260. # split blocks at labeltargets
  261. t = self.labeltargets
  262. #print t
  263. for r in self.ranges:
  264. #print r.addr, r.addr + len(r.data)
  265. for i in range(r.addr + 1, r.addr + len(r.data)):
  266. if i in t:
  267. #print i
  268. ofs = i - r.addr
  269. self.ranges.append(CodeRange(self, i, r.data[ofs:]))
  270. r.data = r.data[:ofs]
  271. r.fallthrough = True
  272. try:
  273. del r.text
  274. except AttributeError:
  275. pass
  276. break
  277. # hack hack hacked
  278. sys.stderr.write("\n")
  279. def show(self, showtext=True, showgraph=True):
  280. if showgraph:
  281. g1 = Graph('codedump')
  282. self.ranges.sort()
  283. for r in self.ranges:
  284. disassembled = r.disassemble()
  285. if showtext:
  286. print disassembled
  287. if showgraph:
  288. text, width = tab2columns(disassembled)
  289. text = '0x%x\n\n%s' % (r.addr, text)
  290. g1.emit_node('N_%x' % r.addr, shape="box", label=text,
  291. width=str(width*0.1125))
  292. for lineno, targetaddr, final in r.findjumps():
  293. if final:
  294. color = "black"
  295. else:
  296. color = "red"
  297. g1.emit_edge('N_%x' % r.addr, 'N_%x' % targetaddr,
  298. color=color)
  299. sys.stdout.flush()
  300. if showgraph:
  301. g1.display()
  302. def showtextonly(self):
  303. self.ranges.sort()
  304. for r in self.ranges:
  305. disassembled = r.disassemble()
  306. print disassembled
  307. del r.text
  308. def tab2columns(text):
  309. lines = text.split('\n')
  310. columnwidth = []
  311. for line in lines:
  312. columns = line.split('\t')[:-1]
  313. while len(columnwidth) < len(columns):
  314. columnwidth.append(0)
  315. for i, s in enumerate(columns):
  316. width = len(s.strip())
  317. if not s.endswith(':'):
  318. width += 2
  319. columnwidth[i] = max(columnwidth[i], width)
  320. columnwidth.append(1)
  321. result = []
  322. for line in lines:
  323. columns = line.split('\t')
  324. text = []
  325. for width, s in zip(columnwidth, columns):
  326. text.append(s.strip().ljust(width))
  327. result.append(' '.join(text))
  328. lengths = [len(line) for line in result]
  329. lengths.append(1)
  330. totalwidth = max(lengths)
  331. return '\\l'.join(result), totalwidth
  332. # ____________________________________________________________
  333. # XXX pasted from
  334. # http://codespeak.net/svn/user/arigo/hack/misc/graphlib.py
  335. # but needs to be a bit more subtle later
  336. from pypy.translator.tool.make_dot import DotGen
  337. from dotviewer.graphclient import display_page
  338. class Graph(DotGen):
  339. def highlight(self, word, text, linked_to=None):
  340. if not hasattr(self, '_links'):
  341. self._links = {}
  342. self._links_to = {}
  343. self._links[word] = text
  344. if linked_to:
  345. self._links_to[word] = linked_to
  346. def display(self):
  347. "Display a graph page locally."
  348. display_page(_Page(self))
  349. class NoGraph(Exception):
  350. pass
  351. class _Page:
  352. def __init__(self, graph_builder):
  353. if callable(graph_builder):
  354. graph = graph_builder()
  355. else:
  356. graph = graph_builder
  357. if graph is None:
  358. raise NoGraph
  359. self.graph_builder = graph_builder
  360. def content(self):
  361. return _PageContent(self.graph_builder)
  362. class _PageContent:
  363. fixedfont = True
  364. def __init__(self, graph_builder):
  365. if callable(graph_builder):
  366. graph = graph_builder()
  367. else:
  368. graph = graph_builder
  369. assert graph is not None
  370. self.graph_builder = graph_builder
  371. self.graph = graph
  372. self.links = getattr(graph, '_links', {})
  373. if not hasattr(graph, '_source'):
  374. graph._source = graph.generate(target=None)
  375. self.source = graph._source
  376. def followlink(self, link):
  377. try:
  378. return _Page(self.graph._links_to[link])
  379. except NoGraph:
  380. return _Page(self.graph_builder)
  381. # ____________________________________________________________
  382. if __name__ == '__main__':
  383. if '--text' in sys.argv:
  384. sys.argv.remove('--text')
  385. showgraph = False
  386. else:
  387. showgraph = True
  388. if len(sys.argv) != 2:
  389. print >> sys.stderr, __doc__
  390. sys.exit(2)
  391. #
  392. import cStringIO
  393. from pypy.tool import logparser
  394. log1 = logparser.parse_log_file(sys.argv[1])
  395. text1 = logparser.extract_category(log1, catprefix='jit-backend-dump')
  396. f = cStringIO.StringIO()
  397. f.writelines(text1)
  398. f.seek(0)
  399. del log1, text1
  400. #
  401. world = World()
  402. world.parse(f)
  403. if showgraph:
  404. world.find_cross_references()
  405. world.show(showtext=True)
  406. else:
  407. world.showtextonly()