PageRenderTime 52ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/rpython/jit/backend/tool/viewcode.py

https://bitbucket.org/pypy/pypy/
Python | 481 lines | 453 code | 14 blank | 14 comment | 29 complexity | 92ec0d4e813fa6b20469d853100510e3 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. #! /usr/bin/env python
  2. """
  3. Viewer for the output of compiled programs generating code.
  4. Use on the log files created with 'PYPYLOG=jit-backend-dump:log'.
  5. Try:
  6. ./viewcode.py --text log # text only disassembly
  7. ./viewcode.py log # also includes a pygame viewer
  8. """
  9. import new
  10. import operator
  11. import os
  12. import py
  13. import re
  14. import sys
  15. import subprocess
  16. from bisect import bisect_left
  17. # ____________________________________________________________
  18. # Some support code from Psyco. There is more over there,
  19. # I am porting it in a lazy fashion... See py-utils/xam.py
  20. if sys.platform == "win32":
  21. pass # lots more in Psyco
  22. class ObjdumpNotFound(Exception):
  23. pass
  24. def find_objdump():
  25. exe = ('objdump', 'gobjdump')
  26. path = os.environ['PATH'].split(os.pathsep)
  27. for e in exe:
  28. for p in path:
  29. path_to = os.path.join(p, e)
  30. if not os.path.exists(path_to):
  31. continue
  32. return e
  33. raise ObjdumpNotFound('(g)objdump was not found in PATH')
  34. def machine_code_dump(data, originaddr, backend_name, label_list=None):
  35. objdump_machine_option = {
  36. 'x86': 'i386',
  37. 'x86-without-sse2': 'i386',
  38. 'x86_32': 'i386',
  39. 'x86_64': 'i386:x86-64',
  40. 'x86-64': 'i386:x86-64',
  41. 'x86-64-sse4': 'i386:x86-64',
  42. 'i386': 'i386',
  43. 'arm': 'arm',
  44. 'arm_32': 'arm',
  45. 'ppc' : 'powerpc:common64',
  46. 'ppc-64' : 'powerpc:common64',
  47. 's390x': 's390:64-bit',
  48. }
  49. machine_endianness = {
  50. # default value: 'little'
  51. 'ppc' : sys.byteorder, # i.e. same as the running machine...
  52. 'ppc-64' : sys.byteorder, # i.e. same as the running machine...
  53. 's390x' : sys.byteorder, # i.e. same as the running machine...
  54. }
  55. cmd = find_objdump()
  56. objdump = ('%(command)s -b binary -m %(machine)s '
  57. '--endian=%(endianness)s '
  58. '--disassembler-options=intel-mnemonics '
  59. '--adjust-vma=%(origin)d -D %(file)s')
  60. #
  61. f = open(tmpfile, 'wb')
  62. f.write(data)
  63. f.close()
  64. p = subprocess.Popen(objdump % {
  65. 'command': cmd,
  66. 'file': tmpfile,
  67. 'origin': originaddr,
  68. 'machine': objdump_machine_option[backend_name],
  69. 'endianness': machine_endianness.get(backend_name, 'little'),
  70. }, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  71. stdout, stderr = p.communicate()
  72. assert not p.returncode, ('Encountered an error running objdump: %s' %
  73. stderr)
  74. # drop some objdump cruft
  75. lines = stdout.splitlines(True)[6:] # drop some objdump cruft
  76. return format_code_dump_with_labels(originaddr, lines, label_list)
  77. def format_code_dump_with_labels(originaddr, lines, label_list):
  78. from rpython.rlib.rarithmetic import r_uint
  79. if not label_list:
  80. label_list = []
  81. originaddr = r_uint(originaddr)
  82. itlines = iter(lines)
  83. yield itlines.next() # don't process the first line
  84. for lbl_start, lbl_name in label_list:
  85. for line in itlines:
  86. addr, _ = line.split(':', 1)
  87. addr = int(addr, 16)
  88. if addr >= originaddr+lbl_start:
  89. yield '\n'
  90. if lbl_name is None:
  91. yield '--end of the loop--\n'
  92. else:
  93. yield str(lbl_name) + '\n'
  94. yield line
  95. break
  96. yield line
  97. # yield all the remaining lines
  98. for line in itlines:
  99. yield line
  100. def load_symbols(filename):
  101. # the program that lists symbols, and the output it gives
  102. symbollister = 'nm %s'
  103. re_symbolentry = re.compile(r'([0-9a-fA-F]+)\s\w\s(.*)')
  104. #
  105. print 'loading symbols from %s...' % (filename,)
  106. symbols = {}
  107. p = subprocess.Popen(symbollister % filename, shell=True,
  108. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  109. stdout, stderr = p.communicate()
  110. assert not p.returncode, ('Encountered an error running nm: %s' %
  111. stderr)
  112. for line in stdout.splitlines(True):
  113. match = re_symbolentry.match(line)
  114. if match:
  115. addr = long(match.group(1), 16)
  116. name = match.group(2)
  117. if name.startswith('pypy_g_'):
  118. name = '\xb7' + name[7:]
  119. symbols[addr] = name
  120. print '%d symbols found' % (len(symbols),)
  121. return symbols
  122. re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]+)')
  123. re_lineaddr = re.compile(r'\s*0?x?([0-9a-fA-F]+)')
  124. def lineaddresses(line):
  125. result = []
  126. i = 0
  127. while 1:
  128. match = re_addr.search(line, i)
  129. if not match:
  130. break
  131. i = match.end()
  132. addr = long(match.group(1), 16)
  133. result.append(addr)
  134. return result
  135. # ____________________________________________________________
  136. class CodeRange(object):
  137. fallthrough = False
  138. def __init__(self, world, addr, data):
  139. self.world = world
  140. self.addr = addr
  141. self.data = data
  142. def __repr__(self):
  143. return '<CodeRange %s length %d>' % (hex(self.addr), len(self.data))
  144. def touches(self, other):
  145. return (self .addr < other.addr + len(other.data) and
  146. other.addr < self .addr + len(self.data))
  147. def update_from_old(self, other):
  148. if other.addr < self.addr:
  149. delta = self.addr - other.addr
  150. assert delta <= len(other.data)
  151. self.addr -= delta
  152. self.data = other.data[:delta] + self.data
  153. self_end = self .addr + len(self .data)
  154. other_end = other.addr + len(other.data)
  155. if other_end > self_end:
  156. extra = other_end - self_end
  157. assert extra <= len(other.data)
  158. self.data += other.data[-extra:]
  159. def cmpop(op):
  160. def _cmp(self, other):
  161. if not isinstance(other, CodeRange):
  162. return NotImplemented
  163. return op((self.addr, self.data), (other.addr, other.data))
  164. return _cmp
  165. __lt__ = cmpop(operator.lt)
  166. __le__ = cmpop(operator.le)
  167. __eq__ = cmpop(operator.eq)
  168. __ne__ = cmpop(operator.ne)
  169. __gt__ = cmpop(operator.gt)
  170. __ge__ = cmpop(operator.ge)
  171. del cmpop
  172. def disassemble(self):
  173. if not hasattr(self, 'text'):
  174. lines = machine_code_dump(self.data, self.addr, self.world.backend_name)
  175. lines = list(lines)
  176. # instead of adding symbol names in the dumps we could
  177. # also make the 0xNNNNNNNN addresses be red and show the
  178. # symbol name when the mouse is over them
  179. logentries = self.world.logentries
  180. symbols = self.world.symbols
  181. for i, line in enumerate(lines):
  182. match = re_lineaddr.match(line)
  183. if match:
  184. addr = long(match.group(1), 16)
  185. logentry = logentries.get(addr)
  186. if logentry:
  187. lines[i] = '\n%s\n%s' % (logentry, lines[i])
  188. for addr in lineaddresses(line):
  189. sym = symbols.get(addr)
  190. if sym:
  191. lines[i] = '%s\t%s\n' % (lines[i].rstrip(), sym)
  192. self.text = ''.join(lines)
  193. return self.text
  194. def findjumps(self):
  195. text = self.disassemble()
  196. lines = text.splitlines()
  197. line = ''
  198. for i, line in enumerate(lines):
  199. if '\tj' not in line: # poor heuristic to recognize lines that
  200. continue # could be jump instructions
  201. addrs = list(lineaddresses(line))
  202. if not addrs:
  203. continue
  204. addr = addrs[-1]
  205. final = '\tjmp' in line
  206. yield i, addr, final
  207. if self.fallthrough and '\tret' not in line:
  208. yield len(lines), self.addr + len(self.data), True
  209. class World(object):
  210. def __init__(self):
  211. self.ranges = []
  212. self.labeltargets = {}
  213. self.jumps = {}
  214. self.symbols = {}
  215. self.logentries = {}
  216. self.backend_name = None
  217. self.executable_name = None
  218. def parse(self, f, textonly=True):
  219. for line in f:
  220. if line.startswith('BACKEND '):
  221. self.backend_name = line.split(' ')[1].strip()
  222. elif line.startswith('CODE_DUMP '):
  223. pieces = line.split()
  224. assert pieces[1].startswith('@')
  225. assert pieces[2].startswith('+')
  226. if len(pieces) == 3:
  227. continue # empty line
  228. baseaddr = long(pieces[1][1:], 16)
  229. if baseaddr < 0:
  230. baseaddr += (2 * sys.maxint + 2)
  231. offset = int(pieces[2][1:])
  232. addr = baseaddr + offset
  233. data = pieces[3].replace(':', '').decode('hex')
  234. coderange = CodeRange(self, addr, data)
  235. i = bisect_left(self.ranges, coderange)
  236. j = i
  237. while i>0 and coderange.touches(self.ranges[i-1]):
  238. coderange.update_from_old(self.ranges[i-1])
  239. i -= 1
  240. while j<len(self.ranges) and coderange.touches(self.ranges[j]):
  241. coderange.update_from_old(self.ranges[j])
  242. j += 1
  243. self.ranges[i:j] = [coderange]
  244. elif line.startswith('LOG '):
  245. pieces = line.split(None, 3)
  246. assert pieces[1].startswith('@')
  247. assert pieces[2].startswith('+')
  248. baseaddr = long(pieces[1][1:], 16)
  249. if baseaddr < 0:
  250. baseaddr += (2 * sys.maxint + 2)
  251. offset = int(pieces[2][1:])
  252. addr = baseaddr + offset
  253. self.logentries[addr] = pieces[3]
  254. elif line.startswith('SYS_EXECUTABLE '):
  255. filename = line[len('SYS_EXECUTABLE '):].strip()
  256. if filename != self.executable_name and filename != '??':
  257. try:
  258. self.symbols.update(load_symbols(filename))
  259. except Exception as e:
  260. print e
  261. self.executable_name = filename
  262. def find_cross_references(self):
  263. # find cross-references between blocks
  264. fnext = 0.1
  265. for i, r in enumerate(self.ranges):
  266. for lineno, targetaddr, _ in r.findjumps():
  267. self.labeltargets[targetaddr] = True
  268. if i % 100 == 99:
  269. f = float(i) / len(self.ranges)
  270. if f >= fnext:
  271. sys.stderr.write("%d%%" % int(f*100.0))
  272. fnext += 0.1
  273. sys.stderr.write(".")
  274. sys.stderr.write("100%")
  275. # split blocks at labeltargets
  276. t = self.labeltargets
  277. #print t
  278. for r in self.ranges:
  279. #print r.addr, r.addr + len(r.data)
  280. for i in range(r.addr + 1, r.addr + len(r.data)):
  281. if i in t:
  282. #print i
  283. ofs = i - r.addr
  284. self.ranges.append(CodeRange(self, i, r.data[ofs:]))
  285. r.data = r.data[:ofs]
  286. r.fallthrough = True
  287. try:
  288. del r.text
  289. except AttributeError:
  290. pass
  291. break
  292. # hack hack hacked
  293. sys.stderr.write("\n")
  294. def show(self, showtext=True, showgraph=True):
  295. if showgraph:
  296. g1 = Graph('codedump')
  297. self.ranges.sort()
  298. for r in self.ranges:
  299. disassembled = r.disassemble()
  300. if showtext:
  301. print disassembled
  302. if showgraph:
  303. text, width = tab2columns(disassembled)
  304. text = '0x%x\n\n%s' % (r.addr, text)
  305. g1.emit_node('N_%x' % r.addr, shape="box", label=text,
  306. width=str(width*0.1125))
  307. for lineno, targetaddr, final in r.findjumps():
  308. if final:
  309. color = "black"
  310. else:
  311. color = "red"
  312. g1.emit_edge('N_%x' % r.addr, 'N_%x' % targetaddr,
  313. color=color)
  314. sys.stdout.flush()
  315. if showgraph:
  316. g1.display()
  317. def showtextonly(self):
  318. self.ranges.sort()
  319. for r in self.ranges:
  320. disassembled = r.disassemble()
  321. print disassembled
  322. del r.text
  323. def tab2columns(text):
  324. lines = text.split('\n')
  325. columnwidth = []
  326. for line in lines:
  327. columns = line.split('\t')[:-1]
  328. while len(columnwidth) < len(columns):
  329. columnwidth.append(0)
  330. for i, s in enumerate(columns):
  331. width = len(s.strip())
  332. if not s.endswith(':'):
  333. width += 2
  334. columnwidth[i] = max(columnwidth[i], width)
  335. columnwidth.append(1)
  336. result = []
  337. for line in lines:
  338. columns = line.split('\t')
  339. text = []
  340. for width, s in zip(columnwidth, columns):
  341. text.append(s.strip().ljust(width))
  342. result.append(' '.join(text))
  343. lengths = [len(line) for line in result]
  344. lengths.append(1)
  345. totalwidth = max(lengths)
  346. return '\\l'.join(result), totalwidth
  347. # ____________________________________________________________
  348. # XXX pasted from
  349. # http://codespeak.net/svn/user/arigo/hack/misc/graphlib.py
  350. # but needs to be a bit more subtle later
  351. from rpython.translator.tool.make_dot import DotGen
  352. from dotviewer.graphclient import display_page
  353. class Graph(DotGen):
  354. def highlight(self, word, text, linked_to=None):
  355. if not hasattr(self, '_links'):
  356. self._links = {}
  357. self._links_to = {}
  358. self._links[word] = text
  359. if linked_to:
  360. self._links_to[word] = linked_to
  361. def display(self):
  362. "Display a graph page locally."
  363. display_page(_Page(self))
  364. class NoGraph(Exception):
  365. pass
  366. class _Page:
  367. def __init__(self, graph_builder):
  368. if callable(graph_builder):
  369. graph = graph_builder()
  370. else:
  371. graph = graph_builder
  372. if graph is None:
  373. raise NoGraph
  374. self.graph_builder = graph_builder
  375. def content(self):
  376. return _PageContent(self.graph_builder)
  377. class _PageContent:
  378. fixedfont = True
  379. def __init__(self, graph_builder):
  380. if callable(graph_builder):
  381. graph = graph_builder()
  382. else:
  383. graph = graph_builder
  384. assert graph is not None
  385. self.graph_builder = graph_builder
  386. self.graph = graph
  387. self.links = getattr(graph, '_links', {})
  388. if not hasattr(graph, '_source'):
  389. graph._source = graph.generate(target=None)
  390. self.source = graph._source
  391. def followlink(self, link):
  392. try:
  393. return _Page(self.graph._links_to[link])
  394. except NoGraph:
  395. return _Page(self.graph_builder)
  396. # ____________________________________________________________
  397. if __name__ == '__main__':
  398. # don't use rpython.tool.udir here to avoid removing old usessions which
  399. # might still contain interesting executables
  400. udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
  401. tmpfile = str(udir.join('dump.tmp'))
  402. # hack hack
  403. import rpython.tool
  404. mod = new.module('rpython.tool.udir')
  405. mod.udir = udir
  406. sys.modules['rpython.tool.udir'] = mod
  407. rpython.tool.udir = mod
  408. if '--text' in sys.argv:
  409. sys.argv.remove('--text')
  410. showgraph = False
  411. else:
  412. showgraph = True
  413. if len(sys.argv) != 2:
  414. print >> sys.stderr, __doc__
  415. sys.exit(2)
  416. #
  417. import cStringIO
  418. from rpython.tool import logparser
  419. log1 = logparser.parse_log_file(sys.argv[1])
  420. text1 = logparser.extract_category(log1, catprefix='jit-backend-dump')
  421. f = cStringIO.StringIO()
  422. f.writelines(text1)
  423. f.seek(0)
  424. del log1, text1
  425. #
  426. world = World()
  427. world.parse(f)
  428. if showgraph:
  429. world.find_cross_references()
  430. world.show(showtext=True)
  431. else:
  432. world.showtextonly()
  433. else:
  434. from rpython.tool.udir import udir
  435. tmpfile = str(udir.join('dump.tmp'))