PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/jit/backend/tool/viewcode.py

https://bitbucket.org/kcr/pypy
Python | 454 lines | 420 code | 17 blank | 17 comment | 30 complexity | d81ca8b5d9cf1ac2bc6c5c7dc626b8f0 MD5 | raw file
Possible License(s): Apache-2.0
  1. #! /usr/bin/env python
  2. """
  3. Viewer for the output of compiled programs generating code.
  4. Use on the log files created with 'PYPYLOG=jit-backend-dump:log'.
  5. Try:
  6. ./viewcode.py --text log # text only disassembly
  7. ./viewcode.py log # also includes a pygame viewer
  8. """
  9. import new
  10. import operator
  11. import os
  12. import py
  13. import re
  14. import sys
  15. import subprocess
  16. from bisect import bisect_left
  17. # don't use rpython.tool.udir here to avoid removing old usessions which
  18. # might still contain interesting executables
  19. udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
  20. tmpfile = str(udir.join('dump.tmp'))
  21. # hack hack
  22. import rpython.tool
  23. mod = new.module('rpython.tool.udir')
  24. mod.udir = udir
  25. sys.modules['rpython.tool.udir'] = mod
  26. rpython.tool.udir = mod
  27. # ____________________________________________________________
  28. # Some support code from Psyco. There is more over there,
  29. # I am porting it in a lazy fashion... See py-utils/xam.py
  30. if sys.platform == "win32":
  31. pass # lots more in Psyco
  32. def find_objdump():
  33. exe = ('objdump', 'gobjdump')
  34. path = os.environ['PATH'].split(os.pathsep)
  35. for e in exe:
  36. for p in path:
  37. path_to = os.path.join(p, e)
  38. if not os.path.exists(path_to):
  39. continue
  40. return e
  41. raise AssertionError('(g)objdump was not found in PATH')
  42. def machine_code_dump(data, originaddr, backend_name, label_list=None):
  43. objdump_backend_option = {
  44. 'x86': 'i386',
  45. 'x86_32': 'i386',
  46. 'x86_64': 'x86-64',
  47. 'i386': 'i386',
  48. 'arm': 'arm',
  49. 'arm_32': 'arm',
  50. }
  51. cmd = find_objdump()
  52. objdump = ('%(command)s -M %(backend)s -b binary -m %(machine)s '
  53. '--disassembler-options=intel-mnemonics '
  54. '--adjust-vma=%(origin)d -D %(file)s')
  55. #
  56. f = open(tmpfile, 'wb')
  57. f.write(data)
  58. f.close()
  59. p = subprocess.Popen(objdump % {
  60. 'command': cmd,
  61. 'file': tmpfile,
  62. 'origin': originaddr,
  63. 'backend': objdump_backend_option[backend_name],
  64. 'machine': 'i386' if not backend_name.startswith('arm') else 'arm',
  65. }, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  66. stdout, stderr = p.communicate()
  67. assert not p.returncode, ('Encountered an error running objdump: %s' %
  68. stderr)
  69. # drop some objdump cruft
  70. lines = stdout.splitlines(True)[6:] # drop some objdump cruft
  71. return format_code_dump_with_labels(originaddr, lines, label_list)
  72. def format_code_dump_with_labels(originaddr, lines, label_list):
  73. from rpython.rlib.rarithmetic import r_uint
  74. if not label_list:
  75. label_list = []
  76. originaddr = r_uint(originaddr)
  77. itlines = iter(lines)
  78. yield itlines.next() # don't process the first line
  79. for lbl_start, lbl_name in label_list:
  80. for line in itlines:
  81. addr, _ = line.split(':', 1)
  82. addr = int(addr, 16)
  83. if addr >= originaddr+lbl_start:
  84. yield '\n'
  85. if lbl_name is None:
  86. yield '--end of the loop--\n'
  87. else:
  88. yield str(lbl_name) + '\n'
  89. yield line
  90. break
  91. yield line
  92. # yield all the remaining lines
  93. for line in itlines:
  94. yield line
  95. def load_symbols(filename):
  96. # the program that lists symbols, and the output it gives
  97. symbollister = 'nm %s'
  98. re_symbolentry = re.compile(r'([0-9a-fA-F]+)\s\w\s(.*)')
  99. #
  100. print 'loading symbols from %s...' % (filename,)
  101. symbols = {}
  102. p = subprocess.Popen(symbollister % filename, shell=True,
  103. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  104. stdout, stderr = p.communicate()
  105. assert not p.returncode, ('Encountered an error running nm: %s' %
  106. stderr)
  107. for line in stdout.splitlines(True):
  108. match = re_symbolentry.match(line)
  109. if match:
  110. addr = long(match.group(1), 16)
  111. name = match.group(2)
  112. if name.startswith('pypy_g_'):
  113. name = '\xb7' + name[7:]
  114. symbols[addr] = name
  115. print '%d symbols found' % (len(symbols),)
  116. return symbols
  117. re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]+)')
  118. re_lineaddr = re.compile(r'\s*0?x?([0-9a-fA-F]+)')
  119. def lineaddresses(line):
  120. result = []
  121. i = 0
  122. while 1:
  123. match = re_addr.search(line, i)
  124. if not match:
  125. break
  126. i = match.end()
  127. addr = long(match.group(1), 16)
  128. result.append(addr)
  129. return result
  130. # ____________________________________________________________
  131. class CodeRange(object):
  132. fallthrough = False
  133. def __init__(self, world, addr, data):
  134. self.world = world
  135. self.addr = addr
  136. self.data = data
  137. def __repr__(self):
  138. return '<CodeRange %s length %d>' % (hex(self.addr), len(self.data))
  139. def touches(self, other):
  140. return (self .addr < other.addr + len(other.data) and
  141. other.addr < self .addr + len(self.data))
  142. def update_from_old(self, other):
  143. if other.addr < self.addr:
  144. delta = self.addr - other.addr
  145. assert delta <= len(other.data)
  146. self.addr -= delta
  147. self.data = other.data[:delta] + self.data
  148. self_end = self .addr + len(self .data)
  149. other_end = other.addr + len(other.data)
  150. if other_end > self_end:
  151. extra = other_end - self_end
  152. assert extra <= len(other.data)
  153. self.data += other.data[-extra:]
  154. def cmpop(op):
  155. def _cmp(self, other):
  156. if not isinstance(other, CodeRange):
  157. return NotImplemented
  158. return op((self.addr, self.data), (other.addr, other.data))
  159. return _cmp
  160. __lt__ = cmpop(operator.lt)
  161. __le__ = cmpop(operator.le)
  162. __eq__ = cmpop(operator.eq)
  163. __ne__ = cmpop(operator.ne)
  164. __gt__ = cmpop(operator.gt)
  165. __ge__ = cmpop(operator.ge)
  166. del cmpop
  167. def disassemble(self):
  168. if not hasattr(self, 'text'):
  169. lines = machine_code_dump(self.data, self.addr, self.world.backend_name)
  170. lines = list(lines)
  171. # instead of adding symbol names in the dumps we could
  172. # also make the 0xNNNNNNNN addresses be red and show the
  173. # symbol name when the mouse is over them
  174. logentries = self.world.logentries
  175. symbols = self.world.symbols
  176. for i, line in enumerate(lines):
  177. match = re_lineaddr.match(line)
  178. if match:
  179. addr = long(match.group(1), 16)
  180. logentry = logentries.get(addr)
  181. if logentry:
  182. lines[i] = '\n%s\n%s' % (logentry, lines[i])
  183. for addr in lineaddresses(line):
  184. sym = symbols.get(addr)
  185. if sym:
  186. lines[i] = '%s\t%s\n' % (lines[i].rstrip(), sym)
  187. self.text = ''.join(lines)
  188. return self.text
  189. def findjumps(self):
  190. text = self.disassemble()
  191. lines = text.splitlines()
  192. line = ''
  193. for i, line in enumerate(lines):
  194. if '\tj' not in line: # poor heuristic to recognize lines that
  195. continue # could be jump instructions
  196. addrs = list(lineaddresses(line))
  197. if not addrs:
  198. continue
  199. addr = addrs[-1]
  200. final = '\tjmp' in line
  201. yield i, addr, final
  202. if self.fallthrough and '\tret' not in line:
  203. yield len(lines), self.addr + len(self.data), True
  204. class World(object):
  205. def __init__(self):
  206. self.ranges = []
  207. self.labeltargets = {}
  208. self.jumps = {}
  209. self.symbols = {}
  210. self.logentries = {}
  211. self.backend_name = None
  212. self.executable_name = None
  213. def parse(self, f, textonly=True):
  214. for line in f:
  215. if line.startswith('BACKEND '):
  216. self.backend_name = line.split(' ')[1].strip()
  217. elif line.startswith('CODE_DUMP '):
  218. pieces = line.split()
  219. assert pieces[1].startswith('@')
  220. assert pieces[2].startswith('+')
  221. if len(pieces) == 3:
  222. continue # empty line
  223. baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
  224. offset = int(pieces[2][1:])
  225. addr = baseaddr + offset
  226. data = pieces[3].replace(':', '').decode('hex')
  227. coderange = CodeRange(self, addr, data)
  228. i = bisect_left(self.ranges, coderange)
  229. j = i
  230. while i>0 and coderange.touches(self.ranges[i-1]):
  231. coderange.update_from_old(self.ranges[i-1])
  232. i -= 1
  233. while j<len(self.ranges) and coderange.touches(self.ranges[j]):
  234. coderange.update_from_old(self.ranges[j])
  235. j += 1
  236. self.ranges[i:j] = [coderange]
  237. elif line.startswith('LOG '):
  238. pieces = line.split(None, 3)
  239. assert pieces[1].startswith('@')
  240. assert pieces[2].startswith('+')
  241. baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
  242. offset = int(pieces[2][1:])
  243. addr = baseaddr + offset
  244. self.logentries[addr] = pieces[3]
  245. elif line.startswith('SYS_EXECUTABLE '):
  246. filename = line[len('SYS_EXECUTABLE '):].strip()
  247. if filename != self.executable_name and filename != '??':
  248. self.symbols.update(load_symbols(filename))
  249. self.executable_name = filename
  250. def find_cross_references(self):
  251. # find cross-references between blocks
  252. fnext = 0.1
  253. for i, r in enumerate(self.ranges):
  254. for lineno, targetaddr, _ in r.findjumps():
  255. self.labeltargets[targetaddr] = True
  256. if i % 100 == 99:
  257. f = float(i) / len(self.ranges)
  258. if f >= fnext:
  259. sys.stderr.write("%d%%" % int(f*100.0))
  260. fnext += 0.1
  261. sys.stderr.write(".")
  262. sys.stderr.write("100%")
  263. # split blocks at labeltargets
  264. t = self.labeltargets
  265. #print t
  266. for r in self.ranges:
  267. #print r.addr, r.addr + len(r.data)
  268. for i in range(r.addr + 1, r.addr + len(r.data)):
  269. if i in t:
  270. #print i
  271. ofs = i - r.addr
  272. self.ranges.append(CodeRange(self, i, r.data[ofs:]))
  273. r.data = r.data[:ofs]
  274. r.fallthrough = True
  275. try:
  276. del r.text
  277. except AttributeError:
  278. pass
  279. break
  280. # hack hack hacked
  281. sys.stderr.write("\n")
  282. def show(self, showtext=True, showgraph=True):
  283. if showgraph:
  284. g1 = Graph('codedump')
  285. self.ranges.sort()
  286. for r in self.ranges:
  287. disassembled = r.disassemble()
  288. if showtext:
  289. print disassembled
  290. if showgraph:
  291. text, width = tab2columns(disassembled)
  292. text = '0x%x\n\n%s' % (r.addr, text)
  293. g1.emit_node('N_%x' % r.addr, shape="box", label=text,
  294. width=str(width*0.1125))
  295. for lineno, targetaddr, final in r.findjumps():
  296. if final:
  297. color = "black"
  298. else:
  299. color = "red"
  300. g1.emit_edge('N_%x' % r.addr, 'N_%x' % targetaddr,
  301. color=color)
  302. sys.stdout.flush()
  303. if showgraph:
  304. g1.display()
  305. def showtextonly(self):
  306. self.ranges.sort()
  307. for r in self.ranges:
  308. disassembled = r.disassemble()
  309. print disassembled
  310. del r.text
  311. def tab2columns(text):
  312. lines = text.split('\n')
  313. columnwidth = []
  314. for line in lines:
  315. columns = line.split('\t')[:-1]
  316. while len(columnwidth) < len(columns):
  317. columnwidth.append(0)
  318. for i, s in enumerate(columns):
  319. width = len(s.strip())
  320. if not s.endswith(':'):
  321. width += 2
  322. columnwidth[i] = max(columnwidth[i], width)
  323. columnwidth.append(1)
  324. result = []
  325. for line in lines:
  326. columns = line.split('\t')
  327. text = []
  328. for width, s in zip(columnwidth, columns):
  329. text.append(s.strip().ljust(width))
  330. result.append(' '.join(text))
  331. lengths = [len(line) for line in result]
  332. lengths.append(1)
  333. totalwidth = max(lengths)
  334. return '\\l'.join(result), totalwidth
  335. # ____________________________________________________________
  336. # XXX pasted from
  337. # http://codespeak.net/svn/user/arigo/hack/misc/graphlib.py
  338. # but needs to be a bit more subtle later
  339. from rpython.translator.tool.make_dot import DotGen
  340. from dotviewer.graphclient import display_page
  341. class Graph(DotGen):
  342. def highlight(self, word, text, linked_to=None):
  343. if not hasattr(self, '_links'):
  344. self._links = {}
  345. self._links_to = {}
  346. self._links[word] = text
  347. if linked_to:
  348. self._links_to[word] = linked_to
  349. def display(self):
  350. "Display a graph page locally."
  351. display_page(_Page(self))
  352. class NoGraph(Exception):
  353. pass
  354. class _Page:
  355. def __init__(self, graph_builder):
  356. if callable(graph_builder):
  357. graph = graph_builder()
  358. else:
  359. graph = graph_builder
  360. if graph is None:
  361. raise NoGraph
  362. self.graph_builder = graph_builder
  363. def content(self):
  364. return _PageContent(self.graph_builder)
  365. class _PageContent:
  366. fixedfont = True
  367. def __init__(self, graph_builder):
  368. if callable(graph_builder):
  369. graph = graph_builder()
  370. else:
  371. graph = graph_builder
  372. assert graph is not None
  373. self.graph_builder = graph_builder
  374. self.graph = graph
  375. self.links = getattr(graph, '_links', {})
  376. if not hasattr(graph, '_source'):
  377. graph._source = graph.generate(target=None)
  378. self.source = graph._source
  379. def followlink(self, link):
  380. try:
  381. return _Page(self.graph._links_to[link])
  382. except NoGraph:
  383. return _Page(self.graph_builder)
  384. # ____________________________________________________________
  385. if __name__ == '__main__':
  386. if '--text' in sys.argv:
  387. sys.argv.remove('--text')
  388. showgraph = False
  389. else:
  390. showgraph = True
  391. if len(sys.argv) != 2:
  392. print >> sys.stderr, __doc__
  393. sys.exit(2)
  394. #
  395. import cStringIO
  396. from rpython.tool import logparser
  397. log1 = logparser.parse_log_file(sys.argv[1])
  398. text1 = logparser.extract_category(log1, catprefix='jit-backend-dump')
  399. f = cStringIO.StringIO()
  400. f.writelines(text1)
  401. f.seek(0)
  402. del log1, text1
  403. #
  404. world = World()
  405. world.parse(f)
  406. if showgraph:
  407. world.find_cross_references()
  408. world.show(showtext=True)
  409. else:
  410. world.showtextonly()