PageRenderTime 60ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/jit/backend/x86/tool/viewcode.py

https://bitbucket.org/pwaller/pypy
Python | 438 lines | 405 code | 16 blank | 17 comment | 25 complexity | bbd72891523d90c50a8397b96deab988 MD5 | raw file
  1. #! /usr/bin/env python
  2. """
  3. Viewer for the output of compiled programs generating code.
  4. Use on the log files created with 'PYPYLOG=jit-backend-dump:log'.
  5. Try:
  6. ./viewcode.py --text log # text only disassembly
  7. ./viewcode.py log # also includes a pygame viewer
  8. """
  9. import autopath
  10. import new
  11. import operator
  12. import py
  13. import re
  14. import sys
  15. import subprocess
  16. from bisect import bisect_left
  17. # don't use pypy.tool.udir here to avoid removing old usessions which
  18. # might still contain interesting executables
  19. udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
  20. tmpfile = str(udir.join('dump.tmp'))
  21. # hack hack
  22. import pypy.tool
  23. mod = new.module('pypy.tool.udir')
  24. mod.udir = udir
  25. sys.modules['pypy.tool.udir'] = mod
  26. pypy.tool.udir = mod
  27. # ____________________________________________________________
  28. # Some support code from Psyco. There is more over there,
  29. # I am porting it in a lazy fashion... See py-utils/xam.py
  30. if sys.platform == "win32":
  31. pass # lots more in Psyco
  32. def machine_code_dump(data, originaddr, backend_name, label_list=None):
  33. objdump_backend_option = {
  34. 'x86': 'i386',
  35. 'x86_32': 'i386',
  36. 'x86_64': 'x86-64',
  37. 'i386': 'i386',
  38. }
  39. objdump = ('objdump -M %(backend)s -b binary -m i386 '
  40. '--disassembler-options=intel-mnemonics '
  41. '--adjust-vma=%(origin)d -D %(file)s')
  42. #
  43. f = open(tmpfile, 'wb')
  44. f.write(data)
  45. f.close()
  46. p = subprocess.Popen(objdump % {
  47. 'file': tmpfile,
  48. 'origin': originaddr,
  49. 'backend': objdump_backend_option[backend_name],
  50. }, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  51. stdout, stderr = p.communicate()
  52. assert not p.returncode, ('Encountered an error running objdump: %s' %
  53. stderr)
  54. # drop some objdump cruft
  55. lines = stdout.splitlines(True)[6:] # drop some objdump cruft
  56. return format_code_dump_with_labels(originaddr, lines, label_list)
  57. def format_code_dump_with_labels(originaddr, lines, label_list):
  58. from pypy.rlib.rarithmetic import r_uint
  59. if not label_list:
  60. label_list = []
  61. originaddr = r_uint(originaddr)
  62. itlines = iter(lines)
  63. yield itlines.next() # don't process the first line
  64. for lbl_start, lbl_name in label_list:
  65. for line in itlines:
  66. addr, _ = line.split(':', 1)
  67. addr = int(addr, 16)
  68. if addr >= originaddr+lbl_start:
  69. yield '\n'
  70. if lbl_name is None:
  71. yield '--end of the loop--\n'
  72. else:
  73. yield str(lbl_name) + '\n'
  74. yield line
  75. break
  76. yield line
  77. # yield all the remaining lines
  78. for line in itlines:
  79. yield line
  80. def load_symbols(filename):
  81. # the program that lists symbols, and the output it gives
  82. symbollister = 'nm %s'
  83. re_symbolentry = re.compile(r'([0-9a-fA-F]+)\s\w\s(.*)')
  84. #
  85. print 'loading symbols from %s...' % (filename,)
  86. symbols = {}
  87. p = subprocess.Popen(symbollister % filename, shell=True,
  88. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  89. stdout, stderr = p.communicate()
  90. assert not p.returncode, ('Encountered an error running nm: %s' %
  91. stderr)
  92. for line in stdout.splitlines(True):
  93. match = re_symbolentry.match(line)
  94. if match:
  95. addr = long(match.group(1), 16)
  96. name = match.group(2)
  97. if name.startswith('pypy_g_'):
  98. name = '\xb7' + name[7:]
  99. symbols[addr] = name
  100. print '%d symbols found' % (len(symbols),)
  101. return symbols
  102. re_addr = re.compile(r'[\s,$]0x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]+)')
  103. re_lineaddr = re.compile(r'\s*0?x?([0-9a-fA-F]+)')
  104. def lineaddresses(line):
  105. result = []
  106. i = 0
  107. while 1:
  108. match = re_addr.search(line, i)
  109. if not match:
  110. break
  111. i = match.end()
  112. addr = long(match.group(1), 16)
  113. result.append(addr)
  114. return result
  115. # ____________________________________________________________
  116. class CodeRange(object):
  117. fallthrough = False
  118. def __init__(self, world, addr, data):
  119. self.world = world
  120. self.addr = addr
  121. self.data = data
  122. def __repr__(self):
  123. return '<CodeRange %s length %d>' % (hex(self.addr), len(self.data))
  124. def touches(self, other):
  125. return (self .addr < other.addr + len(other.data) and
  126. other.addr < self .addr + len(self.data))
  127. def update_from_old(self, other):
  128. if other.addr < self.addr:
  129. delta = self.addr - other.addr
  130. assert delta <= len(other.data)
  131. self.addr -= delta
  132. self.data = other.data[:delta] + self.data
  133. self_end = self .addr + len(self .data)
  134. other_end = other.addr + len(other.data)
  135. if other_end > self_end:
  136. extra = other_end - self_end
  137. assert extra <= len(other.data)
  138. self.data += other.data[-extra:]
  139. def cmpop(op):
  140. def _cmp(self, other):
  141. if not isinstance(other, CodeRange):
  142. return NotImplemented
  143. return op((self.addr, self.data), (other.addr, other.data))
  144. return _cmp
  145. __lt__ = cmpop(operator.lt)
  146. __le__ = cmpop(operator.le)
  147. __eq__ = cmpop(operator.eq)
  148. __ne__ = cmpop(operator.ne)
  149. __gt__ = cmpop(operator.gt)
  150. __ge__ = cmpop(operator.ge)
  151. del cmpop
  152. def disassemble(self):
  153. if not hasattr(self, 'text'):
  154. lines = machine_code_dump(self.data, self.addr, self.world.backend_name)
  155. lines = list(lines)
  156. # instead of adding symbol names in the dumps we could
  157. # also make the 0xNNNNNNNN addresses be red and show the
  158. # symbol name when the mouse is over them
  159. logentries = self.world.logentries
  160. symbols = self.world.symbols
  161. for i, line in enumerate(lines):
  162. match = re_lineaddr.match(line)
  163. if match:
  164. addr = long(match.group(1), 16)
  165. logentry = logentries.get(addr)
  166. if logentry:
  167. lines[i] = '\n%s\n%s' % (logentry, lines[i])
  168. for addr in lineaddresses(line):
  169. sym = symbols.get(addr)
  170. if sym:
  171. lines[i] = '%s\t%s\n' % (lines[i].rstrip(), sym)
  172. self.text = ''.join(lines)
  173. return self.text
  174. def findjumps(self):
  175. text = self.disassemble()
  176. lines = text.splitlines()
  177. line = ''
  178. for i, line in enumerate(lines):
  179. if '\tj' not in line: # poor heuristic to recognize lines that
  180. continue # could be jump instructions
  181. addrs = list(lineaddresses(line))
  182. if not addrs:
  183. continue
  184. addr = addrs[-1]
  185. final = '\tjmp' in line
  186. yield i, addr, final
  187. if self.fallthrough and '\tret' not in line:
  188. yield len(lines), self.addr + len(self.data), True
  189. class World(object):
  190. def __init__(self):
  191. self.ranges = []
  192. self.labeltargets = {}
  193. self.jumps = {}
  194. self.symbols = {}
  195. self.logentries = {}
  196. self.backend_name = None
  197. self.executable_name = None
  198. def parse(self, f, textonly=True):
  199. for line in f:
  200. if line.startswith('BACKEND '):
  201. self.backend_name = line.split(' ')[1].strip()
  202. elif line.startswith('CODE_DUMP '):
  203. pieces = line.split()
  204. assert pieces[1].startswith('@')
  205. assert pieces[2].startswith('+')
  206. if len(pieces) == 3:
  207. continue # empty line
  208. baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
  209. offset = int(pieces[2][1:])
  210. addr = baseaddr + offset
  211. data = pieces[3].replace(':', '').decode('hex')
  212. coderange = CodeRange(self, addr, data)
  213. i = bisect_left(self.ranges, coderange)
  214. j = i
  215. while i>0 and coderange.touches(self.ranges[i-1]):
  216. coderange.update_from_old(self.ranges[i-1])
  217. i -= 1
  218. while j<len(self.ranges) and coderange.touches(self.ranges[j]):
  219. coderange.update_from_old(self.ranges[j])
  220. j += 1
  221. self.ranges[i:j] = [coderange]
  222. elif line.startswith('LOG '):
  223. pieces = line.split(None, 3)
  224. assert pieces[1].startswith('@')
  225. assert pieces[2].startswith('+')
  226. baseaddr = long(pieces[1][1:], 16) & 0xFFFFFFFFL
  227. offset = int(pieces[2][1:])
  228. addr = baseaddr + offset
  229. self.logentries[addr] = pieces[3]
  230. elif line.startswith('SYS_EXECUTABLE '):
  231. filename = line[len('SYS_EXECUTABLE '):].strip()
  232. if filename != self.executable_name and filename != '??':
  233. self.symbols.update(load_symbols(filename))
  234. self.executable_name = filename
  235. def find_cross_references(self):
  236. # find cross-references between blocks
  237. fnext = 0.1
  238. for i, r in enumerate(self.ranges):
  239. for lineno, targetaddr, _ in r.findjumps():
  240. self.labeltargets[targetaddr] = True
  241. if i % 100 == 99:
  242. f = float(i) / len(self.ranges)
  243. if f >= fnext:
  244. sys.stderr.write("%d%%" % int(f*100.0))
  245. fnext += 0.1
  246. sys.stderr.write(".")
  247. sys.stderr.write("100%")
  248. # split blocks at labeltargets
  249. t = self.labeltargets
  250. #print t
  251. for r in self.ranges:
  252. #print r.addr, r.addr + len(r.data)
  253. for i in range(r.addr + 1, r.addr + len(r.data)):
  254. if i in t:
  255. #print i
  256. ofs = i - r.addr
  257. self.ranges.append(CodeRange(self, i, r.data[ofs:]))
  258. r.data = r.data[:ofs]
  259. r.fallthrough = True
  260. try:
  261. del r.text
  262. except AttributeError:
  263. pass
  264. break
  265. # hack hack hacked
  266. sys.stderr.write("\n")
  267. def show(self, showtext=True, showgraph=True):
  268. if showgraph:
  269. g1 = Graph('codedump')
  270. self.ranges.sort()
  271. for r in self.ranges:
  272. disassembled = r.disassemble()
  273. if showtext:
  274. print disassembled
  275. if showgraph:
  276. text, width = tab2columns(disassembled)
  277. text = '0x%x\n\n%s' % (r.addr, text)
  278. g1.emit_node('N_%x' % r.addr, shape="box", label=text,
  279. width=str(width*0.1125))
  280. for lineno, targetaddr, final in r.findjumps():
  281. if final:
  282. color = "black"
  283. else:
  284. color = "red"
  285. g1.emit_edge('N_%x' % r.addr, 'N_%x' % targetaddr,
  286. color=color)
  287. sys.stdout.flush()
  288. if showgraph:
  289. g1.display()
  290. def showtextonly(self):
  291. self.ranges.sort()
  292. for r in self.ranges:
  293. disassembled = r.disassemble()
  294. print disassembled
  295. del r.text
  296. def tab2columns(text):
  297. lines = text.split('\n')
  298. columnwidth = []
  299. for line in lines:
  300. columns = line.split('\t')[:-1]
  301. while len(columnwidth) < len(columns):
  302. columnwidth.append(0)
  303. for i, s in enumerate(columns):
  304. width = len(s.strip())
  305. if not s.endswith(':'):
  306. width += 2
  307. columnwidth[i] = max(columnwidth[i], width)
  308. columnwidth.append(1)
  309. result = []
  310. for line in lines:
  311. columns = line.split('\t')
  312. text = []
  313. for width, s in zip(columnwidth, columns):
  314. text.append(s.strip().ljust(width))
  315. result.append(' '.join(text))
  316. lengths = [len(line) for line in result]
  317. lengths.append(1)
  318. totalwidth = max(lengths)
  319. return '\\l'.join(result), totalwidth
  320. # ____________________________________________________________
  321. # XXX pasted from
  322. # http://codespeak.net/svn/user/arigo/hack/misc/graphlib.py
  323. # but needs to be a bit more subtle later
  324. from pypy.translator.tool.make_dot import DotGen
  325. from dotviewer.graphclient import display_page
  326. class Graph(DotGen):
  327. def highlight(self, word, text, linked_to=None):
  328. if not hasattr(self, '_links'):
  329. self._links = {}
  330. self._links_to = {}
  331. self._links[word] = text
  332. if linked_to:
  333. self._links_to[word] = linked_to
  334. def display(self):
  335. "Display a graph page locally."
  336. display_page(_Page(self))
  337. class NoGraph(Exception):
  338. pass
  339. class _Page:
  340. def __init__(self, graph_builder):
  341. if callable(graph_builder):
  342. graph = graph_builder()
  343. else:
  344. graph = graph_builder
  345. if graph is None:
  346. raise NoGraph
  347. self.graph_builder = graph_builder
  348. def content(self):
  349. return _PageContent(self.graph_builder)
  350. class _PageContent:
  351. fixedfont = True
  352. def __init__(self, graph_builder):
  353. if callable(graph_builder):
  354. graph = graph_builder()
  355. else:
  356. graph = graph_builder
  357. assert graph is not None
  358. self.graph_builder = graph_builder
  359. self.graph = graph
  360. self.links = getattr(graph, '_links', {})
  361. if not hasattr(graph, '_source'):
  362. graph._source = graph.generate(target=None)
  363. self.source = graph._source
  364. def followlink(self, link):
  365. try:
  366. return _Page(self.graph._links_to[link])
  367. except NoGraph:
  368. return _Page(self.graph_builder)
  369. # ____________________________________________________________
  370. if __name__ == '__main__':
  371. if '--text' in sys.argv:
  372. sys.argv.remove('--text')
  373. showgraph = False
  374. else:
  375. showgraph = True
  376. if len(sys.argv) != 2:
  377. print >> sys.stderr, __doc__
  378. sys.exit(2)
  379. #
  380. import cStringIO
  381. from pypy.tool import logparser
  382. log1 = logparser.parse_log_file(sys.argv[1])
  383. text1 = logparser.extract_category(log1, catprefix='jit-backend-dump')
  384. f = cStringIO.StringIO()
  385. f.writelines(text1)
  386. f.seek(0)
  387. del log1, text1
  388. #
  389. world = World()
  390. world.parse(f)
  391. if showgraph:
  392. world.find_cross_references()
  393. world.show(showtext=True)
  394. else:
  395. world.showtextonly()