PageRenderTime 27ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/Lib/dis.py

https://gitlab.com/unofficial-mirrors/cpython
Python | 490 lines | 419 code | 19 blank | 52 comment | 46 complexity | f8dbf46c2d580f8bffe3f883855a3b89 MD5 | raw file
  1. """Disassembler of Python byte code into mnemonics."""
  2. import sys
  3. import types
  4. import collections
  5. import io
  6. from opcode import *
  7. from opcode import __all__ as _opcodes_all
  8. __all__ = ["code_info", "dis", "disassemble", "distb", "disco",
  9. "findlinestarts", "findlabels", "show_code",
  10. "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
  11. del _opcodes_all
  12. _have_code = (types.MethodType, types.FunctionType, types.CodeType,
  13. classmethod, staticmethod, type)
  14. FORMAT_VALUE = opmap['FORMAT_VALUE']
  15. def _try_compile(source, name):
  16. """Attempts to compile the given source, first as an expression and
  17. then as a statement if the first approach fails.
  18. Utility function to accept strings in functions that otherwise
  19. expect code objects
  20. """
  21. try:
  22. c = compile(source, name, 'eval')
  23. except SyntaxError:
  24. c = compile(source, name, 'exec')
  25. return c
  26. def dis(x=None, *, file=None):
  27. """Disassemble classes, methods, functions, generators, or code.
  28. With no argument, disassemble the last traceback.
  29. """
  30. if x is None:
  31. distb(file=file)
  32. return
  33. if hasattr(x, '__func__'): # Method
  34. x = x.__func__
  35. if hasattr(x, '__code__'): # Function
  36. x = x.__code__
  37. if hasattr(x, 'gi_code'): # Generator
  38. x = x.gi_code
  39. if hasattr(x, '__dict__'): # Class or module
  40. items = sorted(x.__dict__.items())
  41. for name, x1 in items:
  42. if isinstance(x1, _have_code):
  43. print("Disassembly of %s:" % name, file=file)
  44. try:
  45. dis(x1, file=file)
  46. except TypeError as msg:
  47. print("Sorry:", msg, file=file)
  48. print(file=file)
  49. elif hasattr(x, 'co_code'): # Code object
  50. disassemble(x, file=file)
  51. elif isinstance(x, (bytes, bytearray)): # Raw bytecode
  52. _disassemble_bytes(x, file=file)
  53. elif isinstance(x, str): # Source code
  54. _disassemble_str(x, file=file)
  55. else:
  56. raise TypeError("don't know how to disassemble %s objects" %
  57. type(x).__name__)
  58. def distb(tb=None, *, file=None):
  59. """Disassemble a traceback (default: last traceback)."""
  60. if tb is None:
  61. try:
  62. tb = sys.last_traceback
  63. except AttributeError:
  64. raise RuntimeError("no last traceback to disassemble")
  65. while tb.tb_next: tb = tb.tb_next
  66. disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
  67. # The inspect module interrogates this dictionary to build its
  68. # list of CO_* constants. It is also used by pretty_flags to
  69. # turn the co_flags field into a human readable list.
  70. COMPILER_FLAG_NAMES = {
  71. 1: "OPTIMIZED",
  72. 2: "NEWLOCALS",
  73. 4: "VARARGS",
  74. 8: "VARKEYWORDS",
  75. 16: "NESTED",
  76. 32: "GENERATOR",
  77. 64: "NOFREE",
  78. 128: "COROUTINE",
  79. 256: "ITERABLE_COROUTINE",
  80. 512: "ASYNC_GENERATOR",
  81. }
  82. def pretty_flags(flags):
  83. """Return pretty representation of code flags."""
  84. names = []
  85. for i in range(32):
  86. flag = 1<<i
  87. if flags & flag:
  88. names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
  89. flags ^= flag
  90. if not flags:
  91. break
  92. else:
  93. names.append(hex(flags))
  94. return ", ".join(names)
  95. def _get_code_object(x):
  96. """Helper to handle methods, functions, generators, strings and raw code objects"""
  97. if hasattr(x, '__func__'): # Method
  98. x = x.__func__
  99. if hasattr(x, '__code__'): # Function
  100. x = x.__code__
  101. if hasattr(x, 'gi_code'): # Generator
  102. x = x.gi_code
  103. if isinstance(x, str): # Source code
  104. x = _try_compile(x, "<disassembly>")
  105. if hasattr(x, 'co_code'): # Code object
  106. return x
  107. raise TypeError("don't know how to disassemble %s objects" %
  108. type(x).__name__)
  109. def code_info(x):
  110. """Formatted details of methods, functions, or code."""
  111. return _format_code_info(_get_code_object(x))
  112. def _format_code_info(co):
  113. lines = []
  114. lines.append("Name: %s" % co.co_name)
  115. lines.append("Filename: %s" % co.co_filename)
  116. lines.append("Argument count: %s" % co.co_argcount)
  117. lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
  118. lines.append("Number of locals: %s" % co.co_nlocals)
  119. lines.append("Stack size: %s" % co.co_stacksize)
  120. lines.append("Flags: %s" % pretty_flags(co.co_flags))
  121. if co.co_consts:
  122. lines.append("Constants:")
  123. for i_c in enumerate(co.co_consts):
  124. lines.append("%4d: %r" % i_c)
  125. if co.co_names:
  126. lines.append("Names:")
  127. for i_n in enumerate(co.co_names):
  128. lines.append("%4d: %s" % i_n)
  129. if co.co_varnames:
  130. lines.append("Variable names:")
  131. for i_n in enumerate(co.co_varnames):
  132. lines.append("%4d: %s" % i_n)
  133. if co.co_freevars:
  134. lines.append("Free variables:")
  135. for i_n in enumerate(co.co_freevars):
  136. lines.append("%4d: %s" % i_n)
  137. if co.co_cellvars:
  138. lines.append("Cell variables:")
  139. for i_n in enumerate(co.co_cellvars):
  140. lines.append("%4d: %s" % i_n)
  141. return "\n".join(lines)
  142. def show_code(co, *, file=None):
  143. """Print details of methods, functions, or code to *file*.
  144. If *file* is not provided, the output is printed on stdout.
  145. """
  146. print(code_info(co), file=file)
  147. _Instruction = collections.namedtuple("_Instruction",
  148. "opname opcode arg argval argrepr offset starts_line is_jump_target")
  149. _Instruction.opname.__doc__ = "Human readable name for operation"
  150. _Instruction.opcode.__doc__ = "Numeric code for operation"
  151. _Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
  152. _Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
  153. _Instruction.argrepr.__doc__ = "Human readable description of operation argument"
  154. _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
  155. _Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
  156. _Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
  157. class Instruction(_Instruction):
  158. """Details for a bytecode operation
  159. Defined fields:
  160. opname - human readable name for operation
  161. opcode - numeric code for operation
  162. arg - numeric argument to operation (if any), otherwise None
  163. argval - resolved arg value (if known), otherwise same as arg
  164. argrepr - human readable description of operation argument
  165. offset - start index of operation within bytecode sequence
  166. starts_line - line started by this opcode (if any), otherwise None
  167. is_jump_target - True if other code jumps to here, otherwise False
  168. """
  169. def _disassemble(self, lineno_width=3, mark_as_current=False):
  170. """Format instruction details for inclusion in disassembly output
  171. *lineno_width* sets the width of the line number field (0 omits it)
  172. *mark_as_current* inserts a '-->' marker arrow as part of the line
  173. """
  174. fields = []
  175. # Column: Source code line number
  176. if lineno_width:
  177. if self.starts_line is not None:
  178. lineno_fmt = "%%%dd" % lineno_width
  179. fields.append(lineno_fmt % self.starts_line)
  180. else:
  181. fields.append(' ' * lineno_width)
  182. # Column: Current instruction indicator
  183. if mark_as_current:
  184. fields.append('-->')
  185. else:
  186. fields.append(' ')
  187. # Column: Jump target marker
  188. if self.is_jump_target:
  189. fields.append('>>')
  190. else:
  191. fields.append(' ')
  192. # Column: Instruction offset from start of code sequence
  193. fields.append(repr(self.offset).rjust(4))
  194. # Column: Opcode name
  195. fields.append(self.opname.ljust(20))
  196. # Column: Opcode argument
  197. if self.arg is not None:
  198. fields.append(repr(self.arg).rjust(5))
  199. # Column: Opcode argument details
  200. if self.argrepr:
  201. fields.append('(' + self.argrepr + ')')
  202. return ' '.join(fields).rstrip()
  203. def get_instructions(x, *, first_line=None):
  204. """Iterator for the opcodes in methods, functions or code
  205. Generates a series of Instruction named tuples giving the details of
  206. each operations in the supplied code.
  207. If *first_line* is not None, it indicates the line number that should
  208. be reported for the first source line in the disassembled code.
  209. Otherwise, the source line information (if any) is taken directly from
  210. the disassembled code object.
  211. """
  212. co = _get_code_object(x)
  213. cell_names = co.co_cellvars + co.co_freevars
  214. linestarts = dict(findlinestarts(co))
  215. if first_line is not None:
  216. line_offset = first_line - co.co_firstlineno
  217. else:
  218. line_offset = 0
  219. return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
  220. co.co_consts, cell_names, linestarts,
  221. line_offset)
  222. def _get_const_info(const_index, const_list):
  223. """Helper to get optional details about const references
  224. Returns the dereferenced constant and its repr if the constant
  225. list is defined.
  226. Otherwise returns the constant index and its repr().
  227. """
  228. argval = const_index
  229. if const_list is not None:
  230. argval = const_list[const_index]
  231. return argval, repr(argval)
  232. def _get_name_info(name_index, name_list):
  233. """Helper to get optional details about named references
  234. Returns the dereferenced name as both value and repr if the name
  235. list is defined.
  236. Otherwise returns the name index and its repr().
  237. """
  238. argval = name_index
  239. if name_list is not None:
  240. argval = name_list[name_index]
  241. argrepr = argval
  242. else:
  243. argrepr = repr(argval)
  244. return argval, argrepr
  245. def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
  246. cells=None, linestarts=None, line_offset=0):
  247. """Iterate over the instructions in a bytecode string.
  248. Generates a sequence of Instruction namedtuples giving the details of each
  249. opcode. Additional information about the code's runtime environment
  250. (e.g. variable names, constants) can be specified using optional
  251. arguments.
  252. """
  253. labels = findlabels(code)
  254. starts_line = None
  255. for offset, op, arg in _unpack_opargs(code):
  256. if linestarts is not None:
  257. starts_line = linestarts.get(offset, None)
  258. if starts_line is not None:
  259. starts_line += line_offset
  260. is_jump_target = offset in labels
  261. argval = None
  262. argrepr = ''
  263. if arg is not None:
  264. # Set argval to the dereferenced value of the argument when
  265. # available, and argrepr to the string representation of argval.
  266. # _disassemble_bytes needs the string repr of the
  267. # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
  268. argval = arg
  269. if op in hasconst:
  270. argval, argrepr = _get_const_info(arg, constants)
  271. elif op in hasname:
  272. argval, argrepr = _get_name_info(arg, names)
  273. elif op in hasjrel:
  274. argval = offset + 2 + arg
  275. argrepr = "to " + repr(argval)
  276. elif op in haslocal:
  277. argval, argrepr = _get_name_info(arg, varnames)
  278. elif op in hascompare:
  279. argval = cmp_op[arg]
  280. argrepr = argval
  281. elif op in hasfree:
  282. argval, argrepr = _get_name_info(arg, cells)
  283. elif op == FORMAT_VALUE:
  284. argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
  285. argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
  286. if argval[1]:
  287. if argrepr:
  288. argrepr += ', '
  289. argrepr += 'with format'
  290. yield Instruction(opname[op], op,
  291. arg, argval, argrepr,
  292. offset, starts_line, is_jump_target)
  293. def disassemble(co, lasti=-1, *, file=None):
  294. """Disassemble a code object."""
  295. cell_names = co.co_cellvars + co.co_freevars
  296. linestarts = dict(findlinestarts(co))
  297. _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
  298. co.co_consts, cell_names, linestarts, file=file)
  299. def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
  300. constants=None, cells=None, linestarts=None,
  301. *, file=None, line_offset=0):
  302. # Omit the line number column entirely if we have no line number info
  303. show_lineno = linestarts is not None
  304. # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
  305. lineno_width = 3 if show_lineno else 0
  306. for instr in _get_instructions_bytes(code, varnames, names,
  307. constants, cells, linestarts,
  308. line_offset=line_offset):
  309. new_source_line = (show_lineno and
  310. instr.starts_line is not None and
  311. instr.offset > 0)
  312. if new_source_line:
  313. print(file=file)
  314. is_current_instr = instr.offset == lasti
  315. print(instr._disassemble(lineno_width, is_current_instr), file=file)
  316. def _disassemble_str(source, *, file=None):
  317. """Compile the source string, then disassemble the code object."""
  318. disassemble(_try_compile(source, '<dis>'), file=file)
  319. disco = disassemble # XXX For backwards compatibility
  320. def _unpack_opargs(code):
  321. extended_arg = 0
  322. for i in range(0, len(code), 2):
  323. op = code[i]
  324. if op >= HAVE_ARGUMENT:
  325. arg = code[i+1] | extended_arg
  326. extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
  327. else:
  328. arg = None
  329. yield (i, op, arg)
  330. def findlabels(code):
  331. """Detect all offsets in a byte code which are jump targets.
  332. Return the list of offsets.
  333. """
  334. labels = []
  335. for offset, op, arg in _unpack_opargs(code):
  336. if arg is not None:
  337. if op in hasjrel:
  338. label = offset + 2 + arg
  339. elif op in hasjabs:
  340. label = arg
  341. else:
  342. continue
  343. if label not in labels:
  344. labels.append(label)
  345. return labels
  346. def findlinestarts(code):
  347. """Find the offsets in a byte code which are start of lines in the source.
  348. Generate pairs (offset, lineno) as described in Python/compile.c.
  349. """
  350. byte_increments = code.co_lnotab[0::2]
  351. line_increments = code.co_lnotab[1::2]
  352. lastlineno = None
  353. lineno = code.co_firstlineno
  354. addr = 0
  355. for byte_incr, line_incr in zip(byte_increments, line_increments):
  356. if byte_incr:
  357. if lineno != lastlineno:
  358. yield (addr, lineno)
  359. lastlineno = lineno
  360. addr += byte_incr
  361. if line_incr >= 0x80:
  362. # line_increments is an array of 8-bit signed integers
  363. line_incr -= 0x100
  364. lineno += line_incr
  365. if lineno != lastlineno:
  366. yield (addr, lineno)
  367. class Bytecode:
  368. """The bytecode operations of a piece of code
  369. Instantiate this with a function, method, string of code, or a code object
  370. (as returned by compile()).
  371. Iterating over this yields the bytecode operations as Instruction instances.
  372. """
  373. def __init__(self, x, *, first_line=None, current_offset=None):
  374. self.codeobj = co = _get_code_object(x)
  375. if first_line is None:
  376. self.first_line = co.co_firstlineno
  377. self._line_offset = 0
  378. else:
  379. self.first_line = first_line
  380. self._line_offset = first_line - co.co_firstlineno
  381. self._cell_names = co.co_cellvars + co.co_freevars
  382. self._linestarts = dict(findlinestarts(co))
  383. self._original_object = x
  384. self.current_offset = current_offset
  385. def __iter__(self):
  386. co = self.codeobj
  387. return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
  388. co.co_consts, self._cell_names,
  389. self._linestarts,
  390. line_offset=self._line_offset)
  391. def __repr__(self):
  392. return "{}({!r})".format(self.__class__.__name__,
  393. self._original_object)
  394. @classmethod
  395. def from_traceback(cls, tb):
  396. """ Construct a Bytecode from the given traceback """
  397. while tb.tb_next:
  398. tb = tb.tb_next
  399. return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
  400. def info(self):
  401. """Return formatted information about the code object."""
  402. return _format_code_info(self.codeobj)
  403. def dis(self):
  404. """Return a formatted view of the bytecode operations."""
  405. co = self.codeobj
  406. if self.current_offset is not None:
  407. offset = self.current_offset
  408. else:
  409. offset = -1
  410. with io.StringIO() as output:
  411. _disassemble_bytes(co.co_code, varnames=co.co_varnames,
  412. names=co.co_names, constants=co.co_consts,
  413. cells=self._cell_names,
  414. linestarts=self._linestarts,
  415. line_offset=self._line_offset,
  416. file=output,
  417. lasti=offset)
  418. return output.getvalue()
  419. def _test():
  420. """Simple test program to disassemble a file."""
  421. import argparse
  422. parser = argparse.ArgumentParser()
  423. parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
  424. args = parser.parse_args()
  425. with args.infile as infile:
  426. source = infile.read()
  427. code = compile(source, args.infile.name, "exec")
  428. dis(code)
  429. if __name__ == "__main__":
  430. _test()