PageRenderTime 40ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/bytecode_tracer/bytecode_tracer.py

https://bitbucket.org/alexandr_dragunkin/pythoscope
Python | 387 lines | 337 code | 8 blank | 42 comment | 1 complexity | 310c671fbb0f947a16c3b7508355be3f MD5 | raw file
  1. import opcode
  2. import os
  3. import re
  4. import sys
  5. import six
  6. from types import CodeType, MethodType
  7. from . import code_rewriting_importer
  8. from .py_frame_object import get_value_stack_top
  9. class ValueStack(object):
  10. """CPython stack that holds values used and generated during computation.
  11. Right before a function call value stack looks like this:
  12. +--------------------- <--- frame.f_valuestack
  13. | function object
  14. +----------
  15. | ...
  16. | list of positional arguments
  17. | ...
  18. +----------
  19. | ...
  20. | flat list of keyword arguments (key-value pairs)
  21. | ...
  22. +----------
  23. | *varargs tuple
  24. +----------
  25. | **kwargs dictionary
  26. +--------------------- <--- frame.f_stacktop
  27. When a function is called with no arguments, the function object is at the
  28. top of the stack. When arguments are present, they are placed above the
  29. function object. Two bytes after the CALL_FUNCTION bytecode contain number
  30. of positional and keyword arguments passed. Bytecode number tells us whether
  31. a call included single star (*args) and/or double star (**kwargs) arguments.
  32. To get to the values at the stack we look at it from the top, from
  33. frame.f_stacktop downwards. Since f_stacktop points at the memory right
  34. after the last value, all offsets have to be negative. For example,
  35. frame.f_stacktop[-1] is an object at the top of the value stack.
  36. """
  37. def __init__(self, frame, bcode):
  38. assert bcode.name.startswith("CALL_FUNCTION")
  39. self.stack = get_value_stack_top(frame)
  40. self.positional_args_count = bcode.arg1
  41. self.keyword_args_count = bcode.arg2
  42. self.args_count = self.positional_args_count + 2*self.keyword_args_count
  43. # There are four bytecodes for function calls, that tell use whether
  44. # single star (*args) and/or double star (**kwargs) notation was
  45. # used: CALL_FUNCTION, CALL_FUNCTION_VAR, CALL_FUNCTION_KW
  46. # and CALL_FUNCTION_VAR_KW.
  47. self.singlestar = "_VAR" in bcode.name
  48. self.doublestar = "_KW" in bcode.name
  49. def bottom(self):
  50. """The first object at the value stack.
  51. It's the function being called for all CALL_FUNCTION_* bytecodes.
  52. """
  53. offset = 1 + self.args_count + self.singlestar + self.doublestar
  54. return self.stack[-offset]
  55. def positional_args(self):
  56. """List of all positional arguments passed to a C function.
  57. """
  58. args = self.positional_args_from_stack()[:]
  59. if self.singlestar:
  60. args.extend(self.positional_args_from_varargs())
  61. return args
  62. def values(self, offset, count):
  63. """Return a list of `count` values from stack starting at `offset`.
  64. """
  65. def v():
  66. for i in range(-offset, -offset + count):
  67. yield self.stack[i]
  68. return list(v())
  69. def positional_args_from_stack(self):
  70. """Objects explicitly placed on stack as positional arguments.
  71. """
  72. offset = self.args_count + self.singlestar + self.doublestar
  73. return self.values(offset, self.positional_args_count)
  74. def positional_args_from_varargs(self):
  75. """Iterable placed on stack as "*args".
  76. """
  77. return self.stack[-1 - self.doublestar]
  78. def keyword_args(self):
  79. """Dictionary of all keyword arguments passed to a C function.
  80. """
  81. kwds = self.keyword_args_from_stack().copy()
  82. if self.doublestar:
  83. kwds.update(self.keyword_args_from_double_star())
  84. return kwds
  85. def keyword_args_from_stack(self):
  86. """Key/value pairs placed explicitly on stack as keyword arguments.
  87. """
  88. offset = 2*self.keyword_args_count + self.singlestar + self.doublestar
  89. args = self.values(offset, 2*self.keyword_args_count)
  90. return flatlist_to_dict(args)
  91. def keyword_args_from_double_star(self):
  92. """Dictionary passed as "**kwds".
  93. """
  94. return self.stack[-1]
  95. def flatlist_to_dict(alist):
  96. return dict(zip(alist[::2], alist[1::2]))
  97. class Bytecode(object):
  98. def __init__(self, name, arg1=None, arg2=None):
  99. self.name = name
  100. self.arg1 = arg1
  101. self.arg2 = arg2
  102. def current_bytecode(frame):
  103. code = frame.f_code.co_code[frame.f_lasti:]
  104. op = ord(code[0])
  105. name = opcode.opname[op]
  106. arg1, arg2 = None, None
  107. if op >= opcode.HAVE_ARGUMENT:
  108. arg1 = ord(code[1])
  109. arg2 = ord(code[2])
  110. return Bytecode(name=name, arg1=arg1, arg2=arg2)
  111. def is_c_func(func):
  112. """Return True if given function object was implemented in C,
  113. via a C extension or as a builtin.
  114. >>> is_c_func(repr)
  115. True
  116. >>> import sys
  117. >>> is_c_func(sys.exit)
  118. True
  119. >>> import doctest
  120. >>> is_c_func(doctest.testmod)
  121. False
  122. """
  123. return not hasattr(func, 'func_code')
  124. def name_from_arg(frame, bcode):
  125. return frame.f_code.co_names[bcode.arg1]
  126. def frame_module(frame):
  127. g = frame.f_globals
  128. for module in sys.modules.itervalues():
  129. if hasattr(module, '__dict__') and module.__dict__ is g:
  130. return module
  131. class StandardBytecodeTracer(object):
  132. """A tracer that goes over each bytecode and reports events that couldn't
  133. be traced by other means.
  134. Usage example:
  135. def trace(frame, event, arg):
  136. bytecode_events = list(btracer.trace(frame, event))
  137. if bytecode_events:
  138. for ev, rest in bytecode_events:
  139. pass # Here handle BytecodeTracer events, like 'c_call', 'c_return', 'print' or 'print_to'.
  140. else:
  141. pass # Here handle the usual tracer events, like 'call', 'return' and 'exception'.
  142. return trace
  143. sys.settrace(trace)
  144. try:
  145. pass # Some code to trace... You may need to call rewrite_function first.
  146. finally:
  147. sys.settrace(None)
  148. """
  149. def __init__(self):
  150. # Will contain False for calls to Python functions and True for calls to
  151. # C functions.
  152. self.call_stack = []
  153. def setup(self):
  154. code_rewriting_importer.install(rewrite_lnotab)
  155. def teardown(self):
  156. code_rewriting_importer.uninstall()
  157. def trace(self, frame, event):
  158. """Tries to recognize the current event in terms of calls to and returns
  159. from C.
  160. Currently supported events:
  161. * ('c_call', (function, positional_arguments, keyword_arguments))
  162. A call to a C function with given arguments is about to happen.
  163. * ('c_return', return_value)
  164. A C function returned with given value (it will always be the function
  165. for the most recent 'c_call' event).
  166. * ('print', value)
  167. * ('print_to', (value, output))
  168. A print statement is about to be executed.
  169. * ('store_attr', (object, name, value))
  170. * ('delete_attr', (object, name))
  171. An instance variable of object is about to be changed or deleted.
  172. * ('load_global', (module, name))
  173. * ('store_global', (module, name, value))
  174. * ('delete_global', (module, name))
  175. A global variable is about to be read, written or deleted.
  176. It is a generator and it yields a sequence of events, as a single
  177. bytecode may generate more than one event. Canonical example is
  178. a sequence of CALL_FUNCTION bytecodes. Execution of the first bytecode
  179. causes a 'c_call' event. Execution of the second bytecode causes two
  180. consecutive events: 'c_return' and another 'c_call'.
  181. """
  182. if event == 'line':
  183. if self.call_stack[-1]:
  184. self.call_stack.pop()
  185. stack = get_value_stack_top(frame)
  186. # Rewrite a code object each time it is returned by some
  187. # C function. Most commonly that will be the 'compile' function.
  188. # TODO: Make sure the old code is garbage collected.
  189. if type(stack[-1]) is CodeType:
  190. stack[-1] = rewrite_lnotab(stack[-1])
  191. yield 'c_return', stack[-1]
  192. bcode = current_bytecode(frame)
  193. if bcode.name.startswith("CALL_FUNCTION"):
  194. value_stack = ValueStack(frame, bcode)
  195. function = value_stack.bottom()
  196. # Python functions are handled by the standard trace mechanism, but
  197. # we have to make sure any C calls the function makes can be traced
  198. # by us later, so we rewrite its bytecode.
  199. if not is_c_func(function):
  200. rewrite_function(function)
  201. return
  202. self.call_stack.append(True)
  203. pargs = value_stack.positional_args()
  204. kargs = value_stack.keyword_args()
  205. # Rewrite all callables that may have been passed to the C function.
  206. rewrite_all(pargs + kargs.values())
  207. yield 'c_call', (function, pargs, kargs)
  208. elif bcode.name == "PRINT_NEWLINE":
  209. yield 'print', os.linesep
  210. else:
  211. stack = get_value_stack_top(frame)
  212. if bcode.name == "PRINT_NEWLINE_TO":
  213. yield 'print_to', (os.linesep, stack[-1])
  214. elif bcode.name == "PRINT_ITEM":
  215. yield 'print', stack[-1]
  216. elif bcode.name == "PRINT_ITEM_TO":
  217. yield 'print_to', (stack[-2], stack[-1])
  218. elif bcode.name == "STORE_ATTR":
  219. yield 'store_attr', (stack[-1], name_from_arg(frame, bcode), stack[-2])
  220. elif bcode.name == "DELETE_ATTR":
  221. yield 'delete_attr', (stack[-1], name_from_arg(frame, bcode))
  222. elif bcode.name == "LOAD_GLOBAL":
  223. module = frame_module(frame)
  224. if module:
  225. try:
  226. name = name_from_arg(frame, bcode)
  227. value = frame.f_globals[name]
  228. yield 'load_global', (module.__name__, name, value)
  229. except KeyError:
  230. pass
  231. elif bcode.name == "STORE_GLOBAL":
  232. module = frame_module(frame)
  233. if module:
  234. yield 'store_global', (module.__name__,
  235. name_from_arg(frame, bcode),
  236. stack[-1])
  237. elif bcode.name == "DELETE_GLOBAL":
  238. module = frame_module(frame)
  239. if module:
  240. yield 'delete_global', (module.__name__,
  241. name_from_arg(frame, bcode))
  242. elif event == 'call':
  243. self.call_stack.append(False)
  244. # When an exception happens in Python >= 2.4 code, 'exception' and
  245. # 'return' events are reported in succession. Exceptions raised from
  246. # C functions don't generate the 'return' event, so we have to pop
  247. # from the stack right away and simulate the 'c_return' event
  248. # ourselves.
  249. elif event == 'exception' and self.call_stack[-1]:
  250. yield 'c_return', None
  251. self.call_stack.pop()
  252. # Python functions always generate a 'return' event, even when an exception
  253. # has been raised, so let's just check for that.
  254. elif event == 'return':
  255. self.call_stack.pop()
  256. class Python23BytecodeTracer(StandardBytecodeTracer):
  257. """Version of the tracer working around a subtle difference in exception
  258. handling of Python 2.3.
  259. In Python 2.4 and higher, when a function (or method) exits with
  260. an exception, interpreter reports two events to a trace function:
  261. first 'exception' and then 'return' right after that.
  262. In Python 2.3 the second event isn't reported, i.e. only 'exception'
  263. events are passed to a trace function. For the sake of consistency this
  264. version of the tracer will act just as the 'return' event would happen
  265. before each consecutive exception reported.
  266. """
  267. def __init__(self, *args):
  268. super(Python23BytecodeTracer, self).__init__(*args)
  269. self.propagating_exception = False
  270. def trace(self, frame, event):
  271. if event == 'exception':
  272. if self.propagating_exception:
  273. self.call_stack.pop()
  274. else:
  275. self.propagating_exception = True
  276. else:
  277. self.propagating_exception = False
  278. return super(Python23BytecodeTracer, self).trace(frame, event)
  279. if sys.version_info < (2, 4):
  280. BytecodeTracer = Python23BytecodeTracer
  281. else:
  282. BytecodeTracer = StandardBytecodeTracer
  283. def rewrite_lnotab(code):
  284. """Replace a code object's line number information to claim that every
  285. byte of the bytecode is a new line. Returns a new code object.
  286. Also recurses to hack the line numbers in nested code objects.
  287. Based on Ned Batchelder's hackpyc.py:
  288. http://nedbatchelder.com/blog/200804/wicked_hack_python_bytecode_tracing.html
  289. """
  290. if has_been_rewritten(code):
  291. return code
  292. n_bytes = len(code.co_code)
  293. new_lnotab = "\x01\x01" * (n_bytes-1)
  294. new_consts = []
  295. for const in code.co_consts:
  296. if type(const) is CodeType:
  297. new_consts.append(rewrite_lnotab(const))
  298. else:
  299. new_consts.append(const)
  300. if sys.version_info.major < 3:
  301. return CodeType(code.co_argcount, code.co_nlocals, code.co_stacksize,
  302. code.co_flags, code.co_code, tuple(new_consts), code.co_names,
  303. code.co_varnames, code.co_filename, code.co_name, 0, new_lnotab,
  304. code.co_freevars,
  305. code.co_cellvars)
  306. else:
  307. # In later Python3 magic_ints, there is a
  308. # kwonlyargcount parameter which we set to 0.
  309. # https://github.com/rocky/python-xdis/blob/master/xdis/unmarshal.py line 165
  310. # https://stackoverflow.com/questions/10116650/typeerror-on-codetype-creation-in-python-3
  311. kwonlyargcount = 0
  312. return CodeType(code.co_argcount, kwonlyargcount, code.co_nlocals, code.co_stacksize,
  313. code.co_flags, code.co_code, tuple(new_consts), code.co_names,
  314. code.co_varnames, code.co_filename, code.co_name, 0, bytes(new_lnotab, encoding='utf-8'),
  315. code.co_freevars, code.co_cellvars)
  316. #code = Code(co_argcount, kwonlyargcount, co_nlocals, co_stacksize, co_flags,
  317. #co_code, co_consts, co_names, co_varnames, co_filename, co_name,
  318. #co_firstlineno, bytes(co_lnotab, encoding='utf-8'),
  319. #co_freevars, co_cellvars)
  320. def rewrite_function(function):
  321. if isinstance(function, MethodType):
  322. function = function.im_func
  323. function.func_code = rewrite_lnotab(six.get_function_code(function))
  324. def rewrite_all(objects):
  325. for obj in objects:
  326. if hasattr(obj, 'func_code'):
  327. rewrite_function(obj)
  328. def has_been_rewritten(code):
  329. """Return True if the code has been rewritten by rewrite_lnotab already.
  330. >>> def fun():
  331. ... pass
  332. >>> has_been_rewritten(fun.func_code)
  333. False
  334. >>> rewrite_function(fun)
  335. >>> has_been_rewritten(fun.func_code)
  336. True
  337. """
  338. if sys.version_info.major < 3:
  339. return re.match(r"\A(\x01\x01)+\Z", code.co_lnotab) is not None
  340. else:
  341. return re.match(r"\A(\x01\x01)+\Z", code.co_lnotab.decode()) is not None