/Lib/profile.py
Python | 619 lines | 553 code | 16 blank | 50 comment | 7 complexity | a1f60efe55b84022b9c3eab5c8dfa233 MD5 | raw file
1#! /usr/bin/env python 2# 3# Class for profiling python code. rev 1.0 6/2/94 4# 5# Based on prior profile module by Sjoerd Mullender... 6# which was hacked somewhat by: Guido van Rossum 7 8"""Class for profiling Python code.""" 9 10# Copyright 1994, by InfoSeek Corporation, all rights reserved. 11# Written by James Roskind 12# 13# Permission to use, copy, modify, and distribute this Python software 14# and its associated documentation for any purpose (subject to the 15# restriction in the following sentence) without fee is hereby granted, 16# provided that the above copyright notice appears in all copies, and 17# that both that copyright notice and this permission notice appear in 18# supporting documentation, and that the name of InfoSeek not be used in 19# advertising or publicity pertaining to distribution of the software 20# without specific, written prior permission. This permission is 21# explicitly restricted to the copying and modification of the software 22# to remain in Python, compiled Python, or other languages (such as C) 23# wherein the modified or derived code is exclusively imported into a 24# Python module. 25# 26# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS 27# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 28# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY 29# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER 30# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF 31# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 32# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 33 34 35 36import sys 37import os 38import time 39import marshal 40from optparse import OptionParser 41 42__all__ = ["run", "runctx", "help", "Profile"] 43 44# Sample timer for use with 45#i_count = 0 46#def integer_timer(): 47# global i_count 48# i_count = i_count + 1 49# return i_count 50#itimes = integer_timer # replace with C coded timer returning integers 51 52#************************************************************************** 53# The following are the static member functions for the profiler class 54# Note that an instance of Profile() is *not* needed to call them. 55#************************************************************************** 56 57def run(statement, filename=None, sort=-1): 58 """Run statement under profiler optionally saving results in filename 59 60 This function takes a single argument that can be passed to the 61 "exec" statement, and an optional file name. In all cases this 62 routine attempts to "exec" its first argument and gather profiling 63 statistics from the execution. If no file name is present, then this 64 function automatically prints a simple profiling report, sorted by the 65 standard name string (file/line/function-name) that is presented in 66 each line. 67 """ 68 prof = Profile() 69 try: 70 prof = prof.run(statement) 71 except SystemExit: 72 pass 73 if filename is not None: 74 prof.dump_stats(filename) 75 else: 76 return prof.print_stats(sort) 77 78def runctx(statement, globals, locals, filename=None): 79 """Run statement under profiler, supplying your own globals and locals, 80 optionally saving results in filename. 81 82 statement and filename have the same semantics as profile.run 83 """ 84 prof = Profile() 85 try: 86 prof = prof.runctx(statement, globals, locals) 87 except SystemExit: 88 pass 89 90 if filename is not None: 91 prof.dump_stats(filename) 92 else: 93 return prof.print_stats() 94 95# Backwards compatibility. 96def help(): 97 print "Documentation for the profile module can be found " 98 print "in the Python Library Reference, section 'The Python Profiler'." 99 100if os.name == "mac": 101 import MacOS 102 def _get_time_mac(timer=MacOS.GetTicks): 103 return timer() / 60.0 104 105if hasattr(os, "times"): 106 def _get_time_times(timer=os.times): 107 t = timer() 108 return t[0] + t[1] 109 110# Using getrusage(3) is better than clock(3) if available: 111# on some systems (e.g. FreeBSD), getrusage has a higher resolution 112# Furthermore, on a POSIX system, returns microseconds, which 113# wrap around after 36min. 114_has_res = 0 115try: 116 import resource 117 resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF) 118 def _get_time_resource(timer=resgetrusage): 119 t = timer() 120 return t[0] + t[1] 121 _has_res = 1 122except ImportError: 123 pass 124 125class Profile: 126 """Profiler class. 127 128 self.cur is always a tuple. Each such tuple corresponds to a stack 129 frame that is currently active (self.cur[-2]). The following are the 130 definitions of its members. We use this external "parallel stack" to 131 avoid contaminating the program that we are profiling. (old profiler 132 used to write into the frames local dictionary!!) Derived classes 133 can change the definition of some entries, as long as they leave 134 [-2:] intact (frame and previous tuple). In case an internal error is 135 detected, the -3 element is used as the function name. 136 137 [ 0] = Time that needs to be charged to the parent frame's function. 138 It is used so that a function call will not have to access the 139 timing data for the parent frame. 140 [ 1] = Total time spent in this frame's function, excluding time in 141 subfunctions (this latter is tallied in cur[2]). 142 [ 2] = Total time spent in subfunctions, excluding time executing the 143 frame's function (this latter is tallied in cur[1]). 144 [-3] = Name of the function that corresponds to this frame. 145 [-2] = Actual frame that we correspond to (used to sync exception handling). 146 [-1] = Our parent 6-tuple (corresponds to frame.f_back). 147 148 Timing data for each function is stored as a 5-tuple in the dictionary 149 self.timings[]. The index is always the name stored in self.cur[-3]. 150 The following are the definitions of the members: 151 152 [0] = The number of times this function was called, not counting direct 153 or indirect recursion, 154 [1] = Number of times this function appears on the stack, minus one 155 [2] = Total time spent internal to this function 156 [3] = Cumulative time that this function was present on the stack. In 157 non-recursive functions, this is the total execution time from start 158 to finish of each invocation of a function, including time spent in 159 all subfunctions. 160 [4] = A dictionary indicating for each function name, the number of times 161 it was called by us. 162 """ 163 164 bias = 0 # calibration constant 165 166 def __init__(self, timer=None, bias=None): 167 self.timings = {} 168 self.cur = None 169 self.cmd = "" 170 self.c_func_name = "" 171 172 if bias is None: 173 bias = self.bias 174 self.bias = bias # Materialize in local dict for lookup speed. 175 176 if not timer: 177 if _has_res: 178 self.timer = resgetrusage 179 self.dispatcher = self.trace_dispatch 180 self.get_time = _get_time_resource 181 elif os.name == 'mac': 182 self.timer = MacOS.GetTicks 183 self.dispatcher = self.trace_dispatch_mac 184 self.get_time = _get_time_mac 185 elif hasattr(time, 'clock'): 186 self.timer = self.get_time = time.clock 187 self.dispatcher = self.trace_dispatch_i 188 elif hasattr(os, 'times'): 189 self.timer = os.times 190 self.dispatcher = self.trace_dispatch 191 self.get_time = _get_time_times 192 else: 193 self.timer = self.get_time = time.time 194 self.dispatcher = self.trace_dispatch_i 195 else: 196 self.timer = timer 197 t = self.timer() # test out timer function 198 try: 199 length = len(t) 200 except TypeError: 201 self.get_time = timer 202 self.dispatcher = self.trace_dispatch_i 203 else: 204 if length == 2: 205 self.dispatcher = self.trace_dispatch 206 else: 207 self.dispatcher = self.trace_dispatch_l 208 # This get_time() implementation needs to be defined 209 # here to capture the passed-in timer in the parameter 210 # list (for performance). Note that we can't assume 211 # the timer() result contains two values in all 212 # cases. 213 def get_time_timer(timer=timer, sum=sum): 214 return sum(timer()) 215 self.get_time = get_time_timer 216 self.t = self.get_time() 217 self.simulate_call('profiler') 218 219 # Heavily optimized dispatch routine for os.times() timer 220 221 def trace_dispatch(self, frame, event, arg): 222 timer = self.timer 223 t = timer() 224 t = t[0] + t[1] - self.t - self.bias 225 226 if event == "c_call": 227 self.c_func_name = arg.__name__ 228 229 if self.dispatch[event](self, frame,t): 230 t = timer() 231 self.t = t[0] + t[1] 232 else: 233 r = timer() 234 self.t = r[0] + r[1] - t # put back unrecorded delta 235 236 # Dispatch routine for best timer program (return = scalar, fastest if 237 # an integer but float works too -- and time.clock() relies on that). 238 239 def trace_dispatch_i(self, frame, event, arg): 240 timer = self.timer 241 t = timer() - self.t - self.bias 242 243 if event == "c_call": 244 self.c_func_name = arg.__name__ 245 246 if self.dispatch[event](self, frame, t): 247 self.t = timer() 248 else: 249 self.t = timer() - t # put back unrecorded delta 250 251 # Dispatch routine for macintosh (timer returns time in ticks of 252 # 1/60th second) 253 254 def trace_dispatch_mac(self, frame, event, arg): 255 timer = self.timer 256 t = timer()/60.0 - self.t - self.bias 257 258 if event == "c_call": 259 self.c_func_name = arg.__name__ 260 261 if self.dispatch[event](self, frame, t): 262 self.t = timer()/60.0 263 else: 264 self.t = timer()/60.0 - t # put back unrecorded delta 265 266 # SLOW generic dispatch routine for timer returning lists of numbers 267 268 def trace_dispatch_l(self, frame, event, arg): 269 get_time = self.get_time 270 t = get_time() - self.t - self.bias 271 272 if event == "c_call": 273 self.c_func_name = arg.__name__ 274 275 if self.dispatch[event](self, frame, t): 276 self.t = get_time() 277 else: 278 self.t = get_time() - t # put back unrecorded delta 279 280 # In the event handlers, the first 3 elements of self.cur are unpacked 281 # into vrbls w/ 3-letter names. The last two characters are meant to be 282 # mnemonic: 283 # _pt self.cur[0] "parent time" time to be charged to parent frame 284 # _it self.cur[1] "internal time" time spent directly in the function 285 # _et self.cur[2] "external time" time spent in subfunctions 286 287 def trace_dispatch_exception(self, frame, t): 288 rpt, rit, ret, rfn, rframe, rcur = self.cur 289 if (rframe is not frame) and rcur: 290 return self.trace_dispatch_return(rframe, t) 291 self.cur = rpt, rit+t, ret, rfn, rframe, rcur 292 return 1 293 294 295 def trace_dispatch_call(self, frame, t): 296 if self.cur and frame.f_back is not self.cur[-2]: 297 rpt, rit, ret, rfn, rframe, rcur = self.cur 298 if not isinstance(rframe, Profile.fake_frame): 299 assert rframe.f_back is frame.f_back, ("Bad call", rfn, 300 rframe, rframe.f_back, 301 frame, frame.f_back) 302 self.trace_dispatch_return(rframe, 0) 303 assert (self.cur is None or \ 304 frame.f_back is self.cur[-2]), ("Bad call", 305 self.cur[-3]) 306 fcode = frame.f_code 307 fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name) 308 self.cur = (t, 0, 0, fn, frame, self.cur) 309 timings = self.timings 310 if fn in timings: 311 cc, ns, tt, ct, callers = timings[fn] 312 timings[fn] = cc, ns + 1, tt, ct, callers 313 else: 314 timings[fn] = 0, 0, 0, 0, {} 315 return 1 316 317 def trace_dispatch_c_call (self, frame, t): 318 fn = ("", 0, self.c_func_name) 319 self.cur = (t, 0, 0, fn, frame, self.cur) 320 timings = self.timings 321 if fn in timings: 322 cc, ns, tt, ct, callers = timings[fn] 323 timings[fn] = cc, ns+1, tt, ct, callers 324 else: 325 timings[fn] = 0, 0, 0, 0, {} 326 return 1 327 328 def trace_dispatch_return(self, frame, t): 329 if frame is not self.cur[-2]: 330 assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3]) 331 self.trace_dispatch_return(self.cur[-2], 0) 332 333 # Prefix "r" means part of the Returning or exiting frame. 334 # Prefix "p" means part of the Previous or Parent or older frame. 335 336 rpt, rit, ret, rfn, frame, rcur = self.cur 337 rit = rit + t 338 frame_total = rit + ret 339 340 ppt, pit, pet, pfn, pframe, pcur = rcur 341 self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur 342 343 timings = self.timings 344 cc, ns, tt, ct, callers = timings[rfn] 345 if not ns: 346 # This is the only occurrence of the function on the stack. 347 # Else this is a (directly or indirectly) recursive call, and 348 # its cumulative time will get updated when the topmost call to 349 # it returns. 350 ct = ct + frame_total 351 cc = cc + 1 352 353 if pfn in callers: 354 callers[pfn] = callers[pfn] + 1 # hack: gather more 355 # stats such as the amount of time added to ct courtesy 356 # of this specific call, and the contribution to cc 357 # courtesy of this call. 358 else: 359 callers[pfn] = 1 360 361 timings[rfn] = cc, ns - 1, tt + rit, ct, callers 362 363 return 1 364 365 366 dispatch = { 367 "call": trace_dispatch_call, 368 "exception": trace_dispatch_exception, 369 "return": trace_dispatch_return, 370 "c_call": trace_dispatch_c_call, 371 "c_exception": trace_dispatch_return, # the C function returned 372 "c_return": trace_dispatch_return, 373 } 374 375 376 # The next few functions play with self.cmd. By carefully preloading 377 # our parallel stack, we can force the profiled result to include 378 # an arbitrary string as the name of the calling function. 379 # We use self.cmd as that string, and the resulting stats look 380 # very nice :-). 381 382 def set_cmd(self, cmd): 383 if self.cur[-1]: return # already set 384 self.cmd = cmd 385 self.simulate_call(cmd) 386 387 class fake_code: 388 def __init__(self, filename, line, name): 389 self.co_filename = filename 390 self.co_line = line 391 self.co_name = name 392 self.co_firstlineno = 0 393 394 def __repr__(self): 395 return repr((self.co_filename, self.co_line, self.co_name)) 396 397 class fake_frame: 398 def __init__(self, code, prior): 399 self.f_code = code 400 self.f_back = prior 401 402 def simulate_call(self, name): 403 code = self.fake_code('profile', 0, name) 404 if self.cur: 405 pframe = self.cur[-2] 406 else: 407 pframe = None 408 frame = self.fake_frame(code, pframe) 409 self.dispatch['call'](self, frame, 0) 410 411 # collect stats from pending stack, including getting final 412 # timings for self.cmd frame. 413 414 def simulate_cmd_complete(self): 415 get_time = self.get_time 416 t = get_time() - self.t 417 while self.cur[-1]: 418 # We *can* cause assertion errors here if 419 # dispatch_trace_return checks for a frame match! 420 self.dispatch['return'](self, self.cur[-2], t) 421 t = 0 422 self.t = get_time() - t 423 424 425 def print_stats(self, sort=-1): 426 import pstats 427 pstats.Stats(self).strip_dirs().sort_stats(sort). \ 428 print_stats() 429 430 def dump_stats(self, file): 431 f = open(file, 'wb') 432 self.create_stats() 433 marshal.dump(self.stats, f) 434 f.close() 435 436 def create_stats(self): 437 self.simulate_cmd_complete() 438 self.snapshot_stats() 439 440 def snapshot_stats(self): 441 self.stats = {} 442 for func, (cc, ns, tt, ct, callers) in self.timings.iteritems(): 443 callers = callers.copy() 444 nc = 0 445 for callcnt in callers.itervalues(): 446 nc += callcnt 447 self.stats[func] = cc, nc, tt, ct, callers 448 449 450 # The following two methods can be called by clients to use 451 # a profiler to profile a statement, given as a string. 452 453 def run(self, cmd): 454 import __main__ 455 dict = __main__.__dict__ 456 return self.runctx(cmd, dict, dict) 457 458 def runctx(self, cmd, globals, locals): 459 self.set_cmd(cmd) 460 sys.setprofile(self.dispatcher) 461 try: 462 exec cmd in globals, locals 463 finally: 464 sys.setprofile(None) 465 return self 466 467 # This method is more useful to profile a single function call. 468 def runcall(self, func, *args, **kw): 469 self.set_cmd(repr(func)) 470 sys.setprofile(self.dispatcher) 471 try: 472 return func(*args, **kw) 473 finally: 474 sys.setprofile(None) 475 476 477 #****************************************************************** 478 # The following calculates the overhead for using a profiler. The 479 # problem is that it takes a fair amount of time for the profiler 480 # to stop the stopwatch (from the time it receives an event). 481 # Similarly, there is a delay from the time that the profiler 482 # re-starts the stopwatch before the user's code really gets to 483 # continue. The following code tries to measure the difference on 484 # a per-event basis. 485 # 486 # Note that this difference is only significant if there are a lot of 487 # events, and relatively little user code per event. For example, 488 # code with small functions will typically benefit from having the 489 # profiler calibrated for the current platform. This *could* be 490 # done on the fly during init() time, but it is not worth the 491 # effort. Also note that if too large a value specified, then 492 # execution time on some functions will actually appear as a 493 # negative number. It is *normal* for some functions (with very 494 # low call counts) to have such negative stats, even if the 495 # calibration figure is "correct." 496 # 497 # One alternative to profile-time calibration adjustments (i.e., 498 # adding in the magic little delta during each event) is to track 499 # more carefully the number of events (and cumulatively, the number 500 # of events during sub functions) that are seen. If this were 501 # done, then the arithmetic could be done after the fact (i.e., at 502 # display time). Currently, we track only call/return events. 503 # These values can be deduced by examining the callees and callers 504 # vectors for each functions. Hence we *can* almost correct the 505 # internal time figure at print time (note that we currently don't 506 # track exception event processing counts). Unfortunately, there 507 # is currently no similar information for cumulative sub-function 508 # time. It would not be hard to "get all this info" at profiler 509 # time. Specifically, we would have to extend the tuples to keep 510 # counts of this in each frame, and then extend the defs of timing 511 # tuples to include the significant two figures. I'm a bit fearful 512 # that this additional feature will slow the heavily optimized 513 # event/time ratio (i.e., the profiler would run slower, fur a very 514 # low "value added" feature.) 515 #************************************************************** 516 517 def calibrate(self, m, verbose=0): 518 if self.__class__ is not Profile: 519 raise TypeError("Subclasses must override .calibrate().") 520 521 saved_bias = self.bias 522 self.bias = 0 523 try: 524 return self._calibrate_inner(m, verbose) 525 finally: 526 self.bias = saved_bias 527 528 def _calibrate_inner(self, m, verbose): 529 get_time = self.get_time 530 531 # Set up a test case to be run with and without profiling. Include 532 # lots of calls, because we're trying to quantify stopwatch overhead. 533 # Do not raise any exceptions, though, because we want to know 534 # exactly how many profile events are generated (one call event, + 535 # one return event, per Python-level call). 536 537 def f1(n): 538 for i in range(n): 539 x = 1 540 541 def f(m, f1=f1): 542 for i in range(m): 543 f1(100) 544 545 f(m) # warm up the cache 546 547 # elapsed_noprofile <- time f(m) takes without profiling. 548 t0 = get_time() 549 f(m) 550 t1 = get_time() 551 elapsed_noprofile = t1 - t0 552 if verbose: 553 print "elapsed time without profiling =", elapsed_noprofile 554 555 # elapsed_profile <- time f(m) takes with profiling. The difference 556 # is profiling overhead, only some of which the profiler subtracts 557 # out on its own. 558 p = Profile() 559 t0 = get_time() 560 p.runctx('f(m)', globals(), locals()) 561 t1 = get_time() 562 elapsed_profile = t1 - t0 563 if verbose: 564 print "elapsed time with profiling =", elapsed_profile 565 566 # reported_time <- "CPU seconds" the profiler charged to f and f1. 567 total_calls = 0.0 568 reported_time = 0.0 569 for (filename, line, funcname), (cc, ns, tt, ct, callers) in \ 570 p.timings.items(): 571 if funcname in ("f", "f1"): 572 total_calls += cc 573 reported_time += tt 574 575 if verbose: 576 print "'CPU seconds' profiler reported =", reported_time 577 print "total # calls =", total_calls 578 if total_calls != m + 1: 579 raise ValueError("internal error: total calls = %d" % total_calls) 580 581 # reported_time - elapsed_noprofile = overhead the profiler wasn't 582 # able to measure. Divide by twice the number of calls (since there 583 # are two profiler events per call in this test) to get the hidden 584 # overhead per event. 585 mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls 586 if verbose: 587 print "mean stopwatch overhead per profile event =", mean 588 return mean 589 590#**************************************************************************** 591def Stats(*args): 592 print 'Report generating functions are in the "pstats" module\a' 593 594def main(): 595 usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..." 596 parser = OptionParser(usage=usage) 597 parser.allow_interspersed_args = False 598 parser.add_option('-o', '--outfile', dest="outfile", 599 help="Save stats to <outfile>", default=None) 600 parser.add_option('-s', '--sort', dest="sort", 601 help="Sort order when printing to stdout, based on pstats.Stats class", default=-1) 602 603 if not sys.argv[1:]: 604 parser.print_usage() 605 sys.exit(2) 606 607 (options, args) = parser.parse_args() 608 609 if (len(args) > 0): 610 sys.argv[:] = args 611 sys.path.insert(0, os.path.dirname(sys.argv[0])) 612 run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort) 613 else: 614 parser.print_usage() 615 return parser 616 617# When invoked as main program, invoke the profiler on a script 618if __name__ == '__main__': 619 main()