PageRenderTime 42ms CodeModel.GetById 2ms app.highlight 33ms RepoModel.GetById 1ms app.codeStats 0ms

/Lib/profile.py

http://unladen-swallow.googlecode.com/
Python | 619 lines | 553 code | 16 blank | 50 comment | 7 complexity | a1f60efe55b84022b9c3eab5c8dfa233 MD5 | raw file
  1#! /usr/bin/env python
  2#
  3# Class for profiling python code. rev 1.0  6/2/94
  4#
  5# Based on prior profile module by Sjoerd Mullender...
  6#   which was hacked somewhat by: Guido van Rossum
  7
  8"""Class for profiling Python code."""
  9
 10# Copyright 1994, by InfoSeek Corporation, all rights reserved.
 11# Written by James Roskind
 12#
 13# Permission to use, copy, modify, and distribute this Python software
 14# and its associated documentation for any purpose (subject to the
 15# restriction in the following sentence) without fee is hereby granted,
 16# provided that the above copyright notice appears in all copies, and
 17# that both that copyright notice and this permission notice appear in
 18# supporting documentation, and that the name of InfoSeek not be used in
 19# advertising or publicity pertaining to distribution of the software
 20# without specific, written prior permission.  This permission is
 21# explicitly restricted to the copying and modification of the software
 22# to remain in Python, compiled Python, or other languages (such as C)
 23# wherein the modified or derived code is exclusively imported into a
 24# Python module.
 25#
 26# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
 27# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 28# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
 29# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
 30# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
 31# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 32# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 33
 34
 35
 36import sys
 37import os
 38import time
 39import marshal
 40from optparse import OptionParser
 41
 42__all__ = ["run", "runctx", "help", "Profile"]
 43
 44# Sample timer for use with
 45#i_count = 0
 46#def integer_timer():
 47#       global i_count
 48#       i_count = i_count + 1
 49#       return i_count
 50#itimes = integer_timer # replace with C coded timer returning integers
 51
 52#**************************************************************************
 53# The following are the static member functions for the profiler class
 54# Note that an instance of Profile() is *not* needed to call them.
 55#**************************************************************************
 56
 57def run(statement, filename=None, sort=-1):
 58    """Run statement under profiler optionally saving results in filename
 59
 60    This function takes a single argument that can be passed to the
 61    "exec" statement, and an optional file name.  In all cases this
 62    routine attempts to "exec" its first argument and gather profiling
 63    statistics from the execution. If no file name is present, then this
 64    function automatically prints a simple profiling report, sorted by the
 65    standard name string (file/line/function-name) that is presented in
 66    each line.
 67    """
 68    prof = Profile()
 69    try:
 70        prof = prof.run(statement)
 71    except SystemExit:
 72        pass
 73    if filename is not None:
 74        prof.dump_stats(filename)
 75    else:
 76        return prof.print_stats(sort)
 77
 78def runctx(statement, globals, locals, filename=None):
 79    """Run statement under profiler, supplying your own globals and locals,
 80    optionally saving results in filename.
 81
 82    statement and filename have the same semantics as profile.run
 83    """
 84    prof = Profile()
 85    try:
 86        prof = prof.runctx(statement, globals, locals)
 87    except SystemExit:
 88        pass
 89
 90    if filename is not None:
 91        prof.dump_stats(filename)
 92    else:
 93        return prof.print_stats()
 94
 95# Backwards compatibility.
 96def help():
 97    print "Documentation for the profile module can be found "
 98    print "in the Python Library Reference, section 'The Python Profiler'."
 99
100if os.name == "mac":
101    import MacOS
102    def _get_time_mac(timer=MacOS.GetTicks):
103        return timer() / 60.0
104
105if hasattr(os, "times"):
106    def _get_time_times(timer=os.times):
107        t = timer()
108        return t[0] + t[1]
109
110# Using getrusage(3) is better than clock(3) if available:
111# on some systems (e.g. FreeBSD), getrusage has a higher resolution
112# Furthermore, on a POSIX system, returns microseconds, which
113# wrap around after 36min.
114_has_res = 0
115try:
116    import resource
117    resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
118    def _get_time_resource(timer=resgetrusage):
119        t = timer()
120        return t[0] + t[1]
121    _has_res = 1
122except ImportError:
123    pass
124
125class Profile:
126    """Profiler class.
127
128    self.cur is always a tuple.  Each such tuple corresponds to a stack
129    frame that is currently active (self.cur[-2]).  The following are the
130    definitions of its members.  We use this external "parallel stack" to
131    avoid contaminating the program that we are profiling. (old profiler
132    used to write into the frames local dictionary!!) Derived classes
133    can change the definition of some entries, as long as they leave
134    [-2:] intact (frame and previous tuple).  In case an internal error is
135    detected, the -3 element is used as the function name.
136
137    [ 0] = Time that needs to be charged to the parent frame's function.
138           It is used so that a function call will not have to access the
139           timing data for the parent frame.
140    [ 1] = Total time spent in this frame's function, excluding time in
141           subfunctions (this latter is tallied in cur[2]).
142    [ 2] = Total time spent in subfunctions, excluding time executing the
143           frame's function (this latter is tallied in cur[1]).
144    [-3] = Name of the function that corresponds to this frame.
145    [-2] = Actual frame that we correspond to (used to sync exception handling).
146    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
147
148    Timing data for each function is stored as a 5-tuple in the dictionary
149    self.timings[].  The index is always the name stored in self.cur[-3].
150    The following are the definitions of the members:
151
152    [0] = The number of times this function was called, not counting direct
153          or indirect recursion,
154    [1] = Number of times this function appears on the stack, minus one
155    [2] = Total time spent internal to this function
156    [3] = Cumulative time that this function was present on the stack.  In
157          non-recursive functions, this is the total execution time from start
158          to finish of each invocation of a function, including time spent in
159          all subfunctions.
160    [4] = A dictionary indicating for each function name, the number of times
161          it was called by us.
162    """
163
164    bias = 0  # calibration constant
165
166    def __init__(self, timer=None, bias=None):
167        self.timings = {}
168        self.cur = None
169        self.cmd = ""
170        self.c_func_name = ""
171
172        if bias is None:
173            bias = self.bias
174        self.bias = bias     # Materialize in local dict for lookup speed.
175
176        if not timer:
177            if _has_res:
178                self.timer = resgetrusage
179                self.dispatcher = self.trace_dispatch
180                self.get_time = _get_time_resource
181            elif os.name == 'mac':
182                self.timer = MacOS.GetTicks
183                self.dispatcher = self.trace_dispatch_mac
184                self.get_time = _get_time_mac
185            elif hasattr(time, 'clock'):
186                self.timer = self.get_time = time.clock
187                self.dispatcher = self.trace_dispatch_i
188            elif hasattr(os, 'times'):
189                self.timer = os.times
190                self.dispatcher = self.trace_dispatch
191                self.get_time = _get_time_times
192            else:
193                self.timer = self.get_time = time.time
194                self.dispatcher = self.trace_dispatch_i
195        else:
196            self.timer = timer
197            t = self.timer() # test out timer function
198            try:
199                length = len(t)
200            except TypeError:
201                self.get_time = timer
202                self.dispatcher = self.trace_dispatch_i
203            else:
204                if length == 2:
205                    self.dispatcher = self.trace_dispatch
206                else:
207                    self.dispatcher = self.trace_dispatch_l
208                # This get_time() implementation needs to be defined
209                # here to capture the passed-in timer in the parameter
210                # list (for performance).  Note that we can't assume
211                # the timer() result contains two values in all
212                # cases.
213                def get_time_timer(timer=timer, sum=sum):
214                    return sum(timer())
215                self.get_time = get_time_timer
216        self.t = self.get_time()
217        self.simulate_call('profiler')
218
219    # Heavily optimized dispatch routine for os.times() timer
220
221    def trace_dispatch(self, frame, event, arg):
222        timer = self.timer
223        t = timer()
224        t = t[0] + t[1] - self.t - self.bias
225
226        if event == "c_call":
227            self.c_func_name = arg.__name__
228
229        if self.dispatch[event](self, frame,t):
230            t = timer()
231            self.t = t[0] + t[1]
232        else:
233            r = timer()
234            self.t = r[0] + r[1] - t # put back unrecorded delta
235
236    # Dispatch routine for best timer program (return = scalar, fastest if
237    # an integer but float works too -- and time.clock() relies on that).
238
239    def trace_dispatch_i(self, frame, event, arg):
240        timer = self.timer
241        t = timer() - self.t - self.bias
242
243        if event == "c_call":
244            self.c_func_name = arg.__name__
245
246        if self.dispatch[event](self, frame, t):
247            self.t = timer()
248        else:
249            self.t = timer() - t  # put back unrecorded delta
250
251    # Dispatch routine for macintosh (timer returns time in ticks of
252    # 1/60th second)
253
254    def trace_dispatch_mac(self, frame, event, arg):
255        timer = self.timer
256        t = timer()/60.0 - self.t - self.bias
257
258        if event == "c_call":
259            self.c_func_name = arg.__name__
260
261        if self.dispatch[event](self, frame, t):
262            self.t = timer()/60.0
263        else:
264            self.t = timer()/60.0 - t  # put back unrecorded delta
265
266    # SLOW generic dispatch routine for timer returning lists of numbers
267
268    def trace_dispatch_l(self, frame, event, arg):
269        get_time = self.get_time
270        t = get_time() - self.t - self.bias
271
272        if event == "c_call":
273            self.c_func_name = arg.__name__
274
275        if self.dispatch[event](self, frame, t):
276            self.t = get_time()
277        else:
278            self.t = get_time() - t # put back unrecorded delta
279
280    # In the event handlers, the first 3 elements of self.cur are unpacked
281    # into vrbls w/ 3-letter names.  The last two characters are meant to be
282    # mnemonic:
283    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
284    #     _it  self.cur[1] "internal time" time spent directly in the function
285    #     _et  self.cur[2] "external time" time spent in subfunctions
286
287    def trace_dispatch_exception(self, frame, t):
288        rpt, rit, ret, rfn, rframe, rcur = self.cur
289        if (rframe is not frame) and rcur:
290            return self.trace_dispatch_return(rframe, t)
291        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
292        return 1
293
294
295    def trace_dispatch_call(self, frame, t):
296        if self.cur and frame.f_back is not self.cur[-2]:
297            rpt, rit, ret, rfn, rframe, rcur = self.cur
298            if not isinstance(rframe, Profile.fake_frame):
299                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
300                                                       rframe, rframe.f_back,
301                                                       frame, frame.f_back)
302                self.trace_dispatch_return(rframe, 0)
303                assert (self.cur is None or \
304                        frame.f_back is self.cur[-2]), ("Bad call",
305                                                        self.cur[-3])
306        fcode = frame.f_code
307        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
308        self.cur = (t, 0, 0, fn, frame, self.cur)
309        timings = self.timings
310        if fn in timings:
311            cc, ns, tt, ct, callers = timings[fn]
312            timings[fn] = cc, ns + 1, tt, ct, callers
313        else:
314            timings[fn] = 0, 0, 0, 0, {}
315        return 1
316
317    def trace_dispatch_c_call (self, frame, t):
318        fn = ("", 0, self.c_func_name)
319        self.cur = (t, 0, 0, fn, frame, self.cur)
320        timings = self.timings
321        if fn in timings:
322            cc, ns, tt, ct, callers = timings[fn]
323            timings[fn] = cc, ns+1, tt, ct, callers
324        else:
325            timings[fn] = 0, 0, 0, 0, {}
326        return 1
327
328    def trace_dispatch_return(self, frame, t):
329        if frame is not self.cur[-2]:
330            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
331            self.trace_dispatch_return(self.cur[-2], 0)
332
333        # Prefix "r" means part of the Returning or exiting frame.
334        # Prefix "p" means part of the Previous or Parent or older frame.
335
336        rpt, rit, ret, rfn, frame, rcur = self.cur
337        rit = rit + t
338        frame_total = rit + ret
339
340        ppt, pit, pet, pfn, pframe, pcur = rcur
341        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
342
343        timings = self.timings
344        cc, ns, tt, ct, callers = timings[rfn]
345        if not ns:
346            # This is the only occurrence of the function on the stack.
347            # Else this is a (directly or indirectly) recursive call, and
348            # its cumulative time will get updated when the topmost call to
349            # it returns.
350            ct = ct + frame_total
351            cc = cc + 1
352
353        if pfn in callers:
354            callers[pfn] = callers[pfn] + 1  # hack: gather more
355            # stats such as the amount of time added to ct courtesy
356            # of this specific call, and the contribution to cc
357            # courtesy of this call.
358        else:
359            callers[pfn] = 1
360
361        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
362
363        return 1
364
365
366    dispatch = {
367        "call": trace_dispatch_call,
368        "exception": trace_dispatch_exception,
369        "return": trace_dispatch_return,
370        "c_call": trace_dispatch_c_call,
371        "c_exception": trace_dispatch_return,  # the C function returned
372        "c_return": trace_dispatch_return,
373        }
374
375
376    # The next few functions play with self.cmd. By carefully preloading
377    # our parallel stack, we can force the profiled result to include
378    # an arbitrary string as the name of the calling function.
379    # We use self.cmd as that string, and the resulting stats look
380    # very nice :-).
381
382    def set_cmd(self, cmd):
383        if self.cur[-1]: return   # already set
384        self.cmd = cmd
385        self.simulate_call(cmd)
386
387    class fake_code:
388        def __init__(self, filename, line, name):
389            self.co_filename = filename
390            self.co_line = line
391            self.co_name = name
392            self.co_firstlineno = 0
393
394        def __repr__(self):
395            return repr((self.co_filename, self.co_line, self.co_name))
396
397    class fake_frame:
398        def __init__(self, code, prior):
399            self.f_code = code
400            self.f_back = prior
401
402    def simulate_call(self, name):
403        code = self.fake_code('profile', 0, name)
404        if self.cur:
405            pframe = self.cur[-2]
406        else:
407            pframe = None
408        frame = self.fake_frame(code, pframe)
409        self.dispatch['call'](self, frame, 0)
410
411    # collect stats from pending stack, including getting final
412    # timings for self.cmd frame.
413
414    def simulate_cmd_complete(self):
415        get_time = self.get_time
416        t = get_time() - self.t
417        while self.cur[-1]:
418            # We *can* cause assertion errors here if
419            # dispatch_trace_return checks for a frame match!
420            self.dispatch['return'](self, self.cur[-2], t)
421            t = 0
422        self.t = get_time() - t
423
424
425    def print_stats(self, sort=-1):
426        import pstats
427        pstats.Stats(self).strip_dirs().sort_stats(sort). \
428                  print_stats()
429
430    def dump_stats(self, file):
431        f = open(file, 'wb')
432        self.create_stats()
433        marshal.dump(self.stats, f)
434        f.close()
435
436    def create_stats(self):
437        self.simulate_cmd_complete()
438        self.snapshot_stats()
439
440    def snapshot_stats(self):
441        self.stats = {}
442        for func, (cc, ns, tt, ct, callers) in self.timings.iteritems():
443            callers = callers.copy()
444            nc = 0
445            for callcnt in callers.itervalues():
446                nc += callcnt
447            self.stats[func] = cc, nc, tt, ct, callers
448
449
450    # The following two methods can be called by clients to use
451    # a profiler to profile a statement, given as a string.
452
453    def run(self, cmd):
454        import __main__
455        dict = __main__.__dict__
456        return self.runctx(cmd, dict, dict)
457
458    def runctx(self, cmd, globals, locals):
459        self.set_cmd(cmd)
460        sys.setprofile(self.dispatcher)
461        try:
462            exec cmd in globals, locals
463        finally:
464            sys.setprofile(None)
465        return self
466
467    # This method is more useful to profile a single function call.
468    def runcall(self, func, *args, **kw):
469        self.set_cmd(repr(func))
470        sys.setprofile(self.dispatcher)
471        try:
472            return func(*args, **kw)
473        finally:
474            sys.setprofile(None)
475
476
477    #******************************************************************
478    # The following calculates the overhead for using a profiler.  The
479    # problem is that it takes a fair amount of time for the profiler
480    # to stop the stopwatch (from the time it receives an event).
481    # Similarly, there is a delay from the time that the profiler
482    # re-starts the stopwatch before the user's code really gets to
483    # continue.  The following code tries to measure the difference on
484    # a per-event basis.
485    #
486    # Note that this difference is only significant if there are a lot of
487    # events, and relatively little user code per event.  For example,
488    # code with small functions will typically benefit from having the
489    # profiler calibrated for the current platform.  This *could* be
490    # done on the fly during init() time, but it is not worth the
491    # effort.  Also note that if too large a value specified, then
492    # execution time on some functions will actually appear as a
493    # negative number.  It is *normal* for some functions (with very
494    # low call counts) to have such negative stats, even if the
495    # calibration figure is "correct."
496    #
497    # One alternative to profile-time calibration adjustments (i.e.,
498    # adding in the magic little delta during each event) is to track
499    # more carefully the number of events (and cumulatively, the number
500    # of events during sub functions) that are seen.  If this were
501    # done, then the arithmetic could be done after the fact (i.e., at
502    # display time).  Currently, we track only call/return events.
503    # These values can be deduced by examining the callees and callers
504    # vectors for each functions.  Hence we *can* almost correct the
505    # internal time figure at print time (note that we currently don't
506    # track exception event processing counts).  Unfortunately, there
507    # is currently no similar information for cumulative sub-function
508    # time.  It would not be hard to "get all this info" at profiler
509    # time.  Specifically, we would have to extend the tuples to keep
510    # counts of this in each frame, and then extend the defs of timing
511    # tuples to include the significant two figures. I'm a bit fearful
512    # that this additional feature will slow the heavily optimized
513    # event/time ratio (i.e., the profiler would run slower, fur a very
514    # low "value added" feature.)
515    #**************************************************************
516
517    def calibrate(self, m, verbose=0):
518        if self.__class__ is not Profile:
519            raise TypeError("Subclasses must override .calibrate().")
520
521        saved_bias = self.bias
522        self.bias = 0
523        try:
524            return self._calibrate_inner(m, verbose)
525        finally:
526            self.bias = saved_bias
527
528    def _calibrate_inner(self, m, verbose):
529        get_time = self.get_time
530
531        # Set up a test case to be run with and without profiling.  Include
532        # lots of calls, because we're trying to quantify stopwatch overhead.
533        # Do not raise any exceptions, though, because we want to know
534        # exactly how many profile events are generated (one call event, +
535        # one return event, per Python-level call).
536
537        def f1(n):
538            for i in range(n):
539                x = 1
540
541        def f(m, f1=f1):
542            for i in range(m):
543                f1(100)
544
545        f(m)    # warm up the cache
546
547        # elapsed_noprofile <- time f(m) takes without profiling.
548        t0 = get_time()
549        f(m)
550        t1 = get_time()
551        elapsed_noprofile = t1 - t0
552        if verbose:
553            print "elapsed time without profiling =", elapsed_noprofile
554
555        # elapsed_profile <- time f(m) takes with profiling.  The difference
556        # is profiling overhead, only some of which the profiler subtracts
557        # out on its own.
558        p = Profile()
559        t0 = get_time()
560        p.runctx('f(m)', globals(), locals())
561        t1 = get_time()
562        elapsed_profile = t1 - t0
563        if verbose:
564            print "elapsed time with profiling =", elapsed_profile
565
566        # reported_time <- "CPU seconds" the profiler charged to f and f1.
567        total_calls = 0.0
568        reported_time = 0.0
569        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
570                p.timings.items():
571            if funcname in ("f", "f1"):
572                total_calls += cc
573                reported_time += tt
574
575        if verbose:
576            print "'CPU seconds' profiler reported =", reported_time
577            print "total # calls =", total_calls
578        if total_calls != m + 1:
579            raise ValueError("internal error: total calls = %d" % total_calls)
580
581        # reported_time - elapsed_noprofile = overhead the profiler wasn't
582        # able to measure.  Divide by twice the number of calls (since there
583        # are two profiler events per call in this test) to get the hidden
584        # overhead per event.
585        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
586        if verbose:
587            print "mean stopwatch overhead per profile event =", mean
588        return mean
589
590#****************************************************************************
591def Stats(*args):
592    print 'Report generating functions are in the "pstats" module\a'
593
594def main():
595    usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
596    parser = OptionParser(usage=usage)
597    parser.allow_interspersed_args = False
598    parser.add_option('-o', '--outfile', dest="outfile",
599        help="Save stats to <outfile>", default=None)
600    parser.add_option('-s', '--sort', dest="sort",
601        help="Sort order when printing to stdout, based on pstats.Stats class", default=-1)
602
603    if not sys.argv[1:]:
604        parser.print_usage()
605        sys.exit(2)
606
607    (options, args) = parser.parse_args()
608
609    if (len(args) > 0):
610        sys.argv[:] = args
611        sys.path.insert(0, os.path.dirname(sys.argv[0]))
612        run('execfile(%r)' % (sys.argv[0],), options.outfile, options.sort)
613    else:
614        parser.print_usage()
615    return parser
616
617# When invoked as main program, invoke the profiler on a script
618if __name__ == '__main__':
619    main()