| #! /usr/bin/env python |
| # |
| # Class for profiling python code. rev 1.0 6/2/94 |
| # |
| # Based on prior profile module by Sjoerd Mullender... |
| # which was hacked somewhat by: Guido van Rossum |
| # |
| # See profile.doc for more information |
| |
| |
| # Copyright 1994, by InfoSeek Corporation, all rights reserved. |
| # Written by James Roskind |
| # |
| # Permission to use, copy, modify, and distribute this Python software |
| # and its associated documentation for any purpose (subject to the |
| # restriction in the following sentence) without fee is hereby granted, |
| # provided that the above copyright notice appears in all copies, and |
| # that both that copyright notice and this permission notice appear in |
| # supporting documentation, and that the name of InfoSeek not be used in |
| # advertising or publicity pertaining to distribution of the software |
| # without specific, written prior permission. This permission is |
| # explicitly restricted to the copying and modification of the software |
| # to remain in Python, compiled Python, or other languages (such as C) |
| # wherein the modified or derived code is exclusively imported into a |
| # Python module. |
| # |
| # INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS |
| # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| # FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY |
| # SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER |
| # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF |
| # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| |
| |
| |
| import sys |
| import os |
| import time |
| import string |
| import marshal |
| |
| |
| # Global variables |
| func_norm_dict = {} |
| func_norm_counter = 0 |
| if hasattr(os, 'getpid'): |
| pid_string = `os.getpid()` |
| else: |
| pid_string = '' |
| |
| |
| # Sample timer for use with |
| #i_count = 0 |
| #def integer_timer(): |
| # global i_count |
| # i_count = i_count + 1 |
| # return i_count |
| #itimes = integer_timer # replace with C coded timer returning integers |
| |
| #************************************************************************** |
| # The following are the static member functions for the profiler class |
| # Note that an instance of Profile() is *not* needed to call them. |
| #************************************************************************** |
| |
| |
| # simplified user interface |
| def run(statement, *args): |
| prof = Profile() |
| try: |
| prof = prof.run(statement) |
| except SystemExit: |
| pass |
| if args: |
| prof.dump_stats(args[0]) |
| else: |
| return prof.print_stats() |
| |
| # print help |
| def help(): |
| for dirname in sys.path: |
| fullname = os.path.join(dirname, 'profile.doc') |
| if os.path.exists(fullname): |
| sts = os.system('${PAGER-more} '+fullname) |
| if sts: print '*** Pager exit status:', sts |
| break |
| else: |
| print 'Sorry, can\'t find the help file "profile.doc"', |
| print 'along the Python search path' |
| |
| |
| #************************************************************************** |
| # class Profile documentation: |
| #************************************************************************** |
| # self.cur is always a tuple. Each such tuple corresponds to a stack |
| # frame that is currently active (self.cur[-2]). The following are the |
| # definitions of its members. We use this external "parallel stack" to |
| # avoid contaminating the program that we are profiling. (old profiler |
| # used to write into the frames local dictionary!!) Derived classes |
| # can change the definition of some entries, as long as they leave |
| # [-2:] intact. |
| # |
| # [ 0] = Time that needs to be charged to the parent frame's function. It is |
| # used so that a function call will not have to access the timing data |
| # for the parents frame. |
| # [ 1] = Total time spent in this frame's function, excluding time in |
| # subfunctions |
| # [ 2] = Cumulative time spent in this frame's function, including time in |
| # all subfunctions to this frame. |
| # [-3] = Name of the function that corresonds to this frame. |
| # [-2] = Actual frame that we correspond to (used to sync exception handling) |
| # [-1] = Our parent 6-tuple (corresonds to frame.f_back) |
| #************************************************************************** |
| # Timing data for each function is stored as a 5-tuple in the dictionary |
| # self.timings[]. The index is always the name stored in self.cur[4]. |
| # The following are the definitions of the members: |
| # |
| # [0] = The number of times this function was called, not counting direct |
| # or indirect recursion, |
| # [1] = Number of times this function appears on the stack, minus one |
| # [2] = Total time spent internal to this function |
| # [3] = Cumulative time that this function was present on the stack. In |
| # non-recursive functions, this is the total execution time from start |
| # to finish of each invocation of a function, including time spent in |
| # all subfunctions. |
| # [5] = A dictionary indicating for each function name, the number of times |
| # it was called by us. |
| #************************************************************************** |
| # We produce function names via a repr() call on the f_code object during |
| # profiling. This save a *lot* of CPU time. This results in a string that |
| # always looks like: |
| # <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76> |
| # After we "normalize it, it is a tuple of filename, line, function-name. |
| # We wait till we are done profiling to do the normalization. |
| # *IF* this repr format changes, then only the normalization routine should |
| # need to be fixed. |
| #************************************************************************** |
| class Profile: |
| |
| def __init__(self, timer=None): |
| self.timings = {} |
| self.cur = None |
| self.cmd = "" |
| |
| self.dispatch = { \ |
| 'call' : self.trace_dispatch_call, \ |
| 'return' : self.trace_dispatch_return, \ |
| 'exception': self.trace_dispatch_exception, \ |
| } |
| |
| if not timer: |
| if os.name == 'mac': |
| import MacOS |
| self.timer = MacOS.GetTicks |
| self.dispatcher = self.trace_dispatch_mac |
| self.get_time = self.get_time_mac |
| elif hasattr(time, 'clock'): |
| self.timer = time.clock |
| self.dispatcher = self.trace_dispatch_i |
| elif hasattr(os, 'times'): |
| self.timer = os.times |
| self.dispatcher = self.trace_dispatch |
| else: |
| self.timer = time.time |
| self.dispatcher = self.trace_dispatch_i |
| else: |
| self.timer = timer |
| t = self.timer() # test out timer function |
| try: |
| if len(t) == 2: |
| self.dispatcher = self.trace_dispatch |
| else: |
| self.dispatcher = self.trace_dispatch_l |
| except TypeError: |
| self.dispatcher = self.trace_dispatch_i |
| self.t = self.get_time() |
| self.simulate_call('profiler') |
| |
| |
| def get_time(self): # slow simulation of method to acquire time |
| t = self.timer() |
| if type(t) == type(()) or type(t) == type([]): |
| t = reduce(lambda x,y: x+y, t, 0) |
| return t |
| |
| def get_time_mac(self): |
| return self.timer()/60.0 |
| |
| # Heavily optimized dispatch routine for os.times() timer |
| |
| def trace_dispatch(self, frame, event, arg): |
| t = self.timer() |
| t = t[0] + t[1] - self.t # No Calibration constant |
| # t = t[0] + t[1] - self.t - .00053 # Calibration constant |
| |
| if self.dispatch[event](frame,t): |
| t = self.timer() |
| self.t = t[0] + t[1] |
| else: |
| r = self.timer() |
| self.t = r[0] + r[1] - t # put back unrecorded delta |
| return |
| |
| |
| |
| # Dispatch routine for best timer program (return = scalar integer) |
| |
| def trace_dispatch_i(self, frame, event, arg): |
| t = self.timer() - self.t # - 1 # Integer calibration constant |
| if self.dispatch[event](frame,t): |
| self.t = self.timer() |
| else: |
| self.t = self.timer() - t # put back unrecorded delta |
| return |
| |
| # Dispatch routine for macintosh (timer returns time in ticks of 1/60th second) |
| |
| def trace_dispatch_mac(self, frame, event, arg): |
| t = self.timer()/60.0 - self.t # - 1 # Integer calibration constant |
| if self.dispatch[event](frame,t): |
| self.t = self.timer()/60.0 |
| else: |
| self.t = self.timer()/60.0 - t # put back unrecorded delta |
| return |
| |
| |
| # SLOW generic dispatch rountine for timer returning lists of numbers |
| |
| def trace_dispatch_l(self, frame, event, arg): |
| t = self.get_time() - self.t |
| |
| if self.dispatch[event](frame,t): |
| self.t = self.get_time() |
| else: |
| self.t = self.get_time()-t # put back unrecorded delta |
| return |
| |
| |
| def trace_dispatch_exception(self, frame, t): |
| rt, rtt, rct, rfn, rframe, rcur = self.cur |
| if (not rframe is frame) and rcur: |
| return self.trace_dispatch_return(rframe, t) |
| return 0 |
| |
| |
| def trace_dispatch_call(self, frame, t): |
| fn = `frame.f_code` |
| |
| # The following should be about the best approach, but |
| # we would need a function that maps from id() back to |
| # the actual code object. |
| # fn = id(frame.f_code) |
| # Note we would really use our own function, which would |
| # return the code address, *and* bump the ref count. We |
| # would then fix up the normalize function to do the |
| # actualy repr(fn) call. |
| |
| # The following is an interesting alternative |
| # It doesn't do as good a job, and it doesn't run as |
| # fast 'cause repr() is written in C, and this is Python. |
| #fcode = frame.f_code |
| #code = fcode.co_code |
| #if ord(code[0]) == 127: # == SET_LINENO |
| # # see "opcode.h" in the Python source |
| # fn = (fcode.co_filename, ord(code[1]) | \ |
| # ord(code[2]) << 8, fcode.co_name) |
| #else: |
| # fn = (fcode.co_filename, 0, fcode.co_name) |
| |
| self.cur = (t, 0, 0, fn, frame, self.cur) |
| if self.timings.has_key(fn): |
| cc, ns, tt, ct, callers = self.timings[fn] |
| self.timings[fn] = cc, ns + 1, tt, ct, callers |
| else: |
| self.timings[fn] = 0, 0, 0, 0, {} |
| return 1 |
| |
| def trace_dispatch_return(self, frame, t): |
| # if not frame is self.cur[-2]: raise "Bad return", self.cur[3] |
| |
| # Prefix "r" means part of the Returning or exiting frame |
| # Prefix "p" means part of the Previous or older frame |
| |
| rt, rtt, rct, rfn, frame, rcur = self.cur |
| rtt = rtt + t |
| sft = rtt + rct |
| |
| pt, ptt, pct, pfn, pframe, pcur = rcur |
| self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur |
| |
| cc, ns, tt, ct, callers = self.timings[rfn] |
| if not ns: |
| ct = ct + sft |
| cc = cc + 1 |
| if callers.has_key(pfn): |
| callers[pfn] = callers[pfn] + 1 # hack: gather more |
| # stats such as the amount of time added to ct courtesy |
| # of this specific call, and the contribution to cc |
| # courtesy of this call. |
| else: |
| callers[pfn] = 1 |
| self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers |
| |
| return 1 |
| |
| # The next few function play with self.cmd. By carefully preloading |
| # our paralell stack, we can force the profiled result to include |
| # an arbitrary string as the name of the calling function. |
| # We use self.cmd as that string, and the resulting stats look |
| # very nice :-). |
| |
| def set_cmd(self, cmd): |
| if self.cur[-1]: return # already set |
| self.cmd = cmd |
| self.simulate_call(cmd) |
| |
| class fake_code: |
| def __init__(self, filename, line, name): |
| self.co_filename = filename |
| self.co_line = line |
| self.co_name = name |
| self.co_code = '\0' # anything but 127 |
| |
| def __repr__(self): |
| return (self.co_filename, self.co_line, self.co_name) |
| |
| class fake_frame: |
| def __init__(self, code, prior): |
| self.f_code = code |
| self.f_back = prior |
| |
| def simulate_call(self, name): |
| code = self.fake_code('profile', 0, name) |
| if self.cur: |
| pframe = self.cur[-2] |
| else: |
| pframe = None |
| frame = self.fake_frame(code, pframe) |
| a = self.dispatch['call'](frame, 0) |
| return |
| |
| # collect stats from pending stack, including getting final |
| # timings for self.cmd frame. |
| |
| def simulate_cmd_complete(self): |
| t = self.get_time() - self.t |
| while self.cur[-1]: |
| # We *can* cause assertion errors here if |
| # dispatch_trace_return checks for a frame match! |
| a = self.dispatch['return'](self.cur[-2], t) |
| t = 0 |
| self.t = self.get_time() - t |
| |
| |
| def print_stats(self): |
| import pstats |
| pstats.Stats(self).strip_dirs().sort_stats(-1). \ |
| print_stats() |
| |
| def dump_stats(self, file): |
| f = open(file, 'wb') |
| self.create_stats() |
| marshal.dump(self.stats, f) |
| f.close() |
| |
| def create_stats(self): |
| self.simulate_cmd_complete() |
| self.snapshot_stats() |
| |
| def snapshot_stats(self): |
| self.stats = {} |
| for func in self.timings.keys(): |
| cc, ns, tt, ct, callers = self.timings[func] |
| nor_func = self.func_normalize(func) |
| nor_callers = {} |
| nc = 0 |
| for func_caller in callers.keys(): |
| nor_callers[self.func_normalize(func_caller)]=\ |
| callers[func_caller] |
| nc = nc + callers[func_caller] |
| self.stats[nor_func] = cc, nc, tt, ct, nor_callers |
| |
| |
| # Override the following function if you can figure out |
| # a better name for the binary f_code entries. I just normalize |
| # them sequentially in a dictionary. It would be nice if we could |
| # *really* see the name of the underlying C code :-). Sometimes |
| # you can figure out what-is-what by looking at caller and callee |
| # lists (and knowing what your python code does). |
| |
| def func_normalize(self, func_name): |
| global func_norm_dict |
| global func_norm_counter |
| global func_sequence_num |
| |
| if func_norm_dict.has_key(func_name): |
| return func_norm_dict[func_name] |
| if type(func_name) == type(""): |
| long_name = string.split(func_name) |
| file_name = long_name[-3][1:-2] |
| func = long_name[2] |
| lineno = long_name[-1][:-1] |
| if '?' == func: # Until I find out how to may 'em... |
| file_name = 'python' |
| func_norm_counter = func_norm_counter + 1 |
| func = pid_string + ".C." + `func_norm_counter` |
| result = file_name , string.atoi(lineno) , func |
| else: |
| result = func_name |
| func_norm_dict[func_name] = result |
| return result |
| |
| |
| # The following two methods can be called by clients to use |
| # a profiler to profile a statement, given as a string. |
| |
| def run(self, cmd): |
| import __main__ |
| dict = __main__.__dict__ |
| return self.runctx(cmd, dict, dict) |
| |
| def runctx(self, cmd, globals, locals): |
| self.set_cmd(cmd) |
| sys.setprofile(self.dispatcher) |
| try: |
| exec cmd in globals, locals |
| finally: |
| sys.setprofile(None) |
| return self |
| |
| # This method is more useful to profile a single function call. |
| def runcall(self, func, *args): |
| self.set_cmd(`func`) |
| sys.setprofile(self.dispatcher) |
| try: |
| return apply(func, args) |
| finally: |
| sys.setprofile(None) |
| |
| |
| #****************************************************************** |
| # The following calculates the overhead for using a profiler. The |
| # problem is that it takes a fair amount of time for the profiler |
| # to stop the stopwatch (from the time it recieves an event). |
| # Similarly, there is a delay from the time that the profiler |
| # re-starts the stopwatch before the user's code really gets to |
| # continue. The following code tries to measure the difference on |
| # a per-event basis. The result can the be placed in the |
| # Profile.dispatch_event() routine for the given platform. Note |
| # that this difference is only significant if there are a lot of |
| # events, and relatively little user code per event. For example, |
| # code with small functions will typically benefit from having the |
| # profiler calibrated for the current platform. This *could* be |
| # done on the fly during init() time, but it is not worth the |
| # effort. Also note that if too large a value specified, then |
| # execution time on some functions will actually appear as a |
| # negative number. It is *normal* for some functions (with very |
| # low call counts) to have such negative stats, even if the |
| # calibration figure is "correct." |
| # |
| # One alternative to profile-time calibration adjustments (i.e., |
| # adding in the magic little delta during each event) is to track |
| # more carefully the number of events (and cumulatively, the number |
| # of events during sub functions) that are seen. If this were |
| # done, then the arithmetic could be done after the fact (i.e., at |
| # display time). Currintly, we track only call/return events. |
| # These values can be deduced by examining the callees and callers |
| # vectors for each functions. Hence we *can* almost correct the |
| # internal time figure at print time (note that we currently don't |
| # track exception event processing counts). Unfortunately, there |
| # is currently no similar information for cumulative sub-function |
| # time. It would not be hard to "get all this info" at profiler |
| # time. Specifically, we would have to extend the tuples to keep |
| # counts of this in each frame, and then extend the defs of timing |
| # tuples to include the significant two figures. I'm a bit fearful |
| # that this additional feature will slow the heavily optimized |
| # event/time ratio (i.e., the profiler would run slower, fur a very |
| # low "value added" feature.) |
| # |
| # Plugging in the calibration constant doesn't slow down the |
| # profiler very much, and the accuracy goes way up. |
| #************************************************************** |
| |
| def calibrate(self, m): |
| n = m |
| s = self.timer() |
| while n: |
| self.simple() |
| n = n - 1 |
| f = self.timer() |
| my_simple = f[0]+f[1]-s[0]-s[1] |
| #print "Simple =", my_simple, |
| |
| n = m |
| s = self.timer() |
| while n: |
| self.instrumented() |
| n = n - 1 |
| f = self.timer() |
| my_inst = f[0]+f[1]-s[0]-s[1] |
| # print "Instrumented =", my_inst |
| avg_cost = (my_inst - my_simple)/m |
| #print "Delta/call =", avg_cost, "(profiler fixup constant)" |
| return avg_cost |
| |
| # simulate a program with no profiler activity |
| def simple(self): |
| a = 1 |
| pass |
| |
| # simulate a program with call/return event processing |
| def instrumented(self): |
| a = 1 |
| self.profiler_simulation(a, a, a) |
| |
| # simulate an event processing activity (from user's perspective) |
| def profiler_simulation(self, x, y, z): |
| t = self.timer() |
| t = t[0] + t[1] |
| self.ut = t |
| |
| |
| |
| #**************************************************************************** |
| # OldProfile class documentation |
| #**************************************************************************** |
| # |
| # The following derived profiler simulates the old style profile, providing |
| # errant results on recursive functions. The reason for the usefulnes of this |
| # profiler is that it runs faster (i.e., less overhead). It still creates |
| # all the caller stats, and is quite useful when there is *no* recursion |
| # in the user's code. |
| # |
| # This code also shows how easy it is to create a modified profiler. |
| #**************************************************************************** |
| class OldProfile(Profile): |
| def trace_dispatch_exception(self, frame, t): |
| rt, rtt, rct, rfn, rframe, rcur = self.cur |
| if rcur and not rframe is frame: |
| return self.trace_dispatch_return(rframe, t) |
| return 0 |
| |
| def trace_dispatch_call(self, frame, t): |
| fn = `frame.f_code` |
| |
| self.cur = (t, 0, 0, fn, frame, self.cur) |
| if self.timings.has_key(fn): |
| tt, ct, callers = self.timings[fn] |
| self.timings[fn] = tt, ct, callers |
| else: |
| self.timings[fn] = 0, 0, {} |
| return 1 |
| |
| def trace_dispatch_return(self, frame, t): |
| rt, rtt, rct, rfn, frame, rcur = self.cur |
| rtt = rtt + t |
| sft = rtt + rct |
| |
| pt, ptt, pct, pfn, pframe, pcur = rcur |
| self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur |
| |
| tt, ct, callers = self.timings[rfn] |
| if callers.has_key(pfn): |
| callers[pfn] = callers[pfn] + 1 |
| else: |
| callers[pfn] = 1 |
| self.timings[rfn] = tt+rtt, ct + sft, callers |
| |
| return 1 |
| |
| |
| def snapshot_stats(self): |
| self.stats = {} |
| for func in self.timings.keys(): |
| tt, ct, callers = self.timings[func] |
| nor_func = self.func_normalize(func) |
| nor_callers = {} |
| nc = 0 |
| for func_caller in callers.keys(): |
| nor_callers[self.func_normalize(func_caller)]=\ |
| callers[func_caller] |
| nc = nc + callers[func_caller] |
| self.stats[nor_func] = nc, nc, tt, ct, nor_callers |
| |
| |
| |
| #**************************************************************************** |
| # HotProfile class documentation |
| #**************************************************************************** |
| # |
| # This profiler is the fastest derived profile example. It does not |
| # calculate caller-callee relationships, and does not calculate cumulative |
| # time under a function. It only calculates time spent in a function, so |
| # it runs very quickly (re: very low overhead) |
| #**************************************************************************** |
| class HotProfile(Profile): |
| def trace_dispatch_exception(self, frame, t): |
| rt, rtt, rfn, rframe, rcur = self.cur |
| if rcur and not rframe is frame: |
| return self.trace_dispatch_return(rframe, t) |
| return 0 |
| |
| def trace_dispatch_call(self, frame, t): |
| self.cur = (t, 0, frame, self.cur) |
| return 1 |
| |
| def trace_dispatch_return(self, frame, t): |
| rt, rtt, frame, rcur = self.cur |
| |
| rfn = `frame.f_code` |
| |
| pt, ptt, pframe, pcur = rcur |
| self.cur = pt, ptt+rt, pframe, pcur |
| |
| if self.timings.has_key(rfn): |
| nc, tt = self.timings[rfn] |
| self.timings[rfn] = nc + 1, rt + rtt + tt |
| else: |
| self.timings[rfn] = 1, rt + rtt |
| |
| return 1 |
| |
| |
| def snapshot_stats(self): |
| self.stats = {} |
| for func in self.timings.keys(): |
| nc, tt = self.timings[func] |
| nor_func = self.func_normalize(func) |
| self.stats[nor_func] = nc, nc, tt, 0, {} |
| |
| |
| |
| #**************************************************************************** |
| def Stats(*args): |
| print 'Report generating functions are in the "pstats" module\a' |
| |
| |
| # When invoked as main program, invoke the profiler on a script |
| if __name__ == '__main__': |
| import sys |
| import os |
| if not sys.argv[1:]: |
| print "usage: profile.py scriptfile [arg] ..." |
| sys.exit(2) |
| |
| filename = sys.argv[1] # Get script filename |
| |
| del sys.argv[0] # Hide "profile.py" from argument list |
| |
| # Insert script directory in front of module search path |
| sys.path.insert(0, os.path.dirname(filename)) |
| |
| run('execfile(' + `filename` + ')') |