blob: 7f4abed60cbcbf1a4f1d79eb54750516a19df0bd [file] [log] [blame]
Guido van Rossum81762581992-04-21 15:36:23 +00001#
Guido van Rossumb6775db1994-08-01 11:34:53 +00002# Class for profiling python code. rev 1.0 6/2/94
Guido van Rossum81762581992-04-21 15:36:23 +00003#
Guido van Rossumb6775db1994-08-01 11:34:53 +00004# Based on prior profile module by Sjoerd Mullender...
5# which was hacked somewhat by: Guido van Rossum
6#
7# See profile.doc for more information
8
9
10# Copyright 1994, by InfoSeek Corporation, all rights reserved.
11# Written by James Roskind
12#
13# Permission to use, copy, modify, and distribute this Python software
14# and its associated documentation for any purpose (subject to the
15# restriction in the following sentence) without fee is hereby granted,
16# provided that the above copyright notice appears in all copies, and
17# that both that copyright notice and this permission notice appear in
18# supporting documentation, and that the name of InfoSeek not be used in
19# advertising or publicity pertaining to distribution of the software
20# without specific, written prior permission. This permission is
21# explicitly restricted to the copying and modification of the software
22# to remain in Python, compiled Python, or other languages (such as C)
23# wherein the modified or derived code is exclusively imported into a
24# Python module.
25#
26# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
33
34
Guido van Rossum81762581992-04-21 15:36:23 +000035
36import sys
Guido van Rossum4e160981992-09-02 20:43:20 +000037import os
Guido van Rossumb6775db1994-08-01 11:34:53 +000038import time
Guido van Rossum4e160981992-09-02 20:43:20 +000039import string
Guido van Rossum4e160981992-09-02 20:43:20 +000040import marshal
Guido van Rossum81762581992-04-21 15:36:23 +000041
Guido van Rossum81762581992-04-21 15:36:23 +000042
Guido van Rossumb6775db1994-08-01 11:34:53 +000043# Global variables
44func_norm_dict = {}
45func_norm_counter = 0
Guido van Rossum4f399fb1995-09-30 16:48:54 +000046if hasattr(os, 'getpid'):
47 pid_string = `os.getpid()`
48else:
49 pid_string = ''
Guido van Rossum81762581992-04-21 15:36:23 +000050
Guido van Rossum81762581992-04-21 15:36:23 +000051
Guido van Rossumb6775db1994-08-01 11:34:53 +000052# Sample timer for use with
53#i_count = 0
54#def integer_timer():
55# global i_count
56# i_count = i_count + 1
57# return i_count
58#itimes = integer_timer # replace with C coded timer returning integers
Guido van Rossum81762581992-04-21 15:36:23 +000059
Guido van Rossumb6775db1994-08-01 11:34:53 +000060#**************************************************************************
61# The following are the static member functions for the profiler class
62# Note that an instance of Profile() is *not* needed to call them.
63#**************************************************************************
Guido van Rossum81762581992-04-21 15:36:23 +000064
Guido van Rossum4e160981992-09-02 20:43:20 +000065
66# simplified user interface
67def run(statement, *args):
Guido van Rossum7bc817d1993-12-17 15:25:27 +000068 prof = Profile()
Guido van Rossum4e160981992-09-02 20:43:20 +000069 try:
Guido van Rossumb6775db1994-08-01 11:34:53 +000070 prof = prof.run(statement)
Guido van Rossum4e160981992-09-02 20:43:20 +000071 except SystemExit:
72 pass
Guido van Rossumb6775db1994-08-01 11:34:53 +000073 if args:
Guido van Rossum4e160981992-09-02 20:43:20 +000074 prof.dump_stats(args[0])
Guido van Rossumb6775db1994-08-01 11:34:53 +000075 else:
76 return prof.print_stats()
Guido van Rossume61fa0a1993-10-22 13:56:35 +000077
78# print help
79def help():
80 for dirname in sys.path:
81 fullname = os.path.join(dirname, 'profile.doc')
82 if os.path.exists(fullname):
83 sts = os.system('${PAGER-more} '+fullname)
84 if sts: print '*** Pager exit status:', sts
85 break
86 else:
87 print 'Sorry, can\'t find the help file "profile.doc"',
88 print 'along the Python search path'
Guido van Rossumb6775db1994-08-01 11:34:53 +000089
90
91#**************************************************************************
92# class Profile documentation:
93#**************************************************************************
94# self.cur is always a tuple. Each such tuple corresponds to a stack
95# frame that is currently active (self.cur[-2]). The following are the
96# definitions of its members. We use this external "parallel stack" to
97# avoid contaminating the program that we are profiling. (old profiler
98# used to write into the frames local dictionary!!) Derived classes
99# can change the definition of some entries, as long as they leave
100# [-2:] intact.
101#
102# [ 0] = Time that needs to be charged to the parent frame's function. It is
103# used so that a function call will not have to access the timing data
104# for the parents frame.
105# [ 1] = Total time spent in this frame's function, excluding time in
106# subfunctions
107# [ 2] = Cumulative time spent in this frame's function, including time in
108# all subfunctions to this frame.
109# [-3] = Name of the function that corresonds to this frame.
110# [-2] = Actual frame that we correspond to (used to sync exception handling)
111# [-1] = Our parent 6-tuple (corresonds to frame.f_back)
112#**************************************************************************
113# Timing data for each function is stored as a 5-tuple in the dictionary
114# self.timings[]. The index is always the name stored in self.cur[4].
115# The following are the definitions of the members:
116#
117# [0] = The number of times this function was called, not counting direct
118# or indirect recursion,
119# [1] = Number of times this function appears on the stack, minus one
120# [2] = Total time spent internal to this function
121# [3] = Cumulative time that this function was present on the stack. In
122# non-recursive functions, this is the total execution time from start
123# to finish of each invocation of a function, including time spent in
124# all subfunctions.
125# [5] = A dictionary indicating for each function name, the number of times
126# it was called by us.
127#**************************************************************************
128# We produce function names via a repr() call on the f_code object during
129# profiling. This save a *lot* of CPU time. This results in a string that
130# always looks like:
131# <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
132# After we "normalize it, it is a tuple of filename, line, function-name.
133# We wait till we are done profiling to do the normalization.
134# *IF* this repr format changes, then only the normalization routine should
135# need to be fixed.
136#**************************************************************************
137class Profile:
138
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000139 def __init__(self, timer=None):
Guido van Rossumb6775db1994-08-01 11:34:53 +0000140 self.timings = {}
141 self.cur = None
142 self.cmd = ""
143
144 self.dispatch = { \
145 'call' : self.trace_dispatch_call, \
146 'return' : self.trace_dispatch_return, \
147 'exception': self.trace_dispatch_exception, \
148 }
149
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000150 if not timer:
151 if hasattr(os, 'times'):
152 self.timer = os.times
153 self.dispatcher = self.trace_dispatch
154 else:
155 self.timer = time.time
156 self.dispatcher = self.trace_dispatch_i
Guido van Rossumb6775db1994-08-01 11:34:53 +0000157 else:
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000158 self.timer = timer
Guido van Rossumb6775db1994-08-01 11:34:53 +0000159 t = self.timer() # test out timer function
160 try:
161 if len(t) == 2:
162 self.dispatcher = self.trace_dispatch
163 else:
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000164 self.dispatcher = self.trace_dispatch_l
165 except TypeError:
Guido van Rossumb6775db1994-08-01 11:34:53 +0000166 self.dispatcher = self.trace_dispatch_i
167 self.t = self.get_time()
168 self.simulate_call('profiler')
169
170
171 def get_time(self): # slow simulation of method to acquire time
172 t = self.timer()
173 if type(t) == type(()) or type(t) == type([]):
174 t = reduce(lambda x,y: x+y, t, 0)
175 return t
176
177
178 # Heavily optimized dispatch routine for os.times() timer
179
180 def trace_dispatch(self, frame, event, arg):
181 t = self.timer()
182 t = t[0] + t[1] - self.t # No Calibration constant
183 # t = t[0] + t[1] - self.t - .00053 # Calibration constant
184
185 if self.dispatch[event](frame,t):
186 t = self.timer()
187 self.t = t[0] + t[1]
188 else:
189 r = self.timer()
190 self.t = r[0] + r[1] - t # put back unrecorded delta
191 return
192
193
194
195 # Dispatch routine for best timer program (return = scalar integer)
196
197 def trace_dispatch_i(self, frame, event, arg):
198 t = self.timer() - self.t # - 1 # Integer calibration constant
199 if self.dispatch[event](frame,t):
200 self.t = self.timer()
201 else:
202 self.t = self.timer() - t # put back unrecorded delta
203 return
204
205
206 # SLOW generic dispatch rountine for timer returning lists of numbers
207
208 def trace_dispatch_l(self, frame, event, arg):
209 t = self.get_time() - self.t
210
211 if self.dispatch[event](frame,t):
212 self.t = self.get_time()
213 else:
214 self.t = self.get_time()-t # put back unrecorded delta
215 return
216
217
218 def trace_dispatch_exception(self, frame, t):
219 rt, rtt, rct, rfn, rframe, rcur = self.cur
220 if (not rframe is frame) and rcur:
221 return self.trace_dispatch_return(rframe, t)
222 return 0
223
224
225 def trace_dispatch_call(self, frame, t):
226 fn = `frame.f_code`
227
228 # The following should be about the best approach, but
229 # we would need a function that maps from id() back to
230 # the actual code object.
231 # fn = id(frame.f_code)
232 # Note we would really use our own function, which would
233 # return the code address, *and* bump the ref count. We
234 # would then fix up the normalize function to do the
235 # actualy repr(fn) call.
236
237 # The following is an interesting alternative
238 # It doesn't do as good a job, and it doesn't run as
239 # fast 'cause repr() is written in C, and this is Python.
240 #fcode = frame.f_code
241 #code = fcode.co_code
242 #if ord(code[0]) == 127: # == SET_LINENO
243 # # see "opcode.h" in the Python source
244 # fn = (fcode.co_filename, ord(code[1]) | \
245 # ord(code[2]) << 8, fcode.co_name)
246 #else:
247 # fn = (fcode.co_filename, 0, fcode.co_name)
248
249 self.cur = (t, 0, 0, fn, frame, self.cur)
250 if self.timings.has_key(fn):
251 cc, ns, tt, ct, callers = self.timings[fn]
252 self.timings[fn] = cc, ns + 1, tt, ct, callers
253 else:
254 self.timings[fn] = 0, 0, 0, 0, {}
255 return 1
256
257 def trace_dispatch_return(self, frame, t):
258 # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
259
260 # Prefix "r" means part of the Returning or exiting frame
261 # Prefix "p" means part of the Previous or older frame
262
263 rt, rtt, rct, rfn, frame, rcur = self.cur
264 rtt = rtt + t
265 sft = rtt + rct
266
267 pt, ptt, pct, pfn, pframe, pcur = rcur
268 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
269
270 cc, ns, tt, ct, callers = self.timings[rfn]
271 if not ns:
272 ct = ct + sft
273 cc = cc + 1
274 if callers.has_key(pfn):
275 callers[pfn] = callers[pfn] + 1 # hack: gather more
276 # stats such as the amount of time added to ct courtesy
277 # of this specific call, and the contribution to cc
278 # courtesy of this call.
279 else:
280 callers[pfn] = 1
281 self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
282
283 return 1
284
285 # The next few function play with self.cmd. By carefully preloading
286 # our paralell stack, we can force the profiled result to include
287 # an arbitrary string as the name of the calling function.
288 # We use self.cmd as that string, and the resulting stats look
289 # very nice :-).
290
291 def set_cmd(self, cmd):
292 if self.cur[-1]: return # already set
293 self.cmd = cmd
294 self.simulate_call(cmd)
295
296 class fake_code:
297 def __init__(self, filename, line, name):
298 self.co_filename = filename
299 self.co_line = line
300 self.co_name = name
301 self.co_code = '\0' # anything but 127
302
303 def __repr__(self):
304 return (self.co_filename, self.co_line, self.co_name)
305
306 class fake_frame:
307 def __init__(self, code, prior):
308 self.f_code = code
309 self.f_back = prior
310
311 def simulate_call(self, name):
312 code = self.fake_code('profile', 0, name)
313 if self.cur:
314 pframe = self.cur[-2]
315 else:
316 pframe = None
317 frame = self.fake_frame(code, pframe)
318 a = self.dispatch['call'](frame, 0)
319 return
320
321 # collect stats from pending stack, including getting final
322 # timings for self.cmd frame.
323
324 def simulate_cmd_complete(self):
325 t = self.get_time() - self.t
326 while self.cur[-1]:
327 # We *can* cause assertion errors here if
328 # dispatch_trace_return checks for a frame match!
329 a = self.dispatch['return'](self.cur[-2], t)
330 t = 0
331 self.t = self.get_time() - t
332
333
334 def print_stats(self):
335 import pstats
336 pstats.Stats(self).strip_dirs().sort_stats(-1). \
337 print_stats()
338
339 def dump_stats(self, file):
340 f = open(file, 'w')
341 self.create_stats()
342 marshal.dump(self.stats, f)
343 f.close()
344
345 def create_stats(self):
346 self.simulate_cmd_complete()
347 self.snapshot_stats()
348
349 def snapshot_stats(self):
350 self.stats = {}
351 for func in self.timings.keys():
352 cc, ns, tt, ct, callers = self.timings[func]
353 nor_func = self.func_normalize(func)
354 nor_callers = {}
355 nc = 0
356 for func_caller in callers.keys():
357 nor_callers[self.func_normalize(func_caller)]=\
358 callers[func_caller]
359 nc = nc + callers[func_caller]
360 self.stats[nor_func] = cc, nc, tt, ct, nor_callers
361
362
363 # Override the following function if you can figure out
364 # a better name for the binary f_code entries. I just normalize
365 # them sequentially in a dictionary. It would be nice if we could
366 # *really* see the name of the underlying C code :-). Sometimes
367 # you can figure out what-is-what by looking at caller and callee
368 # lists (and knowing what your python code does).
369
370 def func_normalize(self, func_name):
371 global func_norm_dict
372 global func_norm_counter
373 global func_sequence_num
374
375 if func_norm_dict.has_key(func_name):
376 return func_norm_dict[func_name]
377 if type(func_name) == type(""):
378 long_name = string.split(func_name)
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000379 file_name = long_name[-3][1:-2]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000380 func = long_name[2]
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000381 lineno = long_name[-1][:-1]
Guido van Rossumb6775db1994-08-01 11:34:53 +0000382 if '?' == func: # Until I find out how to may 'em...
383 file_name = 'python'
384 func_norm_counter = func_norm_counter + 1
385 func = pid_string + ".C." + `func_norm_counter`
386 result = file_name , string.atoi(lineno) , func
387 else:
388 result = func_name
389 func_norm_dict[func_name] = result
390 return result
391
392
393 # The following two methods can be called by clients to use
394 # a profiler to profile a statement, given as a string.
395
396 def run(self, cmd):
397 import __main__
398 dict = __main__.__dict__
Guido van Rossum6cb84f31996-05-28 23:00:42 +0000399 return self.runctx(cmd, dict, dict)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000400
401 def runctx(self, cmd, globals, locals):
402 self.set_cmd(cmd)
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000403 sys.setprofile(self.dispatcher)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000404 try:
Guido van Rossum9c3241d1995-08-10 19:46:50 +0000405 exec cmd in globals, locals
Guido van Rossumb6775db1994-08-01 11:34:53 +0000406 finally:
407 sys.setprofile(None)
Guido van Rossum6cb84f31996-05-28 23:00:42 +0000408 return self
Guido van Rossumb6775db1994-08-01 11:34:53 +0000409
410 # This method is more useful to profile a single function call.
411 def runcall(self, func, *args):
Guido van Rossum8afa8241995-06-22 18:52:35 +0000412 self.set_cmd(`func`)
Guido van Rossum4f399fb1995-09-30 16:48:54 +0000413 sys.setprofile(self.dispatcher)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000414 try:
Guido van Rossum6cb84f31996-05-28 23:00:42 +0000415 return apply(func, args)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000416 finally:
417 sys.setprofile(None)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000418
419
420 #******************************************************************
421 # The following calculates the overhead for using a profiler. The
422 # problem is that it takes a fair amount of time for the profiler
423 # to stop the stopwatch (from the time it recieves an event).
424 # Similarly, there is a delay from the time that the profiler
425 # re-starts the stopwatch before the user's code really gets to
426 # continue. The following code tries to measure the difference on
427 # a per-event basis. The result can the be placed in the
428 # Profile.dispatch_event() routine for the given platform. Note
429 # that this difference is only significant if there are a lot of
430 # events, and relatively little user code per event. For example,
431 # code with small functions will typically benefit from having the
432 # profiler calibrated for the current platform. This *could* be
433 # done on the fly during init() time, but it is not worth the
434 # effort. Also note that if too large a value specified, then
435 # execution time on some functions will actually appear as a
436 # negative number. It is *normal* for some functions (with very
437 # low call counts) to have such negative stats, even if the
438 # calibration figure is "correct."
439 #
440 # One alternative to profile-time calibration adjustments (i.e.,
441 # adding in the magic little delta during each event) is to track
442 # more carefully the number of events (and cumulatively, the number
443 # of events during sub functions) that are seen. If this were
444 # done, then the arithmetic could be done after the fact (i.e., at
445 # display time). Currintly, we track only call/return events.
446 # These values can be deduced by examining the callees and callers
447 # vectors for each functions. Hence we *can* almost correct the
448 # internal time figure at print time (note that we currently don't
449 # track exception event processing counts). Unfortunately, there
450 # is currently no similar information for cumulative sub-function
451 # time. It would not be hard to "get all this info" at profiler
452 # time. Specifically, we would have to extend the tuples to keep
453 # counts of this in each frame, and then extend the defs of timing
454 # tuples to include the significant two figures. I'm a bit fearful
455 # that this additional feature will slow the heavily optimized
456 # event/time ratio (i.e., the profiler would run slower, fur a very
457 # low "value added" feature.)
458 #
459 # Plugging in the calibration constant doesn't slow down the
460 # profiler very much, and the accuracy goes way up.
461 #**************************************************************
462
463 def calibrate(self, m):
464 n = m
465 s = self.timer()
466 while n:
467 self.simple()
468 n = n - 1
469 f = self.timer()
470 my_simple = f[0]+f[1]-s[0]-s[1]
471 #print "Simple =", my_simple,
472
473 n = m
474 s = self.timer()
475 while n:
476 self.instrumented()
477 n = n - 1
478 f = self.timer()
479 my_inst = f[0]+f[1]-s[0]-s[1]
480 # print "Instrumented =", my_inst
481 avg_cost = (my_inst - my_simple)/m
482 #print "Delta/call =", avg_cost, "(profiler fixup constant)"
483 return avg_cost
484
485 # simulate a program with no profiler activity
486 def simple(self):
487 a = 1
488 pass
489
490 # simulate a program with call/return event processing
491 def instrumented(self):
492 a = 1
493 self.profiler_simulation(a, a, a)
494
495 # simulate an event processing activity (from user's perspective)
496 def profiler_simulation(self, x, y, z):
497 t = self.timer()
498 t = t[0] + t[1]
499 self.ut = t
500
501
502
503#****************************************************************************
504# OldProfile class documentation
505#****************************************************************************
506#
507# The following derived profiler simulates the old style profile, providing
508# errant results on recursive functions. The reason for the usefulnes of this
509# profiler is that it runs faster (i.e., less overhead). It still creates
510# all the caller stats, and is quite useful when there is *no* recursion
511# in the user's code.
512#
513# This code also shows how easy it is to create a modified profiler.
514#****************************************************************************
515class OldProfile(Profile):
516 def trace_dispatch_exception(self, frame, t):
517 rt, rtt, rct, rfn, rframe, rcur = self.cur
518 if rcur and not rframe is frame:
519 return self.trace_dispatch_return(rframe, t)
520 return 0
521
522 def trace_dispatch_call(self, frame, t):
523 fn = `frame.f_code`
524
525 self.cur = (t, 0, 0, fn, frame, self.cur)
526 if self.timings.has_key(fn):
527 tt, ct, callers = self.timings[fn]
528 self.timings[fn] = tt, ct, callers
529 else:
530 self.timings[fn] = 0, 0, {}
531 return 1
532
533 def trace_dispatch_return(self, frame, t):
534 rt, rtt, rct, rfn, frame, rcur = self.cur
535 rtt = rtt + t
536 sft = rtt + rct
537
538 pt, ptt, pct, pfn, pframe, pcur = rcur
539 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
540
541 tt, ct, callers = self.timings[rfn]
542 if callers.has_key(pfn):
543 callers[pfn] = callers[pfn] + 1
544 else:
545 callers[pfn] = 1
546 self.timings[rfn] = tt+rtt, ct + sft, callers
547
548 return 1
549
550
551 def snapshot_stats(self):
552 self.stats = {}
553 for func in self.timings.keys():
554 tt, ct, callers = self.timings[func]
555 nor_func = self.func_normalize(func)
556 nor_callers = {}
557 nc = 0
558 for func_caller in callers.keys():
559 nor_callers[self.func_normalize(func_caller)]=\
560 callers[func_caller]
561 nc = nc + callers[func_caller]
562 self.stats[nor_func] = nc, nc, tt, ct, nor_callers
563
564
565
566#****************************************************************************
567# HotProfile class documentation
568#****************************************************************************
569#
570# This profiler is the fastest derived profile example. It does not
571# calculate caller-callee relationships, and does not calculate cumulative
572# time under a function. It only calculates time spent in a function, so
573# it runs very quickly (re: very low overhead)
574#****************************************************************************
575class HotProfile(Profile):
576 def trace_dispatch_exception(self, frame, t):
577 rt, rtt, rfn, rframe, rcur = self.cur
578 if rcur and not rframe is frame:
579 return self.trace_dispatch_return(rframe, t)
580 return 0
581
582 def trace_dispatch_call(self, frame, t):
583 self.cur = (t, 0, frame, self.cur)
584 return 1
585
586 def trace_dispatch_return(self, frame, t):
587 rt, rtt, frame, rcur = self.cur
588
589 rfn = `frame.f_code`
590
591 pt, ptt, pframe, pcur = rcur
592 self.cur = pt, ptt+rt, pframe, pcur
593
594 if self.timings.has_key(rfn):
595 nc, tt = self.timings[rfn]
596 self.timings[rfn] = nc + 1, rt + rtt + tt
597 else:
598 self.timings[rfn] = 1, rt + rtt
599
600 return 1
601
602
603 def snapshot_stats(self):
604 self.stats = {}
605 for func in self.timings.keys():
606 nc, tt = self.timings[func]
607 nor_func = self.func_normalize(func)
608 self.stats[nor_func] = nc, nc, tt, 0, {}
609
610
611
612#****************************************************************************
613def Stats(*args):
614 print 'Report generating functions are in the "pstats" module\a'