blob: baa95e185ac351391ec63491e79a7b9bf088fee8 [file] [log] [blame]
Guido van Rossum81762581992-04-21 15:36:23 +00001#
Guido van Rossumb6775db1994-08-01 11:34:53 +00002# Class for profiling python code. rev 1.0 6/2/94
Guido van Rossum81762581992-04-21 15:36:23 +00003#
Guido van Rossumb6775db1994-08-01 11:34:53 +00004# Based on prior profile module by Sjoerd Mullender...
5# which was hacked somewhat by: Guido van Rossum
6#
7# See profile.doc for more information
8
9
10# Copyright 1994, by InfoSeek Corporation, all rights reserved.
11# Written by James Roskind
12#
13# Permission to use, copy, modify, and distribute this Python software
14# and its associated documentation for any purpose (subject to the
15# restriction in the following sentence) without fee is hereby granted,
16# provided that the above copyright notice appears in all copies, and
17# that both that copyright notice and this permission notice appear in
18# supporting documentation, and that the name of InfoSeek not be used in
19# advertising or publicity pertaining to distribution of the software
20# without specific, written prior permission. This permission is
21# explicitly restricted to the copying and modification of the software
22# to remain in Python, compiled Python, or other languages (such as C)
23# wherein the modified or derived code is exclusively imported into a
24# Python module.
25#
26# INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
27# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
28# FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY
29# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
30# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
31# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
32# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
33
34
Guido van Rossum81762581992-04-21 15:36:23 +000035
36import sys
Guido van Rossum4e160981992-09-02 20:43:20 +000037import os
Guido van Rossumb6775db1994-08-01 11:34:53 +000038import time
Guido van Rossum4e160981992-09-02 20:43:20 +000039import string
Guido van Rossum4e160981992-09-02 20:43:20 +000040import marshal
Guido van Rossum81762581992-04-21 15:36:23 +000041
Guido van Rossum81762581992-04-21 15:36:23 +000042
Guido van Rossumb6775db1994-08-01 11:34:53 +000043# Global variables
44func_norm_dict = {}
45func_norm_counter = 0
46pid_string = `os.getpid()`
Guido van Rossum4e160981992-09-02 20:43:20 +000047
Guido van Rossum81762581992-04-21 15:36:23 +000048
Guido van Rossumb6775db1994-08-01 11:34:53 +000049# Optimized intermodule references
50ostimes = os.times
Guido van Rossum81762581992-04-21 15:36:23 +000051
Guido van Rossum81762581992-04-21 15:36:23 +000052
Guido van Rossumb6775db1994-08-01 11:34:53 +000053# Sample timer for use with
54#i_count = 0
55#def integer_timer():
56# global i_count
57# i_count = i_count + 1
58# return i_count
59#itimes = integer_timer # replace with C coded timer returning integers
Guido van Rossum81762581992-04-21 15:36:23 +000060
Guido van Rossumb6775db1994-08-01 11:34:53 +000061#**************************************************************************
62# The following are the static member functions for the profiler class
63# Note that an instance of Profile() is *not* needed to call them.
64#**************************************************************************
Guido van Rossum81762581992-04-21 15:36:23 +000065
Guido van Rossum4e160981992-09-02 20:43:20 +000066
67# simplified user interface
68def run(statement, *args):
Guido van Rossum7bc817d1993-12-17 15:25:27 +000069 prof = Profile()
Guido van Rossum4e160981992-09-02 20:43:20 +000070 try:
Guido van Rossumb6775db1994-08-01 11:34:53 +000071 prof = prof.run(statement)
Guido van Rossum4e160981992-09-02 20:43:20 +000072 except SystemExit:
73 pass
Guido van Rossumb6775db1994-08-01 11:34:53 +000074 if args:
Guido van Rossum4e160981992-09-02 20:43:20 +000075 prof.dump_stats(args[0])
Guido van Rossumb6775db1994-08-01 11:34:53 +000076 else:
77 return prof.print_stats()
Guido van Rossume61fa0a1993-10-22 13:56:35 +000078
79# print help
80def help():
81 for dirname in sys.path:
82 fullname = os.path.join(dirname, 'profile.doc')
83 if os.path.exists(fullname):
84 sts = os.system('${PAGER-more} '+fullname)
85 if sts: print '*** Pager exit status:', sts
86 break
87 else:
88 print 'Sorry, can\'t find the help file "profile.doc"',
89 print 'along the Python search path'
Guido van Rossumb6775db1994-08-01 11:34:53 +000090
91
92#**************************************************************************
93# class Profile documentation:
94#**************************************************************************
95# self.cur is always a tuple. Each such tuple corresponds to a stack
96# frame that is currently active (self.cur[-2]). The following are the
97# definitions of its members. We use this external "parallel stack" to
98# avoid contaminating the program that we are profiling. (old profiler
99# used to write into the frames local dictionary!!) Derived classes
100# can change the definition of some entries, as long as they leave
101# [-2:] intact.
102#
103# [ 0] = Time that needs to be charged to the parent frame's function. It is
104# used so that a function call will not have to access the timing data
105# for the parents frame.
106# [ 1] = Total time spent in this frame's function, excluding time in
107# subfunctions
108# [ 2] = Cumulative time spent in this frame's function, including time in
109# all subfunctions to this frame.
110# [-3] = Name of the function that corresonds to this frame.
111# [-2] = Actual frame that we correspond to (used to sync exception handling)
112# [-1] = Our parent 6-tuple (corresonds to frame.f_back)
113#**************************************************************************
114# Timing data for each function is stored as a 5-tuple in the dictionary
115# self.timings[]. The index is always the name stored in self.cur[4].
116# The following are the definitions of the members:
117#
118# [0] = The number of times this function was called, not counting direct
119# or indirect recursion,
120# [1] = Number of times this function appears on the stack, minus one
121# [2] = Total time spent internal to this function
122# [3] = Cumulative time that this function was present on the stack. In
123# non-recursive functions, this is the total execution time from start
124# to finish of each invocation of a function, including time spent in
125# all subfunctions.
126# [5] = A dictionary indicating for each function name, the number of times
127# it was called by us.
128#**************************************************************************
129# We produce function names via a repr() call on the f_code object during
130# profiling. This save a *lot* of CPU time. This results in a string that
131# always looks like:
132# <code object main at 87090, file "/a/lib/python-local/myfib.py", line 76>
133# After we "normalize it, it is a tuple of filename, line, function-name.
134# We wait till we are done profiling to do the normalization.
135# *IF* this repr format changes, then only the normalization routine should
136# need to be fixed.
137#**************************************************************************
138class Profile:
139
140 def __init__(self, *arg):
141 self.timings = {}
142 self.cur = None
143 self.cmd = ""
144
145 self.dispatch = { \
146 'call' : self.trace_dispatch_call, \
147 'return' : self.trace_dispatch_return, \
148 'exception': self.trace_dispatch_exception, \
149 }
150
151 if not arg:
152 self.timer = os.times
153 self.dispatcher = self.trace_dispatch
154 else:
155 self.timer = arg[0]
156 t = self.timer() # test out timer function
157 try:
158 if len(t) == 2:
159 self.dispatcher = self.trace_dispatch
160 else:
161 self.dispatcher = self.trace_dispatch_r
162 except:
163 self.dispatcher = self.trace_dispatch_i
164 self.t = self.get_time()
165 self.simulate_call('profiler')
166
167
168 def get_time(self): # slow simulation of method to acquire time
169 t = self.timer()
170 if type(t) == type(()) or type(t) == type([]):
171 t = reduce(lambda x,y: x+y, t, 0)
172 return t
173
174
175 # Heavily optimized dispatch routine for os.times() timer
176
177 def trace_dispatch(self, frame, event, arg):
178 t = self.timer()
179 t = t[0] + t[1] - self.t # No Calibration constant
180 # t = t[0] + t[1] - self.t - .00053 # Calibration constant
181
182 if self.dispatch[event](frame,t):
183 t = self.timer()
184 self.t = t[0] + t[1]
185 else:
186 r = self.timer()
187 self.t = r[0] + r[1] - t # put back unrecorded delta
188 return
189
190
191
192 # Dispatch routine for best timer program (return = scalar integer)
193
194 def trace_dispatch_i(self, frame, event, arg):
195 t = self.timer() - self.t # - 1 # Integer calibration constant
196 if self.dispatch[event](frame,t):
197 self.t = self.timer()
198 else:
199 self.t = self.timer() - t # put back unrecorded delta
200 return
201
202
203 # SLOW generic dispatch rountine for timer returning lists of numbers
204
205 def trace_dispatch_l(self, frame, event, arg):
206 t = self.get_time() - self.t
207
208 if self.dispatch[event](frame,t):
209 self.t = self.get_time()
210 else:
211 self.t = self.get_time()-t # put back unrecorded delta
212 return
213
214
215 def trace_dispatch_exception(self, frame, t):
216 rt, rtt, rct, rfn, rframe, rcur = self.cur
217 if (not rframe is frame) and rcur:
218 return self.trace_dispatch_return(rframe, t)
219 return 0
220
221
222 def trace_dispatch_call(self, frame, t):
223 fn = `frame.f_code`
224
225 # The following should be about the best approach, but
226 # we would need a function that maps from id() back to
227 # the actual code object.
228 # fn = id(frame.f_code)
229 # Note we would really use our own function, which would
230 # return the code address, *and* bump the ref count. We
231 # would then fix up the normalize function to do the
232 # actualy repr(fn) call.
233
234 # The following is an interesting alternative
235 # It doesn't do as good a job, and it doesn't run as
236 # fast 'cause repr() is written in C, and this is Python.
237 #fcode = frame.f_code
238 #code = fcode.co_code
239 #if ord(code[0]) == 127: # == SET_LINENO
240 # # see "opcode.h" in the Python source
241 # fn = (fcode.co_filename, ord(code[1]) | \
242 # ord(code[2]) << 8, fcode.co_name)
243 #else:
244 # fn = (fcode.co_filename, 0, fcode.co_name)
245
246 self.cur = (t, 0, 0, fn, frame, self.cur)
247 if self.timings.has_key(fn):
248 cc, ns, tt, ct, callers = self.timings[fn]
249 self.timings[fn] = cc, ns + 1, tt, ct, callers
250 else:
251 self.timings[fn] = 0, 0, 0, 0, {}
252 return 1
253
254 def trace_dispatch_return(self, frame, t):
255 # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
256
257 # Prefix "r" means part of the Returning or exiting frame
258 # Prefix "p" means part of the Previous or older frame
259
260 rt, rtt, rct, rfn, frame, rcur = self.cur
261 rtt = rtt + t
262 sft = rtt + rct
263
264 pt, ptt, pct, pfn, pframe, pcur = rcur
265 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
266
267 cc, ns, tt, ct, callers = self.timings[rfn]
268 if not ns:
269 ct = ct + sft
270 cc = cc + 1
271 if callers.has_key(pfn):
272 callers[pfn] = callers[pfn] + 1 # hack: gather more
273 # stats such as the amount of time added to ct courtesy
274 # of this specific call, and the contribution to cc
275 # courtesy of this call.
276 else:
277 callers[pfn] = 1
278 self.timings[rfn] = cc, ns - 1, tt+rtt, ct, callers
279
280 return 1
281
282 # The next few function play with self.cmd. By carefully preloading
283 # our paralell stack, we can force the profiled result to include
284 # an arbitrary string as the name of the calling function.
285 # We use self.cmd as that string, and the resulting stats look
286 # very nice :-).
287
288 def set_cmd(self, cmd):
289 if self.cur[-1]: return # already set
290 self.cmd = cmd
291 self.simulate_call(cmd)
292
293 class fake_code:
294 def __init__(self, filename, line, name):
295 self.co_filename = filename
296 self.co_line = line
297 self.co_name = name
298 self.co_code = '\0' # anything but 127
299
300 def __repr__(self):
301 return (self.co_filename, self.co_line, self.co_name)
302
303 class fake_frame:
304 def __init__(self, code, prior):
305 self.f_code = code
306 self.f_back = prior
307
308 def simulate_call(self, name):
309 code = self.fake_code('profile', 0, name)
310 if self.cur:
311 pframe = self.cur[-2]
312 else:
313 pframe = None
314 frame = self.fake_frame(code, pframe)
315 a = self.dispatch['call'](frame, 0)
316 return
317
318 # collect stats from pending stack, including getting final
319 # timings for self.cmd frame.
320
321 def simulate_cmd_complete(self):
322 t = self.get_time() - self.t
323 while self.cur[-1]:
324 # We *can* cause assertion errors here if
325 # dispatch_trace_return checks for a frame match!
326 a = self.dispatch['return'](self.cur[-2], t)
327 t = 0
328 self.t = self.get_time() - t
329
330
331 def print_stats(self):
332 import pstats
333 pstats.Stats(self).strip_dirs().sort_stats(-1). \
334 print_stats()
335
336 def dump_stats(self, file):
337 f = open(file, 'w')
338 self.create_stats()
339 marshal.dump(self.stats, f)
340 f.close()
341
342 def create_stats(self):
343 self.simulate_cmd_complete()
344 self.snapshot_stats()
345
346 def snapshot_stats(self):
347 self.stats = {}
348 for func in self.timings.keys():
349 cc, ns, tt, ct, callers = self.timings[func]
350 nor_func = self.func_normalize(func)
351 nor_callers = {}
352 nc = 0
353 for func_caller in callers.keys():
354 nor_callers[self.func_normalize(func_caller)]=\
355 callers[func_caller]
356 nc = nc + callers[func_caller]
357 self.stats[nor_func] = cc, nc, tt, ct, nor_callers
358
359
360 # Override the following function if you can figure out
361 # a better name for the binary f_code entries. I just normalize
362 # them sequentially in a dictionary. It would be nice if we could
363 # *really* see the name of the underlying C code :-). Sometimes
364 # you can figure out what-is-what by looking at caller and callee
365 # lists (and knowing what your python code does).
366
367 def func_normalize(self, func_name):
368 global func_norm_dict
369 global func_norm_counter
370 global func_sequence_num
371
372 if func_norm_dict.has_key(func_name):
373 return func_norm_dict[func_name]
374 if type(func_name) == type(""):
375 long_name = string.split(func_name)
376 file_name = long_name[6][1:-2]
377 func = long_name[2]
378 lineno = long_name[8][:-1]
379 if '?' == func: # Until I find out how to may 'em...
380 file_name = 'python'
381 func_norm_counter = func_norm_counter + 1
382 func = pid_string + ".C." + `func_norm_counter`
383 result = file_name , string.atoi(lineno) , func
384 else:
385 result = func_name
386 func_norm_dict[func_name] = result
387 return result
388
389
390 # The following two methods can be called by clients to use
391 # a profiler to profile a statement, given as a string.
392
393 def run(self, cmd):
394 import __main__
395 dict = __main__.__dict__
396 self.runctx(cmd, dict, dict)
397 return self
398
399 def runctx(self, cmd, globals, locals):
400 self.set_cmd(cmd)
401 sys.setprofile(self.trace_dispatch)
402 try:
Guido van Rossum9c3241d1995-08-10 19:46:50 +0000403 exec cmd in globals, locals
Guido van Rossumb6775db1994-08-01 11:34:53 +0000404 finally:
405 sys.setprofile(None)
406
407 # This method is more useful to profile a single function call.
408 def runcall(self, func, *args):
Guido van Rossum8afa8241995-06-22 18:52:35 +0000409 self.set_cmd(`func`)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000410 sys.setprofile(self.trace_dispatch)
411 try:
412 apply(func, args)
413 finally:
414 sys.setprofile(None)
415 return self
416
417
418 #******************************************************************
419 # The following calculates the overhead for using a profiler. The
420 # problem is that it takes a fair amount of time for the profiler
421 # to stop the stopwatch (from the time it recieves an event).
422 # Similarly, there is a delay from the time that the profiler
423 # re-starts the stopwatch before the user's code really gets to
424 # continue. The following code tries to measure the difference on
425 # a per-event basis. The result can the be placed in the
426 # Profile.dispatch_event() routine for the given platform. Note
427 # that this difference is only significant if there are a lot of
428 # events, and relatively little user code per event. For example,
429 # code with small functions will typically benefit from having the
430 # profiler calibrated for the current platform. This *could* be
431 # done on the fly during init() time, but it is not worth the
432 # effort. Also note that if too large a value specified, then
433 # execution time on some functions will actually appear as a
434 # negative number. It is *normal* for some functions (with very
435 # low call counts) to have such negative stats, even if the
436 # calibration figure is "correct."
437 #
438 # One alternative to profile-time calibration adjustments (i.e.,
439 # adding in the magic little delta during each event) is to track
440 # more carefully the number of events (and cumulatively, the number
441 # of events during sub functions) that are seen. If this were
442 # done, then the arithmetic could be done after the fact (i.e., at
443 # display time). Currintly, we track only call/return events.
444 # These values can be deduced by examining the callees and callers
445 # vectors for each functions. Hence we *can* almost correct the
446 # internal time figure at print time (note that we currently don't
447 # track exception event processing counts). Unfortunately, there
448 # is currently no similar information for cumulative sub-function
449 # time. It would not be hard to "get all this info" at profiler
450 # time. Specifically, we would have to extend the tuples to keep
451 # counts of this in each frame, and then extend the defs of timing
452 # tuples to include the significant two figures. I'm a bit fearful
453 # that this additional feature will slow the heavily optimized
454 # event/time ratio (i.e., the profiler would run slower, fur a very
455 # low "value added" feature.)
456 #
457 # Plugging in the calibration constant doesn't slow down the
458 # profiler very much, and the accuracy goes way up.
459 #**************************************************************
460
461 def calibrate(self, m):
462 n = m
463 s = self.timer()
464 while n:
465 self.simple()
466 n = n - 1
467 f = self.timer()
468 my_simple = f[0]+f[1]-s[0]-s[1]
469 #print "Simple =", my_simple,
470
471 n = m
472 s = self.timer()
473 while n:
474 self.instrumented()
475 n = n - 1
476 f = self.timer()
477 my_inst = f[0]+f[1]-s[0]-s[1]
478 # print "Instrumented =", my_inst
479 avg_cost = (my_inst - my_simple)/m
480 #print "Delta/call =", avg_cost, "(profiler fixup constant)"
481 return avg_cost
482
483 # simulate a program with no profiler activity
484 def simple(self):
485 a = 1
486 pass
487
488 # simulate a program with call/return event processing
489 def instrumented(self):
490 a = 1
491 self.profiler_simulation(a, a, a)
492
493 # simulate an event processing activity (from user's perspective)
494 def profiler_simulation(self, x, y, z):
495 t = self.timer()
496 t = t[0] + t[1]
497 self.ut = t
498
499
500
501#****************************************************************************
502# OldProfile class documentation
503#****************************************************************************
504#
505# The following derived profiler simulates the old style profile, providing
506# errant results on recursive functions. The reason for the usefulnes of this
507# profiler is that it runs faster (i.e., less overhead). It still creates
508# all the caller stats, and is quite useful when there is *no* recursion
509# in the user's code.
510#
511# This code also shows how easy it is to create a modified profiler.
512#****************************************************************************
513class OldProfile(Profile):
514 def trace_dispatch_exception(self, frame, t):
515 rt, rtt, rct, rfn, rframe, rcur = self.cur
516 if rcur and not rframe is frame:
517 return self.trace_dispatch_return(rframe, t)
518 return 0
519
520 def trace_dispatch_call(self, frame, t):
521 fn = `frame.f_code`
522
523 self.cur = (t, 0, 0, fn, frame, self.cur)
524 if self.timings.has_key(fn):
525 tt, ct, callers = self.timings[fn]
526 self.timings[fn] = tt, ct, callers
527 else:
528 self.timings[fn] = 0, 0, {}
529 return 1
530
531 def trace_dispatch_return(self, frame, t):
532 rt, rtt, rct, rfn, frame, rcur = self.cur
533 rtt = rtt + t
534 sft = rtt + rct
535
536 pt, ptt, pct, pfn, pframe, pcur = rcur
537 self.cur = pt, ptt+rt, pct+sft, pfn, pframe, pcur
538
539 tt, ct, callers = self.timings[rfn]
540 if callers.has_key(pfn):
541 callers[pfn] = callers[pfn] + 1
542 else:
543 callers[pfn] = 1
544 self.timings[rfn] = tt+rtt, ct + sft, callers
545
546 return 1
547
548
549 def snapshot_stats(self):
550 self.stats = {}
551 for func in self.timings.keys():
552 tt, ct, callers = self.timings[func]
553 nor_func = self.func_normalize(func)
554 nor_callers = {}
555 nc = 0
556 for func_caller in callers.keys():
557 nor_callers[self.func_normalize(func_caller)]=\
558 callers[func_caller]
559 nc = nc + callers[func_caller]
560 self.stats[nor_func] = nc, nc, tt, ct, nor_callers
561
562
563
564#****************************************************************************
565# HotProfile class documentation
566#****************************************************************************
567#
568# This profiler is the fastest derived profile example. It does not
569# calculate caller-callee relationships, and does not calculate cumulative
570# time under a function. It only calculates time spent in a function, so
571# it runs very quickly (re: very low overhead)
572#****************************************************************************
573class HotProfile(Profile):
574 def trace_dispatch_exception(self, frame, t):
575 rt, rtt, rfn, rframe, rcur = self.cur
576 if rcur and not rframe is frame:
577 return self.trace_dispatch_return(rframe, t)
578 return 0
579
580 def trace_dispatch_call(self, frame, t):
581 self.cur = (t, 0, frame, self.cur)
582 return 1
583
584 def trace_dispatch_return(self, frame, t):
585 rt, rtt, frame, rcur = self.cur
586
587 rfn = `frame.f_code`
588
589 pt, ptt, pframe, pcur = rcur
590 self.cur = pt, ptt+rt, pframe, pcur
591
592 if self.timings.has_key(rfn):
593 nc, tt = self.timings[rfn]
594 self.timings[rfn] = nc + 1, rt + rtt + tt
595 else:
596 self.timings[rfn] = 1, rt + rtt
597
598 return 1
599
600
601 def snapshot_stats(self):
602 self.stats = {}
603 for func in self.timings.keys():
604 nc, tt = self.timings[func]
605 nor_func = self.func_normalize(func)
606 self.stats[nor_func] = nc, nc, tt, 0, {}
607
608
609
610#****************************************************************************
611def Stats(*args):
612 print 'Report generating functions are in the "pstats" module\a'