Hopefully fix the profiler right.  Add a test suite that checks that
it deals correctly with some anomalous cases; according to this test
suite I've fixed it right.

The anomalous cases had to do with 'exception' events: these aren't
generated when they would be most helpful, and the profiler has to
work hard to recover the right information.  The problems occur when C
code (such as hasattr(), which is used as the example here) calls back
into Python code and clears an exception raised by that Python code.
Consider this example:

    def foo():
        hasattr(obj, "bar")

Where obj is an instance from a class like this:

    class C:
        def __getattr__(self, name):
            raise AttributeError

The profiler sees the following sequence of events:

    call (foo)
    call (__getattr__)
    exception (in __getattr__)
    return (from foo)

Previously, the profiler would assume the return event returned from
__getattr__. An if statement checking for this condition and raising
an exception was commented out...  This version does the right thing.
diff --git a/Lib/profile.py b/Lib/profile.py
index 9167905..73aeff9 100755
--- a/Lib/profile.py
+++ b/Lib/profile.py
@@ -109,18 +109,18 @@
     avoid contaminating the program that we are profiling. (old profiler
     used to write into the frames local dictionary!!) Derived classes
     can change the definition of some entries, as long as they leave
-    [-2:] intact.
+    [3:] intact.
 
-    [ 0] = Time that needs to be charged to the parent frame's function.
-           It is used so that a function call will not have to access the
-           timing data for the parent frame.
-    [ 1] = Total time spent in this frame's function, excluding time in
-           subfunctions
-    [ 2] = Cumulative time spent in this frame's function, including time in
-           all subfunctions to this frame.
-    [-3] = Name of the function that corresponds to this frame.
-    [-2] = Actual frame that we correspond to (used to sync exception handling)
-    [-1] = Our parent 6-tuple (corresponds to frame.f_back)
+    [0] = Time that needs to be charged to the parent frame's function.
+          It is used so that a function call will not have to access the
+          timing data for the parent frame.
+    [1] = Total time spent in this frame's function, excluding time in
+          subfunctions
+    [2] = Cumulative time spent in this frame's function, including time in
+          all subfunctions to this frame (but excluding this frame!).
+    [3] = Name of the function that corresponds to this frame.
+    [4] = Actual frame that we correspond to (used to sync exception handling)
+    [5] = Our parent 6-tuple (corresponds to frame.f_back)
 
     Timing data for each function is stored as a 5-tuple in the dictionary
     self.timings[].  The index is always the name stored in self.cur[4].
@@ -243,10 +243,21 @@
         rt, rtt, rct, rfn, rframe, rcur = self.cur
         if (rframe is not frame) and rcur:
             return self.trace_dispatch_return(rframe, t)
-        return 0
+        self.cur = rt, rtt+t, rct, rfn, rframe, rcur
+        return 1
 
 
     def trace_dispatch_call(self, frame, t):
+        if self.cur and frame.f_back is not self.cur[4]:
+            rt, rtt, rct, rfn, rframe, rcur = self.cur
+            if not isinstance(rframe, Profile.fake_frame):
+                if rframe.f_back is not frame.f_back:
+                    print rframe, rframe.f_back
+                    print frame, frame.f_back
+                    raise "Bad call", self.cur[3]
+                self.trace_dispatch_return(rframe, 0)
+                if self.cur and frame.f_back is not self.cur[4]:
+                    raise "Bad call[2]", self.cur[3]
         fcode = frame.f_code
         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
         self.cur = (t, 0, 0, fn, frame, self.cur)
@@ -259,7 +270,11 @@
         return 1
 
     def trace_dispatch_return(self, frame, t):
-        # if not frame is self.cur[-2]: raise "Bad return", self.cur[3]
+        if frame is not self.cur[4]:
+            if frame is self.cur[4].f_back:
+                self.trace_dispatch_return(self.cur[4], 0)
+            else:
+                raise "Bad return", self.cur[3]
 
         # Prefix "r" means part of the Returning or exiting frame
         # Prefix "p" means part of the Previous or older frame
@@ -302,7 +317,7 @@
     # very nice :-).
 
     def set_cmd(self, cmd):
-        if self.cur[-1]: return   # already set
+        if self.cur[5]: return   # already set
         self.cmd = cmd
         self.simulate_call(cmd)
 
@@ -324,7 +339,7 @@
     def simulate_call(self, name):
         code = self.fake_code('profile', 0, name)
         if self.cur:
-            pframe = self.cur[-2]
+            pframe = self.cur[4]
         else:
             pframe = None
         frame = self.fake_frame(code, pframe)
@@ -337,10 +352,10 @@
     def simulate_cmd_complete(self):
         get_time = self.get_time
         t = get_time() - self.t
-        while self.cur[-1]:
+        while self.cur[5]:
             # We *can* cause assertion errors here if
             # dispatch_trace_return checks for a frame match!
-            a = self.dispatch['return'](self, self.cur[-2], t)
+            a = self.dispatch['return'](self, self.cur[4], t)
             t = 0
         self.t = get_time() - t
 
diff --git a/Lib/test/output/test_profile b/Lib/test/output/test_profile
new file mode 100644
index 0000000..b36a016
--- /dev/null
+++ b/Lib/test/output/test_profile
@@ -0,0 +1,17 @@
+test_profile
+         53 function calls in 1.000 CPU seconds
+
+   Ordered by: standard name
+
+   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
+        1    0.000    0.000    1.000    1.000 <string>:1(?)
+        0    0.000             0.000          profile:0(profiler)
+        1    0.000    0.000    1.000    1.000 profile:0(testfunc())
+        1    0.400    0.400    1.000    1.000 test_profile.py:21(testfunc)
+        2    0.080    0.040    0.600    0.300 test_profile.py:30(helper)
+        4    0.116    0.029    0.120    0.030 test_profile.py:48(helper1)
+        8    0.312    0.039    0.400    0.050 test_profile.py:56(helper2)
+        8    0.064    0.008    0.080    0.010 test_profile.py:66(subhelper)
+       28    0.028    0.001    0.028    0.001 test_profile.py:78(__getattr__)
+
+
diff --git a/Lib/test/test_profile.py b/Lib/test/test_profile.py
new file mode 100644
index 0000000..460d56f
--- /dev/null
+++ b/Lib/test/test_profile.py
@@ -0,0 +1,86 @@
+"""Test suite for the profile module."""
+
+import profile
+
+# In order to have reproducible time, we simulate a timer in the global
+# variable 'ticks', which represents simulated time in milliseconds.
+# (We can't use a helper function increment the timer since it would be
+# included in the profile and would appear to consume all the time.)
+ticks = 0
+    
+def test_main():
+    global ticks
+    ticks = 0
+    prof = profile.Profile(timer)
+    prof.runctx("testfunc()", globals(), globals())
+    prof.print_stats()
+
+def timer():
+    return ticks*0.001
+
+def testfunc():
+    # 1 call
+    # 1000 ticks total: 400 ticks local, 600 ticks in subfunctions
+    global ticks
+    ticks += 199
+    helper()                            # 300
+    helper()                            # 300
+    ticks += 201
+
+def helper():
+    # 2 calls
+    # 300 ticks total: 40 ticks local, 260 ticks in subfunctions
+    global ticks
+    ticks += 1
+    helper1()                           # 30
+    ticks += 3
+    helper1()                           # 30
+    ticks += 6
+    helper2()                           # 50
+    ticks += 5
+    helper2()                           # 50
+    ticks += 4
+    helper2()                           # 50
+    ticks += 7
+    helper2()                           # 50
+    ticks += 14
+
+def helper1():
+    # 4 calls
+    # 30 ticks total: 29 ticks local, 1 tick in subfunctions
+    global ticks
+    ticks += 10
+    hasattr(C(), "foo")
+    ticks += 19
+
+def helper2():
+    # 8 calls
+    # 50 ticks local: 39 ticks local, 11 ticks in subfunctions
+    global ticks
+    ticks += 11
+    hasattr(C(), "bar")                 # 1
+    ticks += 13
+    subhelper()                         # 10
+    ticks += 15
+
+def subhelper():
+    # 8 calls
+    # 10 ticks total: 8 ticks local, 2 ticks in subfunctions
+    global ticks
+    ticks += 2
+    for i in range(2):
+        try:
+            C().foo                     # 1 x 2
+        except AttributeError:
+            ticks += 3                  # 3 x 2
+
+class C:
+    def __getattr__(self, name):
+        # 28 calls
+        # 1 tick, local
+        global ticks
+        ticks += 1
+        raise AttributeError
+
+if __name__ == "__main__":
+    test_main()