Added the cProfile module.
Based on lsprof (patch #1212837) by Brett Rosen and Ted Czotter.
With further editing by Michael Hudson and myself.
History in svn repo: http://codespeak.net/svn/user/arigo/hack/misc/lsprof

* Module/_lsprof.c is the internal C module, Lib/cProfile.py a wrapper.
* pstats.py updated to display cProfile's caller/callee timings if available.
* setup.py and NEWS updated.
* documentation updates in the profiler section:
   - explain the differences between the three profilers that we have now
   - profile and cProfile can use a unified documentation, like (c)Pickle
   - mention that hotshot is "for specialized usage" now
   - removed references to the "old profiler" that no longer exists
* test updates:
   - extended test_profile to cover delicate cases like recursion
   - added tests for the caller/callee displays
   - added test_cProfile, performing the same tests for cProfile
* TO-DO:
   - cProfile gives a nicer name to built-in, particularly built-in methods,
     which could be backported to profile.
   - not tested on Windows recently!
diff --git a/Lib/test/output/test_cProfile b/Lib/test/output/test_cProfile
new file mode 100644
index 0000000..fff3568
--- /dev/null
+++ b/Lib/test/output/test_cProfile
@@ -0,0 +1,79 @@
+test_cProfile
+         126 function calls (106 primitive calls) in 1.000 CPU seconds
+
+   Ordered by: standard name
+
+   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
+        1    0.000    0.000    1.000    1.000 <string>:1(<module>)
+        8    0.064    0.008    0.080    0.010 test_cProfile.py:103(subhelper)
+       28    0.028    0.001    0.028    0.001 test_cProfile.py:115(__getattr__)
+        1    0.270    0.270    1.000    1.000 test_cProfile.py:30(testfunc)
+     23/3    0.150    0.007    0.170    0.057 test_cProfile.py:40(factorial)
+       20    0.020    0.001    0.020    0.001 test_cProfile.py:53(mul)
+        2    0.040    0.020    0.600    0.300 test_cProfile.py:60(helper)
+        4    0.116    0.029    0.120    0.030 test_cProfile.py:78(helper1)
+        2    0.000    0.000    0.140    0.070 test_cProfile.py:89(helper2_indirect)
+        8    0.312    0.039    0.400    0.050 test_cProfile.py:93(helper2)
+       12    0.000    0.000    0.012    0.001 {hasattr}
+        4    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
+        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
+        8    0.000    0.000    0.000    0.000 {range}
+        4    0.000    0.000    0.000    0.000 {sys.exc_info}
+
+
+   Ordered by: standard name
+
+Function                                          called...
+                                                      ncalls  tottime  cumtime
+<string>:1(<module>)                              ->       1    0.270    1.000  test_cProfile.py:30(testfunc)
+test_cProfile.py:103(subhelper)                   ->      16    0.016    0.016  test_cProfile.py:115(__getattr__)
+                                                           8    0.000    0.000  {range}
+test_cProfile.py:115(__getattr__)                 ->
+test_cProfile.py:30(testfunc)                     ->       1    0.014    0.130  test_cProfile.py:40(factorial)
+                                                           2    0.040    0.600  test_cProfile.py:60(helper)
+test_cProfile.py:40(factorial)                    ->    20/3    0.130    0.147  test_cProfile.py:40(factorial)
+                                                          20    0.020    0.020  test_cProfile.py:53(mul)
+test_cProfile.py:53(mul)                          ->
+test_cProfile.py:60(helper)                       ->       4    0.116    0.120  test_cProfile.py:78(helper1)
+                                                           2    0.000    0.140  test_cProfile.py:89(helper2_indirect)
+                                                           6    0.234    0.300  test_cProfile.py:93(helper2)
+test_cProfile.py:78(helper1)                      ->       4    0.000    0.004  {hasattr}
+                                                           4    0.000    0.000  {method 'append' of 'list' objects}
+                                                           4    0.000    0.000  {sys.exc_info}
+test_cProfile.py:89(helper2_indirect)             ->       2    0.006    0.040  test_cProfile.py:40(factorial)
+                                                           2    0.078    0.100  test_cProfile.py:93(helper2)
+test_cProfile.py:93(helper2)                      ->       8    0.064    0.080  test_cProfile.py:103(subhelper)
+                                                           8    0.000    0.008  {hasattr}
+{hasattr}                                         ->      12    0.012    0.012  test_cProfile.py:115(__getattr__)
+{method 'append' of 'list' objects}               ->
+{method 'disable' of '_lsprof.Profiler' objects}  ->
+{range}                                           ->
+{sys.exc_info}                                    ->
+
+
+   Ordered by: standard name
+
+Function                                          was called by...
+                                                      ncalls  tottime  cumtime
+<string>:1(<module>)                              <-
+test_cProfile.py:103(subhelper)                   <-       8    0.064    0.080  test_cProfile.py:93(helper2)
+test_cProfile.py:115(__getattr__)                 <-      16    0.016    0.016  test_cProfile.py:103(subhelper)
+                                                          12    0.012    0.012  {hasattr}
+test_cProfile.py:30(testfunc)                     <-       1    0.270    1.000  <string>:1(<module>)
+test_cProfile.py:40(factorial)                    <-       1    0.014    0.130  test_cProfile.py:30(testfunc)
+                                                        20/3    0.130    0.147  test_cProfile.py:40(factorial)
+                                                           2    0.006    0.040  test_cProfile.py:89(helper2_indirect)
+test_cProfile.py:53(mul)                          <-      20    0.020    0.020  test_cProfile.py:40(factorial)
+test_cProfile.py:60(helper)                       <-       2    0.040    0.600  test_cProfile.py:30(testfunc)
+test_cProfile.py:78(helper1)                      <-       4    0.116    0.120  test_cProfile.py:60(helper)
+test_cProfile.py:89(helper2_indirect)             <-       2    0.000    0.140  test_cProfile.py:60(helper)
+test_cProfile.py:93(helper2)                      <-       6    0.234    0.300  test_cProfile.py:60(helper)
+                                                           2    0.078    0.100  test_cProfile.py:89(helper2_indirect)
+{hasattr}                                         <-       4    0.000    0.004  test_cProfile.py:78(helper1)
+                                                           8    0.000    0.008  test_cProfile.py:93(helper2)
+{method 'append' of 'list' objects}               <-       4    0.000    0.000  test_cProfile.py:78(helper1)
+{method 'disable' of '_lsprof.Profiler' objects}  <-
+{range}                                           <-       8    0.000    0.000  test_cProfile.py:103(subhelper)
+{sys.exc_info}                                    <-       4    0.000    0.000  test_cProfile.py:78(helper1)
+
+
diff --git a/Lib/test/output/test_profile b/Lib/test/output/test_profile
index e745075..96bd77f 100644
--- a/Lib/test/output/test_profile
+++ b/Lib/test/output/test_profile
@@ -1,20 +1,84 @@
 test_profile
-         74 function calls in 1.000 CPU seconds
+         127 function calls (107 primitive calls) in 1.000 CPU seconds
 
    Ordered by: standard name
 
    ncalls  tottime  percall  cumtime  percall filename:lineno(function)
+        4    0.000    0.000    0.000    0.000 :0(append)
+        4    0.000    0.000    0.000    0.000 :0(exc_info)
        12    0.000    0.000    0.012    0.001 :0(hasattr)
         8    0.000    0.000    0.000    0.000 :0(range)
         1    0.000    0.000    0.000    0.000 :0(setprofile)
         1    0.000    0.000    1.000    1.000 <string>:1(<module>)
         0    0.000             0.000          profile:0(profiler)
         1    0.000    0.000    1.000    1.000 profile:0(testfunc())
-        1    0.400    0.400    1.000    1.000 test_profile.py:23(testfunc)
-        2    0.080    0.040    0.600    0.300 test_profile.py:32(helper)
-        4    0.116    0.029    0.120    0.030 test_profile.py:50(helper1)
-        8    0.312    0.039    0.400    0.050 test_profile.py:58(helper2)
-        8    0.064    0.008    0.080    0.010 test_profile.py:68(subhelper)
-       28    0.028    0.001    0.028    0.001 test_profile.py:80(__getattr__)
+        8    0.064    0.008    0.080    0.010 test_profile.py:103(subhelper)
+       28    0.028    0.001    0.028    0.001 test_profile.py:115(__getattr__)
+        1    0.270    0.270    1.000    1.000 test_profile.py:30(testfunc)
+     23/3    0.150    0.007    0.170    0.057 test_profile.py:40(factorial)
+       20    0.020    0.001    0.020    0.001 test_profile.py:53(mul)
+        2    0.040    0.020    0.600    0.300 test_profile.py:60(helper)
+        4    0.116    0.029    0.120    0.030 test_profile.py:78(helper1)
+        2    0.000    0.000    0.140    0.070 test_profile.py:89(helper2_indirect)
+        8    0.312    0.039    0.400    0.050 test_profile.py:93(helper2)
+
+
+   Ordered by: standard name
+
+Function                              called...
+:0(append)                            ->
+:0(exc_info)                          ->
+:0(hasattr)                           -> test_profile.py:115(__getattr__)(12)    0.028
+:0(range)                             ->
+:0(setprofile)                        ->
+<string>:1(<module>)                  -> test_profile.py:30(testfunc)(1)    1.000
+profile:0(profiler)                   -> profile:0(testfunc())(1)    1.000
+profile:0(testfunc())                 -> :0(setprofile)(1)    0.000
+                                         <string>:1(<module>)(1)    1.000
+test_profile.py:103(subhelper)        -> :0(range)(8)    0.000
+                                         test_profile.py:115(__getattr__)(16)    0.028
+test_profile.py:115(__getattr__)      ->
+test_profile.py:30(testfunc)          -> test_profile.py:40(factorial)(1)    0.170
+                                         test_profile.py:60(helper)(2)    0.600
+test_profile.py:40(factorial)         -> test_profile.py:40(factorial)(20)    0.170
+                                         test_profile.py:53(mul)(20)    0.020
+test_profile.py:53(mul)               ->
+test_profile.py:60(helper)            -> test_profile.py:78(helper1)(4)    0.120
+                                         test_profile.py:89(helper2_indirect)(2)    0.140
+                                         test_profile.py:93(helper2)(6)    0.400
+test_profile.py:78(helper1)           -> :0(append)(4)    0.000
+                                         :0(exc_info)(4)    0.000
+                                         :0(hasattr)(4)    0.012
+test_profile.py:89(helper2_indirect)  -> test_profile.py:40(factorial)(2)    0.170
+                                         test_profile.py:93(helper2)(2)    0.400
+test_profile.py:93(helper2)           -> :0(hasattr)(8)    0.012
+                                         test_profile.py:103(subhelper)(8)    0.080
+
+
+   Ordered by: standard name
+
+Function                              was called by...
+:0(append)                            <- test_profile.py:78(helper1)(4)    0.120
+:0(exc_info)                          <- test_profile.py:78(helper1)(4)    0.120
+:0(hasattr)                           <- test_profile.py:78(helper1)(4)    0.120
+                                         test_profile.py:93(helper2)(8)    0.400
+:0(range)                             <- test_profile.py:103(subhelper)(8)    0.080
+:0(setprofile)                        <- profile:0(testfunc())(1)    1.000
+<string>:1(<module>)                  <- profile:0(testfunc())(1)    1.000
+profile:0(profiler)                   <-
+profile:0(testfunc())                 <- profile:0(profiler)(1)    0.000
+test_profile.py:103(subhelper)        <- test_profile.py:93(helper2)(8)    0.400
+test_profile.py:115(__getattr__)      <- :0(hasattr)(12)    0.012
+                                         test_profile.py:103(subhelper)(16)    0.080
+test_profile.py:30(testfunc)          <- <string>:1(<module>)(1)    1.000
+test_profile.py:40(factorial)         <- test_profile.py:30(testfunc)(1)    1.000
+                                         test_profile.py:40(factorial)(20)    0.170
+                                         test_profile.py:89(helper2_indirect)(2)    0.140
+test_profile.py:53(mul)               <- test_profile.py:40(factorial)(20)    0.170
+test_profile.py:60(helper)            <- test_profile.py:30(testfunc)(2)    1.000
+test_profile.py:78(helper1)           <- test_profile.py:60(helper)(4)    0.600
+test_profile.py:89(helper2_indirect)  <- test_profile.py:60(helper)(2)    0.600
+test_profile.py:93(helper2)           <- test_profile.py:60(helper)(6)    0.600
+                                         test_profile.py:89(helper2_indirect)(2)    0.140
 
 
diff --git a/Lib/test/test_cProfile.py b/Lib/test/test_cProfile.py
new file mode 100644
index 0000000..07b2a9b
--- /dev/null
+++ b/Lib/test/test_cProfile.py
@@ -0,0 +1,123 @@
+"""Test suite for the cProfile module."""
+
+import cProfile, pstats, sys
+
+# In order to have reproducible time, we simulate a timer in the global
+# variable 'ticks', which represents simulated time in milliseconds.
+# (We can't use a helper function increment the timer since it would be
+# included in the profile and would appear to consume all the time.)
+ticks = 0
+
+# IMPORTANT: this is an output test.  *ALL* NUMBERS in the expected
+# output are relevant.  If you change the formatting of pstats,
+# please don't just regenerate output/test_cProfile without checking
+# very carefully that not a single number has changed.
+
+def test_main():
+    global ticks
+    ticks = 42000
+    prof = cProfile.Profile(timer, 0.001)
+    prof.runctx("testfunc()", globals(), locals())
+    assert ticks == 43000, ticks
+    st = pstats.Stats(prof)
+    st.strip_dirs().sort_stats('stdname').print_stats()
+    st.print_callees()
+    st.print_callers()
+
+def timer():
+    return ticks
+
+def testfunc():
+    # 1 call
+    # 1000 ticks total: 270 ticks local, 730 ticks in subfunctions
+    global ticks
+    ticks += 99
+    helper()                            # 300
+    helper()                            # 300
+    ticks += 171
+    factorial(14)                       # 130
+
+def factorial(n):
+    # 23 calls total
+    # 170 ticks total, 150 ticks local
+    # 3 primitive calls, 130, 20 and 20 ticks total
+    # including 116, 17, 17 ticks local
+    global ticks
+    if n > 0:
+        ticks += n
+        return mul(n, factorial(n-1))
+    else:
+        ticks += 11
+        return 1
+
+def mul(a, b):
+    # 20 calls
+    # 1 tick, local
+    global ticks
+    ticks += 1
+    return a * b
+
+def helper():
+    # 2 calls
+    # 300 ticks total: 20 ticks local, 260 ticks in subfunctions
+    global ticks
+    ticks += 1
+    helper1()                           # 30
+    ticks += 2
+    helper1()                           # 30
+    ticks += 6
+    helper2()                           # 50
+    ticks += 3
+    helper2()                           # 50
+    ticks += 2
+    helper2()                           # 50
+    ticks += 5
+    helper2_indirect()                  # 70
+    ticks += 1
+
+def helper1():
+    # 4 calls
+    # 30 ticks total: 29 ticks local, 1 tick in subfunctions
+    global ticks
+    ticks += 10
+    hasattr(C(), "foo")                 # 1
+    ticks += 19
+    lst = []
+    lst.append(42)                      # 0
+    sys.exc_info()                      # 0
+
+def helper2_indirect():
+    helper2()                           # 50
+    factorial(3)                        # 20
+
+def helper2():
+    # 8 calls
+    # 50 ticks local: 39 ticks local, 11 ticks in subfunctions
+    global ticks
+    ticks += 11
+    hasattr(C(), "bar")                 # 1
+    ticks += 13
+    subhelper()                         # 10
+    ticks += 15
+
+def subhelper():
+    # 8 calls
+    # 10 ticks total: 8 ticks local, 2 ticks in subfunctions
+    global ticks
+    ticks += 2
+    for i in range(2):                  # 0
+        try:
+            C().foo                     # 1 x 2
+        except AttributeError:
+            ticks += 3                  # 3 x 2
+
+class C:
+    def __getattr__(self, name):
+        # 28 calls
+        # 1 tick, local
+        global ticks
+        ticks += 1
+        raise AttributeError
+
+if __name__ == "__main__":
+    test_main()
diff --git a/Lib/test/test_profile.py b/Lib/test/test_profile.py
index aa0f26c..95ad8d2 100644
--- a/Lib/test/test_profile.py
+++ b/Lib/test/test_profile.py
@@ -1,8 +1,6 @@
 """Test suite for the profile module."""
 
-import profile
-import os
-from test.test_support import TESTFN, vereq
+import profile, pstats, sys
 
 # In order to have reproducible time, we simulate a timer in the global
 # variable 'ticks', which represents simulated time in milliseconds.
@@ -10,50 +8,87 @@
 # included in the profile and would appear to consume all the time.)
 ticks = 0
 
-def test_1():
+# IMPORTANT: this is an output test.  *ALL* NUMBERS in the expected
+# output are relevant.  If you change the formatting of pstats,
+# please don't just regenerate output/test_profile without checking
+# very carefully that not a single number has changed.
+
+def test_main():
     global ticks
-    ticks = 0
+    ticks = 42000
     prof = profile.Profile(timer)
-    prof.runctx("testfunc()", globals(), globals())
-    prof.print_stats()
+    prof.runctx("testfunc()", globals(), locals())
+    assert ticks == 43000, ticks
+    st = pstats.Stats(prof)
+    st.strip_dirs().sort_stats('stdname').print_stats()
+    st.print_callees()
+    st.print_callers()
 
 def timer():
     return ticks*0.001
 
 def testfunc():
     # 1 call
-    # 1000 ticks total: 400 ticks local, 600 ticks in subfunctions
+    # 1000 ticks total: 270 ticks local, 730 ticks in subfunctions
     global ticks
-    ticks += 199
+    ticks += 99
     helper()                            # 300
     helper()                            # 300
-    ticks += 201
+    ticks += 171
+    factorial(14)                       # 130
+
+def factorial(n):
+    # 23 calls total
+    # 170 ticks total, 150 ticks local
+    # 3 primitive calls, 130, 20 and 20 ticks total
+    # including 116, 17, 17 ticks local
+    global ticks
+    if n > 0:
+        ticks += n
+        return mul(n, factorial(n-1))
+    else:
+        ticks += 11
+        return 1
+
+def mul(a, b):
+    # 20 calls
+    # 1 tick, local
+    global ticks
+    ticks += 1
+    return a * b
 
 def helper():
     # 2 calls
-    # 300 ticks total: 40 ticks local, 260 ticks in subfunctions
+    # 300 ticks total: 20 ticks local, 260 ticks in subfunctions
     global ticks
     ticks += 1
     helper1()                           # 30
-    ticks += 3
+    ticks += 2
     helper1()                           # 30
     ticks += 6
     helper2()                           # 50
+    ticks += 3
+    helper2()                           # 50
+    ticks += 2
+    helper2()                           # 50
     ticks += 5
-    helper2()                           # 50
-    ticks += 4
-    helper2()                           # 50
-    ticks += 7
-    helper2()                           # 50
-    ticks += 14
+    helper2_indirect()                  # 70
+    ticks += 1
 
 def helper1():
     # 4 calls
     # 30 ticks total: 29 ticks local, 1 tick in subfunctions
     global ticks
     ticks += 10
-    hasattr(C(), "foo")
+    hasattr(C(), "foo")                 # 1
     ticks += 19
+    lst = []
+    lst.append(42)                      # 0
+    sys.exc_info()                      # 0
+
+def helper2_indirect():
+    helper2()                           # 50
+    factorial(3)                        # 20
 
 def helper2():
     # 8 calls
@@ -70,7 +105,7 @@
     # 10 ticks total: 8 ticks local, 2 ticks in subfunctions
     global ticks
     ticks += 2
-    for i in range(2):
+    for i in range(2):                  # 0
         try:
             C().foo                     # 1 x 2
         except AttributeError:
@@ -84,36 +119,5 @@
         ticks += 1
         raise AttributeError
 
-
-def test_2():
-    d = globals().copy()
-    def testfunc():
-        global x
-        x = 1
-    d['testfunc'] = testfunc
-    profile.runctx("testfunc()", d, d, TESTFN)
-    vereq (x, 1)
-    os.unlink (TESTFN)
-
-def test_3():
-    result = []
-    def testfunc1():
-        try: len(None)
-        except: pass
-        try: len(None)
-        except: pass
-        result.append(True)
-    def testfunc2():
-        testfunc1()
-        testfunc1()
-    profile.runctx("testfunc2()", locals(), locals(), TESTFN)
-    vereq(result, [True, True])
-    os.unlink(TESTFN)
-
-def test_main():
-    test_1()
-    test_2()
-    test_3()
-
 if __name__ == "__main__":
     test_main()