Added the cProfile module.
Based on lsprof (patch #1212837) by Brett Rosen and Ted Czotter.
With further editing by Michael Hudson and myself.
History in svn repo: http://codespeak.net/svn/user/arigo/hack/misc/lsprof

* Module/_lsprof.c is the internal C module, Lib/cProfile.py a wrapper.
* pstats.py updated to display cProfile's caller/callee timings if available.
* setup.py and NEWS updated.
* documentation updates in the profiler section:
   - explain the differences between the three profilers that we have now
   - profile and cProfile can use a unified documentation, like (c)Pickle
   - mention that hotshot is "for specialized usage" now
   - removed references to the "old profiler" that no longer exists
* test updates:
   - extended test_profile to cover delicate cases like recursion
   - added tests for the caller/callee displays
   - added test_cProfile, performing the same tests for cProfile
* TO-DO:
   - cProfile gives a nicer name to built-in, particularly built-in methods,
     which could be backported to profile.
   - not tested on Windows recently!
diff --git a/Lib/test/test_profile.py b/Lib/test/test_profile.py
index aa0f26c..95ad8d2 100644
--- a/Lib/test/test_profile.py
+++ b/Lib/test/test_profile.py
@@ -1,8 +1,6 @@
 """Test suite for the profile module."""
 
-import profile
-import os
-from test.test_support import TESTFN, vereq
+import profile, pstats, sys
 
 # In order to have reproducible time, we simulate a timer in the global
 # variable 'ticks', which represents simulated time in milliseconds.
@@ -10,50 +8,87 @@
 # included in the profile and would appear to consume all the time.)
 ticks = 0
 
-def test_1():
+# IMPORTANT: this is an output test.  *ALL* NUMBERS in the expected
+# output are relevant.  If you change the formatting of pstats,
+# please don't just regenerate output/test_profile without checking
+# very carefully that not a single number has changed.
+
+def test_main():
     global ticks
-    ticks = 0
+    ticks = 42000
     prof = profile.Profile(timer)
-    prof.runctx("testfunc()", globals(), globals())
-    prof.print_stats()
+    prof.runctx("testfunc()", globals(), locals())
+    assert ticks == 43000, ticks
+    st = pstats.Stats(prof)
+    st.strip_dirs().sort_stats('stdname').print_stats()
+    st.print_callees()
+    st.print_callers()
 
 def timer():
     return ticks*0.001
 
 def testfunc():
     # 1 call
-    # 1000 ticks total: 400 ticks local, 600 ticks in subfunctions
+    # 1000 ticks total: 270 ticks local, 730 ticks in subfunctions
     global ticks
-    ticks += 199
+    ticks += 99
     helper()                            # 300
     helper()                            # 300
-    ticks += 201
+    ticks += 171
+    factorial(14)                       # 130
+
+def factorial(n):
+    # 23 calls total
+    # 170 ticks total, 150 ticks local
+    # 3 primitive calls, 130, 20 and 20 ticks total
+    # including 116, 17, 17 ticks local
+    global ticks
+    if n > 0:
+        ticks += n
+        return mul(n, factorial(n-1))
+    else:
+        ticks += 11
+        return 1
+
+def mul(a, b):
+    # 20 calls
+    # 1 tick, local
+    global ticks
+    ticks += 1
+    return a * b
 
 def helper():
     # 2 calls
-    # 300 ticks total: 40 ticks local, 260 ticks in subfunctions
+    # 300 ticks total: 20 ticks local, 260 ticks in subfunctions
     global ticks
     ticks += 1
     helper1()                           # 30
-    ticks += 3
+    ticks += 2
     helper1()                           # 30
     ticks += 6
     helper2()                           # 50
+    ticks += 3
+    helper2()                           # 50
+    ticks += 2
+    helper2()                           # 50
     ticks += 5
-    helper2()                           # 50
-    ticks += 4
-    helper2()                           # 50
-    ticks += 7
-    helper2()                           # 50
-    ticks += 14
+    helper2_indirect()                  # 70
+    ticks += 1
 
 def helper1():
     # 4 calls
     # 30 ticks total: 29 ticks local, 1 tick in subfunctions
     global ticks
     ticks += 10
-    hasattr(C(), "foo")
+    hasattr(C(), "foo")                 # 1
     ticks += 19
+    lst = []
+    lst.append(42)                      # 0
+    sys.exc_info()                      # 0
+
+def helper2_indirect():
+    helper2()                           # 50
+    factorial(3)                        # 20
 
 def helper2():
     # 8 calls
@@ -70,7 +105,7 @@
     # 10 ticks total: 8 ticks local, 2 ticks in subfunctions
     global ticks
     ticks += 2
-    for i in range(2):
+    for i in range(2):                  # 0
         try:
             C().foo                     # 1 x 2
         except AttributeError:
@@ -84,36 +119,5 @@
         ticks += 1
         raise AttributeError
 
-
-def test_2():
-    d = globals().copy()
-    def testfunc():
-        global x
-        x = 1
-    d['testfunc'] = testfunc
-    profile.runctx("testfunc()", d, d, TESTFN)
-    vereq (x, 1)
-    os.unlink (TESTFN)
-
-def test_3():
-    result = []
-    def testfunc1():
-        try: len(None)
-        except: pass
-        try: len(None)
-        except: pass
-        result.append(True)
-    def testfunc2():
-        testfunc1()
-        testfunc1()
-    profile.runctx("testfunc2()", locals(), locals(), TESTFN)
-    vereq(result, [True, True])
-    os.unlink(TESTFN)
-
-def test_main():
-    test_1()
-    test_2()
-    test_3()
-
 if __name__ == "__main__":
     test_main()