Sped the usual case for sorting by calling PyObject_RichCompareBool
directly when no comparison function is specified.  This saves a layer
of function call on every compare then.  Measured speedups:

 i    2**i  *sort  \sort  /sort  3sort  +sort  %sort  ~sort  =sort  !sort
15   32768  12.5%   0.0%   0.0% 100.0%   0.0%  50.0% 100.0% 100.0% -50.0%
16   65536   8.7%   0.0%   0.0%   0.0%   0.0%   0.0%  12.5%   0.0%   0.0%
17  131072   8.0%  25.0%   0.0%  25.0%   0.0%  14.3%   5.9%   0.0%   0.0%
18  262144   6.3% -10.0%  12.5%  11.1%   0.0%   6.3%   5.6%  12.5%   0.0%
19  524288   5.3%   5.9%   0.0%   5.6%   0.0%   5.9%   5.4%   0.0%   2.9%
20 1048576   5.3%   2.9%   2.9%   5.1%   2.8%   1.3%   5.9%   2.9%   4.2%

The best indicators are those that take significant time (larger i), and
where sort doesn't do very few compares (so *sort and ~sort benefit most
reliably).  The large numbers are due to roundoff noise combined with
platform variability; e.g., the 14.3% speedup for %sort at i=17 reflects
a printed elapsed time of 0.18 seconds falling to 0.17, but a change in
the last digit isn't really meaningful (indeed, if it really took 0.175
seconds, one electron having a lazy nanosecond could shift it to either
value <wink>).  Similarly the 25% at 3sort i=17 was a meaningless change
from 0.05 to 0.04.  However, almost all the "meaningless changes" were
in the same direction, which is good.  The before-and-after times for
*sort are clearest:

before after
  0.18  0.16
  0.25  0.23
  0.54  0.50
  1.18  1.11
  2.57  2.44
  5.58  5.30
diff --git a/Objects/listobject.c b/Objects/listobject.c
index bfa3b79..e2b9b2b 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -760,9 +760,9 @@
  */
 
 /* Comparison function.  Takes care of calling a user-supplied
- * comparison function (any callable Python object).  Calls the
- * standard comparison function, PyObject_RichCompareBool(), if the user-
- * supplied function is NULL.
+ * comparison function (any callable Python object), which must not be
+ * NULL (use the ISLT macro if you don't know, or call PyObject_RichCompareBool
+ * with Py_LT if you know it's NULL).
  * Returns -1 on error, 1 if x < y, 0 if x >= y.
  */
 static int
@@ -772,9 +772,7 @@
 	PyObject *args;
 	int i;
 
-	if (compare == NULL)
-		return PyObject_RichCompareBool(x, y, Py_LT);
-
+	assert(compare != NULL);
 	/* Call the user's comparison function and translate the 3-way
 	 * result into true or false (or error).
 	 */
@@ -800,11 +798,20 @@
 	return i < 0;
 }
 
-/* Compare X to Y via islt().  Goto "fail" if the comparison raises an
+/* If COMPARE is NULL, calls PyObject_RichCompareBool with Py_LT, else calls
+ * islt.  This avoids a layer of function call in the usual case, and
+ * sorting does many comparisons.
+ * Returns -1 on error, 1 if x < y, 0 if x >= y.
+ */
+#define ISLT(X, Y, COMPARE) ((COMPARE) == NULL ?			\
+			     PyObject_RichCompareBool(X, Y, Py_LT) :	\
+			     islt(X, Y, COMPARE))
+
+/* Compare X to Y via "<".  Goto "fail" if the comparison raises an
    error.  Else "k" is set to true iff X<Y, and an "if (k)" block is
    started.  It makes more sense in context <wink>.  X and Y are PyObject*s.
 */
-#define IFLT(X, Y) if ((k = islt(X, Y, compare)) < 0) goto fail;  \
+#define IFLT(X, Y) if ((k = ISLT(X, Y, compare)) < 0) goto fail;  \
 		   if (k)
 
 /* binarysort is the best method for sorting small arrays: it does
@@ -1241,7 +1248,7 @@
 		 * appears to win consistently.
 		 */
  		for (;;) {
-	 		k = islt(*pb, *pa, compare);
+	 		k = ISLT(*pb, *pa, compare);
 			if (k) {
 				if (k < 0)
 					goto Fail;
@@ -1369,7 +1376,7 @@
 		 * appears to win consistently.
 		 */
  		for (;;) {
-	 		k = islt(*pb, *pa, compare);
+	 		k = ISLT(*pb, *pa, compare);
 			if (k) {
 				if (k < 0)
 					goto Fail;
@@ -1683,6 +1690,7 @@
 	return result;
 }
 #undef IFLT
+#undef ISLT
 
 int
 PyList_Sort(PyObject *v)