Optimize PyList_AsTuple(). Improve cache performance by doing the
pointer copy and object increment in one pass.  For small lists,
save the overhead of the call to memcpy() -- this comes up in
calls like f(*listcomp).
diff --git a/Objects/listobject.c b/Objects/listobject.c
index ca767da..3fa256e 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -2186,7 +2186,7 @@
 PyList_AsTuple(PyObject *v)
 {
 	PyObject *w;
-	PyObject **p;
+	PyObject **p, **q;
 	Py_ssize_t n;
 	if (v == NULL || !PyList_Check(v)) {
 		PyErr_BadInternalCall();
@@ -2197,12 +2197,12 @@
 	if (w == NULL)
 		return NULL;
 	p = ((PyTupleObject *)w)->ob_item;
-	memcpy((void *)p,
-	       (void *)((PyListObject *)v)->ob_item,
-	       n*sizeof(PyObject *));
+	q = ((PyListObject *)v)->ob_item;
 	while (--n >= 0) {
-		Py_INCREF(*p);
+		Py_INCREF(*q);
+		*p = *q;
 		p++;
+		q++;
 	}
 	return w;
 }