Small optimizations for list_slice() and list_extend_internal().
* Using addition instead of substraction on array indices allows the
compiler to use a fast addressing mode. Saves about 10%.
* Using PyTuple_GET_ITEM and PyList_SET_ITEM is about 7% faster than
PySequenceFast_GET_ITEM which has to make a list check on every pass.
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 162efa0..0508d37 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -342,7 +342,7 @@
list_slice(PyListObject *a, int ilow, int ihigh)
{
PyListObject *np;
- int i;
+ int i, len;
if (ilow < 0)
ilow = 0;
else if (ilow > a->ob_size)
@@ -351,13 +351,15 @@
ihigh = ilow;
else if (ihigh > a->ob_size)
ihigh = a->ob_size;
- np = (PyListObject *) PyList_New(ihigh - ilow);
+ len = ihigh - ilow;
+ np = (PyListObject *) PyList_New(len);
if (np == NULL)
return NULL;
- for (i = ilow; i < ihigh; i++) {
- PyObject *v = a->ob_item[i];
+
+ for (i = 0; i < len; i++) {
+ PyObject *v = a->ob_item[i+ilow];
Py_INCREF(v);
- np->ob_item[i - ilow] = v;
+ np->ob_item[i] = v;
}
return (PyObject *)np;
}
@@ -676,10 +678,19 @@
}
/* populate the end of self with b's items */
- for (i = 0; i < blen; i++) {
- PyObject *o = PySequence_Fast_GET_ITEM(b, i);
- Py_INCREF(o);
- PyList_SET_ITEM(self, i+selflen, o);
+ if (PyList_Check(b)) {
+ for (i = 0; i < blen; i++) {
+ PyObject *o = PyList_GET_ITEM(b, i);
+ Py_INCREF(o);
+ PyList_SET_ITEM(self, i+selflen, o);
+ }
+ } else {
+ assert (PyTuple_Check(b));
+ for (i = 0; i < blen; i++) {
+ PyObject *o = PyTuple_GET_ITEM(b, i);
+ Py_INCREF(o);
+ PyList_SET_ITEM(self, i+selflen, o);
+ }
}
Py_DECREF(b);
return 0;