builtin_zip(): Take a good guess at how big the result list will be,
and allocate it in one gulp.
This isn't a bugfix, it's just a minor optimization that may or may not
pay off.
diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py
index 13ed5bd..c9f6961 100644
--- a/Lib/test/test_iter.py
+++ b/Lib/test/test_iter.py
@@ -467,6 +467,34 @@
except OSError:
pass
+ self.assertEqual(zip(xrange(5)), [(i,) for i in range(5)])
+
+ # Classes that lie about their lengths.
+ class NoGuessLen5:
+ def __getitem__(self, i):
+ if i >= 5:
+ raise IndexError
+ return i
+
+ class Guess3Len5(NoGuessLen5):
+ def __len__(self):
+ return 3
+
+ class Guess30Len5(NoGuessLen5):
+ def __len__(self):
+ return 30
+
+ self.assertEqual(len(Guess3Len5()), 3)
+ self.assertEqual(len(Guess30Len5()), 30)
+ self.assertEqual(zip(NoGuessLen5()), zip(range(5)))
+ self.assertEqual(zip(Guess3Len5()), zip(range(5)))
+ self.assertEqual(zip(Guess30Len5()), zip(range(5)))
+
+ expected = [(i, i) for i in range(5)]
+ for x in NoGuessLen5(), Guess3Len5(), Guess30Len5():
+ for y in NoGuessLen5(), Guess3Len5(), Guess30Len5():
+ self.assertEqual(zip(x, y), expected)
+
# Test reduces()'s use of iterators.
def test_builtin_reduce(self):
from operator import add
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index aa65ebc..fb6810e 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -381,7 +381,7 @@
int supplied_flags = 0;
PyCompilerFlags cf;
- if (!PyArg_ParseTuple(args, "sss|ii:compile", &str, &filename,
+ if (!PyArg_ParseTuple(args, "sss|ii:compile", &str, &filename,
&startstr, &supplied_flags, &dont_inherit))
return NULL;
@@ -1128,7 +1128,7 @@
v = args;
else if (!PyArg_ParseTuple(args, "O:min/max", &v))
return NULL;
-
+
it = PyObject_GetIter(v);
if (it == NULL)
return NULL;
@@ -1704,9 +1704,10 @@
builtin_zip(PyObject *self, PyObject *args)
{
PyObject *ret;
- int itemsize = PySequence_Length(args);
+ const int itemsize = PySequence_Length(args);
int i;
PyObject *itlist; /* tuple of iterators */
+ int len; /* guess at result length */
if (itemsize < 1) {
PyErr_SetString(PyExc_TypeError,
@@ -1716,8 +1717,21 @@
/* args must be a tuple */
assert(PyTuple_Check(args));
+ /* Guess at result length: the shortest of the input lengths. */
+ len = -1; /* unknown */
+ for (i = 0; i < itemsize; ++i) {
+ PyObject *item = PyTuple_GET_ITEM(args, i);
+ int thislen = PySequence_Length(item);
+ if (thislen < 0)
+ PyErr_Clear();
+ else if (len < 0 || thislen < len)
+ len = thislen;
+ }
+
/* allocate result list */
- if ((ret = PyList_New(0)) == NULL)
+ if (len < 0)
+ len = 10; /* arbitrary */
+ if ((ret = PyList_New(len)) == NULL)
return NULL;
/* obtain iterators */
@@ -1738,14 +1752,14 @@
}
/* build result into ret list */
- for (;;) {
- int status;
+ for (i = 0; ; ++i) {
+ int j;
PyObject *next = PyTuple_New(itemsize);
if (!next)
goto Fail_ret_itlist;
- for (i = 0; i < itemsize; i++) {
- PyObject *it = PyTuple_GET_ITEM(itlist, i);
+ for (j = 0; j < itemsize; j++) {
+ PyObject *it = PyTuple_GET_ITEM(itlist, j);
PyObject *item = PyIter_Next(it);
if (!item) {
if (PyErr_Occurred()) {
@@ -1754,17 +1768,30 @@
}
Py_DECREF(next);
Py_DECREF(itlist);
- return ret;
+ goto Done;
}
- PyTuple_SET_ITEM(next, i, item);
+ PyTuple_SET_ITEM(next, j, item);
}
- status = PyList_Append(ret, next);
- Py_DECREF(next);
- if (status < 0)
- goto Fail_ret_itlist;
+ if (i < len)
+ PyList_SET_ITEM(ret, i, next);
+ else {
+ int status = PyList_Append(ret, next);
+ Py_DECREF(next);
+ ++len;
+ if (status < 0)
+ goto Fail_ret_itlist;
+ }
}
+Done:
+ if (ret != NULL && i < len) {
+ /* The list is too big. */
+ if (PyList_SetSlice(ret, i, len, NULL) < 0)
+ return NULL;
+ }
+ return ret;
+
Fail_ret_itlist:
Py_DECREF(itlist);
Fail_ret:
@@ -1864,7 +1891,7 @@
SETBUILTIN("complex", &PyComplex_Type);
#endif
SETBUILTIN("dict", &PyDict_Type);
- SETBUILTIN("enumerate", &PyEnum_Type);
+ SETBUILTIN("enumerate", &PyEnum_Type);
SETBUILTIN("float", &PyFloat_Type);
SETBUILTIN("property", &PyProperty_Type);
SETBUILTIN("int", &PyInt_Type);