blob: ec1da054008b2022cb5205987e1c9467b90cd005 [file] [log] [blame]
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo68079582010-09-03 22:06:31 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Raymond Hettingerec2fe782008-06-06 21:47:51 +000011/* Older implementations of heapq used Py_LE for comparisons. Now, it uses
12 Py_LT so it will match min(), sorted(), and bisect(). Unfortunately, some
13 client code (Twisted for example) relied on Py_LE, so this little function
Ezio Melotti24b07bc2011-03-15 18:55:01 +020014 restores compatibility by trying both.
Raymond Hettingerec2fe782008-06-06 21:47:51 +000015*/
16static int
17cmp_lt(PyObject *x, PyObject *y)
18{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000019 int cmp;
20 static PyObject *lt = NULL;
Raymond Hettingerf0bc3cb2008-06-11 12:06:49 +000021
Antoine Pitrouc83ea132010-05-09 14:46:46 +000022 if (lt == NULL) {
23 lt = PyString_FromString("__lt__");
24 if (lt == NULL)
25 return -1;
26 }
27 if (PyObject_HasAttr(x, lt))
28 return PyObject_RichCompareBool(x, y, Py_LT);
29 cmp = PyObject_RichCompareBool(y, x, Py_LE);
30 if (cmp != -1)
31 cmp = 1 - cmp;
32 return cmp;
Raymond Hettingerec2fe782008-06-06 21:47:51 +000033}
34
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000035static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000036_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000037{
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010038 PyObject *newitem, *parent, *olditem;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000039 int cmp;
40 Py_ssize_t parentpos;
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010041 Py_ssize_t size;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000042
Antoine Pitrouc83ea132010-05-09 14:46:46 +000043 assert(PyList_Check(heap));
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010044 size = PyList_GET_SIZE(heap);
45 if (pos >= size) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000046 PyErr_SetString(PyExc_IndexError, "index out of range");
47 return -1;
48 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000049
Antoine Pitrouc83ea132010-05-09 14:46:46 +000050 newitem = PyList_GET_ITEM(heap, pos);
51 Py_INCREF(newitem);
52 /* Follow the path to the root, moving parents down until finding
53 a place newitem fits. */
54 while (pos > startpos){
55 parentpos = (pos - 1) >> 1;
56 parent = PyList_GET_ITEM(heap, parentpos);
57 cmp = cmp_lt(newitem, parent);
58 if (cmp == -1) {
59 Py_DECREF(newitem);
60 return -1;
61 }
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010062 if (size != PyList_GET_SIZE(heap)) {
63 Py_DECREF(newitem);
64 PyErr_SetString(PyExc_RuntimeError,
65 "list changed size during iteration");
66 return -1;
67 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000068 if (cmp == 0)
69 break;
70 Py_INCREF(parent);
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010071 olditem = PyList_GET_ITEM(heap, pos);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 PyList_SET_ITEM(heap, pos, parent);
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010073 Py_DECREF(olditem);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000074 pos = parentpos;
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010075 if (size != PyList_GET_SIZE(heap)) {
76 PyErr_SetString(PyExc_RuntimeError,
77 "list changed size during iteration");
78 return -1;
79 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000080 }
81 Py_DECREF(PyList_GET_ITEM(heap, pos));
82 PyList_SET_ITEM(heap, pos, newitem);
83 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000084}
85
86static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000087_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000088{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000089 Py_ssize_t startpos, endpos, childpos, rightpos;
90 int cmp;
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010091 PyObject *newitem, *tmp, *olditem;
92 Py_ssize_t size;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000093
Antoine Pitrouc83ea132010-05-09 14:46:46 +000094 assert(PyList_Check(heap));
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010095 size = PyList_GET_SIZE(heap);
96 endpos = size;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 startpos = pos;
98 if (pos >= endpos) {
99 PyErr_SetString(PyExc_IndexError, "index out of range");
100 return -1;
101 }
102 newitem = PyList_GET_ITEM(heap, pos);
103 Py_INCREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000104
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000105 /* Bubble up the smaller child until hitting a leaf. */
106 childpos = 2*pos + 1; /* leftmost child position */
107 while (childpos < endpos) {
108 /* Set childpos to index of smaller child. */
109 rightpos = childpos + 1;
110 if (rightpos < endpos) {
111 cmp = cmp_lt(
112 PyList_GET_ITEM(heap, childpos),
113 PyList_GET_ITEM(heap, rightpos));
114 if (cmp == -1) {
115 Py_DECREF(newitem);
116 return -1;
117 }
118 if (cmp == 0)
119 childpos = rightpos;
120 }
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +0100121 if (size != PyList_GET_SIZE(heap)) {
122 Py_DECREF(newitem);
123 PyErr_SetString(PyExc_RuntimeError,
124 "list changed size during iteration");
125 return -1;
126 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000127 /* Move the smaller child up. */
128 tmp = PyList_GET_ITEM(heap, childpos);
129 Py_INCREF(tmp);
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +0100130 olditem = PyList_GET_ITEM(heap, pos);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyList_SET_ITEM(heap, pos, tmp);
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +0100132 Py_DECREF(olditem);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 pos = childpos;
134 childpos = 2*pos + 1;
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +0100135 if (size != PyList_GET_SIZE(heap)) {
136 PyErr_SetString(PyExc_RuntimeError,
137 "list changed size during iteration");
138 return -1;
139 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000140 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000141
Terry Jan Reedyce9cc492013-03-11 17:41:44 -0400142 /* The leaf at pos is empty now. Put newitem there, and bubble
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000143 it up to its final resting place (by sifting its parents down). */
144 Py_DECREF(PyList_GET_ITEM(heap, pos));
145 PyList_SET_ITEM(heap, pos, newitem);
146 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000147}
148
149static PyObject *
150heappush(PyObject *self, PyObject *args)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
155 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000156
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000157 if (!PyList_Check(heap)) {
158 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
159 return NULL;
160 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000161
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000162 if (PyList_Append(heap, item) == -1)
163 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000164
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000165 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
166 return NULL;
167 Py_INCREF(Py_None);
168 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000169}
170
171PyDoc_STRVAR(heappush_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800172"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000173
174static PyObject *
175heappop(PyObject *self, PyObject *heap)
176{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 PyObject *lastelt, *returnitem;
178 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000179
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 if (!PyList_Check(heap)) {
181 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
182 return NULL;
183 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000184
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* # raises appropriate IndexError if heap is empty */
186 n = PyList_GET_SIZE(heap);
187 if (n == 0) {
188 PyErr_SetString(PyExc_IndexError, "index out of range");
189 return NULL;
190 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000191
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000192 lastelt = PyList_GET_ITEM(heap, n-1) ;
193 Py_INCREF(lastelt);
194 PyList_SetSlice(heap, n-1, n, NULL);
195 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000196
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 if (!n)
198 return lastelt;
199 returnitem = PyList_GET_ITEM(heap, 0);
200 PyList_SET_ITEM(heap, 0, lastelt);
201 if (_siftup((PyListObject *)heap, 0) == -1) {
202 Py_DECREF(returnitem);
203 return NULL;
204 }
205 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000206}
207
208PyDoc_STRVAR(heappop_doc,
209"Pop the smallest item off the heap, maintaining the heap invariant.");
210
211static PyObject *
212heapreplace(PyObject *self, PyObject *args)
213{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000215
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000216 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
217 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000218
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000219 if (!PyList_Check(heap)) {
220 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
221 return NULL;
222 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000223
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 if (PyList_GET_SIZE(heap) < 1) {
225 PyErr_SetString(PyExc_IndexError, "index out of range");
226 return NULL;
227 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000228
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000229 returnitem = PyList_GET_ITEM(heap, 0);
230 Py_INCREF(item);
231 PyList_SET_ITEM(heap, 0, item);
232 if (_siftup((PyListObject *)heap, 0) == -1) {
233 Py_DECREF(returnitem);
234 return NULL;
235 }
236 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000237}
238
239PyDoc_STRVAR(heapreplace_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800240"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000241\n\
242This is more efficient than heappop() followed by heappush(), and can be\n\
243more appropriate when using a fixed-size heap. Note that the value\n\
244returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000245this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000246 if item > heap[0]:\n\
247 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000248
249static PyObject *
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000250heappushpop(PyObject *self, PyObject *args)
251{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000252 PyObject *heap, *item, *returnitem;
253 int cmp;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000254
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000255 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
256 return NULL;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000257
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000258 if (!PyList_Check(heap)) {
259 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
260 return NULL;
261 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000262
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000263 if (PyList_GET_SIZE(heap) < 1) {
264 Py_INCREF(item);
265 return item;
266 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000267
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000268 cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
269 if (cmp == -1)
270 return NULL;
271 if (cmp == 0) {
272 Py_INCREF(item);
273 return item;
274 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000275
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000276 returnitem = PyList_GET_ITEM(heap, 0);
277 Py_INCREF(item);
278 PyList_SET_ITEM(heap, 0, item);
279 if (_siftup((PyListObject *)heap, 0) == -1) {
280 Py_DECREF(returnitem);
281 return NULL;
282 }
283 return returnitem;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000284}
285
286PyDoc_STRVAR(heappushpop_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800287"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000288from the heap. The combined action runs more efficiently than\n\
289heappush() followed by a separate call to heappop().");
290
291static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000292heapify(PyObject *self, PyObject *heap)
293{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000294 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000295
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000296 if (!PyList_Check(heap)) {
297 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
298 return NULL;
299 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000300
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000301 n = PyList_GET_SIZE(heap);
302 /* Transform bottom-up. The largest index there's any point to
303 looking at is the largest with a child index in-range, so must
304 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
305 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
306 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
307 and that's again n//2-1.
308 */
309 for (i=n/2-1 ; i>=0 ; i--)
310 if(_siftup((PyListObject *)heap, i) == -1)
311 return NULL;
312 Py_INCREF(Py_None);
313 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000314}
315
316PyDoc_STRVAR(heapify_doc,
317"Transform list into a heap, in-place, in O(len(heap)) time.");
318
Raymond Hettingerc9297662004-06-12 22:48:46 +0000319static PyObject *
320nlargest(PyObject *self, PyObject *args)
321{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000322 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
323 Py_ssize_t i, n;
324 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000325
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000326 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
327 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000328
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 it = PyObject_GetIter(iterable);
330 if (it == NULL)
331 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000332
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000333 heap = PyList_New(0);
334 if (heap == NULL)
335 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000336
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000337 for (i=0 ; i<n ; i++ ){
338 elem = PyIter_Next(it);
339 if (elem == NULL) {
340 if (PyErr_Occurred())
341 goto fail;
342 else
343 goto sortit;
344 }
345 if (PyList_Append(heap, elem) == -1) {
346 Py_DECREF(elem);
347 goto fail;
348 }
349 Py_DECREF(elem);
350 }
351 if (PyList_GET_SIZE(heap) == 0)
352 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000353
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000354 for (i=n/2-1 ; i>=0 ; i--)
355 if(_siftup((PyListObject *)heap, i) == -1)
356 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000357
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000358 sol = PyList_GET_ITEM(heap, 0);
359 while (1) {
360 elem = PyIter_Next(it);
361 if (elem == NULL) {
362 if (PyErr_Occurred())
363 goto fail;
364 else
365 goto sortit;
366 }
367 cmp = cmp_lt(sol, elem);
368 if (cmp == -1) {
369 Py_DECREF(elem);
370 goto fail;
371 }
372 if (cmp == 0) {
373 Py_DECREF(elem);
374 continue;
375 }
376 oldelem = PyList_GET_ITEM(heap, 0);
377 PyList_SET_ITEM(heap, 0, elem);
378 Py_DECREF(oldelem);
379 if (_siftup((PyListObject *)heap, 0) == -1)
380 goto fail;
381 sol = PyList_GET_ITEM(heap, 0);
382 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000383sortit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000384 if (PyList_Sort(heap) == -1)
385 goto fail;
386 if (PyList_Reverse(heap) == -1)
387 goto fail;
388 Py_DECREF(it);
389 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000390
391fail:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000392 Py_DECREF(it);
393 Py_XDECREF(heap);
394 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000395}
396
397PyDoc_STRVAR(nlargest_doc,
398"Find the n largest elements in a dataset.\n\
399\n\
400Equivalent to: sorted(iterable, reverse=True)[:n]\n");
401
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000402static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000403_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000404{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000405 PyObject *newitem, *parent;
406 int cmp;
407 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000408
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000409 assert(PyList_Check(heap));
410 if (pos >= PyList_GET_SIZE(heap)) {
411 PyErr_SetString(PyExc_IndexError, "index out of range");
412 return -1;
413 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000414
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000415 newitem = PyList_GET_ITEM(heap, pos);
416 Py_INCREF(newitem);
417 /* Follow the path to the root, moving parents down until finding
418 a place newitem fits. */
419 while (pos > startpos){
420 parentpos = (pos - 1) >> 1;
421 parent = PyList_GET_ITEM(heap, parentpos);
422 cmp = cmp_lt(parent, newitem);
423 if (cmp == -1) {
424 Py_DECREF(newitem);
425 return -1;
426 }
427 if (cmp == 0)
428 break;
429 Py_INCREF(parent);
430 Py_DECREF(PyList_GET_ITEM(heap, pos));
431 PyList_SET_ITEM(heap, pos, parent);
432 pos = parentpos;
433 }
434 Py_DECREF(PyList_GET_ITEM(heap, pos));
435 PyList_SET_ITEM(heap, pos, newitem);
436 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000437}
438
439static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000440_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000441{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000442 Py_ssize_t startpos, endpos, childpos, rightpos;
443 int cmp;
444 PyObject *newitem, *tmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000445
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000446 assert(PyList_Check(heap));
447 endpos = PyList_GET_SIZE(heap);
448 startpos = pos;
449 if (pos >= endpos) {
450 PyErr_SetString(PyExc_IndexError, "index out of range");
451 return -1;
452 }
453 newitem = PyList_GET_ITEM(heap, pos);
454 Py_INCREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000455
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000456 /* Bubble up the smaller child until hitting a leaf. */
457 childpos = 2*pos + 1; /* leftmost child position */
458 while (childpos < endpos) {
459 /* Set childpos to index of smaller child. */
460 rightpos = childpos + 1;
461 if (rightpos < endpos) {
462 cmp = cmp_lt(
463 PyList_GET_ITEM(heap, rightpos),
464 PyList_GET_ITEM(heap, childpos));
465 if (cmp == -1) {
466 Py_DECREF(newitem);
467 return -1;
468 }
469 if (cmp == 0)
470 childpos = rightpos;
471 }
472 /* Move the smaller child up. */
473 tmp = PyList_GET_ITEM(heap, childpos);
474 Py_INCREF(tmp);
475 Py_DECREF(PyList_GET_ITEM(heap, pos));
476 PyList_SET_ITEM(heap, pos, tmp);
477 pos = childpos;
478 childpos = 2*pos + 1;
479 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000480
Terry Jan Reedyce9cc492013-03-11 17:41:44 -0400481 /* The leaf at pos is empty now. Put newitem there, and bubble
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000482 it up to its final resting place (by sifting its parents down). */
483 Py_DECREF(PyList_GET_ITEM(heap, pos));
484 PyList_SET_ITEM(heap, pos, newitem);
485 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000486}
487
488static PyObject *
489nsmallest(PyObject *self, PyObject *args)
490{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000491 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
492 Py_ssize_t i, n;
493 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000494
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000495 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
496 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000497
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000498 it = PyObject_GetIter(iterable);
499 if (it == NULL)
500 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000501
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000502 heap = PyList_New(0);
503 if (heap == NULL)
504 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000505
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000506 for (i=0 ; i<n ; i++ ){
507 elem = PyIter_Next(it);
508 if (elem == NULL) {
509 if (PyErr_Occurred())
510 goto fail;
511 else
512 goto sortit;
513 }
514 if (PyList_Append(heap, elem) == -1) {
515 Py_DECREF(elem);
516 goto fail;
517 }
518 Py_DECREF(elem);
519 }
520 n = PyList_GET_SIZE(heap);
521 if (n == 0)
522 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000523
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000524 for (i=n/2-1 ; i>=0 ; i--)
525 if(_siftupmax((PyListObject *)heap, i) == -1)
526 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000527
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000528 los = PyList_GET_ITEM(heap, 0);
529 while (1) {
530 elem = PyIter_Next(it);
531 if (elem == NULL) {
532 if (PyErr_Occurred())
533 goto fail;
534 else
535 goto sortit;
536 }
537 cmp = cmp_lt(elem, los);
538 if (cmp == -1) {
539 Py_DECREF(elem);
540 goto fail;
541 }
542 if (cmp == 0) {
543 Py_DECREF(elem);
544 continue;
545 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000546
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000547 oldelem = PyList_GET_ITEM(heap, 0);
548 PyList_SET_ITEM(heap, 0, elem);
549 Py_DECREF(oldelem);
550 if (_siftupmax((PyListObject *)heap, 0) == -1)
551 goto fail;
552 los = PyList_GET_ITEM(heap, 0);
553 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000554
555sortit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000556 if (PyList_Sort(heap) == -1)
557 goto fail;
558 Py_DECREF(it);
559 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000560
561fail:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000562 Py_DECREF(it);
563 Py_XDECREF(heap);
564 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000565}
566
567PyDoc_STRVAR(nsmallest_doc,
568"Find the n smallest elements in a dataset.\n\
569\n\
570Equivalent to: sorted(iterable)[:n]\n");
571
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000572static PyMethodDef heapq_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000573 {"heappush", (PyCFunction)heappush,
574 METH_VARARGS, heappush_doc},
575 {"heappushpop", (PyCFunction)heappushpop,
576 METH_VARARGS, heappushpop_doc},
577 {"heappop", (PyCFunction)heappop,
578 METH_O, heappop_doc},
579 {"heapreplace", (PyCFunction)heapreplace,
580 METH_VARARGS, heapreplace_doc},
581 {"heapify", (PyCFunction)heapify,
582 METH_O, heapify_doc},
583 {"nlargest", (PyCFunction)nlargest,
584 METH_VARARGS, nlargest_doc},
585 {"nsmallest", (PyCFunction)nsmallest,
586 METH_VARARGS, nsmallest_doc},
587 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000588};
589
590PyDoc_STRVAR(module_doc,
591"Heap queue algorithm (a.k.a. priority queue).\n\
592\n\
593Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
594all k, counting elements from 0. For the sake of comparison,\n\
595non-existing elements are considered to be infinite. The interesting\n\
596property of a heap is that a[0] is always its smallest element.\n\
597\n\
598Usage:\n\
599\n\
600heap = [] # creates an empty heap\n\
601heappush(heap, item) # pushes a new item on the heap\n\
602item = heappop(heap) # pops the smallest item from the heap\n\
603item = heap[0] # smallest item on the heap without popping it\n\
604heapify(x) # transforms list into a heap, in-place, in linear time\n\
605item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
606 # new item; the heap size is unchanged\n\
607\n\
608Our API differs from textbook heap algorithms as follows:\n\
609\n\
610- We use 0-based indexing. This makes the relationship between the\n\
611 index for a node and the indexes for its children slightly less\n\
612 obvious, but is more suitable since Python uses 0-based indexing.\n\
613\n\
614- Our heappop() method returns the smallest item, not the largest.\n\
615\n\
616These two make it possible to view the heap as a regular Python list\n\
617without surprises: heap[0] is the smallest item, and heap.sort()\n\
618maintains the heap invariant!\n");
619
620
621PyDoc_STRVAR(__about__,
622"Heap queues\n\
623\n\
624[explanation by François Pinard]\n\
625\n\
626Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
627all k, counting elements from 0. For the sake of comparison,\n\
628non-existing elements are considered to be infinite. The interesting\n\
629property of a heap is that a[0] is always its smallest element.\n"
630"\n\
631The strange invariant above is meant to be an efficient memory\n\
632representation for a tournament. The numbers below are `k', not a[k]:\n\
633\n\
634 0\n\
635\n\
636 1 2\n\
637\n\
638 3 4 5 6\n\
639\n\
640 7 8 9 10 11 12 13 14\n\
641\n\
642 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
643\n\
644\n\
645In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
646an usual binary tournament we see in sports, each cell is the winner\n\
647over the two cells it tops, and we can trace the winner down the tree\n\
648to see all opponents s/he had. However, in many computer applications\n\
649of such tournaments, we do not need to trace the history of a winner.\n\
650To be more memory efficient, when a winner is promoted, we try to\n\
651replace it by something else at a lower level, and the rule becomes\n\
652that a cell and the two cells it tops contain three different items,\n\
653but the top cell \"wins\" over the two topped cells.\n"
654"\n\
655If this heap invariant is protected at all time, index 0 is clearly\n\
656the overall winner. The simplest algorithmic way to remove it and\n\
657find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
658diagram above) into the 0 position, and then percolate this new 0 down\n\
659the tree, exchanging values, until the invariant is re-established.\n\
660This is clearly logarithmic on the total number of items in the tree.\n\
661By iterating over all items, you get an O(n ln n) sort.\n"
662"\n\
663A nice feature of this sort is that you can efficiently insert new\n\
664items while the sort is going on, provided that the inserted items are\n\
665not \"better\" than the last 0'th element you extracted. This is\n\
666especially useful in simulation contexts, where the tree holds all\n\
667incoming events, and the \"win\" condition means the smallest scheduled\n\
668time. When an event schedule other events for execution, they are\n\
669scheduled into the future, so they can easily go into the heap. So, a\n\
670heap is a good structure for implementing schedulers (this is what I\n\
671used for my MIDI sequencer :-).\n"
672"\n\
673Various structures for implementing schedulers have been extensively\n\
674studied, and heaps are good for this, as they are reasonably speedy,\n\
675the speed is almost constant, and the worst case is not much different\n\
676than the average case. However, there are other representations which\n\
677are more efficient overall, yet the worst cases might be terrible.\n"
678"\n\
679Heaps are also very useful in big disk sorts. You most probably all\n\
680know that a big sort implies producing \"runs\" (which are pre-sorted\n\
681sequences, which size is usually related to the amount of CPU memory),\n\
682followed by a merging passes for these runs, which merging is often\n\
683very cleverly organised[1]. It is very important that the initial\n\
684sort produces the longest runs possible. Tournaments are a good way\n\
685to that. If, using all the memory available to hold a tournament, you\n\
686replace and percolate items that happen to fit the current run, you'll\n\
687produce runs which are twice the size of the memory for random input,\n\
688and much better for input fuzzily ordered.\n"
689"\n\
690Moreover, if you output the 0'th item on disk and get an input which\n\
691may not fit in the current tournament (because the value \"wins\" over\n\
692the last output value), it cannot fit in the heap, so the size of the\n\
693heap decreases. The freed memory could be cleverly reused immediately\n\
694for progressively building a second heap, which grows at exactly the\n\
695same rate the first heap is melting. When the first heap completely\n\
696vanishes, you switch heaps and start a new run. Clever and quite\n\
697effective!\n\
698\n\
699In a word, heaps are useful memory structures to know. I use them in\n\
700a few applications, and I think it is good to keep a `heap' module\n\
701around. :-)\n"
702"\n\
703--------------------\n\
704[1] The disk balancing algorithms which are current, nowadays, are\n\
705more annoying than clever, and this is a consequence of the seeking\n\
706capabilities of the disks. On devices which cannot seek, like big\n\
707tape drives, the story was quite different, and one had to be very\n\
708clever to ensure (far in advance) that each tape movement will be the\n\
709most effective possible (that is, will best participate at\n\
710\"progressing\" the merge). Some tapes were even able to read\n\
711backwards, and this was also used to avoid the rewinding time.\n\
712Believe me, real good tape sorts were quite spectacular to watch!\n\
713From all times, sorting has always been a Great Art! :-)\n");
714
715PyMODINIT_FUNC
716init_heapq(void)
717{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000718 PyObject *m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000719
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000720 m = Py_InitModule3("_heapq", heapq_methods, module_doc);
721 if (m == NULL)
722 return;
723 PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000724}
725