blob: be301a62fc6222a833a8ecd078be1a7040084f23 [file] [log] [blame]
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo68079582010-09-03 22:06:31 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Raymond Hettingerec2fe782008-06-06 21:47:51 +000011/* Older implementations of heapq used Py_LE for comparisons. Now, it uses
12 Py_LT so it will match min(), sorted(), and bisect(). Unfortunately, some
13 client code (Twisted for example) relied on Py_LE, so this little function
Ezio Melotti24b07bc2011-03-15 18:55:01 +020014 restores compatibility by trying both.
Raymond Hettingerec2fe782008-06-06 21:47:51 +000015*/
16static int
17cmp_lt(PyObject *x, PyObject *y)
18{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000019 int cmp;
20 static PyObject *lt = NULL;
Raymond Hettingerf0bc3cb2008-06-11 12:06:49 +000021
Antoine Pitrouc83ea132010-05-09 14:46:46 +000022 if (lt == NULL) {
23 lt = PyString_FromString("__lt__");
24 if (lt == NULL)
25 return -1;
26 }
27 if (PyObject_HasAttr(x, lt))
28 return PyObject_RichCompareBool(x, y, Py_LT);
29 cmp = PyObject_RichCompareBool(y, x, Py_LE);
30 if (cmp != -1)
31 cmp = 1 - cmp;
32 return cmp;
Raymond Hettingerec2fe782008-06-06 21:47:51 +000033}
34
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000035static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000036_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000037{
Raymond Hettinger89543dd2015-05-02 10:26:57 -070038 PyObject *newitem, *parent;
39 Py_ssize_t parentpos, size;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000040 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000041
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042 assert(PyList_Check(heap));
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010043 size = PyList_GET_SIZE(heap);
44 if (pos >= size) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000045 PyErr_SetString(PyExc_IndexError, "index out of range");
46 return -1;
47 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000048
Antoine Pitrouc83ea132010-05-09 14:46:46 +000049 /* Follow the path to the root, moving parents down until finding
50 a place newitem fits. */
Raymond Hettinger89543dd2015-05-02 10:26:57 -070051 newitem = PyList_GET_ITEM(heap, pos);
52 while (pos > startpos) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000053 parentpos = (pos - 1) >> 1;
54 parent = PyList_GET_ITEM(heap, parentpos);
55 cmp = cmp_lt(newitem, parent);
Raymond Hettinger89543dd2015-05-02 10:26:57 -070056 if (cmp == -1)
Antoine Pitrouc83ea132010-05-09 14:46:46 +000057 return -1;
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010058 if (size != PyList_GET_SIZE(heap)) {
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010059 PyErr_SetString(PyExc_RuntimeError,
60 "list changed size during iteration");
61 return -1;
62 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000063 if (cmp == 0)
64 break;
Raymond Hettinger89543dd2015-05-02 10:26:57 -070065 parent = PyList_GET_ITEM(heap, parentpos);
66 newitem = PyList_GET_ITEM(heap, pos);
67 PyList_SET_ITEM(heap, parentpos, newitem);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000068 PyList_SET_ITEM(heap, pos, parent);
69 pos = parentpos;
70 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000071 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000072}
73
74static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000075_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000076{
Raymond Hettinger93434892014-05-03 15:27:14 -070077 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Raymond Hettinger89543dd2015-05-02 10:26:57 -070078 PyObject *tmp1, *tmp2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000079 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000080
Antoine Pitrouc83ea132010-05-09 14:46:46 +000081 assert(PyList_Check(heap));
Raymond Hettinger89543dd2015-05-02 10:26:57 -070082 endpos = PyList_GET_SIZE(heap);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000083 startpos = pos;
84 if (pos >= endpos) {
85 PyErr_SetString(PyExc_IndexError, "index out of range");
86 return -1;
87 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000088
Antoine Pitrouc83ea132010-05-09 14:46:46 +000089 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettinger93434892014-05-03 15:27:14 -070090 limit = endpos / 2; /* smallest pos that has no child */
91 while (pos < limit) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000092 /* Set childpos to index of smaller child. */
Raymond Hettinger93434892014-05-03 15:27:14 -070093 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000094 rightpos = childpos + 1;
95 if (rightpos < endpos) {
96 cmp = cmp_lt(
97 PyList_GET_ITEM(heap, childpos),
98 PyList_GET_ITEM(heap, rightpos));
Raymond Hettinger89543dd2015-05-02 10:26:57 -070099 if (cmp == -1)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000100 return -1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000101 if (cmp == 0)
102 childpos = rightpos;
Raymond Hettinger89543dd2015-05-02 10:26:57 -0700103 if (endpos != PyList_GET_SIZE(heap)) {
104 PyErr_SetString(PyExc_RuntimeError,
105 "list changed size during iteration");
106 return -1;
107 }
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +0100108 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000109 /* Move the smaller child up. */
Raymond Hettinger89543dd2015-05-02 10:26:57 -0700110 tmp1 = PyList_GET_ITEM(heap, childpos);
111 tmp2 = PyList_GET_ITEM(heap, pos);
112 PyList_SET_ITEM(heap, childpos, tmp2);
113 PyList_SET_ITEM(heap, pos, tmp1);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000114 pos = childpos;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 }
Raymond Hettinger89543dd2015-05-02 10:26:57 -0700116 /* Bubble it up to its final resting place (by sifting its parents down). */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000118}
119
120static PyObject *
121heappush(PyObject *self, PyObject *args)
122{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000123 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000124
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000125 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
126 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000127
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000128 if (!PyList_Check(heap)) {
129 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
130 return NULL;
131 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 if (PyList_Append(heap, item) == -1)
134 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000135
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
137 return NULL;
138 Py_INCREF(Py_None);
139 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000140}
141
142PyDoc_STRVAR(heappush_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800143"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000144
145static PyObject *
146heappop(PyObject *self, PyObject *heap)
147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000148 PyObject *lastelt, *returnitem;
149 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000150
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000151 if (!PyList_Check(heap)) {
152 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
153 return NULL;
154 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000155
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000156 /* # raises appropriate IndexError if heap is empty */
157 n = PyList_GET_SIZE(heap);
158 if (n == 0) {
159 PyErr_SetString(PyExc_IndexError, "index out of range");
160 return NULL;
161 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000162
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000163 lastelt = PyList_GET_ITEM(heap, n-1) ;
164 Py_INCREF(lastelt);
165 PyList_SetSlice(heap, n-1, n, NULL);
166 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000167
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000168 if (!n)
169 return lastelt;
170 returnitem = PyList_GET_ITEM(heap, 0);
171 PyList_SET_ITEM(heap, 0, lastelt);
172 if (_siftup((PyListObject *)heap, 0) == -1) {
173 Py_DECREF(returnitem);
174 return NULL;
175 }
176 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000177}
178
179PyDoc_STRVAR(heappop_doc,
180"Pop the smallest item off the heap, maintaining the heap invariant.");
181
182static PyObject *
183heapreplace(PyObject *self, PyObject *args)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000186
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000187 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
188 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000189
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000190 if (!PyList_Check(heap)) {
191 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
192 return NULL;
193 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 if (PyList_GET_SIZE(heap) < 1) {
196 PyErr_SetString(PyExc_IndexError, "index out of range");
197 return NULL;
198 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000199
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000200 returnitem = PyList_GET_ITEM(heap, 0);
201 Py_INCREF(item);
202 PyList_SET_ITEM(heap, 0, item);
203 if (_siftup((PyListObject *)heap, 0) == -1) {
204 Py_DECREF(returnitem);
205 return NULL;
206 }
207 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000208}
209
210PyDoc_STRVAR(heapreplace_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800211"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000212\n\
213This is more efficient than heappop() followed by heappush(), and can be\n\
214more appropriate when using a fixed-size heap. Note that the value\n\
215returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000216this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 if item > heap[0]:\n\
218 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000219
220static PyObject *
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000221heappushpop(PyObject *self, PyObject *args)
222{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000223 PyObject *heap, *item, *returnitem;
224 int cmp;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000225
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000226 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
227 return NULL;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000228
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000229 if (!PyList_Check(heap)) {
230 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
231 return NULL;
232 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000233
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000234 if (PyList_GET_SIZE(heap) < 1) {
235 Py_INCREF(item);
236 return item;
237 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000238
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000239 cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
240 if (cmp == -1)
241 return NULL;
242 if (cmp == 0) {
243 Py_INCREF(item);
244 return item;
245 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000246
Raymond Hettinger3aee0922015-05-11 20:00:25 -0700247 if (PyList_GET_SIZE(heap) == 0) {
248 PyErr_SetString(PyExc_IndexError, "index out of range");
249 return NULL;
250 }
251
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000252 returnitem = PyList_GET_ITEM(heap, 0);
253 Py_INCREF(item);
254 PyList_SET_ITEM(heap, 0, item);
255 if (_siftup((PyListObject *)heap, 0) == -1) {
256 Py_DECREF(returnitem);
257 return NULL;
258 }
259 return returnitem;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000260}
261
262PyDoc_STRVAR(heappushpop_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800263"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000264from the heap. The combined action runs more efficiently than\n\
265heappush() followed by a separate call to heappop().");
266
267static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000268heapify(PyObject *self, PyObject *heap)
269{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000270 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000271
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000272 if (!PyList_Check(heap)) {
273 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
274 return NULL;
275 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000276
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000277 n = PyList_GET_SIZE(heap);
278 /* Transform bottom-up. The largest index there's any point to
279 looking at is the largest with a child index in-range, so must
280 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
281 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
282 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
283 and that's again n//2-1.
284 */
285 for (i=n/2-1 ; i>=0 ; i--)
286 if(_siftup((PyListObject *)heap, i) == -1)
287 return NULL;
288 Py_INCREF(Py_None);
289 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000290}
291
292PyDoc_STRVAR(heapify_doc,
293"Transform list into a heap, in-place, in O(len(heap)) time.");
294
Raymond Hettingerc9297662004-06-12 22:48:46 +0000295static PyObject *
296nlargest(PyObject *self, PyObject *args)
297{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000298 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
299 Py_ssize_t i, n;
300 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000301
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000302 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
303 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000304
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000305 it = PyObject_GetIter(iterable);
306 if (it == NULL)
307 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000308
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000309 heap = PyList_New(0);
310 if (heap == NULL)
311 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000312
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000313 for (i=0 ; i<n ; i++ ){
314 elem = PyIter_Next(it);
315 if (elem == NULL) {
316 if (PyErr_Occurred())
317 goto fail;
318 else
319 goto sortit;
320 }
321 if (PyList_Append(heap, elem) == -1) {
322 Py_DECREF(elem);
323 goto fail;
324 }
325 Py_DECREF(elem);
326 }
327 if (PyList_GET_SIZE(heap) == 0)
328 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000329
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000330 for (i=n/2-1 ; i>=0 ; i--)
331 if(_siftup((PyListObject *)heap, i) == -1)
332 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000333
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000334 sol = PyList_GET_ITEM(heap, 0);
335 while (1) {
336 elem = PyIter_Next(it);
337 if (elem == NULL) {
338 if (PyErr_Occurred())
339 goto fail;
340 else
341 goto sortit;
342 }
343 cmp = cmp_lt(sol, elem);
344 if (cmp == -1) {
345 Py_DECREF(elem);
346 goto fail;
347 }
348 if (cmp == 0) {
349 Py_DECREF(elem);
350 continue;
351 }
352 oldelem = PyList_GET_ITEM(heap, 0);
353 PyList_SET_ITEM(heap, 0, elem);
354 Py_DECREF(oldelem);
355 if (_siftup((PyListObject *)heap, 0) == -1)
356 goto fail;
357 sol = PyList_GET_ITEM(heap, 0);
358 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000359sortit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000360 if (PyList_Sort(heap) == -1)
361 goto fail;
362 if (PyList_Reverse(heap) == -1)
363 goto fail;
364 Py_DECREF(it);
365 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000366
367fail:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000368 Py_DECREF(it);
369 Py_XDECREF(heap);
370 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000371}
372
373PyDoc_STRVAR(nlargest_doc,
374"Find the n largest elements in a dataset.\n\
375\n\
376Equivalent to: sorted(iterable, reverse=True)[:n]\n");
377
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000378static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000379_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000380{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000381 PyObject *newitem, *parent;
382 int cmp;
383 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000384
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000385 assert(PyList_Check(heap));
386 if (pos >= PyList_GET_SIZE(heap)) {
387 PyErr_SetString(PyExc_IndexError, "index out of range");
388 return -1;
389 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000390
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000391 newitem = PyList_GET_ITEM(heap, pos);
392 Py_INCREF(newitem);
393 /* Follow the path to the root, moving parents down until finding
394 a place newitem fits. */
395 while (pos > startpos){
396 parentpos = (pos - 1) >> 1;
397 parent = PyList_GET_ITEM(heap, parentpos);
398 cmp = cmp_lt(parent, newitem);
399 if (cmp == -1) {
400 Py_DECREF(newitem);
401 return -1;
402 }
403 if (cmp == 0)
404 break;
405 Py_INCREF(parent);
406 Py_DECREF(PyList_GET_ITEM(heap, pos));
407 PyList_SET_ITEM(heap, pos, parent);
408 pos = parentpos;
409 }
410 Py_DECREF(PyList_GET_ITEM(heap, pos));
411 PyList_SET_ITEM(heap, pos, newitem);
412 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000413}
414
415static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000416_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000417{
Raymond Hettinger93434892014-05-03 15:27:14 -0700418 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000419 int cmp;
420 PyObject *newitem, *tmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000421
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000422 assert(PyList_Check(heap));
423 endpos = PyList_GET_SIZE(heap);
424 startpos = pos;
425 if (pos >= endpos) {
426 PyErr_SetString(PyExc_IndexError, "index out of range");
427 return -1;
428 }
429 newitem = PyList_GET_ITEM(heap, pos);
430 Py_INCREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000431
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettinger93434892014-05-03 15:27:14 -0700433 limit = endpos / 2; /* smallest pos that has no child */
434 while (pos < limit) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000435 /* Set childpos to index of smaller child. */
Raymond Hettinger93434892014-05-03 15:27:14 -0700436 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000437 rightpos = childpos + 1;
438 if (rightpos < endpos) {
439 cmp = cmp_lt(
440 PyList_GET_ITEM(heap, rightpos),
441 PyList_GET_ITEM(heap, childpos));
442 if (cmp == -1) {
443 Py_DECREF(newitem);
444 return -1;
445 }
446 if (cmp == 0)
447 childpos = rightpos;
448 }
449 /* Move the smaller child up. */
450 tmp = PyList_GET_ITEM(heap, childpos);
451 Py_INCREF(tmp);
452 Py_DECREF(PyList_GET_ITEM(heap, pos));
453 PyList_SET_ITEM(heap, pos, tmp);
454 pos = childpos;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000455 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000456
Terry Jan Reedyce9cc492013-03-11 17:41:44 -0400457 /* The leaf at pos is empty now. Put newitem there, and bubble
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000458 it up to its final resting place (by sifting its parents down). */
459 Py_DECREF(PyList_GET_ITEM(heap, pos));
460 PyList_SET_ITEM(heap, pos, newitem);
461 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000462}
463
464static PyObject *
465nsmallest(PyObject *self, PyObject *args)
466{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000467 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
468 Py_ssize_t i, n;
469 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000470
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000471 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
472 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000473
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000474 it = PyObject_GetIter(iterable);
475 if (it == NULL)
476 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000477
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000478 heap = PyList_New(0);
479 if (heap == NULL)
480 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000481
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000482 for (i=0 ; i<n ; i++ ){
483 elem = PyIter_Next(it);
484 if (elem == NULL) {
485 if (PyErr_Occurred())
486 goto fail;
487 else
488 goto sortit;
489 }
490 if (PyList_Append(heap, elem) == -1) {
491 Py_DECREF(elem);
492 goto fail;
493 }
494 Py_DECREF(elem);
495 }
496 n = PyList_GET_SIZE(heap);
497 if (n == 0)
498 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000499
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000500 for (i=n/2-1 ; i>=0 ; i--)
501 if(_siftupmax((PyListObject *)heap, i) == -1)
502 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000503
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000504 los = PyList_GET_ITEM(heap, 0);
505 while (1) {
506 elem = PyIter_Next(it);
507 if (elem == NULL) {
508 if (PyErr_Occurred())
509 goto fail;
510 else
511 goto sortit;
512 }
513 cmp = cmp_lt(elem, los);
514 if (cmp == -1) {
515 Py_DECREF(elem);
516 goto fail;
517 }
518 if (cmp == 0) {
519 Py_DECREF(elem);
520 continue;
521 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000522
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000523 oldelem = PyList_GET_ITEM(heap, 0);
524 PyList_SET_ITEM(heap, 0, elem);
525 Py_DECREF(oldelem);
526 if (_siftupmax((PyListObject *)heap, 0) == -1)
527 goto fail;
528 los = PyList_GET_ITEM(heap, 0);
529 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000530
531sortit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000532 if (PyList_Sort(heap) == -1)
533 goto fail;
534 Py_DECREF(it);
535 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000536
537fail:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000538 Py_DECREF(it);
539 Py_XDECREF(heap);
540 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000541}
542
543PyDoc_STRVAR(nsmallest_doc,
544"Find the n smallest elements in a dataset.\n\
545\n\
546Equivalent to: sorted(iterable)[:n]\n");
547
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000548static PyMethodDef heapq_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000549 {"heappush", (PyCFunction)heappush,
550 METH_VARARGS, heappush_doc},
551 {"heappushpop", (PyCFunction)heappushpop,
552 METH_VARARGS, heappushpop_doc},
553 {"heappop", (PyCFunction)heappop,
554 METH_O, heappop_doc},
555 {"heapreplace", (PyCFunction)heapreplace,
556 METH_VARARGS, heapreplace_doc},
557 {"heapify", (PyCFunction)heapify,
558 METH_O, heapify_doc},
559 {"nlargest", (PyCFunction)nlargest,
560 METH_VARARGS, nlargest_doc},
561 {"nsmallest", (PyCFunction)nsmallest,
562 METH_VARARGS, nsmallest_doc},
563 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000564};
565
566PyDoc_STRVAR(module_doc,
567"Heap queue algorithm (a.k.a. priority queue).\n\
568\n\
569Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
570all k, counting elements from 0. For the sake of comparison,\n\
571non-existing elements are considered to be infinite. The interesting\n\
572property of a heap is that a[0] is always its smallest element.\n\
573\n\
574Usage:\n\
575\n\
576heap = [] # creates an empty heap\n\
577heappush(heap, item) # pushes a new item on the heap\n\
578item = heappop(heap) # pops the smallest item from the heap\n\
579item = heap[0] # smallest item on the heap without popping it\n\
580heapify(x) # transforms list into a heap, in-place, in linear time\n\
581item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
582 # new item; the heap size is unchanged\n\
583\n\
584Our API differs from textbook heap algorithms as follows:\n\
585\n\
586- We use 0-based indexing. This makes the relationship between the\n\
587 index for a node and the indexes for its children slightly less\n\
588 obvious, but is more suitable since Python uses 0-based indexing.\n\
589\n\
590- Our heappop() method returns the smallest item, not the largest.\n\
591\n\
592These two make it possible to view the heap as a regular Python list\n\
593without surprises: heap[0] is the smallest item, and heap.sort()\n\
594maintains the heap invariant!\n");
595
596
597PyDoc_STRVAR(__about__,
598"Heap queues\n\
599\n\
600[explanation by François Pinard]\n\
601\n\
602Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
603all k, counting elements from 0. For the sake of comparison,\n\
604non-existing elements are considered to be infinite. The interesting\n\
605property of a heap is that a[0] is always its smallest element.\n"
606"\n\
607The strange invariant above is meant to be an efficient memory\n\
608representation for a tournament. The numbers below are `k', not a[k]:\n\
609\n\
610 0\n\
611\n\
612 1 2\n\
613\n\
614 3 4 5 6\n\
615\n\
616 7 8 9 10 11 12 13 14\n\
617\n\
618 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
619\n\
620\n\
621In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
622an usual binary tournament we see in sports, each cell is the winner\n\
623over the two cells it tops, and we can trace the winner down the tree\n\
624to see all opponents s/he had. However, in many computer applications\n\
625of such tournaments, we do not need to trace the history of a winner.\n\
626To be more memory efficient, when a winner is promoted, we try to\n\
627replace it by something else at a lower level, and the rule becomes\n\
628that a cell and the two cells it tops contain three different items,\n\
629but the top cell \"wins\" over the two topped cells.\n"
630"\n\
631If this heap invariant is protected at all time, index 0 is clearly\n\
632the overall winner. The simplest algorithmic way to remove it and\n\
633find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
634diagram above) into the 0 position, and then percolate this new 0 down\n\
635the tree, exchanging values, until the invariant is re-established.\n\
636This is clearly logarithmic on the total number of items in the tree.\n\
637By iterating over all items, you get an O(n ln n) sort.\n"
638"\n\
639A nice feature of this sort is that you can efficiently insert new\n\
640items while the sort is going on, provided that the inserted items are\n\
641not \"better\" than the last 0'th element you extracted. This is\n\
642especially useful in simulation contexts, where the tree holds all\n\
643incoming events, and the \"win\" condition means the smallest scheduled\n\
644time. When an event schedule other events for execution, they are\n\
645scheduled into the future, so they can easily go into the heap. So, a\n\
646heap is a good structure for implementing schedulers (this is what I\n\
647used for my MIDI sequencer :-).\n"
648"\n\
649Various structures for implementing schedulers have been extensively\n\
650studied, and heaps are good for this, as they are reasonably speedy,\n\
651the speed is almost constant, and the worst case is not much different\n\
652than the average case. However, there are other representations which\n\
653are more efficient overall, yet the worst cases might be terrible.\n"
654"\n\
655Heaps are also very useful in big disk sorts. You most probably all\n\
656know that a big sort implies producing \"runs\" (which are pre-sorted\n\
657sequences, which size is usually related to the amount of CPU memory),\n\
658followed by a merging passes for these runs, which merging is often\n\
659very cleverly organised[1]. It is very important that the initial\n\
660sort produces the longest runs possible. Tournaments are a good way\n\
661to that. If, using all the memory available to hold a tournament, you\n\
662replace and percolate items that happen to fit the current run, you'll\n\
663produce runs which are twice the size of the memory for random input,\n\
664and much better for input fuzzily ordered.\n"
665"\n\
666Moreover, if you output the 0'th item on disk and get an input which\n\
667may not fit in the current tournament (because the value \"wins\" over\n\
668the last output value), it cannot fit in the heap, so the size of the\n\
669heap decreases. The freed memory could be cleverly reused immediately\n\
670for progressively building a second heap, which grows at exactly the\n\
671same rate the first heap is melting. When the first heap completely\n\
672vanishes, you switch heaps and start a new run. Clever and quite\n\
673effective!\n\
674\n\
675In a word, heaps are useful memory structures to know. I use them in\n\
676a few applications, and I think it is good to keep a `heap' module\n\
677around. :-)\n"
678"\n\
679--------------------\n\
680[1] The disk balancing algorithms which are current, nowadays, are\n\
681more annoying than clever, and this is a consequence of the seeking\n\
682capabilities of the disks. On devices which cannot seek, like big\n\
683tape drives, the story was quite different, and one had to be very\n\
684clever to ensure (far in advance) that each tape movement will be the\n\
685most effective possible (that is, will best participate at\n\
686\"progressing\" the merge). Some tapes were even able to read\n\
687backwards, and this was also used to avoid the rewinding time.\n\
688Believe me, real good tape sorts were quite spectacular to watch!\n\
689From all times, sorting has always been a Great Art! :-)\n");
690
691PyMODINIT_FUNC
692init_heapq(void)
693{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000694 PyObject *m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000695
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000696 m = Py_InitModule3("_heapq", heapq_methods, module_doc);
697 if (m == NULL)
698 return;
699 PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000700}
701