blob: d0ddb09fb1593372a9d3ce5ca4a06a9f69e40141 [file] [log] [blame]
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo68079582010-09-03 22:06:31 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Raymond Hettingerec2fe782008-06-06 21:47:51 +000011/* Older implementations of heapq used Py_LE for comparisons. Now, it uses
12 Py_LT so it will match min(), sorted(), and bisect(). Unfortunately, some
13 client code (Twisted for example) relied on Py_LE, so this little function
Ezio Melotti24b07bc2011-03-15 18:55:01 +020014 restores compatibility by trying both.
Raymond Hettingerec2fe782008-06-06 21:47:51 +000015*/
16static int
17cmp_lt(PyObject *x, PyObject *y)
18{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000019 int cmp;
20 static PyObject *lt = NULL;
Raymond Hettingerf0bc3cb2008-06-11 12:06:49 +000021
Antoine Pitrouc83ea132010-05-09 14:46:46 +000022 if (lt == NULL) {
23 lt = PyString_FromString("__lt__");
24 if (lt == NULL)
25 return -1;
26 }
27 if (PyObject_HasAttr(x, lt))
28 return PyObject_RichCompareBool(x, y, Py_LT);
29 cmp = PyObject_RichCompareBool(y, x, Py_LE);
30 if (cmp != -1)
31 cmp = 1 - cmp;
32 return cmp;
Raymond Hettingerec2fe782008-06-06 21:47:51 +000033}
34
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000035static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000036_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000037{
Raymond Hettinger89543dd2015-05-02 10:26:57 -070038 PyObject *newitem, *parent;
39 Py_ssize_t parentpos, size;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000040 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000041
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042 assert(PyList_Check(heap));
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010043 size = PyList_GET_SIZE(heap);
44 if (pos >= size) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000045 PyErr_SetString(PyExc_IndexError, "index out of range");
46 return -1;
47 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000048
Antoine Pitrouc83ea132010-05-09 14:46:46 +000049 /* Follow the path to the root, moving parents down until finding
50 a place newitem fits. */
Raymond Hettinger89543dd2015-05-02 10:26:57 -070051 newitem = PyList_GET_ITEM(heap, pos);
52 while (pos > startpos) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000053 parentpos = (pos - 1) >> 1;
54 parent = PyList_GET_ITEM(heap, parentpos);
55 cmp = cmp_lt(newitem, parent);
Raymond Hettinger89543dd2015-05-02 10:26:57 -070056 if (cmp == -1)
Antoine Pitrouc83ea132010-05-09 14:46:46 +000057 return -1;
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010058 if (size != PyList_GET_SIZE(heap)) {
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +010059 PyErr_SetString(PyExc_RuntimeError,
60 "list changed size during iteration");
61 return -1;
62 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000063 if (cmp == 0)
64 break;
Raymond Hettinger89543dd2015-05-02 10:26:57 -070065 parent = PyList_GET_ITEM(heap, parentpos);
66 newitem = PyList_GET_ITEM(heap, pos);
67 PyList_SET_ITEM(heap, parentpos, newitem);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000068 PyList_SET_ITEM(heap, pos, parent);
69 pos = parentpos;
70 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +000071 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000072}
73
74static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000075_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000076{
Raymond Hettinger93434892014-05-03 15:27:14 -070077 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Raymond Hettinger89543dd2015-05-02 10:26:57 -070078 PyObject *tmp1, *tmp2;
Antoine Pitrouc83ea132010-05-09 14:46:46 +000079 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000080
Antoine Pitrouc83ea132010-05-09 14:46:46 +000081 assert(PyList_Check(heap));
Raymond Hettinger89543dd2015-05-02 10:26:57 -070082 endpos = PyList_GET_SIZE(heap);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000083 startpos = pos;
84 if (pos >= endpos) {
85 PyErr_SetString(PyExc_IndexError, "index out of range");
86 return -1;
87 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000088
Antoine Pitrouc83ea132010-05-09 14:46:46 +000089 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettinger93434892014-05-03 15:27:14 -070090 limit = endpos / 2; /* smallest pos that has no child */
91 while (pos < limit) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000092 /* Set childpos to index of smaller child. */
Raymond Hettinger93434892014-05-03 15:27:14 -070093 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000094 rightpos = childpos + 1;
95 if (rightpos < endpos) {
96 cmp = cmp_lt(
97 PyList_GET_ITEM(heap, childpos),
98 PyList_GET_ITEM(heap, rightpos));
Raymond Hettinger89543dd2015-05-02 10:26:57 -070099 if (cmp == -1)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000100 return -1;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000101 if (cmp == 0)
102 childpos = rightpos;
Raymond Hettinger89543dd2015-05-02 10:26:57 -0700103 if (endpos != PyList_GET_SIZE(heap)) {
104 PyErr_SetString(PyExc_RuntimeError,
105 "list changed size during iteration");
106 return -1;
107 }
Antoine Pitrou49e4dfe2013-03-04 20:30:01 +0100108 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000109 /* Move the smaller child up. */
Raymond Hettinger89543dd2015-05-02 10:26:57 -0700110 tmp1 = PyList_GET_ITEM(heap, childpos);
111 tmp2 = PyList_GET_ITEM(heap, pos);
112 PyList_SET_ITEM(heap, childpos, tmp2);
113 PyList_SET_ITEM(heap, pos, tmp1);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000114 pos = childpos;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 }
Raymond Hettinger89543dd2015-05-02 10:26:57 -0700116 /* Bubble it up to its final resting place (by sifting its parents down). */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000118}
119
120static PyObject *
121heappush(PyObject *self, PyObject *args)
122{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000123 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000124
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000125 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
126 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000127
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000128 if (!PyList_Check(heap)) {
129 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
130 return NULL;
131 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 if (PyList_Append(heap, item) == -1)
134 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000135
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
137 return NULL;
138 Py_INCREF(Py_None);
139 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000140}
141
142PyDoc_STRVAR(heappush_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800143"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000144
145static PyObject *
146heappop(PyObject *self, PyObject *heap)
147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000148 PyObject *lastelt, *returnitem;
149 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000150
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000151 if (!PyList_Check(heap)) {
152 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
153 return NULL;
154 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000155
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000156 /* # raises appropriate IndexError if heap is empty */
157 n = PyList_GET_SIZE(heap);
158 if (n == 0) {
159 PyErr_SetString(PyExc_IndexError, "index out of range");
160 return NULL;
161 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000162
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000163 lastelt = PyList_GET_ITEM(heap, n-1) ;
164 Py_INCREF(lastelt);
165 PyList_SetSlice(heap, n-1, n, NULL);
166 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000167
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000168 if (!n)
169 return lastelt;
170 returnitem = PyList_GET_ITEM(heap, 0);
171 PyList_SET_ITEM(heap, 0, lastelt);
172 if (_siftup((PyListObject *)heap, 0) == -1) {
173 Py_DECREF(returnitem);
174 return NULL;
175 }
176 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000177}
178
179PyDoc_STRVAR(heappop_doc,
180"Pop the smallest item off the heap, maintaining the heap invariant.");
181
182static PyObject *
183heapreplace(PyObject *self, PyObject *args)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000186
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000187 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
188 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000189
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000190 if (!PyList_Check(heap)) {
191 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
192 return NULL;
193 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 if (PyList_GET_SIZE(heap) < 1) {
196 PyErr_SetString(PyExc_IndexError, "index out of range");
197 return NULL;
198 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000199
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000200 returnitem = PyList_GET_ITEM(heap, 0);
201 Py_INCREF(item);
202 PyList_SET_ITEM(heap, 0, item);
203 if (_siftup((PyListObject *)heap, 0) == -1) {
204 Py_DECREF(returnitem);
205 return NULL;
206 }
207 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000208}
209
210PyDoc_STRVAR(heapreplace_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800211"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000212\n\
213This is more efficient than heappop() followed by heappush(), and can be\n\
214more appropriate when using a fixed-size heap. Note that the value\n\
215returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000216this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 if item > heap[0]:\n\
218 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000219
220static PyObject *
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000221heappushpop(PyObject *self, PyObject *args)
222{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000223 PyObject *heap, *item, *returnitem;
224 int cmp;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000225
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000226 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
227 return NULL;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000228
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000229 if (!PyList_Check(heap)) {
230 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
231 return NULL;
232 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000233
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000234 if (PyList_GET_SIZE(heap) < 1) {
235 Py_INCREF(item);
236 return item;
237 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000238
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000239 cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
240 if (cmp == -1)
241 return NULL;
242 if (cmp == 0) {
243 Py_INCREF(item);
244 return item;
245 }
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000246
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000247 returnitem = PyList_GET_ITEM(heap, 0);
248 Py_INCREF(item);
249 PyList_SET_ITEM(heap, 0, item);
250 if (_siftup((PyListObject *)heap, 0) == -1) {
251 Py_DECREF(returnitem);
252 return NULL;
253 }
254 return returnitem;
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000255}
256
257PyDoc_STRVAR(heappushpop_doc,
Raymond Hettingerbca22092013-01-18 17:35:25 -0800258"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000259from the heap. The combined action runs more efficiently than\n\
260heappush() followed by a separate call to heappop().");
261
262static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000263heapify(PyObject *self, PyObject *heap)
264{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000265 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000266
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000267 if (!PyList_Check(heap)) {
268 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
269 return NULL;
270 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000271
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000272 n = PyList_GET_SIZE(heap);
273 /* Transform bottom-up. The largest index there's any point to
274 looking at is the largest with a child index in-range, so must
275 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
276 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
277 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
278 and that's again n//2-1.
279 */
280 for (i=n/2-1 ; i>=0 ; i--)
281 if(_siftup((PyListObject *)heap, i) == -1)
282 return NULL;
283 Py_INCREF(Py_None);
284 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000285}
286
287PyDoc_STRVAR(heapify_doc,
288"Transform list into a heap, in-place, in O(len(heap)) time.");
289
Raymond Hettingerc9297662004-06-12 22:48:46 +0000290static PyObject *
291nlargest(PyObject *self, PyObject *args)
292{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000293 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
294 Py_ssize_t i, n;
295 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000296
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000297 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
298 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000299
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000300 it = PyObject_GetIter(iterable);
301 if (it == NULL)
302 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000303
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 heap = PyList_New(0);
305 if (heap == NULL)
306 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000307
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000308 for (i=0 ; i<n ; i++ ){
309 elem = PyIter_Next(it);
310 if (elem == NULL) {
311 if (PyErr_Occurred())
312 goto fail;
313 else
314 goto sortit;
315 }
316 if (PyList_Append(heap, elem) == -1) {
317 Py_DECREF(elem);
318 goto fail;
319 }
320 Py_DECREF(elem);
321 }
322 if (PyList_GET_SIZE(heap) == 0)
323 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000324
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 for (i=n/2-1 ; i>=0 ; i--)
326 if(_siftup((PyListObject *)heap, i) == -1)
327 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000328
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 sol = PyList_GET_ITEM(heap, 0);
330 while (1) {
331 elem = PyIter_Next(it);
332 if (elem == NULL) {
333 if (PyErr_Occurred())
334 goto fail;
335 else
336 goto sortit;
337 }
338 cmp = cmp_lt(sol, elem);
339 if (cmp == -1) {
340 Py_DECREF(elem);
341 goto fail;
342 }
343 if (cmp == 0) {
344 Py_DECREF(elem);
345 continue;
346 }
347 oldelem = PyList_GET_ITEM(heap, 0);
348 PyList_SET_ITEM(heap, 0, elem);
349 Py_DECREF(oldelem);
350 if (_siftup((PyListObject *)heap, 0) == -1)
351 goto fail;
352 sol = PyList_GET_ITEM(heap, 0);
353 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000354sortit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000355 if (PyList_Sort(heap) == -1)
356 goto fail;
357 if (PyList_Reverse(heap) == -1)
358 goto fail;
359 Py_DECREF(it);
360 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000361
362fail:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000363 Py_DECREF(it);
364 Py_XDECREF(heap);
365 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000366}
367
368PyDoc_STRVAR(nlargest_doc,
369"Find the n largest elements in a dataset.\n\
370\n\
371Equivalent to: sorted(iterable, reverse=True)[:n]\n");
372
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000373static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000374_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000375{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000376 PyObject *newitem, *parent;
377 int cmp;
378 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000379
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000380 assert(PyList_Check(heap));
381 if (pos >= PyList_GET_SIZE(heap)) {
382 PyErr_SetString(PyExc_IndexError, "index out of range");
383 return -1;
384 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000385
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000386 newitem = PyList_GET_ITEM(heap, pos);
387 Py_INCREF(newitem);
388 /* Follow the path to the root, moving parents down until finding
389 a place newitem fits. */
390 while (pos > startpos){
391 parentpos = (pos - 1) >> 1;
392 parent = PyList_GET_ITEM(heap, parentpos);
393 cmp = cmp_lt(parent, newitem);
394 if (cmp == -1) {
395 Py_DECREF(newitem);
396 return -1;
397 }
398 if (cmp == 0)
399 break;
400 Py_INCREF(parent);
401 Py_DECREF(PyList_GET_ITEM(heap, pos));
402 PyList_SET_ITEM(heap, pos, parent);
403 pos = parentpos;
404 }
405 Py_DECREF(PyList_GET_ITEM(heap, pos));
406 PyList_SET_ITEM(heap, pos, newitem);
407 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000408}
409
410static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000411_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000412{
Raymond Hettinger93434892014-05-03 15:27:14 -0700413 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000414 int cmp;
415 PyObject *newitem, *tmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000416
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 assert(PyList_Check(heap));
418 endpos = PyList_GET_SIZE(heap);
419 startpos = pos;
420 if (pos >= endpos) {
421 PyErr_SetString(PyExc_IndexError, "index out of range");
422 return -1;
423 }
424 newitem = PyList_GET_ITEM(heap, pos);
425 Py_INCREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000426
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000427 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettinger93434892014-05-03 15:27:14 -0700428 limit = endpos / 2; /* smallest pos that has no child */
429 while (pos < limit) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000430 /* Set childpos to index of smaller child. */
Raymond Hettinger93434892014-05-03 15:27:14 -0700431 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 rightpos = childpos + 1;
433 if (rightpos < endpos) {
434 cmp = cmp_lt(
435 PyList_GET_ITEM(heap, rightpos),
436 PyList_GET_ITEM(heap, childpos));
437 if (cmp == -1) {
438 Py_DECREF(newitem);
439 return -1;
440 }
441 if (cmp == 0)
442 childpos = rightpos;
443 }
444 /* Move the smaller child up. */
445 tmp = PyList_GET_ITEM(heap, childpos);
446 Py_INCREF(tmp);
447 Py_DECREF(PyList_GET_ITEM(heap, pos));
448 PyList_SET_ITEM(heap, pos, tmp);
449 pos = childpos;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000450 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000451
Terry Jan Reedyce9cc492013-03-11 17:41:44 -0400452 /* The leaf at pos is empty now. Put newitem there, and bubble
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000453 it up to its final resting place (by sifting its parents down). */
454 Py_DECREF(PyList_GET_ITEM(heap, pos));
455 PyList_SET_ITEM(heap, pos, newitem);
456 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000457}
458
459static PyObject *
460nsmallest(PyObject *self, PyObject *args)
461{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000462 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
463 Py_ssize_t i, n;
464 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000465
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000466 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
467 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000468
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000469 it = PyObject_GetIter(iterable);
470 if (it == NULL)
471 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000472
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000473 heap = PyList_New(0);
474 if (heap == NULL)
475 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000476
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000477 for (i=0 ; i<n ; i++ ){
478 elem = PyIter_Next(it);
479 if (elem == NULL) {
480 if (PyErr_Occurred())
481 goto fail;
482 else
483 goto sortit;
484 }
485 if (PyList_Append(heap, elem) == -1) {
486 Py_DECREF(elem);
487 goto fail;
488 }
489 Py_DECREF(elem);
490 }
491 n = PyList_GET_SIZE(heap);
492 if (n == 0)
493 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000494
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000495 for (i=n/2-1 ; i>=0 ; i--)
496 if(_siftupmax((PyListObject *)heap, i) == -1)
497 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000498
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000499 los = PyList_GET_ITEM(heap, 0);
500 while (1) {
501 elem = PyIter_Next(it);
502 if (elem == NULL) {
503 if (PyErr_Occurred())
504 goto fail;
505 else
506 goto sortit;
507 }
508 cmp = cmp_lt(elem, los);
509 if (cmp == -1) {
510 Py_DECREF(elem);
511 goto fail;
512 }
513 if (cmp == 0) {
514 Py_DECREF(elem);
515 continue;
516 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000517
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000518 oldelem = PyList_GET_ITEM(heap, 0);
519 PyList_SET_ITEM(heap, 0, elem);
520 Py_DECREF(oldelem);
521 if (_siftupmax((PyListObject *)heap, 0) == -1)
522 goto fail;
523 los = PyList_GET_ITEM(heap, 0);
524 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000525
526sortit:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000527 if (PyList_Sort(heap) == -1)
528 goto fail;
529 Py_DECREF(it);
530 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000531
532fail:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000533 Py_DECREF(it);
534 Py_XDECREF(heap);
535 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000536}
537
538PyDoc_STRVAR(nsmallest_doc,
539"Find the n smallest elements in a dataset.\n\
540\n\
541Equivalent to: sorted(iterable)[:n]\n");
542
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000543static PyMethodDef heapq_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000544 {"heappush", (PyCFunction)heappush,
545 METH_VARARGS, heappush_doc},
546 {"heappushpop", (PyCFunction)heappushpop,
547 METH_VARARGS, heappushpop_doc},
548 {"heappop", (PyCFunction)heappop,
549 METH_O, heappop_doc},
550 {"heapreplace", (PyCFunction)heapreplace,
551 METH_VARARGS, heapreplace_doc},
552 {"heapify", (PyCFunction)heapify,
553 METH_O, heapify_doc},
554 {"nlargest", (PyCFunction)nlargest,
555 METH_VARARGS, nlargest_doc},
556 {"nsmallest", (PyCFunction)nsmallest,
557 METH_VARARGS, nsmallest_doc},
558 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000559};
560
561PyDoc_STRVAR(module_doc,
562"Heap queue algorithm (a.k.a. priority queue).\n\
563\n\
564Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
565all k, counting elements from 0. For the sake of comparison,\n\
566non-existing elements are considered to be infinite. The interesting\n\
567property of a heap is that a[0] is always its smallest element.\n\
568\n\
569Usage:\n\
570\n\
571heap = [] # creates an empty heap\n\
572heappush(heap, item) # pushes a new item on the heap\n\
573item = heappop(heap) # pops the smallest item from the heap\n\
574item = heap[0] # smallest item on the heap without popping it\n\
575heapify(x) # transforms list into a heap, in-place, in linear time\n\
576item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
577 # new item; the heap size is unchanged\n\
578\n\
579Our API differs from textbook heap algorithms as follows:\n\
580\n\
581- We use 0-based indexing. This makes the relationship between the\n\
582 index for a node and the indexes for its children slightly less\n\
583 obvious, but is more suitable since Python uses 0-based indexing.\n\
584\n\
585- Our heappop() method returns the smallest item, not the largest.\n\
586\n\
587These two make it possible to view the heap as a regular Python list\n\
588without surprises: heap[0] is the smallest item, and heap.sort()\n\
589maintains the heap invariant!\n");
590
591
592PyDoc_STRVAR(__about__,
593"Heap queues\n\
594\n\
595[explanation by François Pinard]\n\
596\n\
597Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
598all k, counting elements from 0. For the sake of comparison,\n\
599non-existing elements are considered to be infinite. The interesting\n\
600property of a heap is that a[0] is always its smallest element.\n"
601"\n\
602The strange invariant above is meant to be an efficient memory\n\
603representation for a tournament. The numbers below are `k', not a[k]:\n\
604\n\
605 0\n\
606\n\
607 1 2\n\
608\n\
609 3 4 5 6\n\
610\n\
611 7 8 9 10 11 12 13 14\n\
612\n\
613 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
614\n\
615\n\
616In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
617an usual binary tournament we see in sports, each cell is the winner\n\
618over the two cells it tops, and we can trace the winner down the tree\n\
619to see all opponents s/he had. However, in many computer applications\n\
620of such tournaments, we do not need to trace the history of a winner.\n\
621To be more memory efficient, when a winner is promoted, we try to\n\
622replace it by something else at a lower level, and the rule becomes\n\
623that a cell and the two cells it tops contain three different items,\n\
624but the top cell \"wins\" over the two topped cells.\n"
625"\n\
626If this heap invariant is protected at all time, index 0 is clearly\n\
627the overall winner. The simplest algorithmic way to remove it and\n\
628find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
629diagram above) into the 0 position, and then percolate this new 0 down\n\
630the tree, exchanging values, until the invariant is re-established.\n\
631This is clearly logarithmic on the total number of items in the tree.\n\
632By iterating over all items, you get an O(n ln n) sort.\n"
633"\n\
634A nice feature of this sort is that you can efficiently insert new\n\
635items while the sort is going on, provided that the inserted items are\n\
636not \"better\" than the last 0'th element you extracted. This is\n\
637especially useful in simulation contexts, where the tree holds all\n\
638incoming events, and the \"win\" condition means the smallest scheduled\n\
639time. When an event schedule other events for execution, they are\n\
640scheduled into the future, so they can easily go into the heap. So, a\n\
641heap is a good structure for implementing schedulers (this is what I\n\
642used for my MIDI sequencer :-).\n"
643"\n\
644Various structures for implementing schedulers have been extensively\n\
645studied, and heaps are good for this, as they are reasonably speedy,\n\
646the speed is almost constant, and the worst case is not much different\n\
647than the average case. However, there are other representations which\n\
648are more efficient overall, yet the worst cases might be terrible.\n"
649"\n\
650Heaps are also very useful in big disk sorts. You most probably all\n\
651know that a big sort implies producing \"runs\" (which are pre-sorted\n\
652sequences, which size is usually related to the amount of CPU memory),\n\
653followed by a merging passes for these runs, which merging is often\n\
654very cleverly organised[1]. It is very important that the initial\n\
655sort produces the longest runs possible. Tournaments are a good way\n\
656to that. If, using all the memory available to hold a tournament, you\n\
657replace and percolate items that happen to fit the current run, you'll\n\
658produce runs which are twice the size of the memory for random input,\n\
659and much better for input fuzzily ordered.\n"
660"\n\
661Moreover, if you output the 0'th item on disk and get an input which\n\
662may not fit in the current tournament (because the value \"wins\" over\n\
663the last output value), it cannot fit in the heap, so the size of the\n\
664heap decreases. The freed memory could be cleverly reused immediately\n\
665for progressively building a second heap, which grows at exactly the\n\
666same rate the first heap is melting. When the first heap completely\n\
667vanishes, you switch heaps and start a new run. Clever and quite\n\
668effective!\n\
669\n\
670In a word, heaps are useful memory structures to know. I use them in\n\
671a few applications, and I think it is good to keep a `heap' module\n\
672around. :-)\n"
673"\n\
674--------------------\n\
675[1] The disk balancing algorithms which are current, nowadays, are\n\
676more annoying than clever, and this is a consequence of the seeking\n\
677capabilities of the disks. On devices which cannot seek, like big\n\
678tape drives, the story was quite different, and one had to be very\n\
679clever to ensure (far in advance) that each tape movement will be the\n\
680most effective possible (that is, will best participate at\n\
681\"progressing\" the merge). Some tapes were even able to read\n\
682backwards, and this was also used to avoid the rewinding time.\n\
683Believe me, real good tape sorts were quite spectacular to watch!\n\
684From all times, sorting has always been a Great Art! :-)\n");
685
686PyMODINIT_FUNC
687init_heapq(void)
688{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000689 PyObject *m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000690
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000691 m = Py_InitModule3("_heapq", heapq_methods, module_doc);
692 if (m == NULL)
693 return;
694 PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000695}
696