blob: 366fb3d51cb1b64a0bfde05727fbdba7c8aa003c [file] [log] [blame]
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
5annotated by François Pinard, and converted to C by Raymond Hettinger.
6
7*/
8
9#include "Python.h"
10
Georg Brandlf78e02b2008-06-10 17:40:04 +000011static int
12cmp_lt(PyObject *x, PyObject *y)
13{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000014 return PyObject_RichCompareBool(x, y, Py_LT);
Georg Brandlf78e02b2008-06-10 17:40:04 +000015}
16
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000017static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000018_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000020 PyObject *newitem, *parent;
21 int cmp;
22 Py_ssize_t parentpos;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 assert(PyList_Check(heap));
25 if (pos >= PyList_GET_SIZE(heap)) {
26 PyErr_SetString(PyExc_IndexError, "index out of range");
27 return -1;
28 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000030 newitem = PyList_GET_ITEM(heap, pos);
31 Py_INCREF(newitem);
32 /* Follow the path to the root, moving parents down until finding
33 a place newitem fits. */
34 while (pos > startpos){
35 parentpos = (pos - 1) >> 1;
36 parent = PyList_GET_ITEM(heap, parentpos);
37 cmp = cmp_lt(newitem, parent);
38 if (cmp == -1) {
39 Py_DECREF(newitem);
40 return -1;
41 }
42 if (cmp == 0)
43 break;
44 Py_INCREF(parent);
45 Py_DECREF(PyList_GET_ITEM(heap, pos));
46 PyList_SET_ITEM(heap, pos, parent);
47 pos = parentpos;
48 }
49 Py_DECREF(PyList_GET_ITEM(heap, pos));
50 PyList_SET_ITEM(heap, pos, newitem);
51 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000052}
53
54static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000055_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000056{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 Py_ssize_t startpos, endpos, childpos, rightpos;
58 int cmp;
59 PyObject *newitem, *tmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000061 assert(PyList_Check(heap));
62 endpos = PyList_GET_SIZE(heap);
63 startpos = pos;
64 if (pos >= endpos) {
65 PyErr_SetString(PyExc_IndexError, "index out of range");
66 return -1;
67 }
68 newitem = PyList_GET_ITEM(heap, pos);
69 Py_INCREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000071 /* Bubble up the smaller child until hitting a leaf. */
72 childpos = 2*pos + 1; /* leftmost child position */
73 while (childpos < endpos) {
74 /* Set childpos to index of smaller child. */
75 rightpos = childpos + 1;
76 if (rightpos < endpos) {
77 cmp = cmp_lt(
78 PyList_GET_ITEM(heap, childpos),
79 PyList_GET_ITEM(heap, rightpos));
80 if (cmp == -1) {
81 Py_DECREF(newitem);
82 return -1;
83 }
84 if (cmp == 0)
85 childpos = rightpos;
86 }
87 /* Move the smaller child up. */
88 tmp = PyList_GET_ITEM(heap, childpos);
89 Py_INCREF(tmp);
90 Py_DECREF(PyList_GET_ITEM(heap, pos));
91 PyList_SET_ITEM(heap, pos, tmp);
92 pos = childpos;
93 childpos = 2*pos + 1;
94 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 /* The leaf at pos is empty now. Put newitem there, and and bubble
97 it up to its final resting place (by sifting its parents down). */
98 Py_DECREF(PyList_GET_ITEM(heap, pos));
99 PyList_SET_ITEM(heap, pos, newitem);
100 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000101}
102
103static PyObject *
104heappush(PyObject *self, PyObject *args)
105{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
109 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 if (!PyList_Check(heap)) {
112 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
113 return NULL;
114 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 if (PyList_Append(heap, item) == -1)
117 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
120 return NULL;
121 Py_INCREF(Py_None);
122 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000123}
124
125PyDoc_STRVAR(heappush_doc,
126"Push item onto heap, maintaining the heap invariant.");
127
128static PyObject *
129heappop(PyObject *self, PyObject *heap)
130{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 PyObject *lastelt, *returnitem;
132 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 if (!PyList_Check(heap)) {
135 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
136 return NULL;
137 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 /* # raises appropriate IndexError if heap is empty */
140 n = PyList_GET_SIZE(heap);
141 if (n == 0) {
142 PyErr_SetString(PyExc_IndexError, "index out of range");
143 return NULL;
144 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 lastelt = PyList_GET_ITEM(heap, n-1) ;
147 Py_INCREF(lastelt);
148 PyList_SetSlice(heap, n-1, n, NULL);
149 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 if (!n)
152 return lastelt;
153 returnitem = PyList_GET_ITEM(heap, 0);
154 PyList_SET_ITEM(heap, 0, lastelt);
155 if (_siftup((PyListObject *)heap, 0) == -1) {
156 Py_DECREF(returnitem);
157 return NULL;
158 }
159 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000160}
161
162PyDoc_STRVAR(heappop_doc,
163"Pop the smallest item off the heap, maintaining the heap invariant.");
164
165static PyObject *
166heapreplace(PyObject *self, PyObject *args)
167{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
171 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 if (!PyList_Check(heap)) {
174 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
175 return NULL;
176 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 if (PyList_GET_SIZE(heap) < 1) {
179 PyErr_SetString(PyExc_IndexError, "index out of range");
180 return NULL;
181 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 returnitem = PyList_GET_ITEM(heap, 0);
184 Py_INCREF(item);
185 PyList_SET_ITEM(heap, 0, item);
186 if (_siftup((PyListObject *)heap, 0) == -1) {
187 Py_DECREF(returnitem);
188 return NULL;
189 }
190 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000191}
192
193PyDoc_STRVAR(heapreplace_doc,
194"Pop and return the current smallest value, and add the new item.\n\
195\n\
196This is more efficient than heappop() followed by heappush(), and can be\n\
197more appropriate when using a fixed-size heap. Note that the value\n\
198returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000199this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 if item > heap[0]:\n\
201 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000202
203static PyObject *
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000204heappushpop(PyObject *self, PyObject *args)
205{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000206 PyObject *heap, *item, *returnitem;
207 int cmp;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
210 return NULL;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 if (!PyList_Check(heap)) {
213 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
214 return NULL;
215 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (PyList_GET_SIZE(heap) < 1) {
218 Py_INCREF(item);
219 return item;
220 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
223 if (cmp == -1)
224 return NULL;
225 if (cmp == 0) {
226 Py_INCREF(item);
227 return item;
228 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 returnitem = PyList_GET_ITEM(heap, 0);
231 Py_INCREF(item);
232 PyList_SET_ITEM(heap, 0, item);
233 if (_siftup((PyListObject *)heap, 0) == -1) {
234 Py_DECREF(returnitem);
235 return NULL;
236 }
237 return returnitem;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000238}
239
240PyDoc_STRVAR(heappushpop_doc,
241"Push item on the heap, then pop and return the smallest item\n\
242from the heap. The combined action runs more efficiently than\n\
243heappush() followed by a separate call to heappop().");
244
245static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000246heapify(PyObject *self, PyObject *heap)
247{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 if (!PyList_Check(heap)) {
251 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
252 return NULL;
253 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 n = PyList_GET_SIZE(heap);
256 /* Transform bottom-up. The largest index there's any point to
257 looking at is the largest with a child index in-range, so must
258 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
259 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
260 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
261 and that's again n//2-1.
262 */
263 for (i=n/2-1 ; i>=0 ; i--)
264 if(_siftup((PyListObject *)heap, i) == -1)
265 return NULL;
266 Py_INCREF(Py_None);
267 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000268}
269
270PyDoc_STRVAR(heapify_doc,
271"Transform list into a heap, in-place, in O(len(heap)) time.");
272
Raymond Hettingerc9297662004-06-12 22:48:46 +0000273static PyObject *
274nlargest(PyObject *self, PyObject *args)
275{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
277 Py_ssize_t i, n;
278 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
281 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 it = PyObject_GetIter(iterable);
284 if (it == NULL)
285 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 heap = PyList_New(0);
288 if (heap == NULL)
289 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 for (i=0 ; i<n ; i++ ){
292 elem = PyIter_Next(it);
293 if (elem == NULL) {
294 if (PyErr_Occurred())
295 goto fail;
296 else
297 goto sortit;
298 }
299 if (PyList_Append(heap, elem) == -1) {
300 Py_DECREF(elem);
301 goto fail;
302 }
303 Py_DECREF(elem);
304 }
305 if (PyList_GET_SIZE(heap) == 0)
306 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 for (i=n/2-1 ; i>=0 ; i--)
309 if(_siftup((PyListObject *)heap, i) == -1)
310 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 sol = PyList_GET_ITEM(heap, 0);
313 while (1) {
314 elem = PyIter_Next(it);
315 if (elem == NULL) {
316 if (PyErr_Occurred())
317 goto fail;
318 else
319 goto sortit;
320 }
321 cmp = cmp_lt(sol, elem);
322 if (cmp == -1) {
323 Py_DECREF(elem);
324 goto fail;
325 }
326 if (cmp == 0) {
327 Py_DECREF(elem);
328 continue;
329 }
330 oldelem = PyList_GET_ITEM(heap, 0);
331 PyList_SET_ITEM(heap, 0, elem);
332 Py_DECREF(oldelem);
333 if (_siftup((PyListObject *)heap, 0) == -1)
334 goto fail;
335 sol = PyList_GET_ITEM(heap, 0);
336 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000337sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 if (PyList_Sort(heap) == -1)
339 goto fail;
340 if (PyList_Reverse(heap) == -1)
341 goto fail;
342 Py_DECREF(it);
343 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000344
345fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 Py_DECREF(it);
347 Py_XDECREF(heap);
348 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000349}
350
351PyDoc_STRVAR(nlargest_doc,
352"Find the n largest elements in a dataset.\n\
353\n\
354Equivalent to: sorted(iterable, reverse=True)[:n]\n");
355
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000356static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000357_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 PyObject *newitem, *parent;
360 int cmp;
361 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 assert(PyList_Check(heap));
364 if (pos >= PyList_GET_SIZE(heap)) {
365 PyErr_SetString(PyExc_IndexError, "index out of range");
366 return -1;
367 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 newitem = PyList_GET_ITEM(heap, pos);
370 Py_INCREF(newitem);
371 /* Follow the path to the root, moving parents down until finding
372 a place newitem fits. */
373 while (pos > startpos){
374 parentpos = (pos - 1) >> 1;
375 parent = PyList_GET_ITEM(heap, parentpos);
376 cmp = cmp_lt(parent, newitem);
377 if (cmp == -1) {
378 Py_DECREF(newitem);
379 return -1;
380 }
381 if (cmp == 0)
382 break;
383 Py_INCREF(parent);
384 Py_DECREF(PyList_GET_ITEM(heap, pos));
385 PyList_SET_ITEM(heap, pos, parent);
386 pos = parentpos;
387 }
388 Py_DECREF(PyList_GET_ITEM(heap, pos));
389 PyList_SET_ITEM(heap, pos, newitem);
390 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000391}
392
393static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000394_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 Py_ssize_t startpos, endpos, childpos, rightpos;
397 int cmp;
398 PyObject *newitem, *tmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 assert(PyList_Check(heap));
401 endpos = PyList_GET_SIZE(heap);
402 startpos = pos;
403 if (pos >= endpos) {
404 PyErr_SetString(PyExc_IndexError, "index out of range");
405 return -1;
406 }
407 newitem = PyList_GET_ITEM(heap, pos);
408 Py_INCREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 /* Bubble up the smaller child until hitting a leaf. */
411 childpos = 2*pos + 1; /* leftmost child position */
412 while (childpos < endpos) {
413 /* Set childpos to index of smaller child. */
414 rightpos = childpos + 1;
415 if (rightpos < endpos) {
416 cmp = cmp_lt(
417 PyList_GET_ITEM(heap, rightpos),
418 PyList_GET_ITEM(heap, childpos));
419 if (cmp == -1) {
420 Py_DECREF(newitem);
421 return -1;
422 }
423 if (cmp == 0)
424 childpos = rightpos;
425 }
426 /* Move the smaller child up. */
427 tmp = PyList_GET_ITEM(heap, childpos);
428 Py_INCREF(tmp);
429 Py_DECREF(PyList_GET_ITEM(heap, pos));
430 PyList_SET_ITEM(heap, pos, tmp);
431 pos = childpos;
432 childpos = 2*pos + 1;
433 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 /* The leaf at pos is empty now. Put newitem there, and and bubble
436 it up to its final resting place (by sifting its parents down). */
437 Py_DECREF(PyList_GET_ITEM(heap, pos));
438 PyList_SET_ITEM(heap, pos, newitem);
439 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000440}
441
442static PyObject *
443nsmallest(PyObject *self, PyObject *args)
444{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
446 Py_ssize_t i, n;
447 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
450 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 it = PyObject_GetIter(iterable);
453 if (it == NULL)
454 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 heap = PyList_New(0);
457 if (heap == NULL)
458 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 for (i=0 ; i<n ; i++ ){
461 elem = PyIter_Next(it);
462 if (elem == NULL) {
463 if (PyErr_Occurred())
464 goto fail;
465 else
466 goto sortit;
467 }
468 if (PyList_Append(heap, elem) == -1) {
469 Py_DECREF(elem);
470 goto fail;
471 }
472 Py_DECREF(elem);
473 }
474 n = PyList_GET_SIZE(heap);
475 if (n == 0)
476 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 for (i=n/2-1 ; i>=0 ; i--)
479 if(_siftupmax((PyListObject *)heap, i) == -1)
480 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000481
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 los = PyList_GET_ITEM(heap, 0);
483 while (1) {
484 elem = PyIter_Next(it);
485 if (elem == NULL) {
486 if (PyErr_Occurred())
487 goto fail;
488 else
489 goto sortit;
490 }
491 cmp = cmp_lt(elem, los);
492 if (cmp == -1) {
493 Py_DECREF(elem);
494 goto fail;
495 }
496 if (cmp == 0) {
497 Py_DECREF(elem);
498 continue;
499 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 oldelem = PyList_GET_ITEM(heap, 0);
502 PyList_SET_ITEM(heap, 0, elem);
503 Py_DECREF(oldelem);
504 if (_siftupmax((PyListObject *)heap, 0) == -1)
505 goto fail;
506 los = PyList_GET_ITEM(heap, 0);
507 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000508
509sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 if (PyList_Sort(heap) == -1)
511 goto fail;
512 Py_DECREF(it);
513 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000514
515fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000516 Py_DECREF(it);
517 Py_XDECREF(heap);
518 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000519}
520
521PyDoc_STRVAR(nsmallest_doc,
522"Find the n smallest elements in a dataset.\n\
523\n\
524Equivalent to: sorted(iterable)[:n]\n");
525
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000526static PyMethodDef heapq_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 {"heappush", (PyCFunction)heappush,
528 METH_VARARGS, heappush_doc},
529 {"heappushpop", (PyCFunction)heappushpop,
530 METH_VARARGS, heappushpop_doc},
531 {"heappop", (PyCFunction)heappop,
532 METH_O, heappop_doc},
533 {"heapreplace", (PyCFunction)heapreplace,
534 METH_VARARGS, heapreplace_doc},
535 {"heapify", (PyCFunction)heapify,
536 METH_O, heapify_doc},
537 {"nlargest", (PyCFunction)nlargest,
538 METH_VARARGS, nlargest_doc},
539 {"nsmallest", (PyCFunction)nsmallest,
540 METH_VARARGS, nsmallest_doc},
541 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000542};
543
544PyDoc_STRVAR(module_doc,
545"Heap queue algorithm (a.k.a. priority queue).\n\
546\n\
547Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
548all k, counting elements from 0. For the sake of comparison,\n\
549non-existing elements are considered to be infinite. The interesting\n\
550property of a heap is that a[0] is always its smallest element.\n\
551\n\
552Usage:\n\
553\n\
554heap = [] # creates an empty heap\n\
555heappush(heap, item) # pushes a new item on the heap\n\
556item = heappop(heap) # pops the smallest item from the heap\n\
557item = heap[0] # smallest item on the heap without popping it\n\
558heapify(x) # transforms list into a heap, in-place, in linear time\n\
559item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
560 # new item; the heap size is unchanged\n\
561\n\
562Our API differs from textbook heap algorithms as follows:\n\
563\n\
564- We use 0-based indexing. This makes the relationship between the\n\
565 index for a node and the indexes for its children slightly less\n\
566 obvious, but is more suitable since Python uses 0-based indexing.\n\
567\n\
568- Our heappop() method returns the smallest item, not the largest.\n\
569\n\
570These two make it possible to view the heap as a regular Python list\n\
571without surprises: heap[0] is the smallest item, and heap.sort()\n\
572maintains the heap invariant!\n");
573
574
575PyDoc_STRVAR(__about__,
576"Heap queues\n\
577\n\
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000578[explanation by Fran\xc3\xa7ois Pinard]\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000579\n\
580Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
581all k, counting elements from 0. For the sake of comparison,\n\
582non-existing elements are considered to be infinite. The interesting\n\
583property of a heap is that a[0] is always its smallest element.\n"
584"\n\
585The strange invariant above is meant to be an efficient memory\n\
586representation for a tournament. The numbers below are `k', not a[k]:\n\
587\n\
588 0\n\
589\n\
590 1 2\n\
591\n\
592 3 4 5 6\n\
593\n\
594 7 8 9 10 11 12 13 14\n\
595\n\
596 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
597\n\
598\n\
599In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
600an usual binary tournament we see in sports, each cell is the winner\n\
601over the two cells it tops, and we can trace the winner down the tree\n\
602to see all opponents s/he had. However, in many computer applications\n\
603of such tournaments, we do not need to trace the history of a winner.\n\
604To be more memory efficient, when a winner is promoted, we try to\n\
605replace it by something else at a lower level, and the rule becomes\n\
606that a cell and the two cells it tops contain three different items,\n\
607but the top cell \"wins\" over the two topped cells.\n"
608"\n\
609If this heap invariant is protected at all time, index 0 is clearly\n\
610the overall winner. The simplest algorithmic way to remove it and\n\
611find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
612diagram above) into the 0 position, and then percolate this new 0 down\n\
613the tree, exchanging values, until the invariant is re-established.\n\
614This is clearly logarithmic on the total number of items in the tree.\n\
615By iterating over all items, you get an O(n ln n) sort.\n"
616"\n\
617A nice feature of this sort is that you can efficiently insert new\n\
618items while the sort is going on, provided that the inserted items are\n\
619not \"better\" than the last 0'th element you extracted. This is\n\
620especially useful in simulation contexts, where the tree holds all\n\
621incoming events, and the \"win\" condition means the smallest scheduled\n\
622time. When an event schedule other events for execution, they are\n\
623scheduled into the future, so they can easily go into the heap. So, a\n\
624heap is a good structure for implementing schedulers (this is what I\n\
625used for my MIDI sequencer :-).\n"
626"\n\
627Various structures for implementing schedulers have been extensively\n\
628studied, and heaps are good for this, as they are reasonably speedy,\n\
629the speed is almost constant, and the worst case is not much different\n\
630than the average case. However, there are other representations which\n\
631are more efficient overall, yet the worst cases might be terrible.\n"
632"\n\
633Heaps are also very useful in big disk sorts. You most probably all\n\
634know that a big sort implies producing \"runs\" (which are pre-sorted\n\
635sequences, which size is usually related to the amount of CPU memory),\n\
636followed by a merging passes for these runs, which merging is often\n\
637very cleverly organised[1]. It is very important that the initial\n\
638sort produces the longest runs possible. Tournaments are a good way\n\
639to that. If, using all the memory available to hold a tournament, you\n\
640replace and percolate items that happen to fit the current run, you'll\n\
641produce runs which are twice the size of the memory for random input,\n\
642and much better for input fuzzily ordered.\n"
643"\n\
644Moreover, if you output the 0'th item on disk and get an input which\n\
645may not fit in the current tournament (because the value \"wins\" over\n\
646the last output value), it cannot fit in the heap, so the size of the\n\
647heap decreases. The freed memory could be cleverly reused immediately\n\
648for progressively building a second heap, which grows at exactly the\n\
649same rate the first heap is melting. When the first heap completely\n\
650vanishes, you switch heaps and start a new run. Clever and quite\n\
651effective!\n\
652\n\
653In a word, heaps are useful memory structures to know. I use them in\n\
654a few applications, and I think it is good to keep a `heap' module\n\
655around. :-)\n"
656"\n\
657--------------------\n\
658[1] The disk balancing algorithms which are current, nowadays, are\n\
659more annoying than clever, and this is a consequence of the seeking\n\
660capabilities of the disks. On devices which cannot seek, like big\n\
661tape drives, the story was quite different, and one had to be very\n\
662clever to ensure (far in advance) that each tape movement will be the\n\
663most effective possible (that is, will best participate at\n\
664\"progressing\" the merge). Some tapes were even able to read\n\
665backwards, and this was also used to avoid the rewinding time.\n\
666Believe me, real good tape sorts were quite spectacular to watch!\n\
667From all times, sorting has always been a Great Art! :-)\n");
668
Martin v. Löwis1a214512008-06-11 05:26:20 +0000669
670static struct PyModuleDef _heapqmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000671 PyModuleDef_HEAD_INIT,
672 "_heapq",
673 module_doc,
674 -1,
675 heapq_methods,
676 NULL,
677 NULL,
678 NULL,
679 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +0000680};
681
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000682PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +0000683PyInit__heapq(void)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000684{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 PyObject *m, *about;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000687 m = PyModule_Create(&_heapqmodule);
688 if (m == NULL)
689 return NULL;
690 about = PyUnicode_DecodeUTF8(__about__, strlen(__about__), NULL);
691 PyModule_AddObject(m, "__about__", about);
692 return m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000693}
694