blob: 192e843690abb8fa318e92acff3f298bfd5fd71b [file] [log] [blame]
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00001/* Drop in replacement for heapq.py
2
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
5annotated by François Pinard, and converted to C by Raymond Hettinger.
6
7*/
8
9#include "Python.h"
10
11static int
12_siftdown(PyListObject *heap, int startpos, int pos)
13{
14 PyObject *newitem, *parent;
15 int cmp, parentpos;
16
17 assert(PyList_Check(heap));
18 if (pos >= PyList_GET_SIZE(heap)) {
19 PyErr_SetString(PyExc_IndexError, "index out of range");
20 return -1;
21 }
22
23 newitem = PyList_GET_ITEM(heap, pos);
24 Py_INCREF(newitem);
25 /* Follow the path to the root, moving parents down until finding
26 a place newitem fits. */
27 while (pos > startpos){
28 parentpos = (pos - 1) >> 1;
29 parent = PyList_GET_ITEM(heap, parentpos);
30 cmp = PyObject_RichCompareBool(parent, newitem, Py_LE);
31 if (cmp == -1)
32 return -1;
33 if (cmp == 1)
34 break;
35 Py_INCREF(parent);
36 Py_DECREF(PyList_GET_ITEM(heap, pos));
37 PyList_SET_ITEM(heap, pos, parent);
38 pos = parentpos;
39 }
40 Py_DECREF(PyList_GET_ITEM(heap, pos));
41 PyList_SET_ITEM(heap, pos, newitem);
42 return 0;
43}
44
45static int
46_siftup(PyListObject *heap, int pos)
47{
48 int startpos, endpos, childpos, rightpos;
49 int cmp;
50 PyObject *newitem, *tmp;
51
52 assert(PyList_Check(heap));
53 endpos = PyList_GET_SIZE(heap);
54 startpos = pos;
55 if (pos >= endpos) {
56 PyErr_SetString(PyExc_IndexError, "index out of range");
57 return -1;
58 }
59 newitem = PyList_GET_ITEM(heap, pos);
60 Py_INCREF(newitem);
61
62 /* Bubble up the smaller child until hitting a leaf. */
63 childpos = 2*pos + 1; /* leftmost child position */
64 while (childpos < endpos) {
65 /* Set childpos to index of smaller child. */
66 rightpos = childpos + 1;
67 if (rightpos < endpos) {
68 cmp = PyObject_RichCompareBool(
69 PyList_GET_ITEM(heap, rightpos),
70 PyList_GET_ITEM(heap, childpos),
71 Py_LE);
72 if (cmp == -1)
73 return -1;
74 if (cmp == 1)
75 childpos = rightpos;
76 }
77 /* Move the smaller child up. */
78 tmp = PyList_GET_ITEM(heap, childpos);
79 Py_INCREF(tmp);
80 Py_DECREF(PyList_GET_ITEM(heap, pos));
81 PyList_SET_ITEM(heap, pos, tmp);
82 pos = childpos;
83 childpos = 2*pos + 1;
84 }
85
86 /* The leaf at pos is empty now. Put newitem there, and and bubble
87 it up to its final resting place (by sifting its parents down). */
88 Py_DECREF(PyList_GET_ITEM(heap, pos));
89 PyList_SET_ITEM(heap, pos, newitem);
90 return _siftdown(heap, startpos, pos);
91}
92
93static PyObject *
94heappush(PyObject *self, PyObject *args)
95{
96 PyObject *heap, *item;
97
98 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
99 return NULL;
100
101 if (!PyList_Check(heap)) {
102 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
103 return NULL;
104 }
105
106 if (PyList_Append(heap, item) == -1)
107 return NULL;
108
109 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
110 return NULL;
111 Py_INCREF(Py_None);
112 return Py_None;
113}
114
115PyDoc_STRVAR(heappush_doc,
116"Push item onto heap, maintaining the heap invariant.");
117
118static PyObject *
119heappop(PyObject *self, PyObject *heap)
120{
121 PyObject *lastelt, *returnitem;
122 int n;
123
124 if (!PyList_Check(heap)) {
125 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
126 return NULL;
127 }
128
129 /* # raises appropriate IndexError if heap is empty */
130 n = PyList_GET_SIZE(heap);
131 if (n == 0) {
132 PyErr_SetString(PyExc_IndexError, "index out of range");
133 return NULL;
134 }
135
136 lastelt = PyList_GET_ITEM(heap, n-1) ;
137 Py_INCREF(lastelt);
138 PyList_SetSlice(heap, n-1, n, NULL);
139 n--;
140
141 if (!n)
142 return lastelt;
143 returnitem = PyList_GET_ITEM(heap, 0);
144 PyList_SET_ITEM(heap, 0, lastelt);
145 if (_siftup((PyListObject *)heap, 0) == -1) {
146 Py_DECREF(returnitem);
147 return NULL;
148 }
149 return returnitem;
150}
151
152PyDoc_STRVAR(heappop_doc,
153"Pop the smallest item off the heap, maintaining the heap invariant.");
154
155static PyObject *
156heapreplace(PyObject *self, PyObject *args)
157{
158 PyObject *heap, *item, *returnitem;
159
160 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
161 return NULL;
162
163 if (!PyList_Check(heap)) {
164 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
165 return NULL;
166 }
167
168 if (PyList_GET_SIZE(heap) < 1) {
169 PyErr_SetString(PyExc_IndexError, "index out of range");
170 return NULL;
171 }
172
173 returnitem = PyList_GET_ITEM(heap, 0);
174 Py_INCREF(item);
175 PyList_SET_ITEM(heap, 0, item);
176 if (_siftup((PyListObject *)heap, 0) == -1) {
177 Py_DECREF(returnitem);
178 return NULL;
179 }
180 return returnitem;
181}
182
183PyDoc_STRVAR(heapreplace_doc,
184"Pop and return the current smallest value, and add the new item.\n\
185\n\
186This is more efficient than heappop() followed by heappush(), and can be\n\
187more appropriate when using a fixed-size heap. Note that the value\n\
188returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000189this routine unless written as part of a conditional replacement:\n\n\
190 if item > heap[0]:\n\
191 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000192
193static PyObject *
194heapify(PyObject *self, PyObject *heap)
195{
196 int i, n;
197
198 if (!PyList_Check(heap)) {
199 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
200 return NULL;
201 }
202
203 n = PyList_GET_SIZE(heap);
204 /* Transform bottom-up. The largest index there's any point to
205 looking at is the largest with a child index in-range, so must
206 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
207 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
208 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
209 and that's again n//2-1.
210 */
211 for (i=n/2-1 ; i>=0 ; i--)
212 if(_siftup((PyListObject *)heap, i) == -1)
213 return NULL;
214 Py_INCREF(Py_None);
215 return Py_None;
216}
217
218PyDoc_STRVAR(heapify_doc,
219"Transform list into a heap, in-place, in O(len(heap)) time.");
220
Raymond Hettingerc9297662004-06-12 22:48:46 +0000221static PyObject *
222nlargest(PyObject *self, PyObject *args)
223{
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000224 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000225 int i, n;
226
Raymond Hettingeraefde432004-06-15 23:53:35 +0000227 if (!PyArg_ParseTuple(args, "iO:nlargest", &n, &iterable))
Raymond Hettingerc9297662004-06-12 22:48:46 +0000228 return NULL;
229
230 it = PyObject_GetIter(iterable);
231 if (it == NULL)
232 return NULL;
233
234 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000235 if (heap == NULL)
Raymond Hettingerc9297662004-06-12 22:48:46 +0000236 goto fail;
237
238 for (i=0 ; i<n ; i++ ){
239 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000240 if (elem == NULL) {
241 if (PyErr_Occurred())
242 goto fail;
243 else
244 goto sortit;
245 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000246 if (PyList_Append(heap, elem) == -1) {
247 Py_DECREF(elem);
248 goto fail;
249 }
250 Py_DECREF(elem);
251 }
252 if (PyList_GET_SIZE(heap) == 0)
253 goto sortit;
254
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000255 for (i=n/2-1 ; i>=0 ; i--)
256 if(_siftup((PyListObject *)heap, i) == -1)
257 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000258
259 sol = PyList_GET_ITEM(heap, 0);
260 while (1) {
261 elem = PyIter_Next(it);
262 if (elem == NULL) {
263 if (PyErr_Occurred())
264 goto fail;
265 else
266 goto sortit;
267 }
268 if (PyObject_RichCompareBool(elem, sol, Py_LE)) {
269 Py_DECREF(elem);
270 continue;
271 }
272 oldelem = PyList_GET_ITEM(heap, 0);
273 PyList_SET_ITEM(heap, 0, elem);
274 Py_DECREF(oldelem);
275 if (_siftup((PyListObject *)heap, 0) == -1)
276 goto fail;
277 sol = PyList_GET_ITEM(heap, 0);
278 }
279sortit:
Raymond Hettingerc9297662004-06-12 22:48:46 +0000280 if (PyList_Sort(heap) == -1)
281 goto fail;
282 if (PyList_Reverse(heap) == -1)
283 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000284 Py_DECREF(it);
Raymond Hettingerc9297662004-06-12 22:48:46 +0000285 return heap;
286
287fail:
288 Py_DECREF(it);
289 Py_XDECREF(heap);
290 return NULL;
291}
292
293PyDoc_STRVAR(nlargest_doc,
294"Find the n largest elements in a dataset.\n\
295\n\
296Equivalent to: sorted(iterable, reverse=True)[:n]\n");
297
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000298static int
299_siftdownmax(PyListObject *heap, int startpos, int pos)
300{
301 PyObject *newitem, *parent;
302 int cmp, parentpos;
303
304 assert(PyList_Check(heap));
305 if (pos >= PyList_GET_SIZE(heap)) {
306 PyErr_SetString(PyExc_IndexError, "index out of range");
307 return -1;
308 }
309
310 newitem = PyList_GET_ITEM(heap, pos);
311 Py_INCREF(newitem);
312 /* Follow the path to the root, moving parents down until finding
313 a place newitem fits. */
314 while (pos > startpos){
315 parentpos = (pos - 1) >> 1;
316 parent = PyList_GET_ITEM(heap, parentpos);
317 cmp = PyObject_RichCompareBool(newitem, parent, Py_LE);
318 if (cmp == -1)
319 return -1;
320 if (cmp == 1)
321 break;
322 Py_INCREF(parent);
323 Py_DECREF(PyList_GET_ITEM(heap, pos));
324 PyList_SET_ITEM(heap, pos, parent);
325 pos = parentpos;
326 }
327 Py_DECREF(PyList_GET_ITEM(heap, pos));
328 PyList_SET_ITEM(heap, pos, newitem);
329 return 0;
330}
331
332static int
333_siftupmax(PyListObject *heap, int pos)
334{
335 int startpos, endpos, childpos, rightpos;
336 int cmp;
337 PyObject *newitem, *tmp;
338
339 assert(PyList_Check(heap));
340 endpos = PyList_GET_SIZE(heap);
341 startpos = pos;
342 if (pos >= endpos) {
343 PyErr_SetString(PyExc_IndexError, "index out of range");
344 return -1;
345 }
346 newitem = PyList_GET_ITEM(heap, pos);
347 Py_INCREF(newitem);
348
349 /* Bubble up the smaller child until hitting a leaf. */
350 childpos = 2*pos + 1; /* leftmost child position */
351 while (childpos < endpos) {
352 /* Set childpos to index of smaller child. */
353 rightpos = childpos + 1;
354 if (rightpos < endpos) {
355 cmp = PyObject_RichCompareBool(
356 PyList_GET_ITEM(heap, childpos),
357 PyList_GET_ITEM(heap, rightpos),
358 Py_LE);
359 if (cmp == -1)
360 return -1;
361 if (cmp == 1)
362 childpos = rightpos;
363 }
364 /* Move the smaller child up. */
365 tmp = PyList_GET_ITEM(heap, childpos);
366 Py_INCREF(tmp);
367 Py_DECREF(PyList_GET_ITEM(heap, pos));
368 PyList_SET_ITEM(heap, pos, tmp);
369 pos = childpos;
370 childpos = 2*pos + 1;
371 }
372
373 /* The leaf at pos is empty now. Put newitem there, and and bubble
374 it up to its final resting place (by sifting its parents down). */
375 Py_DECREF(PyList_GET_ITEM(heap, pos));
376 PyList_SET_ITEM(heap, pos, newitem);
377 return _siftdownmax(heap, startpos, pos);
378}
379
380static PyObject *
381nsmallest(PyObject *self, PyObject *args)
382{
383 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
384 int i, n;
385
Raymond Hettingeraefde432004-06-15 23:53:35 +0000386 if (!PyArg_ParseTuple(args, "iO:nsmallest", &n, &iterable))
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000387 return NULL;
388
389 it = PyObject_GetIter(iterable);
390 if (it == NULL)
391 return NULL;
392
393 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000394 if (heap == NULL)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000395 goto fail;
396
397 for (i=0 ; i<n ; i++ ){
398 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000399 if (elem == NULL) {
400 if (PyErr_Occurred())
401 goto fail;
402 else
403 goto sortit;
404 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000405 if (PyList_Append(heap, elem) == -1) {
406 Py_DECREF(elem);
407 goto fail;
408 }
409 Py_DECREF(elem);
410 }
411 n = PyList_GET_SIZE(heap);
412 if (n == 0)
413 goto sortit;
414
415 for (i=n/2-1 ; i>=0 ; i--)
416 if(_siftupmax((PyListObject *)heap, i) == -1)
417 goto fail;
418
419 los = PyList_GET_ITEM(heap, 0);
420 while (1) {
421 elem = PyIter_Next(it);
422 if (elem == NULL) {
423 if (PyErr_Occurred())
424 goto fail;
425 else
426 goto sortit;
427 }
428 if (PyObject_RichCompareBool(los, elem, Py_LE)) {
429 Py_DECREF(elem);
430 continue;
431 }
432
433 oldelem = PyList_GET_ITEM(heap, 0);
434 PyList_SET_ITEM(heap, 0, elem);
435 Py_DECREF(oldelem);
436 if (_siftupmax((PyListObject *)heap, 0) == -1)
437 goto fail;
438 los = PyList_GET_ITEM(heap, 0);
439 }
440
441sortit:
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000442 if (PyList_Sort(heap) == -1)
443 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000444 Py_DECREF(it);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000445 return heap;
446
447fail:
448 Py_DECREF(it);
449 Py_XDECREF(heap);
450 return NULL;
451}
452
453PyDoc_STRVAR(nsmallest_doc,
454"Find the n smallest elements in a dataset.\n\
455\n\
456Equivalent to: sorted(iterable)[:n]\n");
457
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000458static PyMethodDef heapq_methods[] = {
459 {"heappush", (PyCFunction)heappush,
460 METH_VARARGS, heappush_doc},
461 {"heappop", (PyCFunction)heappop,
462 METH_O, heappop_doc},
463 {"heapreplace", (PyCFunction)heapreplace,
464 METH_VARARGS, heapreplace_doc},
465 {"heapify", (PyCFunction)heapify,
466 METH_O, heapify_doc},
Raymond Hettingerc9297662004-06-12 22:48:46 +0000467 {"nlargest", (PyCFunction)nlargest,
468 METH_VARARGS, nlargest_doc},
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000469 {"nsmallest", (PyCFunction)nsmallest,
470 METH_VARARGS, nsmallest_doc},
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000471 {NULL, NULL} /* sentinel */
472};
473
474PyDoc_STRVAR(module_doc,
475"Heap queue algorithm (a.k.a. priority queue).\n\
476\n\
477Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
478all k, counting elements from 0. For the sake of comparison,\n\
479non-existing elements are considered to be infinite. The interesting\n\
480property of a heap is that a[0] is always its smallest element.\n\
481\n\
482Usage:\n\
483\n\
484heap = [] # creates an empty heap\n\
485heappush(heap, item) # pushes a new item on the heap\n\
486item = heappop(heap) # pops the smallest item from the heap\n\
487item = heap[0] # smallest item on the heap without popping it\n\
488heapify(x) # transforms list into a heap, in-place, in linear time\n\
489item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
490 # new item; the heap size is unchanged\n\
491\n\
492Our API differs from textbook heap algorithms as follows:\n\
493\n\
494- We use 0-based indexing. This makes the relationship between the\n\
495 index for a node and the indexes for its children slightly less\n\
496 obvious, but is more suitable since Python uses 0-based indexing.\n\
497\n\
498- Our heappop() method returns the smallest item, not the largest.\n\
499\n\
500These two make it possible to view the heap as a regular Python list\n\
501without surprises: heap[0] is the smallest item, and heap.sort()\n\
502maintains the heap invariant!\n");
503
504
505PyDoc_STRVAR(__about__,
506"Heap queues\n\
507\n\
508[explanation by François Pinard]\n\
509\n\
510Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
511all k, counting elements from 0. For the sake of comparison,\n\
512non-existing elements are considered to be infinite. The interesting\n\
513property of a heap is that a[0] is always its smallest element.\n"
514"\n\
515The strange invariant above is meant to be an efficient memory\n\
516representation for a tournament. The numbers below are `k', not a[k]:\n\
517\n\
518 0\n\
519\n\
520 1 2\n\
521\n\
522 3 4 5 6\n\
523\n\
524 7 8 9 10 11 12 13 14\n\
525\n\
526 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
527\n\
528\n\
529In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
530an usual binary tournament we see in sports, each cell is the winner\n\
531over the two cells it tops, and we can trace the winner down the tree\n\
532to see all opponents s/he had. However, in many computer applications\n\
533of such tournaments, we do not need to trace the history of a winner.\n\
534To be more memory efficient, when a winner is promoted, we try to\n\
535replace it by something else at a lower level, and the rule becomes\n\
536that a cell and the two cells it tops contain three different items,\n\
537but the top cell \"wins\" over the two topped cells.\n"
538"\n\
539If this heap invariant is protected at all time, index 0 is clearly\n\
540the overall winner. The simplest algorithmic way to remove it and\n\
541find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
542diagram above) into the 0 position, and then percolate this new 0 down\n\
543the tree, exchanging values, until the invariant is re-established.\n\
544This is clearly logarithmic on the total number of items in the tree.\n\
545By iterating over all items, you get an O(n ln n) sort.\n"
546"\n\
547A nice feature of this sort is that you can efficiently insert new\n\
548items while the sort is going on, provided that the inserted items are\n\
549not \"better\" than the last 0'th element you extracted. This is\n\
550especially useful in simulation contexts, where the tree holds all\n\
551incoming events, and the \"win\" condition means the smallest scheduled\n\
552time. When an event schedule other events for execution, they are\n\
553scheduled into the future, so they can easily go into the heap. So, a\n\
554heap is a good structure for implementing schedulers (this is what I\n\
555used for my MIDI sequencer :-).\n"
556"\n\
557Various structures for implementing schedulers have been extensively\n\
558studied, and heaps are good for this, as they are reasonably speedy,\n\
559the speed is almost constant, and the worst case is not much different\n\
560than the average case. However, there are other representations which\n\
561are more efficient overall, yet the worst cases might be terrible.\n"
562"\n\
563Heaps are also very useful in big disk sorts. You most probably all\n\
564know that a big sort implies producing \"runs\" (which are pre-sorted\n\
565sequences, which size is usually related to the amount of CPU memory),\n\
566followed by a merging passes for these runs, which merging is often\n\
567very cleverly organised[1]. It is very important that the initial\n\
568sort produces the longest runs possible. Tournaments are a good way\n\
569to that. If, using all the memory available to hold a tournament, you\n\
570replace and percolate items that happen to fit the current run, you'll\n\
571produce runs which are twice the size of the memory for random input,\n\
572and much better for input fuzzily ordered.\n"
573"\n\
574Moreover, if you output the 0'th item on disk and get an input which\n\
575may not fit in the current tournament (because the value \"wins\" over\n\
576the last output value), it cannot fit in the heap, so the size of the\n\
577heap decreases. The freed memory could be cleverly reused immediately\n\
578for progressively building a second heap, which grows at exactly the\n\
579same rate the first heap is melting. When the first heap completely\n\
580vanishes, you switch heaps and start a new run. Clever and quite\n\
581effective!\n\
582\n\
583In a word, heaps are useful memory structures to know. I use them in\n\
584a few applications, and I think it is good to keep a `heap' module\n\
585around. :-)\n"
586"\n\
587--------------------\n\
588[1] The disk balancing algorithms which are current, nowadays, are\n\
589more annoying than clever, and this is a consequence of the seeking\n\
590capabilities of the disks. On devices which cannot seek, like big\n\
591tape drives, the story was quite different, and one had to be very\n\
592clever to ensure (far in advance) that each tape movement will be the\n\
593most effective possible (that is, will best participate at\n\
594\"progressing\" the merge). Some tapes were even able to read\n\
595backwards, and this was also used to avoid the rewinding time.\n\
596Believe me, real good tape sorts were quite spectacular to watch!\n\
597From all times, sorting has always been a Great Art! :-)\n");
598
599PyMODINIT_FUNC
600init_heapq(void)
601{
602 PyObject *m;
603
604 m = Py_InitModule3("_heapq", heapq_methods, module_doc);
605 PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
606}
607