blob: 13e6a3d6040074f4ffe42d3fc8ec0d98b8f4fa76 [file] [log] [blame]
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00001/* Drop in replacement for heapq.py
2
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
5annotated by François Pinard, and converted to C by Raymond Hettinger.
6
7*/
8
9#include "Python.h"
10
11static int
12_siftdown(PyListObject *heap, int startpos, int pos)
13{
14 PyObject *newitem, *parent;
15 int cmp, parentpos;
16
17 assert(PyList_Check(heap));
18 if (pos >= PyList_GET_SIZE(heap)) {
19 PyErr_SetString(PyExc_IndexError, "index out of range");
20 return -1;
21 }
22
23 newitem = PyList_GET_ITEM(heap, pos);
24 Py_INCREF(newitem);
25 /* Follow the path to the root, moving parents down until finding
26 a place newitem fits. */
27 while (pos > startpos){
28 parentpos = (pos - 1) >> 1;
29 parent = PyList_GET_ITEM(heap, parentpos);
30 cmp = PyObject_RichCompareBool(parent, newitem, Py_LE);
31 if (cmp == -1)
32 return -1;
33 if (cmp == 1)
34 break;
35 Py_INCREF(parent);
36 Py_DECREF(PyList_GET_ITEM(heap, pos));
37 PyList_SET_ITEM(heap, pos, parent);
38 pos = parentpos;
39 }
40 Py_DECREF(PyList_GET_ITEM(heap, pos));
41 PyList_SET_ITEM(heap, pos, newitem);
42 return 0;
43}
44
45static int
46_siftup(PyListObject *heap, int pos)
47{
48 int startpos, endpos, childpos, rightpos;
49 int cmp;
50 PyObject *newitem, *tmp;
51
52 assert(PyList_Check(heap));
53 endpos = PyList_GET_SIZE(heap);
54 startpos = pos;
55 if (pos >= endpos) {
56 PyErr_SetString(PyExc_IndexError, "index out of range");
57 return -1;
58 }
59 newitem = PyList_GET_ITEM(heap, pos);
60 Py_INCREF(newitem);
61
62 /* Bubble up the smaller child until hitting a leaf. */
63 childpos = 2*pos + 1; /* leftmost child position */
64 while (childpos < endpos) {
65 /* Set childpos to index of smaller child. */
66 rightpos = childpos + 1;
67 if (rightpos < endpos) {
68 cmp = PyObject_RichCompareBool(
69 PyList_GET_ITEM(heap, rightpos),
70 PyList_GET_ITEM(heap, childpos),
71 Py_LE);
72 if (cmp == -1)
73 return -1;
74 if (cmp == 1)
75 childpos = rightpos;
76 }
77 /* Move the smaller child up. */
78 tmp = PyList_GET_ITEM(heap, childpos);
79 Py_INCREF(tmp);
80 Py_DECREF(PyList_GET_ITEM(heap, pos));
81 PyList_SET_ITEM(heap, pos, tmp);
82 pos = childpos;
83 childpos = 2*pos + 1;
84 }
85
86 /* The leaf at pos is empty now. Put newitem there, and and bubble
87 it up to its final resting place (by sifting its parents down). */
88 Py_DECREF(PyList_GET_ITEM(heap, pos));
89 PyList_SET_ITEM(heap, pos, newitem);
90 return _siftdown(heap, startpos, pos);
91}
92
93static PyObject *
94heappush(PyObject *self, PyObject *args)
95{
96 PyObject *heap, *item;
97
98 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
99 return NULL;
100
101 if (!PyList_Check(heap)) {
102 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
103 return NULL;
104 }
105
106 if (PyList_Append(heap, item) == -1)
107 return NULL;
108
109 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
110 return NULL;
111 Py_INCREF(Py_None);
112 return Py_None;
113}
114
115PyDoc_STRVAR(heappush_doc,
116"Push item onto heap, maintaining the heap invariant.");
117
118static PyObject *
119heappop(PyObject *self, PyObject *heap)
120{
121 PyObject *lastelt, *returnitem;
122 int n;
123
124 if (!PyList_Check(heap)) {
125 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
126 return NULL;
127 }
128
129 /* # raises appropriate IndexError if heap is empty */
130 n = PyList_GET_SIZE(heap);
131 if (n == 0) {
132 PyErr_SetString(PyExc_IndexError, "index out of range");
133 return NULL;
134 }
135
136 lastelt = PyList_GET_ITEM(heap, n-1) ;
137 Py_INCREF(lastelt);
138 PyList_SetSlice(heap, n-1, n, NULL);
139 n--;
140
141 if (!n)
142 return lastelt;
143 returnitem = PyList_GET_ITEM(heap, 0);
144 PyList_SET_ITEM(heap, 0, lastelt);
145 if (_siftup((PyListObject *)heap, 0) == -1) {
146 Py_DECREF(returnitem);
147 return NULL;
148 }
149 return returnitem;
150}
151
152PyDoc_STRVAR(heappop_doc,
153"Pop the smallest item off the heap, maintaining the heap invariant.");
154
155static PyObject *
156heapreplace(PyObject *self, PyObject *args)
157{
158 PyObject *heap, *item, *returnitem;
159
160 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
161 return NULL;
162
163 if (!PyList_Check(heap)) {
164 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
165 return NULL;
166 }
167
168 if (PyList_GET_SIZE(heap) < 1) {
169 PyErr_SetString(PyExc_IndexError, "index out of range");
170 return NULL;
171 }
172
173 returnitem = PyList_GET_ITEM(heap, 0);
174 Py_INCREF(item);
175 PyList_SET_ITEM(heap, 0, item);
176 if (_siftup((PyListObject *)heap, 0) == -1) {
177 Py_DECREF(returnitem);
178 return NULL;
179 }
180 return returnitem;
181}
182
183PyDoc_STRVAR(heapreplace_doc,
184"Pop and return the current smallest value, and add the new item.\n\
185\n\
186This is more efficient than heappop() followed by heappush(), and can be\n\
187more appropriate when using a fixed-size heap. Note that the value\n\
188returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger28224f82004-06-20 09:07:53 +0000189this routine unless written as part of a larger expression:\n\n\
190 result = item <= heap[0] and item or heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000191
192static PyObject *
193heapify(PyObject *self, PyObject *heap)
194{
195 int i, n;
196
197 if (!PyList_Check(heap)) {
198 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
199 return NULL;
200 }
201
202 n = PyList_GET_SIZE(heap);
203 /* Transform bottom-up. The largest index there's any point to
204 looking at is the largest with a child index in-range, so must
205 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
206 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
207 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
208 and that's again n//2-1.
209 */
210 for (i=n/2-1 ; i>=0 ; i--)
211 if(_siftup((PyListObject *)heap, i) == -1)
212 return NULL;
213 Py_INCREF(Py_None);
214 return Py_None;
215}
216
217PyDoc_STRVAR(heapify_doc,
218"Transform list into a heap, in-place, in O(len(heap)) time.");
219
Raymond Hettingerc9297662004-06-12 22:48:46 +0000220static PyObject *
221nlargest(PyObject *self, PyObject *args)
222{
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000223 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000224 int i, n;
225
Raymond Hettingeraefde432004-06-15 23:53:35 +0000226 if (!PyArg_ParseTuple(args, "iO:nlargest", &n, &iterable))
Raymond Hettingerc9297662004-06-12 22:48:46 +0000227 return NULL;
228
229 it = PyObject_GetIter(iterable);
230 if (it == NULL)
231 return NULL;
232
233 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000234 if (heap == NULL)
Raymond Hettingerc9297662004-06-12 22:48:46 +0000235 goto fail;
236
237 for (i=0 ; i<n ; i++ ){
238 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000239 if (elem == NULL) {
240 if (PyErr_Occurred())
241 goto fail;
242 else
243 goto sortit;
244 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000245 if (PyList_Append(heap, elem) == -1) {
246 Py_DECREF(elem);
247 goto fail;
248 }
249 Py_DECREF(elem);
250 }
251 if (PyList_GET_SIZE(heap) == 0)
252 goto sortit;
253
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000254 for (i=n/2-1 ; i>=0 ; i--)
255 if(_siftup((PyListObject *)heap, i) == -1)
256 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000257
258 sol = PyList_GET_ITEM(heap, 0);
259 while (1) {
260 elem = PyIter_Next(it);
261 if (elem == NULL) {
262 if (PyErr_Occurred())
263 goto fail;
264 else
265 goto sortit;
266 }
267 if (PyObject_RichCompareBool(elem, sol, Py_LE)) {
268 Py_DECREF(elem);
269 continue;
270 }
271 oldelem = PyList_GET_ITEM(heap, 0);
272 PyList_SET_ITEM(heap, 0, elem);
273 Py_DECREF(oldelem);
274 if (_siftup((PyListObject *)heap, 0) == -1)
275 goto fail;
276 sol = PyList_GET_ITEM(heap, 0);
277 }
278sortit:
Raymond Hettingerc9297662004-06-12 22:48:46 +0000279 if (PyList_Sort(heap) == -1)
280 goto fail;
281 if (PyList_Reverse(heap) == -1)
282 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000283 Py_DECREF(it);
Raymond Hettingerc9297662004-06-12 22:48:46 +0000284 return heap;
285
286fail:
287 Py_DECREF(it);
288 Py_XDECREF(heap);
289 return NULL;
290}
291
292PyDoc_STRVAR(nlargest_doc,
293"Find the n largest elements in a dataset.\n\
294\n\
295Equivalent to: sorted(iterable, reverse=True)[:n]\n");
296
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000297static int
298_siftdownmax(PyListObject *heap, int startpos, int pos)
299{
300 PyObject *newitem, *parent;
301 int cmp, parentpos;
302
303 assert(PyList_Check(heap));
304 if (pos >= PyList_GET_SIZE(heap)) {
305 PyErr_SetString(PyExc_IndexError, "index out of range");
306 return -1;
307 }
308
309 newitem = PyList_GET_ITEM(heap, pos);
310 Py_INCREF(newitem);
311 /* Follow the path to the root, moving parents down until finding
312 a place newitem fits. */
313 while (pos > startpos){
314 parentpos = (pos - 1) >> 1;
315 parent = PyList_GET_ITEM(heap, parentpos);
316 cmp = PyObject_RichCompareBool(newitem, parent, Py_LE);
317 if (cmp == -1)
318 return -1;
319 if (cmp == 1)
320 break;
321 Py_INCREF(parent);
322 Py_DECREF(PyList_GET_ITEM(heap, pos));
323 PyList_SET_ITEM(heap, pos, parent);
324 pos = parentpos;
325 }
326 Py_DECREF(PyList_GET_ITEM(heap, pos));
327 PyList_SET_ITEM(heap, pos, newitem);
328 return 0;
329}
330
331static int
332_siftupmax(PyListObject *heap, int pos)
333{
334 int startpos, endpos, childpos, rightpos;
335 int cmp;
336 PyObject *newitem, *tmp;
337
338 assert(PyList_Check(heap));
339 endpos = PyList_GET_SIZE(heap);
340 startpos = pos;
341 if (pos >= endpos) {
342 PyErr_SetString(PyExc_IndexError, "index out of range");
343 return -1;
344 }
345 newitem = PyList_GET_ITEM(heap, pos);
346 Py_INCREF(newitem);
347
348 /* Bubble up the smaller child until hitting a leaf. */
349 childpos = 2*pos + 1; /* leftmost child position */
350 while (childpos < endpos) {
351 /* Set childpos to index of smaller child. */
352 rightpos = childpos + 1;
353 if (rightpos < endpos) {
354 cmp = PyObject_RichCompareBool(
355 PyList_GET_ITEM(heap, childpos),
356 PyList_GET_ITEM(heap, rightpos),
357 Py_LE);
358 if (cmp == -1)
359 return -1;
360 if (cmp == 1)
361 childpos = rightpos;
362 }
363 /* Move the smaller child up. */
364 tmp = PyList_GET_ITEM(heap, childpos);
365 Py_INCREF(tmp);
366 Py_DECREF(PyList_GET_ITEM(heap, pos));
367 PyList_SET_ITEM(heap, pos, tmp);
368 pos = childpos;
369 childpos = 2*pos + 1;
370 }
371
372 /* The leaf at pos is empty now. Put newitem there, and and bubble
373 it up to its final resting place (by sifting its parents down). */
374 Py_DECREF(PyList_GET_ITEM(heap, pos));
375 PyList_SET_ITEM(heap, pos, newitem);
376 return _siftdownmax(heap, startpos, pos);
377}
378
379static PyObject *
380nsmallest(PyObject *self, PyObject *args)
381{
382 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
383 int i, n;
384
Raymond Hettingeraefde432004-06-15 23:53:35 +0000385 if (!PyArg_ParseTuple(args, "iO:nsmallest", &n, &iterable))
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000386 return NULL;
387
388 it = PyObject_GetIter(iterable);
389 if (it == NULL)
390 return NULL;
391
392 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000393 if (heap == NULL)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000394 goto fail;
395
396 for (i=0 ; i<n ; i++ ){
397 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000398 if (elem == NULL) {
399 if (PyErr_Occurred())
400 goto fail;
401 else
402 goto sortit;
403 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000404 if (PyList_Append(heap, elem) == -1) {
405 Py_DECREF(elem);
406 goto fail;
407 }
408 Py_DECREF(elem);
409 }
410 n = PyList_GET_SIZE(heap);
411 if (n == 0)
412 goto sortit;
413
414 for (i=n/2-1 ; i>=0 ; i--)
415 if(_siftupmax((PyListObject *)heap, i) == -1)
416 goto fail;
417
418 los = PyList_GET_ITEM(heap, 0);
419 while (1) {
420 elem = PyIter_Next(it);
421 if (elem == NULL) {
422 if (PyErr_Occurred())
423 goto fail;
424 else
425 goto sortit;
426 }
427 if (PyObject_RichCompareBool(los, elem, Py_LE)) {
428 Py_DECREF(elem);
429 continue;
430 }
431
432 oldelem = PyList_GET_ITEM(heap, 0);
433 PyList_SET_ITEM(heap, 0, elem);
434 Py_DECREF(oldelem);
435 if (_siftupmax((PyListObject *)heap, 0) == -1)
436 goto fail;
437 los = PyList_GET_ITEM(heap, 0);
438 }
439
440sortit:
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000441 if (PyList_Sort(heap) == -1)
442 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000443 Py_DECREF(it);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000444 return heap;
445
446fail:
447 Py_DECREF(it);
448 Py_XDECREF(heap);
449 return NULL;
450}
451
452PyDoc_STRVAR(nsmallest_doc,
453"Find the n smallest elements in a dataset.\n\
454\n\
455Equivalent to: sorted(iterable)[:n]\n");
456
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000457static PyMethodDef heapq_methods[] = {
458 {"heappush", (PyCFunction)heappush,
459 METH_VARARGS, heappush_doc},
460 {"heappop", (PyCFunction)heappop,
461 METH_O, heappop_doc},
462 {"heapreplace", (PyCFunction)heapreplace,
463 METH_VARARGS, heapreplace_doc},
464 {"heapify", (PyCFunction)heapify,
465 METH_O, heapify_doc},
Raymond Hettingerc9297662004-06-12 22:48:46 +0000466 {"nlargest", (PyCFunction)nlargest,
467 METH_VARARGS, nlargest_doc},
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000468 {"nsmallest", (PyCFunction)nsmallest,
469 METH_VARARGS, nsmallest_doc},
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000470 {NULL, NULL} /* sentinel */
471};
472
473PyDoc_STRVAR(module_doc,
474"Heap queue algorithm (a.k.a. priority queue).\n\
475\n\
476Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
477all k, counting elements from 0. For the sake of comparison,\n\
478non-existing elements are considered to be infinite. The interesting\n\
479property of a heap is that a[0] is always its smallest element.\n\
480\n\
481Usage:\n\
482\n\
483heap = [] # creates an empty heap\n\
484heappush(heap, item) # pushes a new item on the heap\n\
485item = heappop(heap) # pops the smallest item from the heap\n\
486item = heap[0] # smallest item on the heap without popping it\n\
487heapify(x) # transforms list into a heap, in-place, in linear time\n\
488item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
489 # new item; the heap size is unchanged\n\
490\n\
491Our API differs from textbook heap algorithms as follows:\n\
492\n\
493- We use 0-based indexing. This makes the relationship between the\n\
494 index for a node and the indexes for its children slightly less\n\
495 obvious, but is more suitable since Python uses 0-based indexing.\n\
496\n\
497- Our heappop() method returns the smallest item, not the largest.\n\
498\n\
499These two make it possible to view the heap as a regular Python list\n\
500without surprises: heap[0] is the smallest item, and heap.sort()\n\
501maintains the heap invariant!\n");
502
503
504PyDoc_STRVAR(__about__,
505"Heap queues\n\
506\n\
507[explanation by François Pinard]\n\
508\n\
509Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
510all k, counting elements from 0. For the sake of comparison,\n\
511non-existing elements are considered to be infinite. The interesting\n\
512property of a heap is that a[0] is always its smallest element.\n"
513"\n\
514The strange invariant above is meant to be an efficient memory\n\
515representation for a tournament. The numbers below are `k', not a[k]:\n\
516\n\
517 0\n\
518\n\
519 1 2\n\
520\n\
521 3 4 5 6\n\
522\n\
523 7 8 9 10 11 12 13 14\n\
524\n\
525 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
526\n\
527\n\
528In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
529an usual binary tournament we see in sports, each cell is the winner\n\
530over the two cells it tops, and we can trace the winner down the tree\n\
531to see all opponents s/he had. However, in many computer applications\n\
532of such tournaments, we do not need to trace the history of a winner.\n\
533To be more memory efficient, when a winner is promoted, we try to\n\
534replace it by something else at a lower level, and the rule becomes\n\
535that a cell and the two cells it tops contain three different items,\n\
536but the top cell \"wins\" over the two topped cells.\n"
537"\n\
538If this heap invariant is protected at all time, index 0 is clearly\n\
539the overall winner. The simplest algorithmic way to remove it and\n\
540find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
541diagram above) into the 0 position, and then percolate this new 0 down\n\
542the tree, exchanging values, until the invariant is re-established.\n\
543This is clearly logarithmic on the total number of items in the tree.\n\
544By iterating over all items, you get an O(n ln n) sort.\n"
545"\n\
546A nice feature of this sort is that you can efficiently insert new\n\
547items while the sort is going on, provided that the inserted items are\n\
548not \"better\" than the last 0'th element you extracted. This is\n\
549especially useful in simulation contexts, where the tree holds all\n\
550incoming events, and the \"win\" condition means the smallest scheduled\n\
551time. When an event schedule other events for execution, they are\n\
552scheduled into the future, so they can easily go into the heap. So, a\n\
553heap is a good structure for implementing schedulers (this is what I\n\
554used for my MIDI sequencer :-).\n"
555"\n\
556Various structures for implementing schedulers have been extensively\n\
557studied, and heaps are good for this, as they are reasonably speedy,\n\
558the speed is almost constant, and the worst case is not much different\n\
559than the average case. However, there are other representations which\n\
560are more efficient overall, yet the worst cases might be terrible.\n"
561"\n\
562Heaps are also very useful in big disk sorts. You most probably all\n\
563know that a big sort implies producing \"runs\" (which are pre-sorted\n\
564sequences, which size is usually related to the amount of CPU memory),\n\
565followed by a merging passes for these runs, which merging is often\n\
566very cleverly organised[1]. It is very important that the initial\n\
567sort produces the longest runs possible. Tournaments are a good way\n\
568to that. If, using all the memory available to hold a tournament, you\n\
569replace and percolate items that happen to fit the current run, you'll\n\
570produce runs which are twice the size of the memory for random input,\n\
571and much better for input fuzzily ordered.\n"
572"\n\
573Moreover, if you output the 0'th item on disk and get an input which\n\
574may not fit in the current tournament (because the value \"wins\" over\n\
575the last output value), it cannot fit in the heap, so the size of the\n\
576heap decreases. The freed memory could be cleverly reused immediately\n\
577for progressively building a second heap, which grows at exactly the\n\
578same rate the first heap is melting. When the first heap completely\n\
579vanishes, you switch heaps and start a new run. Clever and quite\n\
580effective!\n\
581\n\
582In a word, heaps are useful memory structures to know. I use them in\n\
583a few applications, and I think it is good to keep a `heap' module\n\
584around. :-)\n"
585"\n\
586--------------------\n\
587[1] The disk balancing algorithms which are current, nowadays, are\n\
588more annoying than clever, and this is a consequence of the seeking\n\
589capabilities of the disks. On devices which cannot seek, like big\n\
590tape drives, the story was quite different, and one had to be very\n\
591clever to ensure (far in advance) that each tape movement will be the\n\
592most effective possible (that is, will best participate at\n\
593\"progressing\" the merge). Some tapes were even able to read\n\
594backwards, and this was also used to avoid the rewinding time.\n\
595Believe me, real good tape sorts were quite spectacular to watch!\n\
596From all times, sorting has always been a Great Art! :-)\n");
597
598PyMODINIT_FUNC
599init_heapq(void)
600{
601 PyObject *m;
602
603 m = Py_InitModule3("_heapq", heapq_methods, module_doc);
604 PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
605}
606