blob: 7921823696f177e7a853d423321cb58af2d75ae8 [file] [log] [blame]
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00001/* Drop in replacement for heapq.py
2
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
5annotated by François Pinard, and converted to C by Raymond Hettinger.
6
7*/
8
9#include "Python.h"
10
Georg Brandlf78e02b2008-06-10 17:40:04 +000011static int
12cmp_lt(PyObject *x, PyObject *y)
13{
Amaury Forgeot d'Arc2ba198d2008-06-17 21:25:35 +000014 return PyObject_RichCompareBool(x, y, Py_LT);
Georg Brandlf78e02b2008-06-10 17:40:04 +000015}
16
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000017static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000018_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000019{
20 PyObject *newitem, *parent;
Martin v. Löwisad0a4622006-02-16 14:30:23 +000021 int cmp;
22 Py_ssize_t parentpos;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000023
24 assert(PyList_Check(heap));
25 if (pos >= PyList_GET_SIZE(heap)) {
26 PyErr_SetString(PyExc_IndexError, "index out of range");
27 return -1;
28 }
29
30 newitem = PyList_GET_ITEM(heap, pos);
31 Py_INCREF(newitem);
32 /* Follow the path to the root, moving parents down until finding
33 a place newitem fits. */
34 while (pos > startpos){
35 parentpos = (pos - 1) >> 1;
36 parent = PyList_GET_ITEM(heap, parentpos);
Georg Brandlf78e02b2008-06-10 17:40:04 +000037 cmp = cmp_lt(newitem, parent);
Raymond Hettinger855d9a92004-09-28 00:03:54 +000038 if (cmp == -1) {
39 Py_DECREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000040 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +000041 }
Georg Brandlf78e02b2008-06-10 17:40:04 +000042 if (cmp == 0)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000043 break;
44 Py_INCREF(parent);
45 Py_DECREF(PyList_GET_ITEM(heap, pos));
46 PyList_SET_ITEM(heap, pos, parent);
47 pos = parentpos;
48 }
49 Py_DECREF(PyList_GET_ITEM(heap, pos));
50 PyList_SET_ITEM(heap, pos, newitem);
51 return 0;
52}
53
54static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000055_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000056{
Martin v. Löwisad0a4622006-02-16 14:30:23 +000057 Py_ssize_t startpos, endpos, childpos, rightpos;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000058 int cmp;
59 PyObject *newitem, *tmp;
60
61 assert(PyList_Check(heap));
62 endpos = PyList_GET_SIZE(heap);
63 startpos = pos;
64 if (pos >= endpos) {
65 PyErr_SetString(PyExc_IndexError, "index out of range");
66 return -1;
67 }
68 newitem = PyList_GET_ITEM(heap, pos);
69 Py_INCREF(newitem);
70
71 /* Bubble up the smaller child until hitting a leaf. */
72 childpos = 2*pos + 1; /* leftmost child position */
73 while (childpos < endpos) {
74 /* Set childpos to index of smaller child. */
75 rightpos = childpos + 1;
76 if (rightpos < endpos) {
Georg Brandlf78e02b2008-06-10 17:40:04 +000077 cmp = cmp_lt(
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000078 PyList_GET_ITEM(heap, childpos),
Georg Brandlf78e02b2008-06-10 17:40:04 +000079 PyList_GET_ITEM(heap, rightpos));
Raymond Hettinger855d9a92004-09-28 00:03:54 +000080 if (cmp == -1) {
81 Py_DECREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000082 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +000083 }
Georg Brandlf78e02b2008-06-10 17:40:04 +000084 if (cmp == 0)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000085 childpos = rightpos;
86 }
87 /* Move the smaller child up. */
88 tmp = PyList_GET_ITEM(heap, childpos);
89 Py_INCREF(tmp);
90 Py_DECREF(PyList_GET_ITEM(heap, pos));
91 PyList_SET_ITEM(heap, pos, tmp);
92 pos = childpos;
93 childpos = 2*pos + 1;
94 }
95
96 /* The leaf at pos is empty now. Put newitem there, and and bubble
97 it up to its final resting place (by sifting its parents down). */
98 Py_DECREF(PyList_GET_ITEM(heap, pos));
99 PyList_SET_ITEM(heap, pos, newitem);
100 return _siftdown(heap, startpos, pos);
101}
102
103static PyObject *
104heappush(PyObject *self, PyObject *args)
105{
106 PyObject *heap, *item;
107
108 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
109 return NULL;
110
111 if (!PyList_Check(heap)) {
112 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
113 return NULL;
114 }
115
116 if (PyList_Append(heap, item) == -1)
117 return NULL;
118
119 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
120 return NULL;
121 Py_INCREF(Py_None);
122 return Py_None;
123}
124
125PyDoc_STRVAR(heappush_doc,
126"Push item onto heap, maintaining the heap invariant.");
127
128static PyObject *
129heappop(PyObject *self, PyObject *heap)
130{
131 PyObject *lastelt, *returnitem;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000132 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000133
134 if (!PyList_Check(heap)) {
135 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
136 return NULL;
137 }
138
139 /* # raises appropriate IndexError if heap is empty */
140 n = PyList_GET_SIZE(heap);
141 if (n == 0) {
142 PyErr_SetString(PyExc_IndexError, "index out of range");
143 return NULL;
144 }
145
146 lastelt = PyList_GET_ITEM(heap, n-1) ;
147 Py_INCREF(lastelt);
148 PyList_SetSlice(heap, n-1, n, NULL);
149 n--;
150
151 if (!n)
152 return lastelt;
153 returnitem = PyList_GET_ITEM(heap, 0);
154 PyList_SET_ITEM(heap, 0, lastelt);
155 if (_siftup((PyListObject *)heap, 0) == -1) {
156 Py_DECREF(returnitem);
157 return NULL;
158 }
159 return returnitem;
160}
161
162PyDoc_STRVAR(heappop_doc,
163"Pop the smallest item off the heap, maintaining the heap invariant.");
164
165static PyObject *
166heapreplace(PyObject *self, PyObject *args)
167{
168 PyObject *heap, *item, *returnitem;
169
170 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
171 return NULL;
172
173 if (!PyList_Check(heap)) {
174 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
175 return NULL;
176 }
177
178 if (PyList_GET_SIZE(heap) < 1) {
179 PyErr_SetString(PyExc_IndexError, "index out of range");
180 return NULL;
181 }
182
183 returnitem = PyList_GET_ITEM(heap, 0);
184 Py_INCREF(item);
185 PyList_SET_ITEM(heap, 0, item);
186 if (_siftup((PyListObject *)heap, 0) == -1) {
187 Py_DECREF(returnitem);
188 return NULL;
189 }
190 return returnitem;
191}
192
193PyDoc_STRVAR(heapreplace_doc,
194"Pop and return the current smallest value, and add the new item.\n\
195\n\
196This is more efficient than heappop() followed by heappush(), and can be\n\
197more appropriate when using a fixed-size heap. Note that the value\n\
198returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000199this routine unless written as part of a conditional replacement:\n\n\
200 if item > heap[0]:\n\
201 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000202
203static PyObject *
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000204heappushpop(PyObject *self, PyObject *args)
205{
206 PyObject *heap, *item, *returnitem;
207 int cmp;
208
209 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
210 return NULL;
211
212 if (!PyList_Check(heap)) {
213 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
214 return NULL;
215 }
216
217 if (PyList_GET_SIZE(heap) < 1) {
218 Py_INCREF(item);
219 return item;
220 }
221
Georg Brandlf78e02b2008-06-10 17:40:04 +0000222 cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000223 if (cmp == -1)
224 return NULL;
Georg Brandlf78e02b2008-06-10 17:40:04 +0000225 if (cmp == 0) {
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000226 Py_INCREF(item);
227 return item;
228 }
229
230 returnitem = PyList_GET_ITEM(heap, 0);
231 Py_INCREF(item);
232 PyList_SET_ITEM(heap, 0, item);
233 if (_siftup((PyListObject *)heap, 0) == -1) {
234 Py_DECREF(returnitem);
235 return NULL;
236 }
237 return returnitem;
238}
239
240PyDoc_STRVAR(heappushpop_doc,
241"Push item on the heap, then pop and return the smallest item\n\
242from the heap. The combined action runs more efficiently than\n\
243heappush() followed by a separate call to heappop().");
244
245static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000246heapify(PyObject *self, PyObject *heap)
247{
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000248 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000249
250 if (!PyList_Check(heap)) {
251 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
252 return NULL;
253 }
254
255 n = PyList_GET_SIZE(heap);
256 /* Transform bottom-up. The largest index there's any point to
257 looking at is the largest with a child index in-range, so must
258 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
259 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
260 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
261 and that's again n//2-1.
262 */
263 for (i=n/2-1 ; i>=0 ; i--)
264 if(_siftup((PyListObject *)heap, i) == -1)
265 return NULL;
266 Py_INCREF(Py_None);
267 return Py_None;
268}
269
270PyDoc_STRVAR(heapify_doc,
271"Transform list into a heap, in-place, in O(len(heap)) time.");
272
Raymond Hettingerc9297662004-06-12 22:48:46 +0000273static PyObject *
274nlargest(PyObject *self, PyObject *args)
275{
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000276 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
Thomas Wouters13870b12006-02-16 19:21:53 +0000277 Py_ssize_t i, n;
Georg Brandlf78e02b2008-06-10 17:40:04 +0000278 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000279
Thomas Wouters13870b12006-02-16 19:21:53 +0000280 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
Raymond Hettingerc9297662004-06-12 22:48:46 +0000281 return NULL;
282
283 it = PyObject_GetIter(iterable);
284 if (it == NULL)
285 return NULL;
286
287 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000288 if (heap == NULL)
Raymond Hettingerc9297662004-06-12 22:48:46 +0000289 goto fail;
290
291 for (i=0 ; i<n ; i++ ){
292 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000293 if (elem == NULL) {
294 if (PyErr_Occurred())
295 goto fail;
296 else
297 goto sortit;
298 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000299 if (PyList_Append(heap, elem) == -1) {
300 Py_DECREF(elem);
301 goto fail;
302 }
303 Py_DECREF(elem);
304 }
305 if (PyList_GET_SIZE(heap) == 0)
306 goto sortit;
307
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000308 for (i=n/2-1 ; i>=0 ; i--)
309 if(_siftup((PyListObject *)heap, i) == -1)
310 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000311
312 sol = PyList_GET_ITEM(heap, 0);
313 while (1) {
314 elem = PyIter_Next(it);
315 if (elem == NULL) {
316 if (PyErr_Occurred())
317 goto fail;
318 else
319 goto sortit;
320 }
Georg Brandlf78e02b2008-06-10 17:40:04 +0000321 cmp = cmp_lt(sol, elem);
322 if (cmp == -1) {
323 Py_DECREF(elem);
324 goto fail;
325 }
326 if (cmp == 0) {
Raymond Hettingerc9297662004-06-12 22:48:46 +0000327 Py_DECREF(elem);
328 continue;
329 }
330 oldelem = PyList_GET_ITEM(heap, 0);
331 PyList_SET_ITEM(heap, 0, elem);
332 Py_DECREF(oldelem);
333 if (_siftup((PyListObject *)heap, 0) == -1)
334 goto fail;
335 sol = PyList_GET_ITEM(heap, 0);
336 }
337sortit:
Raymond Hettingerc9297662004-06-12 22:48:46 +0000338 if (PyList_Sort(heap) == -1)
339 goto fail;
340 if (PyList_Reverse(heap) == -1)
341 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000342 Py_DECREF(it);
Raymond Hettingerc9297662004-06-12 22:48:46 +0000343 return heap;
344
345fail:
346 Py_DECREF(it);
347 Py_XDECREF(heap);
348 return NULL;
349}
350
351PyDoc_STRVAR(nlargest_doc,
352"Find the n largest elements in a dataset.\n\
353\n\
354Equivalent to: sorted(iterable, reverse=True)[:n]\n");
355
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000356static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000357_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000358{
359 PyObject *newitem, *parent;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000360 int cmp;
361 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000362
363 assert(PyList_Check(heap));
364 if (pos >= PyList_GET_SIZE(heap)) {
365 PyErr_SetString(PyExc_IndexError, "index out of range");
366 return -1;
367 }
368
369 newitem = PyList_GET_ITEM(heap, pos);
370 Py_INCREF(newitem);
371 /* Follow the path to the root, moving parents down until finding
372 a place newitem fits. */
373 while (pos > startpos){
374 parentpos = (pos - 1) >> 1;
375 parent = PyList_GET_ITEM(heap, parentpos);
Georg Brandlf78e02b2008-06-10 17:40:04 +0000376 cmp = cmp_lt(parent, newitem);
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000377 if (cmp == -1) {
378 Py_DECREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000379 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000380 }
Georg Brandlf78e02b2008-06-10 17:40:04 +0000381 if (cmp == 0)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000382 break;
383 Py_INCREF(parent);
384 Py_DECREF(PyList_GET_ITEM(heap, pos));
385 PyList_SET_ITEM(heap, pos, parent);
386 pos = parentpos;
387 }
388 Py_DECREF(PyList_GET_ITEM(heap, pos));
389 PyList_SET_ITEM(heap, pos, newitem);
390 return 0;
391}
392
393static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000394_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000395{
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000396 Py_ssize_t startpos, endpos, childpos, rightpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000397 int cmp;
398 PyObject *newitem, *tmp;
399
400 assert(PyList_Check(heap));
401 endpos = PyList_GET_SIZE(heap);
402 startpos = pos;
403 if (pos >= endpos) {
404 PyErr_SetString(PyExc_IndexError, "index out of range");
405 return -1;
406 }
407 newitem = PyList_GET_ITEM(heap, pos);
408 Py_INCREF(newitem);
409
410 /* Bubble up the smaller child until hitting a leaf. */
411 childpos = 2*pos + 1; /* leftmost child position */
412 while (childpos < endpos) {
413 /* Set childpos to index of smaller child. */
414 rightpos = childpos + 1;
415 if (rightpos < endpos) {
Georg Brandlf78e02b2008-06-10 17:40:04 +0000416 cmp = cmp_lt(
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000417 PyList_GET_ITEM(heap, rightpos),
Georg Brandlf78e02b2008-06-10 17:40:04 +0000418 PyList_GET_ITEM(heap, childpos));
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000419 if (cmp == -1) {
420 Py_DECREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000421 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000422 }
Georg Brandlf78e02b2008-06-10 17:40:04 +0000423 if (cmp == 0)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000424 childpos = rightpos;
425 }
426 /* Move the smaller child up. */
427 tmp = PyList_GET_ITEM(heap, childpos);
428 Py_INCREF(tmp);
429 Py_DECREF(PyList_GET_ITEM(heap, pos));
430 PyList_SET_ITEM(heap, pos, tmp);
431 pos = childpos;
432 childpos = 2*pos + 1;
433 }
434
435 /* The leaf at pos is empty now. Put newitem there, and and bubble
436 it up to its final resting place (by sifting its parents down). */
437 Py_DECREF(PyList_GET_ITEM(heap, pos));
438 PyList_SET_ITEM(heap, pos, newitem);
439 return _siftdownmax(heap, startpos, pos);
440}
441
442static PyObject *
443nsmallest(PyObject *self, PyObject *args)
444{
445 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000446 Py_ssize_t i, n;
Georg Brandlf78e02b2008-06-10 17:40:04 +0000447 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000448
Thomas Woutersed6254a2006-02-16 17:32:54 +0000449 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000450 return NULL;
451
452 it = PyObject_GetIter(iterable);
453 if (it == NULL)
454 return NULL;
455
456 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000457 if (heap == NULL)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000458 goto fail;
459
460 for (i=0 ; i<n ; i++ ){
461 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000462 if (elem == NULL) {
463 if (PyErr_Occurred())
464 goto fail;
465 else
466 goto sortit;
467 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000468 if (PyList_Append(heap, elem) == -1) {
469 Py_DECREF(elem);
470 goto fail;
471 }
472 Py_DECREF(elem);
473 }
474 n = PyList_GET_SIZE(heap);
475 if (n == 0)
476 goto sortit;
477
478 for (i=n/2-1 ; i>=0 ; i--)
479 if(_siftupmax((PyListObject *)heap, i) == -1)
480 goto fail;
481
482 los = PyList_GET_ITEM(heap, 0);
483 while (1) {
484 elem = PyIter_Next(it);
485 if (elem == NULL) {
486 if (PyErr_Occurred())
487 goto fail;
488 else
489 goto sortit;
490 }
Georg Brandlf78e02b2008-06-10 17:40:04 +0000491 cmp = cmp_lt(elem, los);
492 if (cmp == -1) {
493 Py_DECREF(elem);
494 goto fail;
495 }
496 if (cmp == 0) {
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000497 Py_DECREF(elem);
498 continue;
499 }
500
501 oldelem = PyList_GET_ITEM(heap, 0);
502 PyList_SET_ITEM(heap, 0, elem);
503 Py_DECREF(oldelem);
504 if (_siftupmax((PyListObject *)heap, 0) == -1)
505 goto fail;
506 los = PyList_GET_ITEM(heap, 0);
507 }
508
509sortit:
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000510 if (PyList_Sort(heap) == -1)
511 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000512 Py_DECREF(it);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000513 return heap;
514
515fail:
516 Py_DECREF(it);
517 Py_XDECREF(heap);
518 return NULL;
519}
520
521PyDoc_STRVAR(nsmallest_doc,
522"Find the n smallest elements in a dataset.\n\
523\n\
524Equivalent to: sorted(iterable)[:n]\n");
525
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000526static PyMethodDef heapq_methods[] = {
527 {"heappush", (PyCFunction)heappush,
528 METH_VARARGS, heappush_doc},
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000529 {"heappushpop", (PyCFunction)heappushpop,
530 METH_VARARGS, heappushpop_doc},
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000531 {"heappop", (PyCFunction)heappop,
532 METH_O, heappop_doc},
533 {"heapreplace", (PyCFunction)heapreplace,
534 METH_VARARGS, heapreplace_doc},
535 {"heapify", (PyCFunction)heapify,
536 METH_O, heapify_doc},
Raymond Hettingerc9297662004-06-12 22:48:46 +0000537 {"nlargest", (PyCFunction)nlargest,
538 METH_VARARGS, nlargest_doc},
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000539 {"nsmallest", (PyCFunction)nsmallest,
540 METH_VARARGS, nsmallest_doc},
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000541 {NULL, NULL} /* sentinel */
542};
543
544PyDoc_STRVAR(module_doc,
545"Heap queue algorithm (a.k.a. priority queue).\n\
546\n\
547Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
548all k, counting elements from 0. For the sake of comparison,\n\
549non-existing elements are considered to be infinite. The interesting\n\
550property of a heap is that a[0] is always its smallest element.\n\
551\n\
552Usage:\n\
553\n\
554heap = [] # creates an empty heap\n\
555heappush(heap, item) # pushes a new item on the heap\n\
556item = heappop(heap) # pops the smallest item from the heap\n\
557item = heap[0] # smallest item on the heap without popping it\n\
558heapify(x) # transforms list into a heap, in-place, in linear time\n\
559item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
560 # new item; the heap size is unchanged\n\
561\n\
562Our API differs from textbook heap algorithms as follows:\n\
563\n\
564- We use 0-based indexing. This makes the relationship between the\n\
565 index for a node and the indexes for its children slightly less\n\
566 obvious, but is more suitable since Python uses 0-based indexing.\n\
567\n\
568- Our heappop() method returns the smallest item, not the largest.\n\
569\n\
570These two make it possible to view the heap as a regular Python list\n\
571without surprises: heap[0] is the smallest item, and heap.sort()\n\
572maintains the heap invariant!\n");
573
574
575PyDoc_STRVAR(__about__,
576"Heap queues\n\
577\n\
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000578[explanation by Fran\xc3\xa7ois Pinard]\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000579\n\
580Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
581all k, counting elements from 0. For the sake of comparison,\n\
582non-existing elements are considered to be infinite. The interesting\n\
583property of a heap is that a[0] is always its smallest element.\n"
584"\n\
585The strange invariant above is meant to be an efficient memory\n\
586representation for a tournament. The numbers below are `k', not a[k]:\n\
587\n\
588 0\n\
589\n\
590 1 2\n\
591\n\
592 3 4 5 6\n\
593\n\
594 7 8 9 10 11 12 13 14\n\
595\n\
596 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
597\n\
598\n\
599In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
600an usual binary tournament we see in sports, each cell is the winner\n\
601over the two cells it tops, and we can trace the winner down the tree\n\
602to see all opponents s/he had. However, in many computer applications\n\
603of such tournaments, we do not need to trace the history of a winner.\n\
604To be more memory efficient, when a winner is promoted, we try to\n\
605replace it by something else at a lower level, and the rule becomes\n\
606that a cell and the two cells it tops contain three different items,\n\
607but the top cell \"wins\" over the two topped cells.\n"
608"\n\
609If this heap invariant is protected at all time, index 0 is clearly\n\
610the overall winner. The simplest algorithmic way to remove it and\n\
611find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
612diagram above) into the 0 position, and then percolate this new 0 down\n\
613the tree, exchanging values, until the invariant is re-established.\n\
614This is clearly logarithmic on the total number of items in the tree.\n\
615By iterating over all items, you get an O(n ln n) sort.\n"
616"\n\
617A nice feature of this sort is that you can efficiently insert new\n\
618items while the sort is going on, provided that the inserted items are\n\
619not \"better\" than the last 0'th element you extracted. This is\n\
620especially useful in simulation contexts, where the tree holds all\n\
621incoming events, and the \"win\" condition means the smallest scheduled\n\
622time. When an event schedule other events for execution, they are\n\
623scheduled into the future, so they can easily go into the heap. So, a\n\
624heap is a good structure for implementing schedulers (this is what I\n\
625used for my MIDI sequencer :-).\n"
626"\n\
627Various structures for implementing schedulers have been extensively\n\
628studied, and heaps are good for this, as they are reasonably speedy,\n\
629the speed is almost constant, and the worst case is not much different\n\
630than the average case. However, there are other representations which\n\
631are more efficient overall, yet the worst cases might be terrible.\n"
632"\n\
633Heaps are also very useful in big disk sorts. You most probably all\n\
634know that a big sort implies producing \"runs\" (which are pre-sorted\n\
635sequences, which size is usually related to the amount of CPU memory),\n\
636followed by a merging passes for these runs, which merging is often\n\
637very cleverly organised[1]. It is very important that the initial\n\
638sort produces the longest runs possible. Tournaments are a good way\n\
639to that. If, using all the memory available to hold a tournament, you\n\
640replace and percolate items that happen to fit the current run, you'll\n\
641produce runs which are twice the size of the memory for random input,\n\
642and much better for input fuzzily ordered.\n"
643"\n\
644Moreover, if you output the 0'th item on disk and get an input which\n\
645may not fit in the current tournament (because the value \"wins\" over\n\
646the last output value), it cannot fit in the heap, so the size of the\n\
647heap decreases. The freed memory could be cleverly reused immediately\n\
648for progressively building a second heap, which grows at exactly the\n\
649same rate the first heap is melting. When the first heap completely\n\
650vanishes, you switch heaps and start a new run. Clever and quite\n\
651effective!\n\
652\n\
653In a word, heaps are useful memory structures to know. I use them in\n\
654a few applications, and I think it is good to keep a `heap' module\n\
655around. :-)\n"
656"\n\
657--------------------\n\
658[1] The disk balancing algorithms which are current, nowadays, are\n\
659more annoying than clever, and this is a consequence of the seeking\n\
660capabilities of the disks. On devices which cannot seek, like big\n\
661tape drives, the story was quite different, and one had to be very\n\
662clever to ensure (far in advance) that each tape movement will be the\n\
663most effective possible (that is, will best participate at\n\
664\"progressing\" the merge). Some tapes were even able to read\n\
665backwards, and this was also used to avoid the rewinding time.\n\
666Believe me, real good tape sorts were quite spectacular to watch!\n\
667From all times, sorting has always been a Great Art! :-)\n");
668
Martin v. Löwis1a214512008-06-11 05:26:20 +0000669
670static struct PyModuleDef _heapqmodule = {
671 PyModuleDef_HEAD_INIT,
672 "_heapq",
673 module_doc,
674 -1,
675 heapq_methods,
676 NULL,
677 NULL,
678 NULL,
679 NULL
680};
681
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000682PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +0000683PyInit__heapq(void)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000684{
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000685 PyObject *m, *about;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000686
Martin v. Löwis1a214512008-06-11 05:26:20 +0000687 m = PyModule_Create(&_heapqmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +0000688 if (m == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +0000689 return NULL;
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000690 about = PyUnicode_DecodeUTF8(__about__, strlen(__about__), NULL);
691 PyModule_AddObject(m, "__about__", about);
Martin v. Löwis1a214512008-06-11 05:26:20 +0000692 return m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000693}
694