blob: ad2fd70f53a9640c0356ba343c20b27a81d0dd0a [file] [log] [blame]
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00001/* Drop in replacement for heapq.py
2
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
5annotated by François Pinard, and converted to C by Raymond Hettinger.
6
7*/
8
9#include "Python.h"
10
11static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000012_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000013{
14 PyObject *newitem, *parent;
Martin v. Löwisad0a4622006-02-16 14:30:23 +000015 int cmp;
16 Py_ssize_t parentpos;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000017
18 assert(PyList_Check(heap));
19 if (pos >= PyList_GET_SIZE(heap)) {
20 PyErr_SetString(PyExc_IndexError, "index out of range");
21 return -1;
22 }
23
24 newitem = PyList_GET_ITEM(heap, pos);
25 Py_INCREF(newitem);
26 /* Follow the path to the root, moving parents down until finding
27 a place newitem fits. */
28 while (pos > startpos){
29 parentpos = (pos - 1) >> 1;
30 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettinger6d7702e2008-05-31 03:24:31 +000031 cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
Raymond Hettinger855d9a92004-09-28 00:03:54 +000032 if (cmp == -1) {
33 Py_DECREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000034 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +000035 }
Raymond Hettinger6d7702e2008-05-31 03:24:31 +000036 if (cmp == 0)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000037 break;
38 Py_INCREF(parent);
39 Py_DECREF(PyList_GET_ITEM(heap, pos));
40 PyList_SET_ITEM(heap, pos, parent);
41 pos = parentpos;
42 }
43 Py_DECREF(PyList_GET_ITEM(heap, pos));
44 PyList_SET_ITEM(heap, pos, newitem);
45 return 0;
46}
47
48static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000049_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000050{
Martin v. Löwisad0a4622006-02-16 14:30:23 +000051 Py_ssize_t startpos, endpos, childpos, rightpos;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000052 int cmp;
53 PyObject *newitem, *tmp;
54
55 assert(PyList_Check(heap));
56 endpos = PyList_GET_SIZE(heap);
57 startpos = pos;
58 if (pos >= endpos) {
59 PyErr_SetString(PyExc_IndexError, "index out of range");
60 return -1;
61 }
62 newitem = PyList_GET_ITEM(heap, pos);
63 Py_INCREF(newitem);
64
65 /* Bubble up the smaller child until hitting a leaf. */
66 childpos = 2*pos + 1; /* leftmost child position */
67 while (childpos < endpos) {
68 /* Set childpos to index of smaller child. */
69 rightpos = childpos + 1;
70 if (rightpos < endpos) {
71 cmp = PyObject_RichCompareBool(
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000072 PyList_GET_ITEM(heap, childpos),
Raymond Hettinger6d7702e2008-05-31 03:24:31 +000073 PyList_GET_ITEM(heap, rightpos),
74 Py_LT);
Raymond Hettinger855d9a92004-09-28 00:03:54 +000075 if (cmp == -1) {
76 Py_DECREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000077 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +000078 }
Raymond Hettinger6d7702e2008-05-31 03:24:31 +000079 if (cmp == 0)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000080 childpos = rightpos;
81 }
82 /* Move the smaller child up. */
83 tmp = PyList_GET_ITEM(heap, childpos);
84 Py_INCREF(tmp);
85 Py_DECREF(PyList_GET_ITEM(heap, pos));
86 PyList_SET_ITEM(heap, pos, tmp);
87 pos = childpos;
88 childpos = 2*pos + 1;
89 }
90
91 /* The leaf at pos is empty now. Put newitem there, and and bubble
92 it up to its final resting place (by sifting its parents down). */
93 Py_DECREF(PyList_GET_ITEM(heap, pos));
94 PyList_SET_ITEM(heap, pos, newitem);
95 return _siftdown(heap, startpos, pos);
96}
97
98static PyObject *
99heappush(PyObject *self, PyObject *args)
100{
101 PyObject *heap, *item;
102
103 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
104 return NULL;
105
106 if (!PyList_Check(heap)) {
107 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
108 return NULL;
109 }
110
111 if (PyList_Append(heap, item) == -1)
112 return NULL;
113
114 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
115 return NULL;
116 Py_INCREF(Py_None);
117 return Py_None;
118}
119
120PyDoc_STRVAR(heappush_doc,
121"Push item onto heap, maintaining the heap invariant.");
122
123static PyObject *
124heappop(PyObject *self, PyObject *heap)
125{
126 PyObject *lastelt, *returnitem;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000127 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000128
129 if (!PyList_Check(heap)) {
130 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
131 return NULL;
132 }
133
134 /* # raises appropriate IndexError if heap is empty */
135 n = PyList_GET_SIZE(heap);
136 if (n == 0) {
137 PyErr_SetString(PyExc_IndexError, "index out of range");
138 return NULL;
139 }
140
141 lastelt = PyList_GET_ITEM(heap, n-1) ;
142 Py_INCREF(lastelt);
143 PyList_SetSlice(heap, n-1, n, NULL);
144 n--;
145
146 if (!n)
147 return lastelt;
148 returnitem = PyList_GET_ITEM(heap, 0);
149 PyList_SET_ITEM(heap, 0, lastelt);
150 if (_siftup((PyListObject *)heap, 0) == -1) {
151 Py_DECREF(returnitem);
152 return NULL;
153 }
154 return returnitem;
155}
156
157PyDoc_STRVAR(heappop_doc,
158"Pop the smallest item off the heap, maintaining the heap invariant.");
159
160static PyObject *
161heapreplace(PyObject *self, PyObject *args)
162{
163 PyObject *heap, *item, *returnitem;
164
165 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
166 return NULL;
167
168 if (!PyList_Check(heap)) {
169 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
170 return NULL;
171 }
172
173 if (PyList_GET_SIZE(heap) < 1) {
174 PyErr_SetString(PyExc_IndexError, "index out of range");
175 return NULL;
176 }
177
178 returnitem = PyList_GET_ITEM(heap, 0);
179 Py_INCREF(item);
180 PyList_SET_ITEM(heap, 0, item);
181 if (_siftup((PyListObject *)heap, 0) == -1) {
182 Py_DECREF(returnitem);
183 return NULL;
184 }
185 return returnitem;
186}
187
188PyDoc_STRVAR(heapreplace_doc,
189"Pop and return the current smallest value, and add the new item.\n\
190\n\
191This is more efficient than heappop() followed by heappush(), and can be\n\
192more appropriate when using a fixed-size heap. Note that the value\n\
193returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000194this routine unless written as part of a conditional replacement:\n\n\
195 if item > heap[0]:\n\
196 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000197
198static PyObject *
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000199heappushpop(PyObject *self, PyObject *args)
200{
201 PyObject *heap, *item, *returnitem;
202 int cmp;
203
204 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
205 return NULL;
206
207 if (!PyList_Check(heap)) {
208 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
209 return NULL;
210 }
211
212 if (PyList_GET_SIZE(heap) < 1) {
213 Py_INCREF(item);
214 return item;
215 }
216
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000217 cmp = PyObject_RichCompareBool(PyList_GET_ITEM(heap, 0), item, Py_LT);
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000218 if (cmp == -1)
219 return NULL;
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000220 if (cmp == 0) {
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000221 Py_INCREF(item);
222 return item;
223 }
224
225 returnitem = PyList_GET_ITEM(heap, 0);
226 Py_INCREF(item);
227 PyList_SET_ITEM(heap, 0, item);
228 if (_siftup((PyListObject *)heap, 0) == -1) {
229 Py_DECREF(returnitem);
230 return NULL;
231 }
232 return returnitem;
233}
234
235PyDoc_STRVAR(heappushpop_doc,
236"Push item on the heap, then pop and return the smallest item\n\
237from the heap. The combined action runs more efficiently than\n\
238heappush() followed by a separate call to heappop().");
239
240static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000241heapify(PyObject *self, PyObject *heap)
242{
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000243 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000244
245 if (!PyList_Check(heap)) {
246 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
247 return NULL;
248 }
249
250 n = PyList_GET_SIZE(heap);
251 /* Transform bottom-up. The largest index there's any point to
252 looking at is the largest with a child index in-range, so must
253 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
254 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
255 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
256 and that's again n//2-1.
257 */
258 for (i=n/2-1 ; i>=0 ; i--)
259 if(_siftup((PyListObject *)heap, i) == -1)
260 return NULL;
261 Py_INCREF(Py_None);
262 return Py_None;
263}
264
265PyDoc_STRVAR(heapify_doc,
266"Transform list into a heap, in-place, in O(len(heap)) time.");
267
Raymond Hettingerc9297662004-06-12 22:48:46 +0000268static PyObject *
269nlargest(PyObject *self, PyObject *args)
270{
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000271 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
Thomas Wouters13870b12006-02-16 19:21:53 +0000272 Py_ssize_t i, n;
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000273 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000274
Thomas Wouters13870b12006-02-16 19:21:53 +0000275 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
Raymond Hettingerc9297662004-06-12 22:48:46 +0000276 return NULL;
277
278 it = PyObject_GetIter(iterable);
279 if (it == NULL)
280 return NULL;
281
282 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000283 if (heap == NULL)
Raymond Hettingerc9297662004-06-12 22:48:46 +0000284 goto fail;
285
286 for (i=0 ; i<n ; i++ ){
287 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000288 if (elem == NULL) {
289 if (PyErr_Occurred())
290 goto fail;
291 else
292 goto sortit;
293 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000294 if (PyList_Append(heap, elem) == -1) {
295 Py_DECREF(elem);
296 goto fail;
297 }
298 Py_DECREF(elem);
299 }
300 if (PyList_GET_SIZE(heap) == 0)
301 goto sortit;
302
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000303 for (i=n/2-1 ; i>=0 ; i--)
304 if(_siftup((PyListObject *)heap, i) == -1)
305 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000306
307 sol = PyList_GET_ITEM(heap, 0);
308 while (1) {
309 elem = PyIter_Next(it);
310 if (elem == NULL) {
311 if (PyErr_Occurred())
312 goto fail;
313 else
314 goto sortit;
315 }
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000316 cmp = PyObject_RichCompareBool(sol, elem, Py_LT);
317 if (cmp == -1) {
318 Py_DECREF(elem);
319 goto fail;
320 }
321 if (cmp == 0) {
Raymond Hettingerc9297662004-06-12 22:48:46 +0000322 Py_DECREF(elem);
323 continue;
324 }
325 oldelem = PyList_GET_ITEM(heap, 0);
326 PyList_SET_ITEM(heap, 0, elem);
327 Py_DECREF(oldelem);
328 if (_siftup((PyListObject *)heap, 0) == -1)
329 goto fail;
330 sol = PyList_GET_ITEM(heap, 0);
331 }
332sortit:
Raymond Hettingerc9297662004-06-12 22:48:46 +0000333 if (PyList_Sort(heap) == -1)
334 goto fail;
335 if (PyList_Reverse(heap) == -1)
336 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000337 Py_DECREF(it);
Raymond Hettingerc9297662004-06-12 22:48:46 +0000338 return heap;
339
340fail:
341 Py_DECREF(it);
342 Py_XDECREF(heap);
343 return NULL;
344}
345
346PyDoc_STRVAR(nlargest_doc,
347"Find the n largest elements in a dataset.\n\
348\n\
349Equivalent to: sorted(iterable, reverse=True)[:n]\n");
350
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000351static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000352_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000353{
354 PyObject *newitem, *parent;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000355 int cmp;
356 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000357
358 assert(PyList_Check(heap));
359 if (pos >= PyList_GET_SIZE(heap)) {
360 PyErr_SetString(PyExc_IndexError, "index out of range");
361 return -1;
362 }
363
364 newitem = PyList_GET_ITEM(heap, pos);
365 Py_INCREF(newitem);
366 /* Follow the path to the root, moving parents down until finding
367 a place newitem fits. */
368 while (pos > startpos){
369 parentpos = (pos - 1) >> 1;
370 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000371 cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000372 if (cmp == -1) {
373 Py_DECREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000374 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000375 }
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000376 if (cmp == 0)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000377 break;
378 Py_INCREF(parent);
379 Py_DECREF(PyList_GET_ITEM(heap, pos));
380 PyList_SET_ITEM(heap, pos, parent);
381 pos = parentpos;
382 }
383 Py_DECREF(PyList_GET_ITEM(heap, pos));
384 PyList_SET_ITEM(heap, pos, newitem);
385 return 0;
386}
387
388static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000389_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000390{
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000391 Py_ssize_t startpos, endpos, childpos, rightpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000392 int cmp;
393 PyObject *newitem, *tmp;
394
395 assert(PyList_Check(heap));
396 endpos = PyList_GET_SIZE(heap);
397 startpos = pos;
398 if (pos >= endpos) {
399 PyErr_SetString(PyExc_IndexError, "index out of range");
400 return -1;
401 }
402 newitem = PyList_GET_ITEM(heap, pos);
403 Py_INCREF(newitem);
404
405 /* Bubble up the smaller child until hitting a leaf. */
406 childpos = 2*pos + 1; /* leftmost child position */
407 while (childpos < endpos) {
408 /* Set childpos to index of smaller child. */
409 rightpos = childpos + 1;
410 if (rightpos < endpos) {
411 cmp = PyObject_RichCompareBool(
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000412 PyList_GET_ITEM(heap, rightpos),
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000413 PyList_GET_ITEM(heap, childpos),
414 Py_LT);
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000415 if (cmp == -1) {
416 Py_DECREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000417 return -1;
Raymond Hettinger855d9a92004-09-28 00:03:54 +0000418 }
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000419 if (cmp == 0)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000420 childpos = rightpos;
421 }
422 /* Move the smaller child up. */
423 tmp = PyList_GET_ITEM(heap, childpos);
424 Py_INCREF(tmp);
425 Py_DECREF(PyList_GET_ITEM(heap, pos));
426 PyList_SET_ITEM(heap, pos, tmp);
427 pos = childpos;
428 childpos = 2*pos + 1;
429 }
430
431 /* The leaf at pos is empty now. Put newitem there, and and bubble
432 it up to its final resting place (by sifting its parents down). */
433 Py_DECREF(PyList_GET_ITEM(heap, pos));
434 PyList_SET_ITEM(heap, pos, newitem);
435 return _siftdownmax(heap, startpos, pos);
436}
437
438static PyObject *
439nsmallest(PyObject *self, PyObject *args)
440{
441 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000442 Py_ssize_t i, n;
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000443 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000444
Thomas Woutersed6254a2006-02-16 17:32:54 +0000445 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000446 return NULL;
447
448 it = PyObject_GetIter(iterable);
449 if (it == NULL)
450 return NULL;
451
452 heap = PyList_New(0);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000453 if (heap == NULL)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000454 goto fail;
455
456 for (i=0 ; i<n ; i++ ){
457 elem = PyIter_Next(it);
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000458 if (elem == NULL) {
459 if (PyErr_Occurred())
460 goto fail;
461 else
462 goto sortit;
463 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000464 if (PyList_Append(heap, elem) == -1) {
465 Py_DECREF(elem);
466 goto fail;
467 }
468 Py_DECREF(elem);
469 }
470 n = PyList_GET_SIZE(heap);
471 if (n == 0)
472 goto sortit;
473
474 for (i=n/2-1 ; i>=0 ; i--)
475 if(_siftupmax((PyListObject *)heap, i) == -1)
476 goto fail;
477
478 los = PyList_GET_ITEM(heap, 0);
479 while (1) {
480 elem = PyIter_Next(it);
481 if (elem == NULL) {
482 if (PyErr_Occurred())
483 goto fail;
484 else
485 goto sortit;
486 }
Raymond Hettinger6d7702e2008-05-31 03:24:31 +0000487 cmp = PyObject_RichCompareBool(elem, los, Py_LT);
488 if (cmp == -1) {
489 Py_DECREF(elem);
490 goto fail;
491 }
492 if (cmp == 0) {
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000493 Py_DECREF(elem);
494 continue;
495 }
496
497 oldelem = PyList_GET_ITEM(heap, 0);
498 PyList_SET_ITEM(heap, 0, elem);
499 Py_DECREF(oldelem);
500 if (_siftupmax((PyListObject *)heap, 0) == -1)
501 goto fail;
502 los = PyList_GET_ITEM(heap, 0);
503 }
504
505sortit:
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000506 if (PyList_Sort(heap) == -1)
507 goto fail;
Raymond Hettingerde72edd2004-06-13 15:36:56 +0000508 Py_DECREF(it);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000509 return heap;
510
511fail:
512 Py_DECREF(it);
513 Py_XDECREF(heap);
514 return NULL;
515}
516
517PyDoc_STRVAR(nsmallest_doc,
518"Find the n smallest elements in a dataset.\n\
519\n\
520Equivalent to: sorted(iterable)[:n]\n");
521
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000522static PyMethodDef heapq_methods[] = {
523 {"heappush", (PyCFunction)heappush,
524 METH_VARARGS, heappush_doc},
Raymond Hettinger53bdf092008-03-13 19:03:51 +0000525 {"heappushpop", (PyCFunction)heappushpop,
526 METH_VARARGS, heappushpop_doc},
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000527 {"heappop", (PyCFunction)heappop,
528 METH_O, heappop_doc},
529 {"heapreplace", (PyCFunction)heapreplace,
530 METH_VARARGS, heapreplace_doc},
531 {"heapify", (PyCFunction)heapify,
532 METH_O, heapify_doc},
Raymond Hettingerc9297662004-06-12 22:48:46 +0000533 {"nlargest", (PyCFunction)nlargest,
534 METH_VARARGS, nlargest_doc},
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000535 {"nsmallest", (PyCFunction)nsmallest,
536 METH_VARARGS, nsmallest_doc},
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000537 {NULL, NULL} /* sentinel */
538};
539
540PyDoc_STRVAR(module_doc,
541"Heap queue algorithm (a.k.a. priority queue).\n\
542\n\
543Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
544all k, counting elements from 0. For the sake of comparison,\n\
545non-existing elements are considered to be infinite. The interesting\n\
546property of a heap is that a[0] is always its smallest element.\n\
547\n\
548Usage:\n\
549\n\
550heap = [] # creates an empty heap\n\
551heappush(heap, item) # pushes a new item on the heap\n\
552item = heappop(heap) # pops the smallest item from the heap\n\
553item = heap[0] # smallest item on the heap without popping it\n\
554heapify(x) # transforms list into a heap, in-place, in linear time\n\
555item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
556 # new item; the heap size is unchanged\n\
557\n\
558Our API differs from textbook heap algorithms as follows:\n\
559\n\
560- We use 0-based indexing. This makes the relationship between the\n\
561 index for a node and the indexes for its children slightly less\n\
562 obvious, but is more suitable since Python uses 0-based indexing.\n\
563\n\
564- Our heappop() method returns the smallest item, not the largest.\n\
565\n\
566These two make it possible to view the heap as a regular Python list\n\
567without surprises: heap[0] is the smallest item, and heap.sort()\n\
568maintains the heap invariant!\n");
569
570
571PyDoc_STRVAR(__about__,
572"Heap queues\n\
573\n\
574[explanation by François Pinard]\n\
575\n\
576Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
577all k, counting elements from 0. For the sake of comparison,\n\
578non-existing elements are considered to be infinite. The interesting\n\
579property of a heap is that a[0] is always its smallest element.\n"
580"\n\
581The strange invariant above is meant to be an efficient memory\n\
582representation for a tournament. The numbers below are `k', not a[k]:\n\
583\n\
584 0\n\
585\n\
586 1 2\n\
587\n\
588 3 4 5 6\n\
589\n\
590 7 8 9 10 11 12 13 14\n\
591\n\
592 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
593\n\
594\n\
595In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
596an usual binary tournament we see in sports, each cell is the winner\n\
597over the two cells it tops, and we can trace the winner down the tree\n\
598to see all opponents s/he had. However, in many computer applications\n\
599of such tournaments, we do not need to trace the history of a winner.\n\
600To be more memory efficient, when a winner is promoted, we try to\n\
601replace it by something else at a lower level, and the rule becomes\n\
602that a cell and the two cells it tops contain three different items,\n\
603but the top cell \"wins\" over the two topped cells.\n"
604"\n\
605If this heap invariant is protected at all time, index 0 is clearly\n\
606the overall winner. The simplest algorithmic way to remove it and\n\
607find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
608diagram above) into the 0 position, and then percolate this new 0 down\n\
609the tree, exchanging values, until the invariant is re-established.\n\
610This is clearly logarithmic on the total number of items in the tree.\n\
611By iterating over all items, you get an O(n ln n) sort.\n"
612"\n\
613A nice feature of this sort is that you can efficiently insert new\n\
614items while the sort is going on, provided that the inserted items are\n\
615not \"better\" than the last 0'th element you extracted. This is\n\
616especially useful in simulation contexts, where the tree holds all\n\
617incoming events, and the \"win\" condition means the smallest scheduled\n\
618time. When an event schedule other events for execution, they are\n\
619scheduled into the future, so they can easily go into the heap. So, a\n\
620heap is a good structure for implementing schedulers (this is what I\n\
621used for my MIDI sequencer :-).\n"
622"\n\
623Various structures for implementing schedulers have been extensively\n\
624studied, and heaps are good for this, as they are reasonably speedy,\n\
625the speed is almost constant, and the worst case is not much different\n\
626than the average case. However, there are other representations which\n\
627are more efficient overall, yet the worst cases might be terrible.\n"
628"\n\
629Heaps are also very useful in big disk sorts. You most probably all\n\
630know that a big sort implies producing \"runs\" (which are pre-sorted\n\
631sequences, which size is usually related to the amount of CPU memory),\n\
632followed by a merging passes for these runs, which merging is often\n\
633very cleverly organised[1]. It is very important that the initial\n\
634sort produces the longest runs possible. Tournaments are a good way\n\
635to that. If, using all the memory available to hold a tournament, you\n\
636replace and percolate items that happen to fit the current run, you'll\n\
637produce runs which are twice the size of the memory for random input,\n\
638and much better for input fuzzily ordered.\n"
639"\n\
640Moreover, if you output the 0'th item on disk and get an input which\n\
641may not fit in the current tournament (because the value \"wins\" over\n\
642the last output value), it cannot fit in the heap, so the size of the\n\
643heap decreases. The freed memory could be cleverly reused immediately\n\
644for progressively building a second heap, which grows at exactly the\n\
645same rate the first heap is melting. When the first heap completely\n\
646vanishes, you switch heaps and start a new run. Clever and quite\n\
647effective!\n\
648\n\
649In a word, heaps are useful memory structures to know. I use them in\n\
650a few applications, and I think it is good to keep a `heap' module\n\
651around. :-)\n"
652"\n\
653--------------------\n\
654[1] The disk balancing algorithms which are current, nowadays, are\n\
655more annoying than clever, and this is a consequence of the seeking\n\
656capabilities of the disks. On devices which cannot seek, like big\n\
657tape drives, the story was quite different, and one had to be very\n\
658clever to ensure (far in advance) that each tape movement will be the\n\
659most effective possible (that is, will best participate at\n\
660\"progressing\" the merge). Some tapes were even able to read\n\
661backwards, and this was also used to avoid the rewinding time.\n\
662Believe me, real good tape sorts were quite spectacular to watch!\n\
663From all times, sorting has always been a Great Art! :-)\n");
664
665PyMODINIT_FUNC
666init_heapq(void)
667{
668 PyObject *m;
669
670 m = Py_InitModule3("_heapq", heapq_methods, module_doc);
Neal Norwitz1ac754f2006-01-19 06:09:39 +0000671 if (m == NULL)
672 return;
Christian Heimes593daf52008-05-26 12:51:38 +0000673 PyModule_AddObject(m, "__about__", PyBytes_FromString(__about__));
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000674}
675