blob: 1066f94b002a8b56297b336bddcf312039e9ff25 [file] [log] [blame]
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo1670b432010-09-03 22:03:10 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Georg Brandlf78e02b2008-06-10 17:40:04 +000011static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000012_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000013{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000014 PyObject *newitem, *parent;
15 int cmp;
16 Py_ssize_t parentpos;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 assert(PyList_Check(heap));
19 if (pos >= PyList_GET_SIZE(heap)) {
20 PyErr_SetString(PyExc_IndexError, "index out of range");
21 return -1;
22 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 newitem = PyList_GET_ITEM(heap, pos);
25 Py_INCREF(newitem);
26 /* Follow the path to the root, moving parents down until finding
27 a place newitem fits. */
28 while (pos > startpos){
29 parentpos = (pos - 1) >> 1;
30 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000031 cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000032 if (cmp == -1) {
33 Py_DECREF(newitem);
34 return -1;
35 }
36 if (cmp == 0)
37 break;
38 Py_INCREF(parent);
39 Py_DECREF(PyList_GET_ITEM(heap, pos));
40 PyList_SET_ITEM(heap, pos, parent);
41 pos = parentpos;
42 }
43 Py_DECREF(PyList_GET_ITEM(heap, pos));
44 PyList_SET_ITEM(heap, pos, newitem);
45 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000046}
47
48static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000049_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 Py_ssize_t startpos, endpos, childpos, rightpos;
52 int cmp;
53 PyObject *newitem, *tmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000055 assert(PyList_Check(heap));
56 endpos = PyList_GET_SIZE(heap);
57 startpos = pos;
58 if (pos >= endpos) {
59 PyErr_SetString(PyExc_IndexError, "index out of range");
60 return -1;
61 }
62 newitem = PyList_GET_ITEM(heap, pos);
63 Py_INCREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 /* Bubble up the smaller child until hitting a leaf. */
66 childpos = 2*pos + 1; /* leftmost child position */
67 while (childpos < endpos) {
68 /* Set childpos to index of smaller child. */
69 rightpos = childpos + 1;
70 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000071 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 PyList_GET_ITEM(heap, childpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000073 PyList_GET_ITEM(heap, rightpos),
74 Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 if (cmp == -1) {
76 Py_DECREF(newitem);
77 return -1;
78 }
79 if (cmp == 0)
80 childpos = rightpos;
81 }
82 /* Move the smaller child up. */
83 tmp = PyList_GET_ITEM(heap, childpos);
84 Py_INCREF(tmp);
85 Py_DECREF(PyList_GET_ITEM(heap, pos));
86 PyList_SET_ITEM(heap, pos, tmp);
87 pos = childpos;
88 childpos = 2*pos + 1;
89 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 /* The leaf at pos is empty now. Put newitem there, and and bubble
92 it up to its final resting place (by sifting its parents down). */
93 Py_DECREF(PyList_GET_ITEM(heap, pos));
94 PyList_SET_ITEM(heap, pos, newitem);
95 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000096}
97
98static PyObject *
99heappush(PyObject *self, PyObject *args)
100{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
104 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 if (!PyList_Check(heap)) {
107 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
108 return NULL;
109 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 if (PyList_Append(heap, item) == -1)
112 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
115 return NULL;
116 Py_INCREF(Py_None);
117 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000118}
119
120PyDoc_STRVAR(heappush_doc,
121"Push item onto heap, maintaining the heap invariant.");
122
123static PyObject *
124heappop(PyObject *self, PyObject *heap)
125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 PyObject *lastelt, *returnitem;
127 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 if (!PyList_Check(heap)) {
130 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
131 return NULL;
132 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 /* # raises appropriate IndexError if heap is empty */
135 n = PyList_GET_SIZE(heap);
136 if (n == 0) {
137 PyErr_SetString(PyExc_IndexError, "index out of range");
138 return NULL;
139 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 lastelt = PyList_GET_ITEM(heap, n-1) ;
142 Py_INCREF(lastelt);
143 PyList_SetSlice(heap, n-1, n, NULL);
144 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 if (!n)
147 return lastelt;
148 returnitem = PyList_GET_ITEM(heap, 0);
149 PyList_SET_ITEM(heap, 0, lastelt);
150 if (_siftup((PyListObject *)heap, 0) == -1) {
151 Py_DECREF(returnitem);
152 return NULL;
153 }
154 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000155}
156
157PyDoc_STRVAR(heappop_doc,
158"Pop the smallest item off the heap, maintaining the heap invariant.");
159
160static PyObject *
161heapreplace(PyObject *self, PyObject *args)
162{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
166 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 if (!PyList_Check(heap)) {
169 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
170 return NULL;
171 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 if (PyList_GET_SIZE(heap) < 1) {
174 PyErr_SetString(PyExc_IndexError, "index out of range");
175 return NULL;
176 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 returnitem = PyList_GET_ITEM(heap, 0);
179 Py_INCREF(item);
180 PyList_SET_ITEM(heap, 0, item);
181 if (_siftup((PyListObject *)heap, 0) == -1) {
182 Py_DECREF(returnitem);
183 return NULL;
184 }
185 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000186}
187
188PyDoc_STRVAR(heapreplace_doc,
189"Pop and return the current smallest value, and add the new item.\n\
190\n\
191This is more efficient than heappop() followed by heappush(), and can be\n\
192more appropriate when using a fixed-size heap. Note that the value\n\
193returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000194this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if item > heap[0]:\n\
196 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000197
198static PyObject *
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000199heappushpop(PyObject *self, PyObject *args)
200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 PyObject *heap, *item, *returnitem;
202 int cmp;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
205 return NULL;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 if (!PyList_Check(heap)) {
208 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
209 return NULL;
210 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 if (PyList_GET_SIZE(heap) < 1) {
213 Py_INCREF(item);
214 return item;
215 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000216
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000217 cmp = PyObject_RichCompareBool(PyList_GET_ITEM(heap, 0), item, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000218 if (cmp == -1)
219 return NULL;
220 if (cmp == 0) {
221 Py_INCREF(item);
222 return item;
223 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000224
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000225 returnitem = PyList_GET_ITEM(heap, 0);
226 Py_INCREF(item);
227 PyList_SET_ITEM(heap, 0, item);
228 if (_siftup((PyListObject *)heap, 0) == -1) {
229 Py_DECREF(returnitem);
230 return NULL;
231 }
232 return returnitem;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000233}
234
235PyDoc_STRVAR(heappushpop_doc,
236"Push item on the heap, then pop and return the smallest item\n\
237from the heap. The combined action runs more efficiently than\n\
238heappush() followed by a separate call to heappop().");
239
240static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000241heapify(PyObject *self, PyObject *heap)
242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000243 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 if (!PyList_Check(heap)) {
246 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
247 return NULL;
248 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 n = PyList_GET_SIZE(heap);
251 /* Transform bottom-up. The largest index there's any point to
252 looking at is the largest with a child index in-range, so must
253 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
254 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
255 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
256 and that's again n//2-1.
257 */
258 for (i=n/2-1 ; i>=0 ; i--)
259 if(_siftup((PyListObject *)heap, i) == -1)
260 return NULL;
261 Py_INCREF(Py_None);
262 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000263}
264
265PyDoc_STRVAR(heapify_doc,
266"Transform list into a heap, in-place, in O(len(heap)) time.");
267
Raymond Hettingerc9297662004-06-12 22:48:46 +0000268static PyObject *
269nlargest(PyObject *self, PyObject *args)
270{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
272 Py_ssize_t i, n;
273 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
276 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 it = PyObject_GetIter(iterable);
279 if (it == NULL)
280 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000282 heap = PyList_New(0);
283 if (heap == NULL)
284 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 for (i=0 ; i<n ; i++ ){
287 elem = PyIter_Next(it);
288 if (elem == NULL) {
289 if (PyErr_Occurred())
290 goto fail;
291 else
292 goto sortit;
293 }
294 if (PyList_Append(heap, elem) == -1) {
295 Py_DECREF(elem);
296 goto fail;
297 }
298 Py_DECREF(elem);
299 }
300 if (PyList_GET_SIZE(heap) == 0)
301 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 for (i=n/2-1 ; i>=0 ; i--)
304 if(_siftup((PyListObject *)heap, i) == -1)
305 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000306
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 sol = PyList_GET_ITEM(heap, 0);
308 while (1) {
309 elem = PyIter_Next(it);
310 if (elem == NULL) {
311 if (PyErr_Occurred())
312 goto fail;
313 else
314 goto sortit;
315 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000316 cmp = PyObject_RichCompareBool(sol, elem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 if (cmp == -1) {
318 Py_DECREF(elem);
319 goto fail;
320 }
321 if (cmp == 0) {
322 Py_DECREF(elem);
323 continue;
324 }
325 oldelem = PyList_GET_ITEM(heap, 0);
326 PyList_SET_ITEM(heap, 0, elem);
327 Py_DECREF(oldelem);
328 if (_siftup((PyListObject *)heap, 0) == -1)
329 goto fail;
330 sol = PyList_GET_ITEM(heap, 0);
331 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000332sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 if (PyList_Sort(heap) == -1)
334 goto fail;
335 if (PyList_Reverse(heap) == -1)
336 goto fail;
337 Py_DECREF(it);
338 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000339
340fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 Py_DECREF(it);
342 Py_XDECREF(heap);
343 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000344}
345
346PyDoc_STRVAR(nlargest_doc,
347"Find the n largest elements in a dataset.\n\
348\n\
349Equivalent to: sorted(iterable, reverse=True)[:n]\n");
350
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000351static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000352_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000354 PyObject *newitem, *parent;
355 int cmp;
356 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 assert(PyList_Check(heap));
359 if (pos >= PyList_GET_SIZE(heap)) {
360 PyErr_SetString(PyExc_IndexError, "index out of range");
361 return -1;
362 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 newitem = PyList_GET_ITEM(heap, pos);
365 Py_INCREF(newitem);
366 /* Follow the path to the root, moving parents down until finding
367 a place newitem fits. */
368 while (pos > startpos){
369 parentpos = (pos - 1) >> 1;
370 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000371 cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 if (cmp == -1) {
373 Py_DECREF(newitem);
374 return -1;
375 }
376 if (cmp == 0)
377 break;
378 Py_INCREF(parent);
379 Py_DECREF(PyList_GET_ITEM(heap, pos));
380 PyList_SET_ITEM(heap, pos, parent);
381 pos = parentpos;
382 }
383 Py_DECREF(PyList_GET_ITEM(heap, pos));
384 PyList_SET_ITEM(heap, pos, newitem);
385 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000386}
387
388static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000389_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 Py_ssize_t startpos, endpos, childpos, rightpos;
392 int cmp;
393 PyObject *newitem, *tmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 assert(PyList_Check(heap));
396 endpos = PyList_GET_SIZE(heap);
397 startpos = pos;
398 if (pos >= endpos) {
399 PyErr_SetString(PyExc_IndexError, "index out of range");
400 return -1;
401 }
402 newitem = PyList_GET_ITEM(heap, pos);
403 Py_INCREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 /* Bubble up the smaller child until hitting a leaf. */
406 childpos = 2*pos + 1; /* leftmost child position */
407 while (childpos < endpos) {
408 /* Set childpos to index of smaller child. */
409 rightpos = childpos + 1;
410 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000411 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 PyList_GET_ITEM(heap, rightpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000413 PyList_GET_ITEM(heap, childpos),
414 Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 if (cmp == -1) {
416 Py_DECREF(newitem);
417 return -1;
418 }
419 if (cmp == 0)
420 childpos = rightpos;
421 }
422 /* Move the smaller child up. */
423 tmp = PyList_GET_ITEM(heap, childpos);
424 Py_INCREF(tmp);
425 Py_DECREF(PyList_GET_ITEM(heap, pos));
426 PyList_SET_ITEM(heap, pos, tmp);
427 pos = childpos;
428 childpos = 2*pos + 1;
429 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431 /* The leaf at pos is empty now. Put newitem there, and and bubble
432 it up to its final resting place (by sifting its parents down). */
433 Py_DECREF(PyList_GET_ITEM(heap, pos));
434 PyList_SET_ITEM(heap, pos, newitem);
435 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000436}
437
438static PyObject *
439nsmallest(PyObject *self, PyObject *args)
440{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
442 Py_ssize_t i, n;
443 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
446 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 it = PyObject_GetIter(iterable);
449 if (it == NULL)
450 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 heap = PyList_New(0);
453 if (heap == NULL)
454 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 for (i=0 ; i<n ; i++ ){
457 elem = PyIter_Next(it);
458 if (elem == NULL) {
459 if (PyErr_Occurred())
460 goto fail;
461 else
462 goto sortit;
463 }
464 if (PyList_Append(heap, elem) == -1) {
465 Py_DECREF(elem);
466 goto fail;
467 }
468 Py_DECREF(elem);
469 }
470 n = PyList_GET_SIZE(heap);
471 if (n == 0)
472 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 for (i=n/2-1 ; i>=0 ; i--)
475 if(_siftupmax((PyListObject *)heap, i) == -1)
476 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 los = PyList_GET_ITEM(heap, 0);
479 while (1) {
480 elem = PyIter_Next(it);
481 if (elem == NULL) {
482 if (PyErr_Occurred())
483 goto fail;
484 else
485 goto sortit;
486 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000487 cmp = PyObject_RichCompareBool(elem, los, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 if (cmp == -1) {
489 Py_DECREF(elem);
490 goto fail;
491 }
492 if (cmp == 0) {
493 Py_DECREF(elem);
494 continue;
495 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 oldelem = PyList_GET_ITEM(heap, 0);
498 PyList_SET_ITEM(heap, 0, elem);
499 Py_DECREF(oldelem);
500 if (_siftupmax((PyListObject *)heap, 0) == -1)
501 goto fail;
502 los = PyList_GET_ITEM(heap, 0);
503 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000504
505sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 if (PyList_Sort(heap) == -1)
507 goto fail;
508 Py_DECREF(it);
509 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000510
511fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 Py_DECREF(it);
513 Py_XDECREF(heap);
514 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000515}
516
517PyDoc_STRVAR(nsmallest_doc,
518"Find the n smallest elements in a dataset.\n\
519\n\
520Equivalent to: sorted(iterable)[:n]\n");
521
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000522static PyMethodDef heapq_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000523 {"heappush", (PyCFunction)heappush,
524 METH_VARARGS, heappush_doc},
525 {"heappushpop", (PyCFunction)heappushpop,
526 METH_VARARGS, heappushpop_doc},
527 {"heappop", (PyCFunction)heappop,
528 METH_O, heappop_doc},
529 {"heapreplace", (PyCFunction)heapreplace,
530 METH_VARARGS, heapreplace_doc},
531 {"heapify", (PyCFunction)heapify,
532 METH_O, heapify_doc},
533 {"nlargest", (PyCFunction)nlargest,
534 METH_VARARGS, nlargest_doc},
535 {"nsmallest", (PyCFunction)nsmallest,
536 METH_VARARGS, nsmallest_doc},
537 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000538};
539
540PyDoc_STRVAR(module_doc,
541"Heap queue algorithm (a.k.a. priority queue).\n\
542\n\
543Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
544all k, counting elements from 0. For the sake of comparison,\n\
545non-existing elements are considered to be infinite. The interesting\n\
546property of a heap is that a[0] is always its smallest element.\n\
547\n\
548Usage:\n\
549\n\
550heap = [] # creates an empty heap\n\
551heappush(heap, item) # pushes a new item on the heap\n\
552item = heappop(heap) # pops the smallest item from the heap\n\
553item = heap[0] # smallest item on the heap without popping it\n\
554heapify(x) # transforms list into a heap, in-place, in linear time\n\
555item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
556 # new item; the heap size is unchanged\n\
557\n\
558Our API differs from textbook heap algorithms as follows:\n\
559\n\
560- We use 0-based indexing. This makes the relationship between the\n\
561 index for a node and the indexes for its children slightly less\n\
562 obvious, but is more suitable since Python uses 0-based indexing.\n\
563\n\
564- Our heappop() method returns the smallest item, not the largest.\n\
565\n\
566These two make it possible to view the heap as a regular Python list\n\
567without surprises: heap[0] is the smallest item, and heap.sort()\n\
568maintains the heap invariant!\n");
569
570
571PyDoc_STRVAR(__about__,
572"Heap queues\n\
573\n\
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000574[explanation by Fran\xc3\xa7ois Pinard]\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000575\n\
576Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
577all k, counting elements from 0. For the sake of comparison,\n\
578non-existing elements are considered to be infinite. The interesting\n\
579property of a heap is that a[0] is always its smallest element.\n"
580"\n\
581The strange invariant above is meant to be an efficient memory\n\
582representation for a tournament. The numbers below are `k', not a[k]:\n\
583\n\
584 0\n\
585\n\
586 1 2\n\
587\n\
588 3 4 5 6\n\
589\n\
590 7 8 9 10 11 12 13 14\n\
591\n\
592 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
593\n\
594\n\
595In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
596an usual binary tournament we see in sports, each cell is the winner\n\
597over the two cells it tops, and we can trace the winner down the tree\n\
598to see all opponents s/he had. However, in many computer applications\n\
599of such tournaments, we do not need to trace the history of a winner.\n\
600To be more memory efficient, when a winner is promoted, we try to\n\
601replace it by something else at a lower level, and the rule becomes\n\
602that a cell and the two cells it tops contain three different items,\n\
603but the top cell \"wins\" over the two topped cells.\n"
604"\n\
605If this heap invariant is protected at all time, index 0 is clearly\n\
606the overall winner. The simplest algorithmic way to remove it and\n\
607find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
608diagram above) into the 0 position, and then percolate this new 0 down\n\
609the tree, exchanging values, until the invariant is re-established.\n\
610This is clearly logarithmic on the total number of items in the tree.\n\
611By iterating over all items, you get an O(n ln n) sort.\n"
612"\n\
613A nice feature of this sort is that you can efficiently insert new\n\
614items while the sort is going on, provided that the inserted items are\n\
615not \"better\" than the last 0'th element you extracted. This is\n\
616especially useful in simulation contexts, where the tree holds all\n\
617incoming events, and the \"win\" condition means the smallest scheduled\n\
618time. When an event schedule other events for execution, they are\n\
619scheduled into the future, so they can easily go into the heap. So, a\n\
620heap is a good structure for implementing schedulers (this is what I\n\
621used for my MIDI sequencer :-).\n"
622"\n\
623Various structures for implementing schedulers have been extensively\n\
624studied, and heaps are good for this, as they are reasonably speedy,\n\
625the speed is almost constant, and the worst case is not much different\n\
626than the average case. However, there are other representations which\n\
627are more efficient overall, yet the worst cases might be terrible.\n"
628"\n\
629Heaps are also very useful in big disk sorts. You most probably all\n\
630know that a big sort implies producing \"runs\" (which are pre-sorted\n\
631sequences, which size is usually related to the amount of CPU memory),\n\
632followed by a merging passes for these runs, which merging is often\n\
633very cleverly organised[1]. It is very important that the initial\n\
634sort produces the longest runs possible. Tournaments are a good way\n\
635to that. If, using all the memory available to hold a tournament, you\n\
636replace and percolate items that happen to fit the current run, you'll\n\
637produce runs which are twice the size of the memory for random input,\n\
638and much better for input fuzzily ordered.\n"
639"\n\
640Moreover, if you output the 0'th item on disk and get an input which\n\
641may not fit in the current tournament (because the value \"wins\" over\n\
642the last output value), it cannot fit in the heap, so the size of the\n\
643heap decreases. The freed memory could be cleverly reused immediately\n\
644for progressively building a second heap, which grows at exactly the\n\
645same rate the first heap is melting. When the first heap completely\n\
646vanishes, you switch heaps and start a new run. Clever and quite\n\
647effective!\n\
648\n\
649In a word, heaps are useful memory structures to know. I use them in\n\
650a few applications, and I think it is good to keep a `heap' module\n\
651around. :-)\n"
652"\n\
653--------------------\n\
654[1] The disk balancing algorithms which are current, nowadays, are\n\
655more annoying than clever, and this is a consequence of the seeking\n\
656capabilities of the disks. On devices which cannot seek, like big\n\
657tape drives, the story was quite different, and one had to be very\n\
658clever to ensure (far in advance) that each tape movement will be the\n\
659most effective possible (that is, will best participate at\n\
660\"progressing\" the merge). Some tapes were even able to read\n\
661backwards, and this was also used to avoid the rewinding time.\n\
662Believe me, real good tape sorts were quite spectacular to watch!\n\
663From all times, sorting has always been a Great Art! :-)\n");
664
Martin v. Löwis1a214512008-06-11 05:26:20 +0000665
666static struct PyModuleDef _heapqmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 PyModuleDef_HEAD_INIT,
668 "_heapq",
669 module_doc,
670 -1,
671 heapq_methods,
672 NULL,
673 NULL,
674 NULL,
675 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +0000676};
677
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000678PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +0000679PyInit__heapq(void)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000680{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 PyObject *m, *about;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 m = PyModule_Create(&_heapqmodule);
684 if (m == NULL)
685 return NULL;
686 about = PyUnicode_DecodeUTF8(__about__, strlen(__about__), NULL);
687 PyModule_AddObject(m, "__about__", about);
688 return m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000689}
690