blob: eee56a002743af5638f14f4d7cdbbd068a423781 [file] [log] [blame]
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo1670b432010-09-03 22:03:10 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Georg Brandlf78e02b2008-06-10 17:40:04 +000011static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000012_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000013{
Antoine Pitrou44d52142013-03-04 20:30:01 +010014 PyObject *newitem, *parent, *olditem;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000015 int cmp;
16 Py_ssize_t parentpos;
Antoine Pitrou44d52142013-03-04 20:30:01 +010017 Py_ssize_t size;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000019 assert(PyList_Check(heap));
Antoine Pitrou44d52142013-03-04 20:30:01 +010020 size = PyList_GET_SIZE(heap);
21 if (pos >= size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000022 PyErr_SetString(PyExc_IndexError, "index out of range");
23 return -1;
24 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000026 newitem = PyList_GET_ITEM(heap, pos);
27 Py_INCREF(newitem);
28 /* Follow the path to the root, moving parents down until finding
29 a place newitem fits. */
30 while (pos > startpos){
31 parentpos = (pos - 1) >> 1;
32 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000033 cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 if (cmp == -1) {
35 Py_DECREF(newitem);
36 return -1;
37 }
Antoine Pitrou44d52142013-03-04 20:30:01 +010038 if (size != PyList_GET_SIZE(heap)) {
39 Py_DECREF(newitem);
40 PyErr_SetString(PyExc_RuntimeError,
41 "list changed size during iteration");
42 return -1;
43 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044 if (cmp == 0)
45 break;
46 Py_INCREF(parent);
Antoine Pitrou44d52142013-03-04 20:30:01 +010047 olditem = PyList_GET_ITEM(heap, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 PyList_SET_ITEM(heap, pos, parent);
Antoine Pitrou44d52142013-03-04 20:30:01 +010049 Py_DECREF(olditem);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 pos = parentpos;
Antoine Pitrou44d52142013-03-04 20:30:01 +010051 if (size != PyList_GET_SIZE(heap)) {
52 PyErr_SetString(PyExc_RuntimeError,
53 "list changed size during iteration");
54 return -1;
55 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000056 }
57 Py_DECREF(PyList_GET_ITEM(heap, pos));
58 PyList_SET_ITEM(heap, pos, newitem);
59 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000060}
61
62static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000063_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000064{
Raymond Hettingerc9926082014-05-03 15:22:07 -070065 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 int cmp;
Antoine Pitrou44d52142013-03-04 20:30:01 +010067 PyObject *newitem, *tmp, *olditem;
68 Py_ssize_t size;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 assert(PyList_Check(heap));
Antoine Pitrou44d52142013-03-04 20:30:01 +010071 size = PyList_GET_SIZE(heap);
72 endpos = size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 startpos = pos;
74 if (pos >= endpos) {
75 PyErr_SetString(PyExc_IndexError, "index out of range");
76 return -1;
77 }
78 newitem = PyList_GET_ITEM(heap, pos);
79 Py_INCREF(newitem);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070082 limit = endpos / 2; /* smallest pos that has no child */
83 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070085 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 rightpos = childpos + 1;
87 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000088 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 PyList_GET_ITEM(heap, childpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000090 PyList_GET_ITEM(heap, rightpos),
91 Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 if (cmp == -1) {
93 Py_DECREF(newitem);
94 return -1;
95 }
96 if (cmp == 0)
97 childpos = rightpos;
98 }
Antoine Pitrou44d52142013-03-04 20:30:01 +010099 if (size != PyList_GET_SIZE(heap)) {
100 Py_DECREF(newitem);
101 PyErr_SetString(PyExc_RuntimeError,
102 "list changed size during iteration");
103 return -1;
104 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 /* Move the smaller child up. */
106 tmp = PyList_GET_ITEM(heap, childpos);
107 Py_INCREF(tmp);
Antoine Pitrou44d52142013-03-04 20:30:01 +0100108 olditem = PyList_GET_ITEM(heap, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 PyList_SET_ITEM(heap, pos, tmp);
Antoine Pitrou44d52142013-03-04 20:30:01 +0100110 Py_DECREF(olditem);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 pos = childpos;
Antoine Pitrou44d52142013-03-04 20:30:01 +0100112 if (size != PyList_GET_SIZE(heap)) {
113 PyErr_SetString(PyExc_RuntimeError,
114 "list changed size during iteration");
115 return -1;
116 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000118
Terry Jan Reedy0158af32013-03-11 17:42:46 -0400119 /* The leaf at pos is empty now. Put newitem there, and bubble
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 it up to its final resting place (by sifting its parents down). */
121 Py_DECREF(PyList_GET_ITEM(heap, pos));
122 PyList_SET_ITEM(heap, pos, newitem);
123 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000124}
125
126static PyObject *
127heappush(PyObject *self, PyObject *args)
128{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
132 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 if (!PyList_Check(heap)) {
135 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
136 return NULL;
137 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 if (PyList_Append(heap, item) == -1)
140 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
143 return NULL;
144 Py_INCREF(Py_None);
145 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000146}
147
148PyDoc_STRVAR(heappush_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800149"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000150
151static PyObject *
152heappop(PyObject *self, PyObject *heap)
153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 PyObject *lastelt, *returnitem;
155 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 if (!PyList_Check(heap)) {
158 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
159 return NULL;
160 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 /* # raises appropriate IndexError if heap is empty */
163 n = PyList_GET_SIZE(heap);
164 if (n == 0) {
165 PyErr_SetString(PyExc_IndexError, "index out of range");
166 return NULL;
167 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 lastelt = PyList_GET_ITEM(heap, n-1) ;
170 Py_INCREF(lastelt);
Victor Stinner764a46d2013-07-17 21:50:21 +0200171 if (PyList_SetSlice(heap, n-1, n, NULL) < 0) {
172 Py_DECREF(lastelt);
173 return NULL;
174 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 if (!n)
178 return lastelt;
179 returnitem = PyList_GET_ITEM(heap, 0);
180 PyList_SET_ITEM(heap, 0, lastelt);
181 if (_siftup((PyListObject *)heap, 0) == -1) {
182 Py_DECREF(returnitem);
183 return NULL;
184 }
185 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000186}
187
188PyDoc_STRVAR(heappop_doc,
189"Pop the smallest item off the heap, maintaining the heap invariant.");
190
191static PyObject *
192heapreplace(PyObject *self, PyObject *args)
193{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
197 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (!PyList_Check(heap)) {
200 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
201 return NULL;
202 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 if (PyList_GET_SIZE(heap) < 1) {
205 PyErr_SetString(PyExc_IndexError, "index out of range");
206 return NULL;
207 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 returnitem = PyList_GET_ITEM(heap, 0);
210 Py_INCREF(item);
211 PyList_SET_ITEM(heap, 0, item);
212 if (_siftup((PyListObject *)heap, 0) == -1) {
213 Py_DECREF(returnitem);
214 return NULL;
215 }
216 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000217}
218
219PyDoc_STRVAR(heapreplace_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800220"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000221\n\
222This is more efficient than heappop() followed by heappush(), and can be\n\
223more appropriate when using a fixed-size heap. Note that the value\n\
224returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000225this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 if item > heap[0]:\n\
227 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000228
229static PyObject *
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000230heappushpop(PyObject *self, PyObject *args)
231{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 PyObject *heap, *item, *returnitem;
233 int cmp;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
236 return NULL;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 if (!PyList_Check(heap)) {
239 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
240 return NULL;
241 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000242
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000243 if (PyList_GET_SIZE(heap) < 1) {
244 Py_INCREF(item);
245 return item;
246 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000247
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000248 cmp = PyObject_RichCompareBool(PyList_GET_ITEM(heap, 0), item, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 if (cmp == -1)
250 return NULL;
251 if (cmp == 0) {
252 Py_INCREF(item);
253 return item;
254 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 returnitem = PyList_GET_ITEM(heap, 0);
257 Py_INCREF(item);
258 PyList_SET_ITEM(heap, 0, item);
259 if (_siftup((PyListObject *)heap, 0) == -1) {
260 Py_DECREF(returnitem);
261 return NULL;
262 }
263 return returnitem;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000264}
265
266PyDoc_STRVAR(heappushpop_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800267"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000268from the heap. The combined action runs more efficiently than\n\
269heappush() followed by a separate call to heappop().");
270
271static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000272heapify(PyObject *self, PyObject *heap)
273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000276 if (!PyList_Check(heap)) {
277 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
278 return NULL;
279 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 n = PyList_GET_SIZE(heap);
282 /* Transform bottom-up. The largest index there's any point to
283 looking at is the largest with a child index in-range, so must
284 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
285 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
286 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
287 and that's again n//2-1.
288 */
289 for (i=n/2-1 ; i>=0 ; i--)
290 if(_siftup((PyListObject *)heap, i) == -1)
291 return NULL;
292 Py_INCREF(Py_None);
293 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000294}
295
296PyDoc_STRVAR(heapify_doc,
297"Transform list into a heap, in-place, in O(len(heap)) time.");
298
Raymond Hettingerc9297662004-06-12 22:48:46 +0000299static PyObject *
300nlargest(PyObject *self, PyObject *args)
301{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
303 Py_ssize_t i, n;
304 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
307 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 it = PyObject_GetIter(iterable);
310 if (it == NULL)
311 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000313 heap = PyList_New(0);
314 if (heap == NULL)
315 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 for (i=0 ; i<n ; i++ ){
318 elem = PyIter_Next(it);
319 if (elem == NULL) {
320 if (PyErr_Occurred())
321 goto fail;
322 else
323 goto sortit;
324 }
325 if (PyList_Append(heap, elem) == -1) {
326 Py_DECREF(elem);
327 goto fail;
328 }
329 Py_DECREF(elem);
330 }
331 if (PyList_GET_SIZE(heap) == 0)
332 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 for (i=n/2-1 ; i>=0 ; i--)
335 if(_siftup((PyListObject *)heap, i) == -1)
336 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000337
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 sol = PyList_GET_ITEM(heap, 0);
339 while (1) {
340 elem = PyIter_Next(it);
341 if (elem == NULL) {
342 if (PyErr_Occurred())
343 goto fail;
344 else
345 goto sortit;
346 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000347 cmp = PyObject_RichCompareBool(sol, elem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 if (cmp == -1) {
349 Py_DECREF(elem);
350 goto fail;
351 }
352 if (cmp == 0) {
353 Py_DECREF(elem);
354 continue;
355 }
356 oldelem = PyList_GET_ITEM(heap, 0);
357 PyList_SET_ITEM(heap, 0, elem);
358 Py_DECREF(oldelem);
359 if (_siftup((PyListObject *)heap, 0) == -1)
360 goto fail;
361 sol = PyList_GET_ITEM(heap, 0);
362 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000363sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 if (PyList_Sort(heap) == -1)
365 goto fail;
366 if (PyList_Reverse(heap) == -1)
367 goto fail;
368 Py_DECREF(it);
369 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000370
371fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 Py_DECREF(it);
373 Py_XDECREF(heap);
374 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000375}
376
377PyDoc_STRVAR(nlargest_doc,
378"Find the n largest elements in a dataset.\n\
379\n\
380Equivalent to: sorted(iterable, reverse=True)[:n]\n");
381
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000382static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000383_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 PyObject *newitem, *parent;
386 int cmp;
387 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 assert(PyList_Check(heap));
390 if (pos >= PyList_GET_SIZE(heap)) {
391 PyErr_SetString(PyExc_IndexError, "index out of range");
392 return -1;
393 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 newitem = PyList_GET_ITEM(heap, pos);
396 Py_INCREF(newitem);
397 /* Follow the path to the root, moving parents down until finding
398 a place newitem fits. */
399 while (pos > startpos){
400 parentpos = (pos - 1) >> 1;
401 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000402 cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 if (cmp == -1) {
404 Py_DECREF(newitem);
405 return -1;
406 }
407 if (cmp == 0)
408 break;
409 Py_INCREF(parent);
410 Py_DECREF(PyList_GET_ITEM(heap, pos));
411 PyList_SET_ITEM(heap, pos, parent);
412 pos = parentpos;
413 }
414 Py_DECREF(PyList_GET_ITEM(heap, pos));
415 PyList_SET_ITEM(heap, pos, newitem);
416 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000417}
418
419static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000420_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000421{
Raymond Hettingerc9926082014-05-03 15:22:07 -0700422 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 int cmp;
424 PyObject *newitem, *tmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 assert(PyList_Check(heap));
427 endpos = PyList_GET_SIZE(heap);
428 startpos = pos;
429 if (pos >= endpos) {
430 PyErr_SetString(PyExc_IndexError, "index out of range");
431 return -1;
432 }
433 newitem = PyList_GET_ITEM(heap, pos);
434 Py_INCREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700437 limit = endpos / 2; /* smallest pos that has no child */
438 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700440 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 rightpos = childpos + 1;
442 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000443 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 PyList_GET_ITEM(heap, rightpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000445 PyList_GET_ITEM(heap, childpos),
446 Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 if (cmp == -1) {
448 Py_DECREF(newitem);
449 return -1;
450 }
451 if (cmp == 0)
452 childpos = rightpos;
453 }
454 /* Move the smaller child up. */
455 tmp = PyList_GET_ITEM(heap, childpos);
456 Py_INCREF(tmp);
457 Py_DECREF(PyList_GET_ITEM(heap, pos));
458 PyList_SET_ITEM(heap, pos, tmp);
459 pos = childpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000461
Terry Jan Reedy0158af32013-03-11 17:42:46 -0400462 /* The leaf at pos is empty now. Put newitem there, and bubble
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 it up to its final resting place (by sifting its parents down). */
464 Py_DECREF(PyList_GET_ITEM(heap, pos));
465 PyList_SET_ITEM(heap, pos, newitem);
466 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000467}
468
469static PyObject *
470nsmallest(PyObject *self, PyObject *args)
471{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
473 Py_ssize_t i, n;
474 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
477 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 it = PyObject_GetIter(iterable);
480 if (it == NULL)
481 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 heap = PyList_New(0);
484 if (heap == NULL)
485 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 for (i=0 ; i<n ; i++ ){
488 elem = PyIter_Next(it);
489 if (elem == NULL) {
490 if (PyErr_Occurred())
491 goto fail;
492 else
493 goto sortit;
494 }
495 if (PyList_Append(heap, elem) == -1) {
496 Py_DECREF(elem);
497 goto fail;
498 }
499 Py_DECREF(elem);
500 }
501 n = PyList_GET_SIZE(heap);
502 if (n == 0)
503 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 for (i=n/2-1 ; i>=0 ; i--)
506 if(_siftupmax((PyListObject *)heap, i) == -1)
507 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 los = PyList_GET_ITEM(heap, 0);
510 while (1) {
511 elem = PyIter_Next(it);
512 if (elem == NULL) {
513 if (PyErr_Occurred())
514 goto fail;
515 else
516 goto sortit;
517 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000518 cmp = PyObject_RichCompareBool(elem, los, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (cmp == -1) {
520 Py_DECREF(elem);
521 goto fail;
522 }
523 if (cmp == 0) {
524 Py_DECREF(elem);
525 continue;
526 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000527
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 oldelem = PyList_GET_ITEM(heap, 0);
529 PyList_SET_ITEM(heap, 0, elem);
530 Py_DECREF(oldelem);
531 if (_siftupmax((PyListObject *)heap, 0) == -1)
532 goto fail;
533 los = PyList_GET_ITEM(heap, 0);
534 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000535
536sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 if (PyList_Sort(heap) == -1)
538 goto fail;
539 Py_DECREF(it);
540 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000541
542fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 Py_DECREF(it);
544 Py_XDECREF(heap);
545 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000546}
547
548PyDoc_STRVAR(nsmallest_doc,
549"Find the n smallest elements in a dataset.\n\
550\n\
551Equivalent to: sorted(iterable)[:n]\n");
552
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000553static PyMethodDef heapq_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 {"heappush", (PyCFunction)heappush,
555 METH_VARARGS, heappush_doc},
556 {"heappushpop", (PyCFunction)heappushpop,
557 METH_VARARGS, heappushpop_doc},
558 {"heappop", (PyCFunction)heappop,
559 METH_O, heappop_doc},
560 {"heapreplace", (PyCFunction)heapreplace,
561 METH_VARARGS, heapreplace_doc},
562 {"heapify", (PyCFunction)heapify,
563 METH_O, heapify_doc},
564 {"nlargest", (PyCFunction)nlargest,
565 METH_VARARGS, nlargest_doc},
566 {"nsmallest", (PyCFunction)nsmallest,
567 METH_VARARGS, nsmallest_doc},
568 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000569};
570
571PyDoc_STRVAR(module_doc,
572"Heap queue algorithm (a.k.a. priority queue).\n\
573\n\
574Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
575all k, counting elements from 0. For the sake of comparison,\n\
576non-existing elements are considered to be infinite. The interesting\n\
577property of a heap is that a[0] is always its smallest element.\n\
578\n\
579Usage:\n\
580\n\
581heap = [] # creates an empty heap\n\
582heappush(heap, item) # pushes a new item on the heap\n\
583item = heappop(heap) # pops the smallest item from the heap\n\
584item = heap[0] # smallest item on the heap without popping it\n\
585heapify(x) # transforms list into a heap, in-place, in linear time\n\
586item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
587 # new item; the heap size is unchanged\n\
588\n\
589Our API differs from textbook heap algorithms as follows:\n\
590\n\
591- We use 0-based indexing. This makes the relationship between the\n\
592 index for a node and the indexes for its children slightly less\n\
593 obvious, but is more suitable since Python uses 0-based indexing.\n\
594\n\
595- Our heappop() method returns the smallest item, not the largest.\n\
596\n\
597These two make it possible to view the heap as a regular Python list\n\
598without surprises: heap[0] is the smallest item, and heap.sort()\n\
599maintains the heap invariant!\n");
600
601
602PyDoc_STRVAR(__about__,
603"Heap queues\n\
604\n\
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000605[explanation by Fran\xc3\xa7ois Pinard]\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000606\n\
607Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
608all k, counting elements from 0. For the sake of comparison,\n\
609non-existing elements are considered to be infinite. The interesting\n\
610property of a heap is that a[0] is always its smallest element.\n"
611"\n\
612The strange invariant above is meant to be an efficient memory\n\
613representation for a tournament. The numbers below are `k', not a[k]:\n\
614\n\
615 0\n\
616\n\
617 1 2\n\
618\n\
619 3 4 5 6\n\
620\n\
621 7 8 9 10 11 12 13 14\n\
622\n\
623 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
624\n\
625\n\
626In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
627an usual binary tournament we see in sports, each cell is the winner\n\
628over the two cells it tops, and we can trace the winner down the tree\n\
629to see all opponents s/he had. However, in many computer applications\n\
630of such tournaments, we do not need to trace the history of a winner.\n\
631To be more memory efficient, when a winner is promoted, we try to\n\
632replace it by something else at a lower level, and the rule becomes\n\
633that a cell and the two cells it tops contain three different items,\n\
634but the top cell \"wins\" over the two topped cells.\n"
635"\n\
636If this heap invariant is protected at all time, index 0 is clearly\n\
637the overall winner. The simplest algorithmic way to remove it and\n\
638find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
639diagram above) into the 0 position, and then percolate this new 0 down\n\
640the tree, exchanging values, until the invariant is re-established.\n\
641This is clearly logarithmic on the total number of items in the tree.\n\
642By iterating over all items, you get an O(n ln n) sort.\n"
643"\n\
644A nice feature of this sort is that you can efficiently insert new\n\
645items while the sort is going on, provided that the inserted items are\n\
646not \"better\" than the last 0'th element you extracted. This is\n\
647especially useful in simulation contexts, where the tree holds all\n\
648incoming events, and the \"win\" condition means the smallest scheduled\n\
649time. When an event schedule other events for execution, they are\n\
650scheduled into the future, so they can easily go into the heap. So, a\n\
651heap is a good structure for implementing schedulers (this is what I\n\
652used for my MIDI sequencer :-).\n"
653"\n\
654Various structures for implementing schedulers have been extensively\n\
655studied, and heaps are good for this, as they are reasonably speedy,\n\
656the speed is almost constant, and the worst case is not much different\n\
657than the average case. However, there are other representations which\n\
658are more efficient overall, yet the worst cases might be terrible.\n"
659"\n\
660Heaps are also very useful in big disk sorts. You most probably all\n\
661know that a big sort implies producing \"runs\" (which are pre-sorted\n\
662sequences, which size is usually related to the amount of CPU memory),\n\
663followed by a merging passes for these runs, which merging is often\n\
664very cleverly organised[1]. It is very important that the initial\n\
665sort produces the longest runs possible. Tournaments are a good way\n\
666to that. If, using all the memory available to hold a tournament, you\n\
667replace and percolate items that happen to fit the current run, you'll\n\
668produce runs which are twice the size of the memory for random input,\n\
669and much better for input fuzzily ordered.\n"
670"\n\
671Moreover, if you output the 0'th item on disk and get an input which\n\
672may not fit in the current tournament (because the value \"wins\" over\n\
673the last output value), it cannot fit in the heap, so the size of the\n\
674heap decreases. The freed memory could be cleverly reused immediately\n\
675for progressively building a second heap, which grows at exactly the\n\
676same rate the first heap is melting. When the first heap completely\n\
677vanishes, you switch heaps and start a new run. Clever and quite\n\
678effective!\n\
679\n\
680In a word, heaps are useful memory structures to know. I use them in\n\
681a few applications, and I think it is good to keep a `heap' module\n\
682around. :-)\n"
683"\n\
684--------------------\n\
685[1] The disk balancing algorithms which are current, nowadays, are\n\
686more annoying than clever, and this is a consequence of the seeking\n\
687capabilities of the disks. On devices which cannot seek, like big\n\
688tape drives, the story was quite different, and one had to be very\n\
689clever to ensure (far in advance) that each tape movement will be the\n\
690most effective possible (that is, will best participate at\n\
691\"progressing\" the merge). Some tapes were even able to read\n\
692backwards, and this was also used to avoid the rewinding time.\n\
693Believe me, real good tape sorts were quite spectacular to watch!\n\
694From all times, sorting has always been a Great Art! :-)\n");
695
Martin v. Löwis1a214512008-06-11 05:26:20 +0000696
697static struct PyModuleDef _heapqmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000698 PyModuleDef_HEAD_INIT,
699 "_heapq",
700 module_doc,
701 -1,
702 heapq_methods,
703 NULL,
704 NULL,
705 NULL,
706 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +0000707};
708
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000709PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +0000710PyInit__heapq(void)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000711{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 PyObject *m, *about;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 m = PyModule_Create(&_heapqmodule);
715 if (m == NULL)
716 return NULL;
717 about = PyUnicode_DecodeUTF8(__about__, strlen(__about__), NULL);
718 PyModule_AddObject(m, "__about__", about);
719 return m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000720}
721