blob: 7f3ce79163f1c061621c30cee72d56beb87e5281 [file] [log] [blame]
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo1670b432010-09-03 22:03:10 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Georg Brandlf78e02b2008-06-10 17:40:04 +000011static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000012_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000013{
Raymond Hettinger871620d2014-05-03 18:36:48 -070014 PyObject *newitem, *parent;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000015 int cmp;
16 Py_ssize_t parentpos;
Antoine Pitrou44d52142013-03-04 20:30:01 +010017 Py_ssize_t size;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000019 assert(PyList_Check(heap));
Antoine Pitrou44d52142013-03-04 20:30:01 +010020 size = PyList_GET_SIZE(heap);
21 if (pos >= size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000022 PyErr_SetString(PyExc_IndexError, "index out of range");
23 return -1;
24 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000026 /* Follow the path to the root, moving parents down until finding
27 a place newitem fits. */
Raymond Hettinger871620d2014-05-03 18:36:48 -070028 newitem = PyList_GET_ITEM(heap, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000029 while (pos > startpos){
30 parentpos = (pos - 1) >> 1;
31 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000032 cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -070033 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 return -1;
Antoine Pitrou44d52142013-03-04 20:30:01 +010035 if (size != PyList_GET_SIZE(heap)) {
Antoine Pitrou44d52142013-03-04 20:30:01 +010036 PyErr_SetString(PyExc_RuntimeError,
37 "list changed size during iteration");
38 return -1;
39 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000040 if (cmp == 0)
41 break;
Raymond Hettinger871620d2014-05-03 18:36:48 -070042 parent = PyList_GET_ITEM(heap, parentpos);
43 newitem = PyList_GET_ITEM(heap, pos);
44 PyList_SET_ITEM(heap, parentpos, newitem);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000045 PyList_SET_ITEM(heap, pos, parent);
46 pos = parentpos;
47 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000048 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000049}
50
51static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000052_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000053{
Raymond Hettingerc9926082014-05-03 15:22:07 -070054 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Raymond Hettinger871620d2014-05-03 18:36:48 -070055 PyObject *tmp1, *tmp2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000056 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 assert(PyList_Check(heap));
Raymond Hettinger871620d2014-05-03 18:36:48 -070059 endpos = PyList_GET_SIZE(heap);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 startpos = pos;
61 if (pos >= endpos) {
62 PyErr_SetString(PyExc_IndexError, "index out of range");
63 return -1;
64 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000066 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070067 limit = endpos / 2; /* smallest pos that has no child */
68 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070070 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000071 rightpos = childpos + 1;
72 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000073 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 PyList_GET_ITEM(heap, childpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000075 PyList_GET_ITEM(heap, rightpos),
76 Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -070077 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 if (cmp == 0)
80 childpos = rightpos;
Raymond Hettinger871620d2014-05-03 18:36:48 -070081 if (endpos != PyList_GET_SIZE(heap)) {
82 PyErr_SetString(PyExc_RuntimeError,
83 "list changed size during iteration");
84 return -1;
85 }
Antoine Pitrou44d52142013-03-04 20:30:01 +010086 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 /* Move the smaller child up. */
Raymond Hettinger871620d2014-05-03 18:36:48 -070088 tmp1 = PyList_GET_ITEM(heap, childpos);
89 tmp2 = PyList_GET_ITEM(heap, pos);
90 PyList_SET_ITEM(heap, childpos, tmp2);
91 PyList_SET_ITEM(heap, pos, tmp1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 pos = childpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 }
Raymond Hettinger871620d2014-05-03 18:36:48 -070094 /* Bubble it up to its final resting place (by sifting its parents down). */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000096}
97
98static PyObject *
99heappush(PyObject *self, PyObject *args)
100{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
104 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 if (!PyList_Check(heap)) {
107 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
108 return NULL;
109 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 if (PyList_Append(heap, item) == -1)
112 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
115 return NULL;
116 Py_INCREF(Py_None);
117 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000118}
119
120PyDoc_STRVAR(heappush_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800121"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000122
123static PyObject *
124heappop(PyObject *self, PyObject *heap)
125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 PyObject *lastelt, *returnitem;
127 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 if (!PyList_Check(heap)) {
130 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
131 return NULL;
132 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 /* # raises appropriate IndexError if heap is empty */
135 n = PyList_GET_SIZE(heap);
136 if (n == 0) {
137 PyErr_SetString(PyExc_IndexError, "index out of range");
138 return NULL;
139 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 lastelt = PyList_GET_ITEM(heap, n-1) ;
142 Py_INCREF(lastelt);
Victor Stinner764a46d2013-07-17 21:50:21 +0200143 if (PyList_SetSlice(heap, n-1, n, NULL) < 0) {
144 Py_DECREF(lastelt);
145 return NULL;
146 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 if (!n)
150 return lastelt;
151 returnitem = PyList_GET_ITEM(heap, 0);
152 PyList_SET_ITEM(heap, 0, lastelt);
153 if (_siftup((PyListObject *)heap, 0) == -1) {
154 Py_DECREF(returnitem);
155 return NULL;
156 }
157 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000158}
159
160PyDoc_STRVAR(heappop_doc,
161"Pop the smallest item off the heap, maintaining the heap invariant.");
162
163static PyObject *
164heapreplace(PyObject *self, PyObject *args)
165{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000168 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
169 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 if (!PyList_Check(heap)) {
172 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
173 return NULL;
174 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 if (PyList_GET_SIZE(heap) < 1) {
177 PyErr_SetString(PyExc_IndexError, "index out of range");
178 return NULL;
179 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 returnitem = PyList_GET_ITEM(heap, 0);
182 Py_INCREF(item);
183 PyList_SET_ITEM(heap, 0, item);
184 if (_siftup((PyListObject *)heap, 0) == -1) {
185 Py_DECREF(returnitem);
186 return NULL;
187 }
188 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000189}
190
191PyDoc_STRVAR(heapreplace_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800192"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000193\n\
194This is more efficient than heappop() followed by heappush(), and can be\n\
195more appropriate when using a fixed-size heap. Note that the value\n\
196returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000197this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 if item > heap[0]:\n\
199 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000200
201static PyObject *
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000202heappushpop(PyObject *self, PyObject *args)
203{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 PyObject *heap, *item, *returnitem;
205 int cmp;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
208 return NULL;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 if (!PyList_Check(heap)) {
211 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
212 return NULL;
213 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 if (PyList_GET_SIZE(heap) < 1) {
216 Py_INCREF(item);
217 return item;
218 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000219
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000220 cmp = PyObject_RichCompareBool(PyList_GET_ITEM(heap, 0), item, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 if (cmp == -1)
222 return NULL;
223 if (cmp == 0) {
224 Py_INCREF(item);
225 return item;
226 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 returnitem = PyList_GET_ITEM(heap, 0);
229 Py_INCREF(item);
230 PyList_SET_ITEM(heap, 0, item);
231 if (_siftup((PyListObject *)heap, 0) == -1) {
232 Py_DECREF(returnitem);
233 return NULL;
234 }
235 return returnitem;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000236}
237
238PyDoc_STRVAR(heappushpop_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800239"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000240from the heap. The combined action runs more efficiently than\n\
241heappush() followed by a separate call to heappop().");
242
243static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000244heapify(PyObject *self, PyObject *heap)
245{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 if (!PyList_Check(heap)) {
249 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
250 return NULL;
251 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 n = PyList_GET_SIZE(heap);
254 /* Transform bottom-up. The largest index there's any point to
255 looking at is the largest with a child index in-range, so must
256 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
257 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
258 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
259 and that's again n//2-1.
260 */
261 for (i=n/2-1 ; i>=0 ; i--)
262 if(_siftup((PyListObject *)heap, i) == -1)
263 return NULL;
264 Py_INCREF(Py_None);
265 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000266}
267
268PyDoc_STRVAR(heapify_doc,
269"Transform list into a heap, in-place, in O(len(heap)) time.");
270
Raymond Hettingerc9297662004-06-12 22:48:46 +0000271static PyObject *
272nlargest(PyObject *self, PyObject *args)
273{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
275 Py_ssize_t i, n;
276 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
279 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 it = PyObject_GetIter(iterable);
282 if (it == NULL)
283 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 heap = PyList_New(0);
286 if (heap == NULL)
287 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 for (i=0 ; i<n ; i++ ){
290 elem = PyIter_Next(it);
291 if (elem == NULL) {
292 if (PyErr_Occurred())
293 goto fail;
294 else
295 goto sortit;
296 }
297 if (PyList_Append(heap, elem) == -1) {
298 Py_DECREF(elem);
299 goto fail;
300 }
301 Py_DECREF(elem);
302 }
303 if (PyList_GET_SIZE(heap) == 0)
304 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 for (i=n/2-1 ; i>=0 ; i--)
307 if(_siftup((PyListObject *)heap, i) == -1)
308 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 sol = PyList_GET_ITEM(heap, 0);
311 while (1) {
312 elem = PyIter_Next(it);
313 if (elem == NULL) {
314 if (PyErr_Occurred())
315 goto fail;
316 else
317 goto sortit;
318 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000319 cmp = PyObject_RichCompareBool(sol, elem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 if (cmp == -1) {
321 Py_DECREF(elem);
322 goto fail;
323 }
324 if (cmp == 0) {
325 Py_DECREF(elem);
326 continue;
327 }
328 oldelem = PyList_GET_ITEM(heap, 0);
329 PyList_SET_ITEM(heap, 0, elem);
330 Py_DECREF(oldelem);
331 if (_siftup((PyListObject *)heap, 0) == -1)
332 goto fail;
333 sol = PyList_GET_ITEM(heap, 0);
334 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000335sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000336 if (PyList_Sort(heap) == -1)
337 goto fail;
338 if (PyList_Reverse(heap) == -1)
339 goto fail;
340 Py_DECREF(it);
341 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000342
343fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 Py_DECREF(it);
345 Py_XDECREF(heap);
346 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000347}
348
349PyDoc_STRVAR(nlargest_doc,
350"Find the n largest elements in a dataset.\n\
351\n\
352Equivalent to: sorted(iterable, reverse=True)[:n]\n");
353
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000354static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000355_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 PyObject *newitem, *parent;
358 int cmp;
359 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 assert(PyList_Check(heap));
362 if (pos >= PyList_GET_SIZE(heap)) {
363 PyErr_SetString(PyExc_IndexError, "index out of range");
364 return -1;
365 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000366
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 /* Follow the path to the root, moving parents down until finding
368 a place newitem fits. */
Raymond Hettinger871620d2014-05-03 18:36:48 -0700369 newitem = PyList_GET_ITEM(heap, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 while (pos > startpos){
371 parentpos = (pos - 1) >> 1;
372 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000373 cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -0700374 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 if (cmp == 0)
377 break;
Raymond Hettinger871620d2014-05-03 18:36:48 -0700378 parent = PyList_GET_ITEM(heap, parentpos);
379 newitem = PyList_GET_ITEM(heap, pos);
380 PyList_SET_ITEM(heap, parentpos, newitem);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 PyList_SET_ITEM(heap, pos, parent);
382 pos = parentpos;
383 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000385}
386
387static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000388_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000389{
Raymond Hettingerc9926082014-05-03 15:22:07 -0700390 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Raymond Hettinger871620d2014-05-03 18:36:48 -0700391 PyObject *tmp1, *tmp2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 assert(PyList_Check(heap));
395 endpos = PyList_GET_SIZE(heap);
396 startpos = pos;
397 if (pos >= endpos) {
398 PyErr_SetString(PyExc_IndexError, "index out of range");
399 return -1;
400 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700403 limit = endpos / 2; /* smallest pos that has no child */
404 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700406 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000407 rightpos = childpos + 1;
408 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000409 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 PyList_GET_ITEM(heap, rightpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000411 PyList_GET_ITEM(heap, childpos),
412 Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -0700413 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 if (cmp == 0)
416 childpos = rightpos;
Raymond Hettinger871620d2014-05-03 18:36:48 -0700417 if (endpos != PyList_GET_SIZE(heap)) {
418 PyErr_SetString(PyExc_RuntimeError,
419 "list changed size during iteration");
420 return -1;
421 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 }
423 /* Move the smaller child up. */
Raymond Hettinger871620d2014-05-03 18:36:48 -0700424 tmp1 = PyList_GET_ITEM(heap, childpos);
425 tmp2 = PyList_GET_ITEM(heap, pos);
426 PyList_SET_ITEM(heap, childpos, tmp2);
427 PyList_SET_ITEM(heap, pos, tmp1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000428 pos = childpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 }
Raymond Hettinger871620d2014-05-03 18:36:48 -0700430 /* Bubble it up to its final resting place (by sifting its parents down). */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000432}
433
434static PyObject *
435nsmallest(PyObject *self, PyObject *args)
436{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000437 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
438 Py_ssize_t i, n;
439 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
442 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 it = PyObject_GetIter(iterable);
445 if (it == NULL)
446 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 heap = PyList_New(0);
449 if (heap == NULL)
450 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 for (i=0 ; i<n ; i++ ){
453 elem = PyIter_Next(it);
454 if (elem == NULL) {
455 if (PyErr_Occurred())
456 goto fail;
457 else
458 goto sortit;
459 }
460 if (PyList_Append(heap, elem) == -1) {
461 Py_DECREF(elem);
462 goto fail;
463 }
464 Py_DECREF(elem);
465 }
466 n = PyList_GET_SIZE(heap);
467 if (n == 0)
468 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 for (i=n/2-1 ; i>=0 ; i--)
471 if(_siftupmax((PyListObject *)heap, i) == -1)
472 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 los = PyList_GET_ITEM(heap, 0);
475 while (1) {
476 elem = PyIter_Next(it);
477 if (elem == NULL) {
478 if (PyErr_Occurred())
479 goto fail;
480 else
481 goto sortit;
482 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000483 cmp = PyObject_RichCompareBool(elem, los, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 if (cmp == -1) {
485 Py_DECREF(elem);
486 goto fail;
487 }
488 if (cmp == 0) {
489 Py_DECREF(elem);
490 continue;
491 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000493 oldelem = PyList_GET_ITEM(heap, 0);
494 PyList_SET_ITEM(heap, 0, elem);
495 Py_DECREF(oldelem);
496 if (_siftupmax((PyListObject *)heap, 0) == -1)
497 goto fail;
498 los = PyList_GET_ITEM(heap, 0);
499 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000500
501sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 if (PyList_Sort(heap) == -1)
503 goto fail;
504 Py_DECREF(it);
505 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000506
507fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 Py_DECREF(it);
509 Py_XDECREF(heap);
510 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000511}
512
513PyDoc_STRVAR(nsmallest_doc,
514"Find the n smallest elements in a dataset.\n\
515\n\
516Equivalent to: sorted(iterable)[:n]\n");
517
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000518static PyMethodDef heapq_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 {"heappush", (PyCFunction)heappush,
520 METH_VARARGS, heappush_doc},
521 {"heappushpop", (PyCFunction)heappushpop,
522 METH_VARARGS, heappushpop_doc},
523 {"heappop", (PyCFunction)heappop,
524 METH_O, heappop_doc},
525 {"heapreplace", (PyCFunction)heapreplace,
526 METH_VARARGS, heapreplace_doc},
527 {"heapify", (PyCFunction)heapify,
528 METH_O, heapify_doc},
529 {"nlargest", (PyCFunction)nlargest,
530 METH_VARARGS, nlargest_doc},
531 {"nsmallest", (PyCFunction)nsmallest,
532 METH_VARARGS, nsmallest_doc},
533 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000534};
535
536PyDoc_STRVAR(module_doc,
537"Heap queue algorithm (a.k.a. priority queue).\n\
538\n\
539Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
540all k, counting elements from 0. For the sake of comparison,\n\
541non-existing elements are considered to be infinite. The interesting\n\
542property of a heap is that a[0] is always its smallest element.\n\
543\n\
544Usage:\n\
545\n\
546heap = [] # creates an empty heap\n\
547heappush(heap, item) # pushes a new item on the heap\n\
548item = heappop(heap) # pops the smallest item from the heap\n\
549item = heap[0] # smallest item on the heap without popping it\n\
550heapify(x) # transforms list into a heap, in-place, in linear time\n\
551item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
552 # new item; the heap size is unchanged\n\
553\n\
554Our API differs from textbook heap algorithms as follows:\n\
555\n\
556- We use 0-based indexing. This makes the relationship between the\n\
557 index for a node and the indexes for its children slightly less\n\
558 obvious, but is more suitable since Python uses 0-based indexing.\n\
559\n\
560- Our heappop() method returns the smallest item, not the largest.\n\
561\n\
562These two make it possible to view the heap as a regular Python list\n\
563without surprises: heap[0] is the smallest item, and heap.sort()\n\
564maintains the heap invariant!\n");
565
566
567PyDoc_STRVAR(__about__,
568"Heap queues\n\
569\n\
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000570[explanation by Fran\xc3\xa7ois Pinard]\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000571\n\
572Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
573all k, counting elements from 0. For the sake of comparison,\n\
574non-existing elements are considered to be infinite. The interesting\n\
575property of a heap is that a[0] is always its smallest element.\n"
576"\n\
577The strange invariant above is meant to be an efficient memory\n\
578representation for a tournament. The numbers below are `k', not a[k]:\n\
579\n\
580 0\n\
581\n\
582 1 2\n\
583\n\
584 3 4 5 6\n\
585\n\
586 7 8 9 10 11 12 13 14\n\
587\n\
588 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
589\n\
590\n\
591In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
592an usual binary tournament we see in sports, each cell is the winner\n\
593over the two cells it tops, and we can trace the winner down the tree\n\
594to see all opponents s/he had. However, in many computer applications\n\
595of such tournaments, we do not need to trace the history of a winner.\n\
596To be more memory efficient, when a winner is promoted, we try to\n\
597replace it by something else at a lower level, and the rule becomes\n\
598that a cell and the two cells it tops contain three different items,\n\
599but the top cell \"wins\" over the two topped cells.\n"
600"\n\
601If this heap invariant is protected at all time, index 0 is clearly\n\
602the overall winner. The simplest algorithmic way to remove it and\n\
603find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
604diagram above) into the 0 position, and then percolate this new 0 down\n\
605the tree, exchanging values, until the invariant is re-established.\n\
606This is clearly logarithmic on the total number of items in the tree.\n\
607By iterating over all items, you get an O(n ln n) sort.\n"
608"\n\
609A nice feature of this sort is that you can efficiently insert new\n\
610items while the sort is going on, provided that the inserted items are\n\
611not \"better\" than the last 0'th element you extracted. This is\n\
612especially useful in simulation contexts, where the tree holds all\n\
613incoming events, and the \"win\" condition means the smallest scheduled\n\
614time. When an event schedule other events for execution, they are\n\
615scheduled into the future, so they can easily go into the heap. So, a\n\
616heap is a good structure for implementing schedulers (this is what I\n\
617used for my MIDI sequencer :-).\n"
618"\n\
619Various structures for implementing schedulers have been extensively\n\
620studied, and heaps are good for this, as they are reasonably speedy,\n\
621the speed is almost constant, and the worst case is not much different\n\
622than the average case. However, there are other representations which\n\
623are more efficient overall, yet the worst cases might be terrible.\n"
624"\n\
625Heaps are also very useful in big disk sorts. You most probably all\n\
626know that a big sort implies producing \"runs\" (which are pre-sorted\n\
627sequences, which size is usually related to the amount of CPU memory),\n\
628followed by a merging passes for these runs, which merging is often\n\
629very cleverly organised[1]. It is very important that the initial\n\
630sort produces the longest runs possible. Tournaments are a good way\n\
631to that. If, using all the memory available to hold a tournament, you\n\
632replace and percolate items that happen to fit the current run, you'll\n\
633produce runs which are twice the size of the memory for random input,\n\
634and much better for input fuzzily ordered.\n"
635"\n\
636Moreover, if you output the 0'th item on disk and get an input which\n\
637may not fit in the current tournament (because the value \"wins\" over\n\
638the last output value), it cannot fit in the heap, so the size of the\n\
639heap decreases. The freed memory could be cleverly reused immediately\n\
640for progressively building a second heap, which grows at exactly the\n\
641same rate the first heap is melting. When the first heap completely\n\
642vanishes, you switch heaps and start a new run. Clever and quite\n\
643effective!\n\
644\n\
645In a word, heaps are useful memory structures to know. I use them in\n\
646a few applications, and I think it is good to keep a `heap' module\n\
647around. :-)\n"
648"\n\
649--------------------\n\
650[1] The disk balancing algorithms which are current, nowadays, are\n\
651more annoying than clever, and this is a consequence of the seeking\n\
652capabilities of the disks. On devices which cannot seek, like big\n\
653tape drives, the story was quite different, and one had to be very\n\
654clever to ensure (far in advance) that each tape movement will be the\n\
655most effective possible (that is, will best participate at\n\
656\"progressing\" the merge). Some tapes were even able to read\n\
657backwards, and this was also used to avoid the rewinding time.\n\
658Believe me, real good tape sorts were quite spectacular to watch!\n\
659From all times, sorting has always been a Great Art! :-)\n");
660
Martin v. Löwis1a214512008-06-11 05:26:20 +0000661
662static struct PyModuleDef _heapqmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 PyModuleDef_HEAD_INIT,
664 "_heapq",
665 module_doc,
666 -1,
667 heapq_methods,
668 NULL,
669 NULL,
670 NULL,
671 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +0000672};
673
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000674PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +0000675PyInit__heapq(void)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000676{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 PyObject *m, *about;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000679 m = PyModule_Create(&_heapqmodule);
680 if (m == NULL)
681 return NULL;
682 about = PyUnicode_DecodeUTF8(__about__, strlen(__about__), NULL);
683 PyModule_AddObject(m, "__about__", about);
684 return m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000685}
686