blob: 04845f165aa46d893cdbfdb97eb6b66e24574931 [file] [log] [blame]
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo1670b432010-09-03 22:03:10 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Georg Brandlf78e02b2008-06-10 17:40:04 +000011static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000012_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000013{
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070014 PyObject *newitem, *parent;
15 Py_ssize_t parentpos, size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000016 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 assert(PyList_Check(heap));
Antoine Pitrou44d52142013-03-04 20:30:01 +010019 size = PyList_GET_SIZE(heap);
20 if (pos >= size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000021 PyErr_SetString(PyExc_IndexError, "index out of range");
22 return -1;
23 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000025 /* Follow the path to the root, moving parents down until finding
26 a place newitem fits. */
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070027 newitem = PyList_GET_ITEM(heap, pos);
28 while (pos > startpos) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000029 parentpos = (pos - 1) >> 1;
30 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000031 cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070032 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Antoine Pitrou44d52142013-03-04 20:30:01 +010034 if (size != PyList_GET_SIZE(heap)) {
Antoine Pitrou44d52142013-03-04 20:30:01 +010035 PyErr_SetString(PyExc_RuntimeError,
36 "list changed size during iteration");
37 return -1;
38 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 if (cmp == 0)
40 break;
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070041 parent = PyList_GET_ITEM(heap, parentpos);
42 newitem = PyList_GET_ITEM(heap, pos);
43 PyList_SET_ITEM(heap, parentpos, newitem);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044 PyList_SET_ITEM(heap, pos, parent);
45 pos = parentpos;
46 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000048}
49
50static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000051_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000052{
Raymond Hettingerc9926082014-05-03 15:22:07 -070053 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070054 PyObject *tmp1, *tmp2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000055 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 assert(PyList_Check(heap));
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070058 endpos = PyList_GET_SIZE(heap);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 startpos = pos;
60 if (pos >= endpos) {
61 PyErr_SetString(PyExc_IndexError, "index out of range");
62 return -1;
63 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070066 limit = endpos / 2; /* smallest pos that has no child */
67 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070069 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 rightpos = childpos + 1;
71 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000072 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 PyList_GET_ITEM(heap, childpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000074 PyList_GET_ITEM(heap, rightpos),
75 Py_LT);
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070076 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (cmp == 0)
79 childpos = rightpos;
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070080 if (endpos != PyList_GET_SIZE(heap)) {
81 PyErr_SetString(PyExc_RuntimeError,
82 "list changed size during iteration");
83 return -1;
84 }
Antoine Pitrou44d52142013-03-04 20:30:01 +010085 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 /* Move the smaller child up. */
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070087 tmp1 = PyList_GET_ITEM(heap, childpos);
88 tmp2 = PyList_GET_ITEM(heap, pos);
89 PyList_SET_ITEM(heap, childpos, tmp2);
90 PyList_SET_ITEM(heap, pos, tmp1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 pos = childpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 }
Raymond Hettinger1dd8e712015-05-02 10:00:22 -070093 /* Bubble it up to its final resting place (by sifting its parents down). */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000095}
96
97static PyObject *
98heappush(PyObject *self, PyObject *args)
99{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
103 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 if (!PyList_Check(heap)) {
106 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
107 return NULL;
108 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 if (PyList_Append(heap, item) == -1)
111 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
114 return NULL;
115 Py_INCREF(Py_None);
116 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000117}
118
119PyDoc_STRVAR(heappush_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800120"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000121
122static PyObject *
123heappop(PyObject *self, PyObject *heap)
124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 PyObject *lastelt, *returnitem;
126 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128 if (!PyList_Check(heap)) {
129 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
130 return NULL;
131 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 /* # raises appropriate IndexError if heap is empty */
134 n = PyList_GET_SIZE(heap);
135 if (n == 0) {
136 PyErr_SetString(PyExc_IndexError, "index out of range");
137 return NULL;
138 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 lastelt = PyList_GET_ITEM(heap, n-1) ;
141 Py_INCREF(lastelt);
Victor Stinner764a46d2013-07-17 21:50:21 +0200142 if (PyList_SetSlice(heap, n-1, n, NULL) < 0) {
143 Py_DECREF(lastelt);
144 return NULL;
145 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 if (!n)
149 return lastelt;
150 returnitem = PyList_GET_ITEM(heap, 0);
151 PyList_SET_ITEM(heap, 0, lastelt);
152 if (_siftup((PyListObject *)heap, 0) == -1) {
153 Py_DECREF(returnitem);
154 return NULL;
155 }
156 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000157}
158
159PyDoc_STRVAR(heappop_doc,
160"Pop the smallest item off the heap, maintaining the heap invariant.");
161
162static PyObject *
163heapreplace(PyObject *self, PyObject *args)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
168 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 if (!PyList_Check(heap)) {
171 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
172 return NULL;
173 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 if (PyList_GET_SIZE(heap) < 1) {
176 PyErr_SetString(PyExc_IndexError, "index out of range");
177 return NULL;
178 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 returnitem = PyList_GET_ITEM(heap, 0);
181 Py_INCREF(item);
182 PyList_SET_ITEM(heap, 0, item);
183 if (_siftup((PyListObject *)heap, 0) == -1) {
184 Py_DECREF(returnitem);
185 return NULL;
186 }
187 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000188}
189
190PyDoc_STRVAR(heapreplace_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800191"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000192\n\
193This is more efficient than heappop() followed by heappush(), and can be\n\
194more appropriate when using a fixed-size heap. Note that the value\n\
195returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000196this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 if item > heap[0]:\n\
198 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000199
200static PyObject *
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000201heappushpop(PyObject *self, PyObject *args)
202{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 PyObject *heap, *item, *returnitem;
204 int cmp;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000206 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
207 return NULL;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (!PyList_Check(heap)) {
210 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
211 return NULL;
212 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 if (PyList_GET_SIZE(heap) < 1) {
215 Py_INCREF(item);
216 return item;
217 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000218
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000219 cmp = PyObject_RichCompareBool(PyList_GET_ITEM(heap, 0), item, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 if (cmp == -1)
221 return NULL;
222 if (cmp == 0) {
223 Py_INCREF(item);
224 return item;
225 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000226
Raymond Hettingerb9db9e12015-05-11 19:58:56 -0700227 if (PyList_GET_SIZE(heap) == 0) {
228 PyErr_SetString(PyExc_IndexError, "index out of range");
229 return NULL;
230 }
231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 returnitem = PyList_GET_ITEM(heap, 0);
233 Py_INCREF(item);
234 PyList_SET_ITEM(heap, 0, item);
235 if (_siftup((PyListObject *)heap, 0) == -1) {
236 Py_DECREF(returnitem);
237 return NULL;
238 }
239 return returnitem;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000240}
241
242PyDoc_STRVAR(heappushpop_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800243"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000244from the heap. The combined action runs more efficiently than\n\
245heappush() followed by a separate call to heappop().");
246
247static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000248heapify(PyObject *self, PyObject *heap)
249{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 if (!PyList_Check(heap)) {
253 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
254 return NULL;
255 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 n = PyList_GET_SIZE(heap);
258 /* Transform bottom-up. The largest index there's any point to
259 looking at is the largest with a child index in-range, so must
260 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
261 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
262 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
263 and that's again n//2-1.
264 */
265 for (i=n/2-1 ; i>=0 ; i--)
266 if(_siftup((PyListObject *)heap, i) == -1)
267 return NULL;
268 Py_INCREF(Py_None);
269 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000270}
271
272PyDoc_STRVAR(heapify_doc,
273"Transform list into a heap, in-place, in O(len(heap)) time.");
274
Raymond Hettingerc9297662004-06-12 22:48:46 +0000275static PyObject *
276nlargest(PyObject *self, PyObject *args)
277{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
279 Py_ssize_t i, n;
280 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000282 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
283 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 it = PyObject_GetIter(iterable);
286 if (it == NULL)
287 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 heap = PyList_New(0);
290 if (heap == NULL)
291 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 for (i=0 ; i<n ; i++ ){
294 elem = PyIter_Next(it);
295 if (elem == NULL) {
296 if (PyErr_Occurred())
297 goto fail;
298 else
299 goto sortit;
300 }
301 if (PyList_Append(heap, elem) == -1) {
302 Py_DECREF(elem);
303 goto fail;
304 }
305 Py_DECREF(elem);
306 }
307 if (PyList_GET_SIZE(heap) == 0)
308 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 for (i=n/2-1 ; i>=0 ; i--)
311 if(_siftup((PyListObject *)heap, i) == -1)
312 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 sol = PyList_GET_ITEM(heap, 0);
315 while (1) {
316 elem = PyIter_Next(it);
317 if (elem == NULL) {
318 if (PyErr_Occurred())
319 goto fail;
320 else
321 goto sortit;
322 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000323 cmp = PyObject_RichCompareBool(sol, elem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000324 if (cmp == -1) {
325 Py_DECREF(elem);
326 goto fail;
327 }
328 if (cmp == 0) {
329 Py_DECREF(elem);
330 continue;
331 }
332 oldelem = PyList_GET_ITEM(heap, 0);
333 PyList_SET_ITEM(heap, 0, elem);
334 Py_DECREF(oldelem);
335 if (_siftup((PyListObject *)heap, 0) == -1)
336 goto fail;
337 sol = PyList_GET_ITEM(heap, 0);
338 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000339sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 if (PyList_Sort(heap) == -1)
341 goto fail;
342 if (PyList_Reverse(heap) == -1)
343 goto fail;
344 Py_DECREF(it);
345 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000346
347fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 Py_DECREF(it);
349 Py_XDECREF(heap);
350 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000351}
352
353PyDoc_STRVAR(nlargest_doc,
354"Find the n largest elements in a dataset.\n\
355\n\
356Equivalent to: sorted(iterable, reverse=True)[:n]\n");
357
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000358static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000359_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 PyObject *newitem, *parent;
362 int cmp;
363 Py_ssize_t parentpos;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 assert(PyList_Check(heap));
366 if (pos >= PyList_GET_SIZE(heap)) {
367 PyErr_SetString(PyExc_IndexError, "index out of range");
368 return -1;
369 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 newitem = PyList_GET_ITEM(heap, pos);
372 Py_INCREF(newitem);
373 /* Follow the path to the root, moving parents down until finding
374 a place newitem fits. */
375 while (pos > startpos){
376 parentpos = (pos - 1) >> 1;
377 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000378 cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 if (cmp == -1) {
380 Py_DECREF(newitem);
381 return -1;
382 }
383 if (cmp == 0)
384 break;
385 Py_INCREF(parent);
386 Py_DECREF(PyList_GET_ITEM(heap, pos));
387 PyList_SET_ITEM(heap, pos, parent);
388 pos = parentpos;
389 }
390 Py_DECREF(PyList_GET_ITEM(heap, pos));
391 PyList_SET_ITEM(heap, pos, newitem);
392 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000393}
394
395static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000396_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000397{
Raymond Hettingerc9926082014-05-03 15:22:07 -0700398 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399 int cmp;
400 PyObject *newitem, *tmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 assert(PyList_Check(heap));
403 endpos = PyList_GET_SIZE(heap);
404 startpos = pos;
405 if (pos >= endpos) {
406 PyErr_SetString(PyExc_IndexError, "index out of range");
407 return -1;
408 }
409 newitem = PyList_GET_ITEM(heap, pos);
410 Py_INCREF(newitem);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700413 limit = endpos / 2; /* smallest pos that has no child */
414 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700416 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 rightpos = childpos + 1;
418 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000419 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 PyList_GET_ITEM(heap, rightpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000421 PyList_GET_ITEM(heap, childpos),
422 Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 if (cmp == -1) {
424 Py_DECREF(newitem);
425 return -1;
426 }
427 if (cmp == 0)
428 childpos = rightpos;
429 }
430 /* Move the smaller child up. */
431 tmp = PyList_GET_ITEM(heap, childpos);
432 Py_INCREF(tmp);
433 Py_DECREF(PyList_GET_ITEM(heap, pos));
434 PyList_SET_ITEM(heap, pos, tmp);
435 pos = childpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000437
Terry Jan Reedy0158af32013-03-11 17:42:46 -0400438 /* The leaf at pos is empty now. Put newitem there, and bubble
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 it up to its final resting place (by sifting its parents down). */
440 Py_DECREF(PyList_GET_ITEM(heap, pos));
441 PyList_SET_ITEM(heap, pos, newitem);
442 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000443}
444
445static PyObject *
446nsmallest(PyObject *self, PyObject *args)
447{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
449 Py_ssize_t i, n;
450 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
453 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 it = PyObject_GetIter(iterable);
456 if (it == NULL)
457 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 heap = PyList_New(0);
460 if (heap == NULL)
461 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000462
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 for (i=0 ; i<n ; i++ ){
464 elem = PyIter_Next(it);
465 if (elem == NULL) {
466 if (PyErr_Occurred())
467 goto fail;
468 else
469 goto sortit;
470 }
471 if (PyList_Append(heap, elem) == -1) {
472 Py_DECREF(elem);
473 goto fail;
474 }
475 Py_DECREF(elem);
476 }
477 n = PyList_GET_SIZE(heap);
478 if (n == 0)
479 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000480
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 for (i=n/2-1 ; i>=0 ; i--)
482 if(_siftupmax((PyListObject *)heap, i) == -1)
483 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 los = PyList_GET_ITEM(heap, 0);
486 while (1) {
487 elem = PyIter_Next(it);
488 if (elem == NULL) {
489 if (PyErr_Occurred())
490 goto fail;
491 else
492 goto sortit;
493 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000494 cmp = PyObject_RichCompareBool(elem, los, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000495 if (cmp == -1) {
496 Py_DECREF(elem);
497 goto fail;
498 }
499 if (cmp == 0) {
500 Py_DECREF(elem);
501 continue;
502 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 oldelem = PyList_GET_ITEM(heap, 0);
505 PyList_SET_ITEM(heap, 0, elem);
506 Py_DECREF(oldelem);
507 if (_siftupmax((PyListObject *)heap, 0) == -1)
508 goto fail;
509 los = PyList_GET_ITEM(heap, 0);
510 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000511
512sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 if (PyList_Sort(heap) == -1)
514 goto fail;
515 Py_DECREF(it);
516 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000517
518fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 Py_DECREF(it);
520 Py_XDECREF(heap);
521 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000522}
523
524PyDoc_STRVAR(nsmallest_doc,
525"Find the n smallest elements in a dataset.\n\
526\n\
527Equivalent to: sorted(iterable)[:n]\n");
528
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000529static PyMethodDef heapq_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 {"heappush", (PyCFunction)heappush,
531 METH_VARARGS, heappush_doc},
532 {"heappushpop", (PyCFunction)heappushpop,
533 METH_VARARGS, heappushpop_doc},
534 {"heappop", (PyCFunction)heappop,
535 METH_O, heappop_doc},
536 {"heapreplace", (PyCFunction)heapreplace,
537 METH_VARARGS, heapreplace_doc},
538 {"heapify", (PyCFunction)heapify,
539 METH_O, heapify_doc},
540 {"nlargest", (PyCFunction)nlargest,
541 METH_VARARGS, nlargest_doc},
542 {"nsmallest", (PyCFunction)nsmallest,
543 METH_VARARGS, nsmallest_doc},
544 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000545};
546
547PyDoc_STRVAR(module_doc,
548"Heap queue algorithm (a.k.a. priority queue).\n\
549\n\
550Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
551all k, counting elements from 0. For the sake of comparison,\n\
552non-existing elements are considered to be infinite. The interesting\n\
553property of a heap is that a[0] is always its smallest element.\n\
554\n\
555Usage:\n\
556\n\
557heap = [] # creates an empty heap\n\
558heappush(heap, item) # pushes a new item on the heap\n\
559item = heappop(heap) # pops the smallest item from the heap\n\
560item = heap[0] # smallest item on the heap without popping it\n\
561heapify(x) # transforms list into a heap, in-place, in linear time\n\
562item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
563 # new item; the heap size is unchanged\n\
564\n\
565Our API differs from textbook heap algorithms as follows:\n\
566\n\
567- We use 0-based indexing. This makes the relationship between the\n\
568 index for a node and the indexes for its children slightly less\n\
569 obvious, but is more suitable since Python uses 0-based indexing.\n\
570\n\
571- Our heappop() method returns the smallest item, not the largest.\n\
572\n\
573These two make it possible to view the heap as a regular Python list\n\
574without surprises: heap[0] is the smallest item, and heap.sort()\n\
575maintains the heap invariant!\n");
576
577
578PyDoc_STRVAR(__about__,
579"Heap queues\n\
580\n\
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000581[explanation by Fran\xc3\xa7ois Pinard]\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000582\n\
583Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
584all k, counting elements from 0. For the sake of comparison,\n\
585non-existing elements are considered to be infinite. The interesting\n\
586property of a heap is that a[0] is always its smallest element.\n"
587"\n\
588The strange invariant above is meant to be an efficient memory\n\
589representation for a tournament. The numbers below are `k', not a[k]:\n\
590\n\
591 0\n\
592\n\
593 1 2\n\
594\n\
595 3 4 5 6\n\
596\n\
597 7 8 9 10 11 12 13 14\n\
598\n\
599 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
600\n\
601\n\
602In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
603an usual binary tournament we see in sports, each cell is the winner\n\
604over the two cells it tops, and we can trace the winner down the tree\n\
605to see all opponents s/he had. However, in many computer applications\n\
606of such tournaments, we do not need to trace the history of a winner.\n\
607To be more memory efficient, when a winner is promoted, we try to\n\
608replace it by something else at a lower level, and the rule becomes\n\
609that a cell and the two cells it tops contain three different items,\n\
610but the top cell \"wins\" over the two topped cells.\n"
611"\n\
612If this heap invariant is protected at all time, index 0 is clearly\n\
613the overall winner. The simplest algorithmic way to remove it and\n\
614find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
615diagram above) into the 0 position, and then percolate this new 0 down\n\
616the tree, exchanging values, until the invariant is re-established.\n\
617This is clearly logarithmic on the total number of items in the tree.\n\
618By iterating over all items, you get an O(n ln n) sort.\n"
619"\n\
620A nice feature of this sort is that you can efficiently insert new\n\
621items while the sort is going on, provided that the inserted items are\n\
622not \"better\" than the last 0'th element you extracted. This is\n\
623especially useful in simulation contexts, where the tree holds all\n\
624incoming events, and the \"win\" condition means the smallest scheduled\n\
625time. When an event schedule other events for execution, they are\n\
626scheduled into the future, so they can easily go into the heap. So, a\n\
627heap is a good structure for implementing schedulers (this is what I\n\
628used for my MIDI sequencer :-).\n"
629"\n\
630Various structures for implementing schedulers have been extensively\n\
631studied, and heaps are good for this, as they are reasonably speedy,\n\
632the speed is almost constant, and the worst case is not much different\n\
633than the average case. However, there are other representations which\n\
634are more efficient overall, yet the worst cases might be terrible.\n"
635"\n\
636Heaps are also very useful in big disk sorts. You most probably all\n\
637know that a big sort implies producing \"runs\" (which are pre-sorted\n\
638sequences, which size is usually related to the amount of CPU memory),\n\
639followed by a merging passes for these runs, which merging is often\n\
640very cleverly organised[1]. It is very important that the initial\n\
641sort produces the longest runs possible. Tournaments are a good way\n\
642to that. If, using all the memory available to hold a tournament, you\n\
643replace and percolate items that happen to fit the current run, you'll\n\
644produce runs which are twice the size of the memory for random input,\n\
645and much better for input fuzzily ordered.\n"
646"\n\
647Moreover, if you output the 0'th item on disk and get an input which\n\
648may not fit in the current tournament (because the value \"wins\" over\n\
649the last output value), it cannot fit in the heap, so the size of the\n\
650heap decreases. The freed memory could be cleverly reused immediately\n\
651for progressively building a second heap, which grows at exactly the\n\
652same rate the first heap is melting. When the first heap completely\n\
653vanishes, you switch heaps and start a new run. Clever and quite\n\
654effective!\n\
655\n\
656In a word, heaps are useful memory structures to know. I use them in\n\
657a few applications, and I think it is good to keep a `heap' module\n\
658around. :-)\n"
659"\n\
660--------------------\n\
661[1] The disk balancing algorithms which are current, nowadays, are\n\
662more annoying than clever, and this is a consequence of the seeking\n\
663capabilities of the disks. On devices which cannot seek, like big\n\
664tape drives, the story was quite different, and one had to be very\n\
665clever to ensure (far in advance) that each tape movement will be the\n\
666most effective possible (that is, will best participate at\n\
667\"progressing\" the merge). Some tapes were even able to read\n\
668backwards, and this was also used to avoid the rewinding time.\n\
669Believe me, real good tape sorts were quite spectacular to watch!\n\
670From all times, sorting has always been a Great Art! :-)\n");
671
Martin v. Löwis1a214512008-06-11 05:26:20 +0000672
673static struct PyModuleDef _heapqmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 PyModuleDef_HEAD_INIT,
675 "_heapq",
676 module_doc,
677 -1,
678 heapq_methods,
679 NULL,
680 NULL,
681 NULL,
682 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +0000683};
684
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000685PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +0000686PyInit__heapq(void)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000687{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 PyObject *m, *about;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 m = PyModule_Create(&_heapqmodule);
691 if (m == NULL)
692 return NULL;
693 about = PyUnicode_DecodeUTF8(__about__, strlen(__about__), NULL);
694 PyModule_AddObject(m, "__about__", about);
695 return m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000696}
697