blob: b3e4753f2654725441cdb943637a578b28c55071 [file] [log] [blame]
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001/* Drop in replacement for heapq.py
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00002
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
Éric Araujo1670b432010-09-03 22:03:10 +00005annotated by François Pinard, and converted to C by Raymond Hettinger.
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +00006
7*/
8
9#include "Python.h"
10
Georg Brandlf78e02b2008-06-10 17:40:04 +000011static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000012_siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000013{
Raymond Hettinger871620d2014-05-03 18:36:48 -070014 PyObject *newitem, *parent;
Raymond Hettinger90e93382014-05-03 18:45:54 -070015 Py_ssize_t parentpos, size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000016 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000018 assert(PyList_Check(heap));
Antoine Pitrou44d52142013-03-04 20:30:01 +010019 size = PyList_GET_SIZE(heap);
20 if (pos >= size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000021 PyErr_SetString(PyExc_IndexError, "index out of range");
22 return -1;
23 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000025 /* Follow the path to the root, moving parents down until finding
26 a place newitem fits. */
Raymond Hettinger871620d2014-05-03 18:36:48 -070027 newitem = PyList_GET_ITEM(heap, pos);
Raymond Hettinger90e93382014-05-03 18:45:54 -070028 while (pos > startpos) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000029 parentpos = (pos - 1) >> 1;
30 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000031 cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -070032 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000033 return -1;
Antoine Pitrou44d52142013-03-04 20:30:01 +010034 if (size != PyList_GET_SIZE(heap)) {
Antoine Pitrou44d52142013-03-04 20:30:01 +010035 PyErr_SetString(PyExc_RuntimeError,
36 "list changed size during iteration");
37 return -1;
38 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 if (cmp == 0)
40 break;
Raymond Hettinger871620d2014-05-03 18:36:48 -070041 parent = PyList_GET_ITEM(heap, parentpos);
42 newitem = PyList_GET_ITEM(heap, pos);
43 PyList_SET_ITEM(heap, parentpos, newitem);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044 PyList_SET_ITEM(heap, pos, parent);
45 pos = parentpos;
46 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 return 0;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000048}
49
50static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +000051_siftup(PyListObject *heap, Py_ssize_t pos)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000052{
Raymond Hettingerc9926082014-05-03 15:22:07 -070053 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Raymond Hettinger871620d2014-05-03 18:36:48 -070054 PyObject *tmp1, *tmp2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000055 int cmp;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 assert(PyList_Check(heap));
Raymond Hettinger871620d2014-05-03 18:36:48 -070058 endpos = PyList_GET_SIZE(heap);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 startpos = pos;
60 if (pos >= endpos) {
61 PyErr_SetString(PyExc_IndexError, "index out of range");
62 return -1;
63 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070066 limit = endpos / 2; /* smallest pos that has no child */
67 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -070069 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 rightpos = childpos + 1;
71 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000072 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 PyList_GET_ITEM(heap, childpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +000074 PyList_GET_ITEM(heap, rightpos),
75 Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -070076 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (cmp == 0)
79 childpos = rightpos;
Raymond Hettinger871620d2014-05-03 18:36:48 -070080 if (endpos != PyList_GET_SIZE(heap)) {
81 PyErr_SetString(PyExc_RuntimeError,
82 "list changed size during iteration");
83 return -1;
84 }
Antoine Pitrou44d52142013-03-04 20:30:01 +010085 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 /* Move the smaller child up. */
Raymond Hettinger871620d2014-05-03 18:36:48 -070087 tmp1 = PyList_GET_ITEM(heap, childpos);
88 tmp2 = PyList_GET_ITEM(heap, pos);
89 PyList_SET_ITEM(heap, childpos, tmp2);
90 PyList_SET_ITEM(heap, pos, tmp1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 pos = childpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 }
Raymond Hettinger871620d2014-05-03 18:36:48 -070093 /* Bubble it up to its final resting place (by sifting its parents down). */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 return _siftdown(heap, startpos, pos);
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +000095}
96
97static PyObject *
98heappush(PyObject *self, PyObject *args)
99{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 PyObject *heap, *item;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
103 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 if (!PyList_Check(heap)) {
106 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
107 return NULL;
108 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 if (PyList_Append(heap, item) == -1)
111 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
114 return NULL;
115 Py_INCREF(Py_None);
116 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000117}
118
119PyDoc_STRVAR(heappush_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800120"heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000121
122static PyObject *
123heappop(PyObject *self, PyObject *heap)
124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 PyObject *lastelt, *returnitem;
126 Py_ssize_t n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000128 if (!PyList_Check(heap)) {
129 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
130 return NULL;
131 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 /* # raises appropriate IndexError if heap is empty */
134 n = PyList_GET_SIZE(heap);
135 if (n == 0) {
136 PyErr_SetString(PyExc_IndexError, "index out of range");
137 return NULL;
138 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 lastelt = PyList_GET_ITEM(heap, n-1) ;
141 Py_INCREF(lastelt);
Victor Stinner764a46d2013-07-17 21:50:21 +0200142 if (PyList_SetSlice(heap, n-1, n, NULL) < 0) {
143 Py_DECREF(lastelt);
144 return NULL;
145 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 n--;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 if (!n)
149 return lastelt;
150 returnitem = PyList_GET_ITEM(heap, 0);
151 PyList_SET_ITEM(heap, 0, lastelt);
152 if (_siftup((PyListObject *)heap, 0) == -1) {
153 Py_DECREF(returnitem);
154 return NULL;
155 }
156 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000157}
158
159PyDoc_STRVAR(heappop_doc,
160"Pop the smallest item off the heap, maintaining the heap invariant.");
161
162static PyObject *
163heapreplace(PyObject *self, PyObject *args)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 PyObject *heap, *item, *returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
168 return NULL;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 if (!PyList_Check(heap)) {
171 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
172 return NULL;
173 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 if (PyList_GET_SIZE(heap) < 1) {
176 PyErr_SetString(PyExc_IndexError, "index out of range");
177 return NULL;
178 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000180 returnitem = PyList_GET_ITEM(heap, 0);
181 Py_INCREF(item);
182 PyList_SET_ITEM(heap, 0, item);
183 if (_siftup((PyListObject *)heap, 0) == -1) {
184 Py_DECREF(returnitem);
185 return NULL;
186 }
187 return returnitem;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000188}
189
190PyDoc_STRVAR(heapreplace_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800191"heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000192\n\
193This is more efficient than heappop() followed by heappush(), and can be\n\
194more appropriate when using a fixed-size heap. Note that the value\n\
195returned may be larger than item! That constrains reasonable uses of\n\
Raymond Hettinger8158e842004-09-06 07:04:09 +0000196this routine unless written as part of a conditional replacement:\n\n\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 if item > heap[0]:\n\
198 item = heapreplace(heap, item)\n");
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000199
200static PyObject *
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000201heappushpop(PyObject *self, PyObject *args)
202{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 PyObject *heap, *item, *returnitem;
204 int cmp;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000206 if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
207 return NULL;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (!PyList_Check(heap)) {
210 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
211 return NULL;
212 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000214 if (PyList_GET_SIZE(heap) < 1) {
215 Py_INCREF(item);
216 return item;
217 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000218
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000219 cmp = PyObject_RichCompareBool(PyList_GET_ITEM(heap, 0), item, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 if (cmp == -1)
221 return NULL;
222 if (cmp == 0) {
223 Py_INCREF(item);
224 return item;
225 }
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 returnitem = PyList_GET_ITEM(heap, 0);
228 Py_INCREF(item);
229 PyList_SET_ITEM(heap, 0, item);
230 if (_siftup((PyListObject *)heap, 0) == -1) {
231 Py_DECREF(returnitem);
232 return NULL;
233 }
234 return returnitem;
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000235}
236
237PyDoc_STRVAR(heappushpop_doc,
Raymond Hettingerbd8f2902013-01-18 17:35:25 -0800238"heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000239from the heap. The combined action runs more efficiently than\n\
240heappush() followed by a separate call to heappop().");
241
242static PyObject *
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000243heapify(PyObject *self, PyObject *heap)
244{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 Py_ssize_t i, n;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 if (!PyList_Check(heap)) {
248 PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
249 return NULL;
250 }
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 n = PyList_GET_SIZE(heap);
253 /* Transform bottom-up. The largest index there's any point to
254 looking at is the largest with a child index in-range, so must
255 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
256 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
257 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
258 and that's again n//2-1.
259 */
260 for (i=n/2-1 ; i>=0 ; i--)
261 if(_siftup((PyListObject *)heap, i) == -1)
262 return NULL;
263 Py_INCREF(Py_None);
264 return Py_None;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000265}
266
267PyDoc_STRVAR(heapify_doc,
268"Transform list into a heap, in-place, in O(len(heap)) time.");
269
Raymond Hettingerc9297662004-06-12 22:48:46 +0000270static PyObject *
271nlargest(PyObject *self, PyObject *args)
272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
274 Py_ssize_t i, n;
275 int cmp;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
278 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 it = PyObject_GetIter(iterable);
281 if (it == NULL)
282 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 heap = PyList_New(0);
285 if (heap == NULL)
286 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 for (i=0 ; i<n ; i++ ){
289 elem = PyIter_Next(it);
290 if (elem == NULL) {
291 if (PyErr_Occurred())
292 goto fail;
293 else
294 goto sortit;
295 }
296 if (PyList_Append(heap, elem) == -1) {
297 Py_DECREF(elem);
298 goto fail;
299 }
300 Py_DECREF(elem);
301 }
302 if (PyList_GET_SIZE(heap) == 0)
303 goto sortit;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000304
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 for (i=n/2-1 ; i>=0 ; i--)
306 if(_siftup((PyListObject *)heap, i) == -1)
307 goto fail;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 sol = PyList_GET_ITEM(heap, 0);
310 while (1) {
311 elem = PyIter_Next(it);
312 if (elem == NULL) {
313 if (PyErr_Occurred())
314 goto fail;
315 else
316 goto sortit;
317 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000318 cmp = PyObject_RichCompareBool(sol, elem, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 if (cmp == -1) {
320 Py_DECREF(elem);
321 goto fail;
322 }
323 if (cmp == 0) {
324 Py_DECREF(elem);
325 continue;
326 }
327 oldelem = PyList_GET_ITEM(heap, 0);
328 PyList_SET_ITEM(heap, 0, elem);
329 Py_DECREF(oldelem);
330 if (_siftup((PyListObject *)heap, 0) == -1)
331 goto fail;
332 sol = PyList_GET_ITEM(heap, 0);
333 }
Raymond Hettingerc9297662004-06-12 22:48:46 +0000334sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 if (PyList_Sort(heap) == -1)
336 goto fail;
337 if (PyList_Reverse(heap) == -1)
338 goto fail;
339 Py_DECREF(it);
340 return heap;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000341
342fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000343 Py_DECREF(it);
344 Py_XDECREF(heap);
345 return NULL;
Raymond Hettingerc9297662004-06-12 22:48:46 +0000346}
347
348PyDoc_STRVAR(nlargest_doc,
349"Find the n largest elements in a dataset.\n\
350\n\
351Equivalent to: sorted(iterable, reverse=True)[:n]\n");
352
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000353static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000354_siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000355{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 PyObject *newitem, *parent;
Raymond Hettinger90e93382014-05-03 18:45:54 -0700357 Py_ssize_t parentpos, size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 assert(PyList_Check(heap));
Raymond Hettinger90e93382014-05-03 18:45:54 -0700361 size = PyList_GET_SIZE(heap);
362 if (pos >= size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyErr_SetString(PyExc_IndexError, "index out of range");
364 return -1;
365 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000366
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 /* Follow the path to the root, moving parents down until finding
368 a place newitem fits. */
Raymond Hettinger871620d2014-05-03 18:36:48 -0700369 newitem = PyList_GET_ITEM(heap, pos);
Raymond Hettinger90e93382014-05-03 18:45:54 -0700370 while (pos > startpos) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 parentpos = (pos - 1) >> 1;
372 parent = PyList_GET_ITEM(heap, parentpos);
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000373 cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -0700374 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 return -1;
Raymond Hettinger90e93382014-05-03 18:45:54 -0700376 if (size != PyList_GET_SIZE(heap)) {
377 PyErr_SetString(PyExc_RuntimeError,
378 "list changed size during iteration");
379 return -1;
380 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 if (cmp == 0)
382 break;
Raymond Hettinger871620d2014-05-03 18:36:48 -0700383 parent = PyList_GET_ITEM(heap, parentpos);
384 newitem = PyList_GET_ITEM(heap, pos);
385 PyList_SET_ITEM(heap, parentpos, newitem);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 PyList_SET_ITEM(heap, pos, parent);
387 pos = parentpos;
388 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 return 0;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000390}
391
392static int
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000393_siftupmax(PyListObject *heap, Py_ssize_t pos)
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000394{
Raymond Hettingerc9926082014-05-03 15:22:07 -0700395 Py_ssize_t startpos, endpos, childpos, rightpos, limit;
Raymond Hettinger871620d2014-05-03 18:36:48 -0700396 PyObject *tmp1, *tmp2;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399 assert(PyList_Check(heap));
400 endpos = PyList_GET_SIZE(heap);
401 startpos = pos;
402 if (pos >= endpos) {
403 PyErr_SetString(PyExc_IndexError, "index out of range");
404 return -1;
405 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000407 /* Bubble up the smaller child until hitting a leaf. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700408 limit = endpos / 2; /* smallest pos that has no child */
409 while (pos < limit) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 /* Set childpos to index of smaller child. */
Raymond Hettingerc9926082014-05-03 15:22:07 -0700411 childpos = 2*pos + 1; /* leftmost child position */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 rightpos = childpos + 1;
413 if (rightpos < endpos) {
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000414 cmp = PyObject_RichCompareBool(
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 PyList_GET_ITEM(heap, rightpos),
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000416 PyList_GET_ITEM(heap, childpos),
417 Py_LT);
Raymond Hettinger871620d2014-05-03 18:36:48 -0700418 if (cmp == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 if (cmp == 0)
421 childpos = rightpos;
Raymond Hettinger871620d2014-05-03 18:36:48 -0700422 if (endpos != PyList_GET_SIZE(heap)) {
423 PyErr_SetString(PyExc_RuntimeError,
424 "list changed size during iteration");
425 return -1;
426 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 }
428 /* Move the smaller child up. */
Raymond Hettinger871620d2014-05-03 18:36:48 -0700429 tmp1 = PyList_GET_ITEM(heap, childpos);
430 tmp2 = PyList_GET_ITEM(heap, pos);
431 PyList_SET_ITEM(heap, childpos, tmp2);
432 PyList_SET_ITEM(heap, pos, tmp1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 pos = childpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000434 }
Raymond Hettinger871620d2014-05-03 18:36:48 -0700435 /* Bubble it up to its final resting place (by sifting its parents down). */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 return _siftdownmax(heap, startpos, pos);
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000437}
438
439static PyObject *
440nsmallest(PyObject *self, PyObject *args)
441{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
443 Py_ssize_t i, n;
444 int cmp;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
447 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 it = PyObject_GetIter(iterable);
450 if (it == NULL)
451 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000452
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 heap = PyList_New(0);
454 if (heap == NULL)
455 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 for (i=0 ; i<n ; i++ ){
458 elem = PyIter_Next(it);
459 if (elem == NULL) {
460 if (PyErr_Occurred())
461 goto fail;
462 else
463 goto sortit;
464 }
465 if (PyList_Append(heap, elem) == -1) {
466 Py_DECREF(elem);
467 goto fail;
468 }
469 Py_DECREF(elem);
470 }
471 n = PyList_GET_SIZE(heap);
472 if (n == 0)
473 goto sortit;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 for (i=n/2-1 ; i>=0 ; i--)
476 if(_siftupmax((PyListObject *)heap, i) == -1)
477 goto fail;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 los = PyList_GET_ITEM(heap, 0);
480 while (1) {
481 elem = PyIter_Next(it);
482 if (elem == NULL) {
483 if (PyErr_Occurred())
484 goto fail;
485 else
486 goto sortit;
487 }
Raymond Hettingerdb6b62e2010-09-05 05:26:10 +0000488 cmp = PyObject_RichCompareBool(elem, los, Py_LT);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 if (cmp == -1) {
490 Py_DECREF(elem);
491 goto fail;
492 }
493 if (cmp == 0) {
494 Py_DECREF(elem);
495 continue;
496 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 oldelem = PyList_GET_ITEM(heap, 0);
499 PyList_SET_ITEM(heap, 0, elem);
500 Py_DECREF(oldelem);
501 if (_siftupmax((PyListObject *)heap, 0) == -1)
502 goto fail;
503 los = PyList_GET_ITEM(heap, 0);
504 }
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000505
506sortit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 if (PyList_Sort(heap) == -1)
508 goto fail;
509 Py_DECREF(it);
510 return heap;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000511
512fail:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 Py_DECREF(it);
514 Py_XDECREF(heap);
515 return NULL;
Raymond Hettinger2e3dfaf2004-06-13 05:26:33 +0000516}
517
518PyDoc_STRVAR(nsmallest_doc,
519"Find the n smallest elements in a dataset.\n\
520\n\
521Equivalent to: sorted(iterable)[:n]\n");
522
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000523static PyMethodDef heapq_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 {"heappush", (PyCFunction)heappush,
525 METH_VARARGS, heappush_doc},
526 {"heappushpop", (PyCFunction)heappushpop,
527 METH_VARARGS, heappushpop_doc},
528 {"heappop", (PyCFunction)heappop,
529 METH_O, heappop_doc},
530 {"heapreplace", (PyCFunction)heapreplace,
531 METH_VARARGS, heapreplace_doc},
532 {"heapify", (PyCFunction)heapify,
533 METH_O, heapify_doc},
534 {"nlargest", (PyCFunction)nlargest,
535 METH_VARARGS, nlargest_doc},
536 {"nsmallest", (PyCFunction)nsmallest,
537 METH_VARARGS, nsmallest_doc},
538 {NULL, NULL} /* sentinel */
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000539};
540
541PyDoc_STRVAR(module_doc,
542"Heap queue algorithm (a.k.a. priority queue).\n\
543\n\
544Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
545all k, counting elements from 0. For the sake of comparison,\n\
546non-existing elements are considered to be infinite. The interesting\n\
547property of a heap is that a[0] is always its smallest element.\n\
548\n\
549Usage:\n\
550\n\
551heap = [] # creates an empty heap\n\
552heappush(heap, item) # pushes a new item on the heap\n\
553item = heappop(heap) # pops the smallest item from the heap\n\
554item = heap[0] # smallest item on the heap without popping it\n\
555heapify(x) # transforms list into a heap, in-place, in linear time\n\
556item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
557 # new item; the heap size is unchanged\n\
558\n\
559Our API differs from textbook heap algorithms as follows:\n\
560\n\
561- We use 0-based indexing. This makes the relationship between the\n\
562 index for a node and the indexes for its children slightly less\n\
563 obvious, but is more suitable since Python uses 0-based indexing.\n\
564\n\
565- Our heappop() method returns the smallest item, not the largest.\n\
566\n\
567These two make it possible to view the heap as a regular Python list\n\
568without surprises: heap[0] is the smallest item, and heap.sort()\n\
569maintains the heap invariant!\n");
570
571
572PyDoc_STRVAR(__about__,
573"Heap queues\n\
574\n\
Neal Norwitzc1786ea2007-08-23 23:58:43 +0000575[explanation by Fran\xc3\xa7ois Pinard]\n\
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000576\n\
577Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
578all k, counting elements from 0. For the sake of comparison,\n\
579non-existing elements are considered to be infinite. The interesting\n\
580property of a heap is that a[0] is always its smallest element.\n"
581"\n\
582The strange invariant above is meant to be an efficient memory\n\
583representation for a tournament. The numbers below are `k', not a[k]:\n\
584\n\
585 0\n\
586\n\
587 1 2\n\
588\n\
589 3 4 5 6\n\
590\n\
591 7 8 9 10 11 12 13 14\n\
592\n\
593 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
594\n\
595\n\
596In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
597an usual binary tournament we see in sports, each cell is the winner\n\
598over the two cells it tops, and we can trace the winner down the tree\n\
599to see all opponents s/he had. However, in many computer applications\n\
600of such tournaments, we do not need to trace the history of a winner.\n\
601To be more memory efficient, when a winner is promoted, we try to\n\
602replace it by something else at a lower level, and the rule becomes\n\
603that a cell and the two cells it tops contain three different items,\n\
604but the top cell \"wins\" over the two topped cells.\n"
605"\n\
606If this heap invariant is protected at all time, index 0 is clearly\n\
607the overall winner. The simplest algorithmic way to remove it and\n\
608find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
609diagram above) into the 0 position, and then percolate this new 0 down\n\
610the tree, exchanging values, until the invariant is re-established.\n\
611This is clearly logarithmic on the total number of items in the tree.\n\
612By iterating over all items, you get an O(n ln n) sort.\n"
613"\n\
614A nice feature of this sort is that you can efficiently insert new\n\
615items while the sort is going on, provided that the inserted items are\n\
616not \"better\" than the last 0'th element you extracted. This is\n\
617especially useful in simulation contexts, where the tree holds all\n\
618incoming events, and the \"win\" condition means the smallest scheduled\n\
619time. When an event schedule other events for execution, they are\n\
620scheduled into the future, so they can easily go into the heap. So, a\n\
621heap is a good structure for implementing schedulers (this is what I\n\
622used for my MIDI sequencer :-).\n"
623"\n\
624Various structures for implementing schedulers have been extensively\n\
625studied, and heaps are good for this, as they are reasonably speedy,\n\
626the speed is almost constant, and the worst case is not much different\n\
627than the average case. However, there are other representations which\n\
628are more efficient overall, yet the worst cases might be terrible.\n"
629"\n\
630Heaps are also very useful in big disk sorts. You most probably all\n\
631know that a big sort implies producing \"runs\" (which are pre-sorted\n\
632sequences, which size is usually related to the amount of CPU memory),\n\
633followed by a merging passes for these runs, which merging is often\n\
634very cleverly organised[1]. It is very important that the initial\n\
635sort produces the longest runs possible. Tournaments are a good way\n\
636to that. If, using all the memory available to hold a tournament, you\n\
637replace and percolate items that happen to fit the current run, you'll\n\
638produce runs which are twice the size of the memory for random input,\n\
639and much better for input fuzzily ordered.\n"
640"\n\
641Moreover, if you output the 0'th item on disk and get an input which\n\
642may not fit in the current tournament (because the value \"wins\" over\n\
643the last output value), it cannot fit in the heap, so the size of the\n\
644heap decreases. The freed memory could be cleverly reused immediately\n\
645for progressively building a second heap, which grows at exactly the\n\
646same rate the first heap is melting. When the first heap completely\n\
647vanishes, you switch heaps and start a new run. Clever and quite\n\
648effective!\n\
649\n\
650In a word, heaps are useful memory structures to know. I use them in\n\
651a few applications, and I think it is good to keep a `heap' module\n\
652around. :-)\n"
653"\n\
654--------------------\n\
655[1] The disk balancing algorithms which are current, nowadays, are\n\
656more annoying than clever, and this is a consequence of the seeking\n\
657capabilities of the disks. On devices which cannot seek, like big\n\
658tape drives, the story was quite different, and one had to be very\n\
659clever to ensure (far in advance) that each tape movement will be the\n\
660most effective possible (that is, will best participate at\n\
661\"progressing\" the merge). Some tapes were even able to read\n\
662backwards, and this was also used to avoid the rewinding time.\n\
663Believe me, real good tape sorts were quite spectacular to watch!\n\
664From all times, sorting has always been a Great Art! :-)\n");
665
Martin v. Löwis1a214512008-06-11 05:26:20 +0000666
667static struct PyModuleDef _heapqmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 PyModuleDef_HEAD_INIT,
669 "_heapq",
670 module_doc,
671 -1,
672 heapq_methods,
673 NULL,
674 NULL,
675 NULL,
676 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +0000677};
678
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000679PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +0000680PyInit__heapq(void)
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000681{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 PyObject *m, *about;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 m = PyModule_Create(&_heapqmodule);
685 if (m == NULL)
686 return NULL;
687 about = PyUnicode_DecodeUTF8(__about__, strlen(__about__), NULL);
688 PyModule_AddObject(m, "__about__", about);
689 return m;
Raymond Hettingerc46cb2a2004-04-19 19:06:21 +0000690}
691