blob: 629f516457c1c9d93a54697d6b914124a02aef12 [file] [log] [blame]
Raymond Hettingerb3af1812003-11-08 10:24:38 +00001/* Drop in replacement for heapq.py
2
3C implementation derived directly from heapq.py in Py2.3
4which was written by Kevin O'Connor, augmented by Tim Peters,
5annotated by François Pinard, and converted to C by Raymond Hettinger.
6
7*/
8
9#include "Python.h"
10
11int
12_siftdown(PyListObject *heap, int startpos, int pos)
13{
14 PyObject *newitem, *parent;
15 int cmp, parentpos;
16
17 if (pos >= PyList_GET_SIZE(heap)) {
18 PyErr_SetString(PyExc_IndexError, "index out of range");
19 return -1;
20 }
21
22 newitem = PyList_GET_ITEM(heap, pos);
23 Py_INCREF(newitem);
24 /* Follow the path to the root, moving parents down until finding
25 a place newitem fits. */
26 while (pos > startpos){
27 parentpos = (pos - 1) >> 1;
28 parent = PyList_GET_ITEM(heap, parentpos);
29 cmp = PyObject_RichCompareBool(parent, newitem, Py_LE);
30 if (cmp == -1)
31 return -1;
32 if (cmp == 1)
33 break;
34 Py_INCREF(parent);
35 Py_DECREF(PyList_GET_ITEM(heap, pos));
36 PyList_SET_ITEM(heap, pos, parent);
37 pos = parentpos;
38 }
39 Py_DECREF(PyList_GET_ITEM(heap, pos));
40 PyList_SET_ITEM(heap, pos, newitem);
41 return 0;
42}
43
44int
45_siftup(PyListObject *heap, int pos)
46{
47 int startpos, endpos, childpos, rightpos;
48 int cmp;
49 PyObject *newitem, *tmp;
50
51 endpos = PyList_GET_SIZE(heap);
52 startpos = pos;
53 if (pos >= endpos) {
54 PyErr_SetString(PyExc_IndexError, "index out of range");
55 return -1;
56 }
57 newitem = PyList_GET_ITEM(heap, pos);
58 Py_INCREF(newitem);
59
60 /* Bubble up the smaller child until hitting a leaf. */
61 childpos = 2*pos + 1; /* leftmost child position */
62 while (childpos < endpos) {
63 /* Set childpos to index of smaller child. */
64 rightpos = childpos + 1;
65 if (rightpos < endpos) {
66 cmp = PyObject_RichCompareBool(
67 PyList_GET_ITEM(heap, rightpos),
68 PyList_GET_ITEM(heap, childpos),
69 Py_LE);
70 if (cmp == -1)
71 return -1;
72 if (cmp == 1)
73 childpos = rightpos;
74 }
75 /* Move the smaller child up. */
76 tmp = PyList_GET_ITEM(heap, childpos);
77 Py_INCREF(tmp);
78 Py_DECREF(PyList_GET_ITEM(heap, pos));
79 PyList_SET_ITEM(heap, pos, tmp);
80 pos = childpos;
81 childpos = 2*pos + 1;
82 }
83
84 /* The leaf at pos is empty now. Put newitem there, and and bubble
85 it up to its final resting place (by sifting its parents down). */
86 Py_DECREF(PyList_GET_ITEM(heap, pos));
87 PyList_SET_ITEM(heap, pos, newitem);
88 return _siftdown(heap, startpos, pos);
89}
90
91PyObject *
92heappush(PyObject *self, PyObject *args)
93{
94 PyObject *heap, *item;
95
96 if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
97 return NULL;
98
99 if (!PyList_Check(heap)) {
100 PyErr_SetString(PyExc_ValueError, "heap argument must be a list");
101 return NULL;
102 }
103
104 if (PyList_Append(heap, item) == -1)
105 return NULL;
106
107 if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
108 return NULL;
109 Py_INCREF(Py_None);
110 return Py_None;
111}
112
113PyDoc_STRVAR(heappush_doc,
114"Push item onto heap, maintaining the heap invariant.");
115
116PyObject *
117heappop(PyObject *self, PyObject *heap)
118{
119 PyObject *lastelt, *returnitem;
120 int n;
121
Raymond Hettinger236a2442003-11-15 12:33:01 +0000122 if (!PyList_Check(heap)) {
123 PyErr_SetString(PyExc_ValueError, "heap argument must be a list");
124 return NULL;
125 }
126
Raymond Hettingerb3af1812003-11-08 10:24:38 +0000127 /* # raises appropriate IndexError if heap is empty */
128 n = PyList_GET_SIZE(heap);
129 if (n == 0) {
130 PyErr_SetString(PyExc_IndexError, "index out of range");
131 return NULL;
132 }
133
134 lastelt = PyList_GET_ITEM(heap, n-1) ;
135 Py_INCREF(lastelt);
136 PyList_SetSlice(heap, n-1, n, NULL);
137 n--;
138
139 if (!n)
140 return lastelt;
141 returnitem = PyList_GET_ITEM(heap, 0);
142 PyList_SET_ITEM(heap, 0, lastelt);
143 if (_siftup((PyListObject *)heap, 0) == -1) {
144 Py_DECREF(returnitem);
145 return NULL;
146 }
147 return returnitem;
148}
149
150PyDoc_STRVAR(heappop_doc,
151"Pop the smallest item off the heap, maintaining the heap invariant.");
152
153PyObject *
154heapreplace(PyObject *self, PyObject *args)
155{
156 PyObject *heap, *item, *returnitem;
157
158 if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
159 return NULL;
160
161 if (!PyList_Check(heap)) {
162 PyErr_SetString(PyExc_ValueError, "heap argument must be a list");
163 return NULL;
164 }
165
166 if (PyList_GET_SIZE(heap) < 1) {
167 PyErr_SetString(PyExc_IndexError, "index out of range");
168 return NULL;
169 }
170
171 returnitem = PyList_GET_ITEM(heap, 0);
172 Py_INCREF(item);
173 PyList_SET_ITEM(heap, 0, item);
174 if (_siftup((PyListObject *)heap, 0) == -1) {
175 Py_DECREF(returnitem);
176 return NULL;
177 }
178 return returnitem;
179}
180
181PyDoc_STRVAR(heapreplace_doc,
182"Pop and return the current smallest value, and add the new item.\n\
183\n\
184This is more efficient than heappop() followed by heappush(), and can be\n\
185more appropriate when using a fixed-size heap. Note that the value\n\
186returned may be larger than item! That constrains reasonable uses of\n\
187this routine.\n");
188
189PyObject *
190heapify(PyObject *self, PyObject *heap)
191{
192 int i, n;
193
194 if (!PyList_Check(heap)) {
195 PyErr_SetString(PyExc_ValueError, "heap argument must be a list");
196 return NULL;
197 }
198
199 n = PyList_GET_SIZE(heap);
200 /* Transform bottom-up. The largest index there's any point to
201 looking at is the largest with a child index in-range, so must
202 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
203 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
204 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
205 and that's again n//2-1.
206 */
207 for (i=n/2-1 ; i>=0 ; i--)
208 if(_siftup((PyListObject *)heap, i) == -1)
209 return NULL;
210 Py_INCREF(Py_None);
211 return Py_None;
212}
213
214PyDoc_STRVAR(heapify_doc,
215"Transform list into a heap, in-place, in O(len(heap)) time.");
216
217static PyMethodDef heapq_methods[] = {
218 {"heappush", (PyCFunction)heappush,
219 METH_VARARGS, heappush_doc},
220 {"heappop", (PyCFunction)heappop,
221 METH_O, heappop_doc},
222 {"heapreplace", (PyCFunction)heapreplace,
223 METH_VARARGS, heapreplace_doc},
224 {"heapify", (PyCFunction)heapify,
225 METH_O, heapify_doc},
226 {NULL, NULL} /* sentinel */
227};
228
229PyDoc_STRVAR(module_doc,
230"Heap queue algorithm (a.k.a. priority queue).\n\
231\n\
232Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
233all k, counting elements from 0. For the sake of comparison,\n\
234non-existing elements are considered to be infinite. The interesting\n\
235property of a heap is that a[0] is always its smallest element.\n\
236\n\
237Usage:\n\
238\n\
239heap = [] # creates an empty heap\n\
240heappush(heap, item) # pushes a new item on the heap\n\
241item = heappop(heap) # pops the smallest item from the heap\n\
242item = heap[0] # smallest item on the heap without popping it\n\
243heapify(x) # transforms list into a heap, in-place, in linear time\n\
244item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
245 # new item; the heap size is unchanged\n\
246\n\
247Our API differs from textbook heap algorithms as follows:\n\
248\n\
249- We use 0-based indexing. This makes the relationship between the\n\
250 index for a node and the indexes for its children slightly less\n\
251 obvious, but is more suitable since Python uses 0-based indexing.\n\
252\n\
253- Our heappop() method returns the smallest item, not the largest.\n\
254\n\
255These two make it possible to view the heap as a regular Python list\n\
256without surprises: heap[0] is the smallest item, and heap.sort()\n\
257maintains the heap invariant!\n");
258
259
260PyDoc_STRVAR(__about__,
261"Heap queues\n\
262\n\
263[explanation by François Pinard]\n\
264\n\
265Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
266all k, counting elements from 0. For the sake of comparison,\n\
267non-existing elements are considered to be infinite. The interesting\n\
268property of a heap is that a[0] is always its smallest element.\n"
269"\n\
270The strange invariant above is meant to be an efficient memory\n\
271representation for a tournament. The numbers below are `k', not a[k]:\n\
272\n\
273 0\n\
274\n\
275 1 2\n\
276\n\
277 3 4 5 6\n\
278\n\
279 7 8 9 10 11 12 13 14\n\
280\n\
281 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
282\n\
283\n\
284In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
285an usual binary tournament we see in sports, each cell is the winner\n\
286over the two cells it tops, and we can trace the winner down the tree\n\
287to see all opponents s/he had. However, in many computer applications\n\
288of such tournaments, we do not need to trace the history of a winner.\n\
289To be more memory efficient, when a winner is promoted, we try to\n\
290replace it by something else at a lower level, and the rule becomes\n\
291that a cell and the two cells it tops contain three different items,\n\
292but the top cell \"wins\" over the two topped cells.\n"
293"\n\
294If this heap invariant is protected at all time, index 0 is clearly\n\
295the overall winner. The simplest algorithmic way to remove it and\n\
296find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
297diagram above) into the 0 position, and then percolate this new 0 down\n\
298the tree, exchanging values, until the invariant is re-established.\n\
299This is clearly logarithmic on the total number of items in the tree.\n\
300By iterating over all items, you get an O(n ln n) sort.\n"
301"\n\
302A nice feature of this sort is that you can efficiently insert new\n\
303items while the sort is going on, provided that the inserted items are\n\
304not \"better\" than the last 0'th element you extracted. This is\n\
305especially useful in simulation contexts, where the tree holds all\n\
306incoming events, and the \"win\" condition means the smallest scheduled\n\
307time. When an event schedule other events for execution, they are\n\
308scheduled into the future, so they can easily go into the heap. So, a\n\
309heap is a good structure for implementing schedulers (this is what I\n\
310used for my MIDI sequencer :-).\n"
311"\n\
312Various structures for implementing schedulers have been extensively\n\
313studied, and heaps are good for this, as they are reasonably speedy,\n\
314the speed is almost constant, and the worst case is not much different\n\
315than the average case. However, there are other representations which\n\
316are more efficient overall, yet the worst cases might be terrible.\n"
317"\n\
318Heaps are also very useful in big disk sorts. You most probably all\n\
319know that a big sort implies producing \"runs\" (which are pre-sorted\n\
320sequences, which size is usually related to the amount of CPU memory),\n\
321followed by a merging passes for these runs, which merging is often\n\
322very cleverly organised[1]. It is very important that the initial\n\
323sort produces the longest runs possible. Tournaments are a good way\n\
324to that. If, using all the memory available to hold a tournament, you\n\
325replace and percolate items that happen to fit the current run, you'll\n\
326produce runs which are twice the size of the memory for random input,\n\
327and much better for input fuzzily ordered.\n"
328"\n\
329Moreover, if you output the 0'th item on disk and get an input which\n\
330may not fit in the current tournament (because the value \"wins\" over\n\
331the last output value), it cannot fit in the heap, so the size of the\n\
332heap decreases. The freed memory could be cleverly reused immediately\n\
333for progressively building a second heap, which grows at exactly the\n\
334same rate the first heap is melting. When the first heap completely\n\
335vanishes, you switch heaps and start a new run. Clever and quite\n\
336effective!\n\
337\n\
338In a word, heaps are useful memory structures to know. I use them in\n\
339a few applications, and I think it is good to keep a `heap' module\n\
340around. :-)\n"
341"\n\
342--------------------\n\
343[1] The disk balancing algorithms which are current, nowadays, are\n\
344more annoying than clever, and this is a consequence of the seeking\n\
345capabilities of the disks. On devices which cannot seek, like big\n\
346tape drives, the story was quite different, and one had to be very\n\
347clever to ensure (far in advance) that each tape movement will be the\n\
348most effective possible (that is, will best participate at\n\
349\"progressing\" the merge). Some tapes were even able to read\n\
350backwards, and this was also used to avoid the rewinding time.\n\
351Believe me, real good tape sorts were quite spectacular to watch!\n\
352From all times, sorting has always been a Great Art! :-)\n");
353
354PyMODINIT_FUNC
355initheapq(void)
356{
357 PyObject *m;
358
359 m = Py_InitModule3("heapq", heapq_methods, module_doc);
360 PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
361}
362