| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 1 | /* Drop in replacement for heapq.py  | 
 | 2 |  | 
 | 3 | C implementation derived directly from heapq.py in Py2.3 | 
 | 4 | which was written by Kevin O'Connor, augmented by Tim Peters, | 
 | 5 | annotated by François Pinard, and converted to C by Raymond Hettinger. | 
 | 6 |  | 
 | 7 | */ | 
 | 8 |  | 
 | 9 | #include "Python.h" | 
 | 10 |  | 
 | 11 | static int | 
 | 12 | _siftdown(PyListObject *heap, int startpos, int pos) | 
 | 13 | { | 
 | 14 | 	PyObject *newitem, *parent; | 
 | 15 | 	int cmp, parentpos; | 
 | 16 |  | 
 | 17 | 	assert(PyList_Check(heap)); | 
 | 18 | 	if (pos >= PyList_GET_SIZE(heap)) { | 
 | 19 | 		PyErr_SetString(PyExc_IndexError, "index out of range"); | 
 | 20 | 		return -1; | 
 | 21 | 	} | 
 | 22 |  | 
 | 23 | 	newitem = PyList_GET_ITEM(heap, pos); | 
 | 24 | 	Py_INCREF(newitem); | 
 | 25 | 	/* Follow the path to the root, moving parents down until finding | 
 | 26 | 	   a place newitem fits. */ | 
 | 27 | 	while (pos > startpos){ | 
 | 28 | 		parentpos = (pos - 1) >> 1; | 
 | 29 | 		parent = PyList_GET_ITEM(heap, parentpos); | 
 | 30 | 		cmp = PyObject_RichCompareBool(parent, newitem, Py_LE); | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 31 | 		if (cmp == -1) { | 
 | 32 | 			Py_DECREF(newitem); | 
| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 33 | 			return -1; | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 34 | 		} | 
| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 35 | 		if (cmp == 1) | 
 | 36 | 			break; | 
 | 37 | 		Py_INCREF(parent); | 
 | 38 | 		Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 39 | 		PyList_SET_ITEM(heap, pos, parent); | 
 | 40 | 		pos = parentpos; | 
 | 41 | 	} | 
 | 42 | 	Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 43 | 	PyList_SET_ITEM(heap, pos, newitem); | 
 | 44 | 	return 0; | 
 | 45 | } | 
 | 46 |  | 
 | 47 | static int | 
 | 48 | _siftup(PyListObject *heap, int pos) | 
 | 49 | { | 
 | 50 | 	int startpos, endpos, childpos, rightpos; | 
 | 51 | 	int cmp; | 
 | 52 | 	PyObject *newitem, *tmp; | 
 | 53 |  | 
 | 54 | 	assert(PyList_Check(heap)); | 
 | 55 | 	endpos = PyList_GET_SIZE(heap); | 
 | 56 | 	startpos = pos; | 
 | 57 | 	if (pos >= endpos) { | 
 | 58 | 		PyErr_SetString(PyExc_IndexError, "index out of range"); | 
 | 59 | 		return -1; | 
 | 60 | 	} | 
 | 61 | 	newitem = PyList_GET_ITEM(heap, pos); | 
 | 62 | 	Py_INCREF(newitem); | 
 | 63 |  | 
 | 64 | 	/* Bubble up the smaller child until hitting a leaf. */ | 
 | 65 | 	childpos = 2*pos + 1;    /* leftmost child position  */ | 
 | 66 | 	while (childpos < endpos) { | 
 | 67 | 		/* Set childpos to index of smaller child.   */ | 
 | 68 | 		rightpos = childpos + 1; | 
 | 69 | 		if (rightpos < endpos) { | 
 | 70 | 			cmp = PyObject_RichCompareBool( | 
 | 71 | 				PyList_GET_ITEM(heap, rightpos), | 
 | 72 | 				PyList_GET_ITEM(heap, childpos), | 
 | 73 | 				Py_LE); | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 74 | 			if (cmp == -1) { | 
 | 75 | 				Py_DECREF(newitem); | 
| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 76 | 				return -1; | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 77 | 			} | 
| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 78 | 			if (cmp == 1) | 
 | 79 | 				childpos = rightpos; | 
 | 80 | 		} | 
 | 81 | 		/* Move the smaller child up. */ | 
 | 82 | 		tmp = PyList_GET_ITEM(heap, childpos); | 
 | 83 | 		Py_INCREF(tmp); | 
 | 84 | 		Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 85 | 		PyList_SET_ITEM(heap, pos, tmp); | 
 | 86 | 		pos = childpos; | 
 | 87 | 		childpos = 2*pos + 1; | 
 | 88 | 	} | 
 | 89 |  | 
 | 90 | 	/* The leaf at pos is empty now.  Put newitem there, and and bubble | 
 | 91 | 	   it up to its final resting place (by sifting its parents down). */ | 
 | 92 | 	Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 93 | 	PyList_SET_ITEM(heap, pos, newitem); | 
 | 94 | 	return _siftdown(heap, startpos, pos); | 
 | 95 | } | 
 | 96 |  | 
 | 97 | static PyObject * | 
 | 98 | heappush(PyObject *self, PyObject *args) | 
 | 99 | { | 
 | 100 | 	PyObject *heap, *item; | 
 | 101 |  | 
 | 102 | 	if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item)) | 
 | 103 | 		return NULL; | 
 | 104 |  | 
 | 105 | 	if (!PyList_Check(heap)) { | 
 | 106 | 		PyErr_SetString(PyExc_TypeError, "heap argument must be a list"); | 
 | 107 | 		return NULL; | 
 | 108 | 	} | 
 | 109 |  | 
 | 110 | 	if (PyList_Append(heap, item) == -1) | 
 | 111 | 		return NULL; | 
 | 112 |  | 
 | 113 | 	if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1) | 
 | 114 | 		return NULL; | 
 | 115 | 	Py_INCREF(Py_None); | 
 | 116 | 	return Py_None; | 
 | 117 | } | 
 | 118 |  | 
 | 119 | PyDoc_STRVAR(heappush_doc, | 
 | 120 | "Push item onto heap, maintaining the heap invariant."); | 
 | 121 |  | 
 | 122 | static PyObject * | 
 | 123 | heappop(PyObject *self, PyObject *heap) | 
 | 124 | { | 
 | 125 | 	PyObject *lastelt, *returnitem; | 
 | 126 | 	int n; | 
 | 127 |  | 
 | 128 | 	if (!PyList_Check(heap)) { | 
 | 129 | 		PyErr_SetString(PyExc_TypeError, "heap argument must be a list"); | 
 | 130 | 		return NULL; | 
 | 131 | 	} | 
 | 132 |  | 
 | 133 | 	/* # raises appropriate IndexError if heap is empty */ | 
 | 134 | 	n = PyList_GET_SIZE(heap); | 
 | 135 | 	if (n == 0) { | 
 | 136 | 		PyErr_SetString(PyExc_IndexError, "index out of range"); | 
 | 137 | 		return NULL; | 
 | 138 | 	} | 
 | 139 |  | 
 | 140 | 	lastelt = PyList_GET_ITEM(heap, n-1) ; | 
 | 141 | 	Py_INCREF(lastelt); | 
 | 142 | 	PyList_SetSlice(heap, n-1, n, NULL); | 
 | 143 | 	n--; | 
 | 144 |  | 
 | 145 | 	if (!n)  | 
 | 146 | 		return lastelt; | 
 | 147 | 	returnitem = PyList_GET_ITEM(heap, 0); | 
 | 148 | 	PyList_SET_ITEM(heap, 0, lastelt); | 
 | 149 | 	if (_siftup((PyListObject *)heap, 0) == -1) { | 
 | 150 | 		Py_DECREF(returnitem); | 
 | 151 | 		return NULL; | 
 | 152 | 	} | 
 | 153 | 	return returnitem; | 
 | 154 | } | 
 | 155 |  | 
 | 156 | PyDoc_STRVAR(heappop_doc, | 
 | 157 | "Pop the smallest item off the heap, maintaining the heap invariant."); | 
 | 158 |  | 
 | 159 | static PyObject * | 
 | 160 | heapreplace(PyObject *self, PyObject *args) | 
 | 161 | { | 
 | 162 | 	PyObject *heap, *item, *returnitem; | 
 | 163 |  | 
 | 164 | 	if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item)) | 
 | 165 | 		return NULL; | 
 | 166 |  | 
 | 167 | 	if (!PyList_Check(heap)) { | 
 | 168 | 		PyErr_SetString(PyExc_TypeError, "heap argument must be a list"); | 
 | 169 | 		return NULL; | 
 | 170 | 	} | 
 | 171 |  | 
 | 172 | 	if (PyList_GET_SIZE(heap) < 1) { | 
 | 173 | 		PyErr_SetString(PyExc_IndexError, "index out of range"); | 
 | 174 | 		return NULL; | 
 | 175 | 	} | 
 | 176 |  | 
 | 177 | 	returnitem = PyList_GET_ITEM(heap, 0); | 
 | 178 | 	Py_INCREF(item); | 
 | 179 | 	PyList_SET_ITEM(heap, 0, item); | 
 | 180 | 	if (_siftup((PyListObject *)heap, 0) == -1) { | 
 | 181 | 		Py_DECREF(returnitem); | 
 | 182 | 		return NULL; | 
 | 183 | 	} | 
 | 184 | 	return returnitem; | 
 | 185 | } | 
 | 186 |  | 
 | 187 | PyDoc_STRVAR(heapreplace_doc, | 
 | 188 | "Pop and return the current smallest value, and add the new item.\n\ | 
 | 189 | \n\ | 
 | 190 | This is more efficient than heappop() followed by heappush(), and can be\n\ | 
 | 191 | more appropriate when using a fixed-size heap.  Note that the value\n\ | 
 | 192 | returned may be larger than item!  That constrains reasonable uses of\n\ | 
| Raymond Hettinger | 8158e84 | 2004-09-06 07:04:09 +0000 | [diff] [blame] | 193 | this routine unless written as part of a conditional replacement:\n\n\ | 
 | 194 |         if item > heap[0]:\n\ | 
 | 195 |             item = heapreplace(heap, item)\n"); | 
| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 196 |  | 
 | 197 | static PyObject * | 
 | 198 | heapify(PyObject *self, PyObject *heap) | 
 | 199 | { | 
 | 200 | 	int i, n; | 
 | 201 |  | 
 | 202 | 	if (!PyList_Check(heap)) { | 
 | 203 | 		PyErr_SetString(PyExc_TypeError, "heap argument must be a list"); | 
 | 204 | 		return NULL; | 
 | 205 | 	} | 
 | 206 |  | 
 | 207 | 	n = PyList_GET_SIZE(heap); | 
 | 208 | 	/* Transform bottom-up.  The largest index there's any point to | 
 | 209 | 	   looking at is the largest with a child index in-range, so must | 
 | 210 | 	   have 2*i + 1 < n, or i < (n-1)/2.  If n is even = 2*j, this is | 
 | 211 | 	   (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1.  If | 
 | 212 | 	   n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest, | 
 | 213 | 	   and that's again n//2-1. | 
 | 214 | 	*/ | 
 | 215 | 	for (i=n/2-1 ; i>=0 ; i--) | 
 | 216 | 		if(_siftup((PyListObject *)heap, i) == -1) | 
 | 217 | 			return NULL; | 
 | 218 | 	Py_INCREF(Py_None); | 
 | 219 | 	return Py_None; | 
 | 220 | } | 
 | 221 |  | 
 | 222 | PyDoc_STRVAR(heapify_doc, | 
 | 223 | "Transform list into a heap, in-place, in O(len(heap)) time."); | 
 | 224 |  | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 225 | static PyObject * | 
 | 226 | nlargest(PyObject *self, PyObject *args) | 
 | 227 | { | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 228 | 	PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem; | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 229 | 	int i, n; | 
 | 230 |  | 
| Raymond Hettinger | aefde43 | 2004-06-15 23:53:35 +0000 | [diff] [blame] | 231 | 	if (!PyArg_ParseTuple(args, "iO:nlargest", &n, &iterable)) | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 232 | 		return NULL; | 
 | 233 |  | 
 | 234 | 	it = PyObject_GetIter(iterable); | 
 | 235 | 	if (it == NULL) | 
 | 236 | 		return NULL; | 
 | 237 |  | 
 | 238 | 	heap = PyList_New(0); | 
| Raymond Hettinger | de72edd | 2004-06-13 15:36:56 +0000 | [diff] [blame] | 239 | 	if (heap == NULL) | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 240 | 		goto fail; | 
 | 241 |  | 
 | 242 | 	for (i=0 ; i<n ; i++ ){ | 
 | 243 | 		elem = PyIter_Next(it); | 
| Raymond Hettinger | de72edd | 2004-06-13 15:36:56 +0000 | [diff] [blame] | 244 | 		if (elem == NULL) { | 
 | 245 | 			if (PyErr_Occurred()) | 
 | 246 | 				goto fail; | 
 | 247 | 			else | 
 | 248 | 				goto sortit; | 
 | 249 | 		} | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 250 | 		if (PyList_Append(heap, elem) == -1) { | 
 | 251 | 			Py_DECREF(elem); | 
 | 252 | 			goto fail; | 
 | 253 | 		} | 
 | 254 | 		Py_DECREF(elem); | 
 | 255 | 	} | 
 | 256 | 	if (PyList_GET_SIZE(heap) == 0) | 
 | 257 | 		goto sortit; | 
 | 258 |  | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 259 | 	for (i=n/2-1 ; i>=0 ; i--) | 
 | 260 | 		if(_siftup((PyListObject *)heap, i) == -1) | 
 | 261 | 			goto fail; | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 262 |  | 
 | 263 | 	sol = PyList_GET_ITEM(heap, 0); | 
 | 264 | 	while (1) { | 
 | 265 | 		elem = PyIter_Next(it); | 
 | 266 | 		if (elem == NULL) { | 
 | 267 | 			if (PyErr_Occurred()) | 
 | 268 | 				goto fail; | 
 | 269 | 			else | 
 | 270 | 				goto sortit; | 
 | 271 | 		} | 
 | 272 | 		if (PyObject_RichCompareBool(elem, sol, Py_LE)) { | 
 | 273 | 			Py_DECREF(elem); | 
 | 274 | 			continue; | 
 | 275 | 		} | 
 | 276 | 		oldelem = PyList_GET_ITEM(heap, 0); | 
 | 277 | 		PyList_SET_ITEM(heap, 0, elem); | 
 | 278 | 		Py_DECREF(oldelem); | 
 | 279 | 		if (_siftup((PyListObject *)heap, 0) == -1) | 
 | 280 | 			goto fail; | 
 | 281 | 		sol = PyList_GET_ITEM(heap, 0); | 
 | 282 | 	} | 
 | 283 | sortit: | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 284 | 	if (PyList_Sort(heap) == -1) | 
 | 285 | 		goto fail; | 
 | 286 | 	if (PyList_Reverse(heap) == -1) | 
 | 287 | 		goto fail; | 
| Raymond Hettinger | de72edd | 2004-06-13 15:36:56 +0000 | [diff] [blame] | 288 | 	Py_DECREF(it); | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 289 | 	return heap; | 
 | 290 |  | 
 | 291 | fail: | 
 | 292 | 	Py_DECREF(it); | 
 | 293 | 	Py_XDECREF(heap); | 
 | 294 | 	return NULL; | 
 | 295 | } | 
 | 296 |  | 
 | 297 | PyDoc_STRVAR(nlargest_doc, | 
 | 298 | "Find the n largest elements in a dataset.\n\ | 
 | 299 | \n\ | 
 | 300 | Equivalent to:  sorted(iterable, reverse=True)[:n]\n"); | 
 | 301 |  | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 302 | static int | 
 | 303 | _siftdownmax(PyListObject *heap, int startpos, int pos) | 
 | 304 | { | 
 | 305 | 	PyObject *newitem, *parent; | 
 | 306 | 	int cmp, parentpos; | 
 | 307 |  | 
 | 308 | 	assert(PyList_Check(heap)); | 
 | 309 | 	if (pos >= PyList_GET_SIZE(heap)) { | 
 | 310 | 		PyErr_SetString(PyExc_IndexError, "index out of range"); | 
 | 311 | 		return -1; | 
 | 312 | 	} | 
 | 313 |  | 
 | 314 | 	newitem = PyList_GET_ITEM(heap, pos); | 
 | 315 | 	Py_INCREF(newitem); | 
 | 316 | 	/* Follow the path to the root, moving parents down until finding | 
 | 317 | 	   a place newitem fits. */ | 
 | 318 | 	while (pos > startpos){ | 
 | 319 | 		parentpos = (pos - 1) >> 1; | 
 | 320 | 		parent = PyList_GET_ITEM(heap, parentpos); | 
 | 321 | 		cmp = PyObject_RichCompareBool(newitem, parent, Py_LE); | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 322 | 		if (cmp == -1) { | 
 | 323 | 			Py_DECREF(newitem); | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 324 | 			return -1; | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 325 | 		} | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 326 | 		if (cmp == 1) | 
 | 327 | 			break; | 
 | 328 | 		Py_INCREF(parent); | 
 | 329 | 		Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 330 | 		PyList_SET_ITEM(heap, pos, parent); | 
 | 331 | 		pos = parentpos; | 
 | 332 | 	} | 
 | 333 | 	Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 334 | 	PyList_SET_ITEM(heap, pos, newitem); | 
 | 335 | 	return 0; | 
 | 336 | } | 
 | 337 |  | 
 | 338 | static int | 
 | 339 | _siftupmax(PyListObject *heap, int pos) | 
 | 340 | { | 
 | 341 | 	int startpos, endpos, childpos, rightpos; | 
 | 342 | 	int cmp; | 
 | 343 | 	PyObject *newitem, *tmp; | 
 | 344 |  | 
 | 345 | 	assert(PyList_Check(heap)); | 
 | 346 | 	endpos = PyList_GET_SIZE(heap); | 
 | 347 | 	startpos = pos; | 
 | 348 | 	if (pos >= endpos) { | 
 | 349 | 		PyErr_SetString(PyExc_IndexError, "index out of range"); | 
 | 350 | 		return -1; | 
 | 351 | 	} | 
 | 352 | 	newitem = PyList_GET_ITEM(heap, pos); | 
 | 353 | 	Py_INCREF(newitem); | 
 | 354 |  | 
 | 355 | 	/* Bubble up the smaller child until hitting a leaf. */ | 
 | 356 | 	childpos = 2*pos + 1;    /* leftmost child position  */ | 
 | 357 | 	while (childpos < endpos) { | 
 | 358 | 		/* Set childpos to index of smaller child.   */ | 
 | 359 | 		rightpos = childpos + 1; | 
 | 360 | 		if (rightpos < endpos) { | 
 | 361 | 			cmp = PyObject_RichCompareBool( | 
 | 362 | 				PyList_GET_ITEM(heap, childpos), | 
 | 363 | 				PyList_GET_ITEM(heap, rightpos), | 
 | 364 | 				Py_LE); | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 365 | 			if (cmp == -1) { | 
 | 366 | 				Py_DECREF(newitem); | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 367 | 				return -1; | 
| Raymond Hettinger | 855d9a9 | 2004-09-28 00:03:54 +0000 | [diff] [blame] | 368 | 			} | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 369 | 			if (cmp == 1) | 
 | 370 | 				childpos = rightpos; | 
 | 371 | 		} | 
 | 372 | 		/* Move the smaller child up. */ | 
 | 373 | 		tmp = PyList_GET_ITEM(heap, childpos); | 
 | 374 | 		Py_INCREF(tmp); | 
 | 375 | 		Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 376 | 		PyList_SET_ITEM(heap, pos, tmp); | 
 | 377 | 		pos = childpos; | 
 | 378 | 		childpos = 2*pos + 1; | 
 | 379 | 	} | 
 | 380 |  | 
 | 381 | 	/* The leaf at pos is empty now.  Put newitem there, and and bubble | 
 | 382 | 	   it up to its final resting place (by sifting its parents down). */ | 
 | 383 | 	Py_DECREF(PyList_GET_ITEM(heap, pos)); | 
 | 384 | 	PyList_SET_ITEM(heap, pos, newitem); | 
 | 385 | 	return _siftdownmax(heap, startpos, pos); | 
 | 386 | } | 
 | 387 |  | 
 | 388 | static PyObject * | 
 | 389 | nsmallest(PyObject *self, PyObject *args) | 
 | 390 | { | 
 | 391 | 	PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem; | 
 | 392 | 	int i, n; | 
 | 393 |  | 
| Raymond Hettinger | aefde43 | 2004-06-15 23:53:35 +0000 | [diff] [blame] | 394 | 	if (!PyArg_ParseTuple(args, "iO:nsmallest", &n, &iterable)) | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 395 | 		return NULL; | 
 | 396 |  | 
 | 397 | 	it = PyObject_GetIter(iterable); | 
 | 398 | 	if (it == NULL) | 
 | 399 | 		return NULL; | 
 | 400 |  | 
 | 401 | 	heap = PyList_New(0); | 
| Raymond Hettinger | de72edd | 2004-06-13 15:36:56 +0000 | [diff] [blame] | 402 | 	if (heap == NULL) | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 403 | 		goto fail; | 
 | 404 |  | 
 | 405 | 	for (i=0 ; i<n ; i++ ){ | 
 | 406 | 		elem = PyIter_Next(it); | 
| Raymond Hettinger | de72edd | 2004-06-13 15:36:56 +0000 | [diff] [blame] | 407 | 		if (elem == NULL) { | 
 | 408 | 			if (PyErr_Occurred()) | 
 | 409 | 				goto fail; | 
 | 410 | 			else | 
 | 411 | 				goto sortit; | 
 | 412 | 		} | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 413 | 		if (PyList_Append(heap, elem) == -1) { | 
 | 414 | 			Py_DECREF(elem); | 
 | 415 | 			goto fail; | 
 | 416 | 		} | 
 | 417 | 		Py_DECREF(elem); | 
 | 418 | 	} | 
 | 419 | 	n = PyList_GET_SIZE(heap); | 
 | 420 | 	if (n == 0) | 
 | 421 | 		goto sortit; | 
 | 422 |  | 
 | 423 | 	for (i=n/2-1 ; i>=0 ; i--) | 
 | 424 | 		if(_siftupmax((PyListObject *)heap, i) == -1) | 
 | 425 | 			goto fail; | 
 | 426 |  | 
 | 427 | 	los = PyList_GET_ITEM(heap, 0); | 
 | 428 | 	while (1) { | 
 | 429 | 		elem = PyIter_Next(it); | 
 | 430 | 		if (elem == NULL) { | 
 | 431 | 			if (PyErr_Occurred()) | 
 | 432 | 				goto fail; | 
 | 433 | 			else | 
 | 434 | 				goto sortit; | 
 | 435 | 		} | 
 | 436 | 		if (PyObject_RichCompareBool(los, elem, Py_LE)) { | 
 | 437 | 			Py_DECREF(elem); | 
 | 438 | 			continue; | 
 | 439 | 		} | 
 | 440 |  | 
 | 441 | 		oldelem = PyList_GET_ITEM(heap, 0); | 
 | 442 | 		PyList_SET_ITEM(heap, 0, elem); | 
 | 443 | 		Py_DECREF(oldelem); | 
 | 444 | 		if (_siftupmax((PyListObject *)heap, 0) == -1) | 
 | 445 | 			goto fail; | 
 | 446 | 		los = PyList_GET_ITEM(heap, 0); | 
 | 447 | 	} | 
 | 448 |  | 
 | 449 | sortit: | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 450 | 	if (PyList_Sort(heap) == -1) | 
 | 451 | 		goto fail; | 
| Raymond Hettinger | de72edd | 2004-06-13 15:36:56 +0000 | [diff] [blame] | 452 | 	Py_DECREF(it); | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 453 | 	return heap; | 
 | 454 |  | 
 | 455 | fail: | 
 | 456 | 	Py_DECREF(it); | 
 | 457 | 	Py_XDECREF(heap); | 
 | 458 | 	return NULL; | 
 | 459 | } | 
 | 460 |  | 
 | 461 | PyDoc_STRVAR(nsmallest_doc, | 
 | 462 | "Find the n smallest elements in a dataset.\n\ | 
 | 463 | \n\ | 
 | 464 | Equivalent to:  sorted(iterable)[:n]\n"); | 
 | 465 |  | 
| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 466 | static PyMethodDef heapq_methods[] = { | 
 | 467 | 	{"heappush",	(PyCFunction)heappush,		 | 
 | 468 | 		METH_VARARGS,	heappush_doc}, | 
 | 469 | 	{"heappop",	(PyCFunction)heappop, | 
 | 470 | 		METH_O,		heappop_doc}, | 
 | 471 | 	{"heapreplace",	(PyCFunction)heapreplace, | 
 | 472 | 		METH_VARARGS,	heapreplace_doc}, | 
 | 473 | 	{"heapify",	(PyCFunction)heapify, | 
 | 474 | 		METH_O,		heapify_doc}, | 
| Raymond Hettinger | c929766 | 2004-06-12 22:48:46 +0000 | [diff] [blame] | 475 | 	{"nlargest",	(PyCFunction)nlargest, | 
 | 476 | 		METH_VARARGS,	nlargest_doc}, | 
| Raymond Hettinger | 2e3dfaf | 2004-06-13 05:26:33 +0000 | [diff] [blame] | 477 | 	{"nsmallest",	(PyCFunction)nsmallest, | 
 | 478 | 		METH_VARARGS,	nsmallest_doc}, | 
| Raymond Hettinger | c46cb2a | 2004-04-19 19:06:21 +0000 | [diff] [blame] | 479 | 	{NULL,		NULL}		/* sentinel */ | 
 | 480 | }; | 
 | 481 |  | 
 | 482 | PyDoc_STRVAR(module_doc, | 
 | 483 | "Heap queue algorithm (a.k.a. priority queue).\n\ | 
 | 484 | \n\ | 
 | 485 | Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\ | 
 | 486 | all k, counting elements from 0.  For the sake of comparison,\n\ | 
 | 487 | non-existing elements are considered to be infinite.  The interesting\n\ | 
 | 488 | property of a heap is that a[0] is always its smallest element.\n\ | 
 | 489 | \n\ | 
 | 490 | Usage:\n\ | 
 | 491 | \n\ | 
 | 492 | heap = []            # creates an empty heap\n\ | 
 | 493 | heappush(heap, item) # pushes a new item on the heap\n\ | 
 | 494 | item = heappop(heap) # pops the smallest item from the heap\n\ | 
 | 495 | item = heap[0]       # smallest item on the heap without popping it\n\ | 
 | 496 | heapify(x)           # transforms list into a heap, in-place, in linear time\n\ | 
 | 497 | item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\ | 
 | 498 |                                # new item; the heap size is unchanged\n\ | 
 | 499 | \n\ | 
 | 500 | Our API differs from textbook heap algorithms as follows:\n\ | 
 | 501 | \n\ | 
 | 502 | - We use 0-based indexing.  This makes the relationship between the\n\ | 
 | 503 |   index for a node and the indexes for its children slightly less\n\ | 
 | 504 |   obvious, but is more suitable since Python uses 0-based indexing.\n\ | 
 | 505 | \n\ | 
 | 506 | - Our heappop() method returns the smallest item, not the largest.\n\ | 
 | 507 | \n\ | 
 | 508 | These two make it possible to view the heap as a regular Python list\n\ | 
 | 509 | without surprises: heap[0] is the smallest item, and heap.sort()\n\ | 
 | 510 | maintains the heap invariant!\n"); | 
 | 511 |  | 
 | 512 |  | 
 | 513 | PyDoc_STRVAR(__about__, | 
 | 514 | "Heap queues\n\ | 
 | 515 | \n\ | 
 | 516 | [explanation by François Pinard]\n\ | 
 | 517 | \n\ | 
 | 518 | Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\ | 
 | 519 | all k, counting elements from 0.  For the sake of comparison,\n\ | 
 | 520 | non-existing elements are considered to be infinite.  The interesting\n\ | 
 | 521 | property of a heap is that a[0] is always its smallest element.\n" | 
 | 522 | "\n\ | 
 | 523 | The strange invariant above is meant to be an efficient memory\n\ | 
 | 524 | representation for a tournament.  The numbers below are `k', not a[k]:\n\ | 
 | 525 | \n\ | 
 | 526 |                                    0\n\ | 
 | 527 | \n\ | 
 | 528 |                   1                                 2\n\ | 
 | 529 | \n\ | 
 | 530 |           3               4                5               6\n\ | 
 | 531 | \n\ | 
 | 532 |       7       8       9       10      11      12      13      14\n\ | 
 | 533 | \n\ | 
 | 534 |     15 16   17 18   19 20   21 22   23 24   25 26   27 28   29 30\n\ | 
 | 535 | \n\ | 
 | 536 | \n\ | 
 | 537 | In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'.  In\n\ | 
 | 538 | an usual binary tournament we see in sports, each cell is the winner\n\ | 
 | 539 | over the two cells it tops, and we can trace the winner down the tree\n\ | 
 | 540 | to see all opponents s/he had.  However, in many computer applications\n\ | 
 | 541 | of such tournaments, we do not need to trace the history of a winner.\n\ | 
 | 542 | To be more memory efficient, when a winner is promoted, we try to\n\ | 
 | 543 | replace it by something else at a lower level, and the rule becomes\n\ | 
 | 544 | that a cell and the two cells it tops contain three different items,\n\ | 
 | 545 | but the top cell \"wins\" over the two topped cells.\n" | 
 | 546 | "\n\ | 
 | 547 | If this heap invariant is protected at all time, index 0 is clearly\n\ | 
 | 548 | the overall winner.  The simplest algorithmic way to remove it and\n\ | 
 | 549 | find the \"next\" winner is to move some loser (let's say cell 30 in the\n\ | 
 | 550 | diagram above) into the 0 position, and then percolate this new 0 down\n\ | 
 | 551 | the tree, exchanging values, until the invariant is re-established.\n\ | 
 | 552 | This is clearly logarithmic on the total number of items in the tree.\n\ | 
 | 553 | By iterating over all items, you get an O(n ln n) sort.\n" | 
 | 554 | "\n\ | 
 | 555 | A nice feature of this sort is that you can efficiently insert new\n\ | 
 | 556 | items while the sort is going on, provided that the inserted items are\n\ | 
 | 557 | not \"better\" than the last 0'th element you extracted.  This is\n\ | 
 | 558 | especially useful in simulation contexts, where the tree holds all\n\ | 
 | 559 | incoming events, and the \"win\" condition means the smallest scheduled\n\ | 
 | 560 | time.  When an event schedule other events for execution, they are\n\ | 
 | 561 | scheduled into the future, so they can easily go into the heap.  So, a\n\ | 
 | 562 | heap is a good structure for implementing schedulers (this is what I\n\ | 
 | 563 | used for my MIDI sequencer :-).\n" | 
 | 564 | "\n\ | 
 | 565 | Various structures for implementing schedulers have been extensively\n\ | 
 | 566 | studied, and heaps are good for this, as they are reasonably speedy,\n\ | 
 | 567 | the speed is almost constant, and the worst case is not much different\n\ | 
 | 568 | than the average case.  However, there are other representations which\n\ | 
 | 569 | are more efficient overall, yet the worst cases might be terrible.\n" | 
 | 570 | "\n\ | 
 | 571 | Heaps are also very useful in big disk sorts.  You most probably all\n\ | 
 | 572 | know that a big sort implies producing \"runs\" (which are pre-sorted\n\ | 
 | 573 | sequences, which size is usually related to the amount of CPU memory),\n\ | 
 | 574 | followed by a merging passes for these runs, which merging is often\n\ | 
 | 575 | very cleverly organised[1].  It is very important that the initial\n\ | 
 | 576 | sort produces the longest runs possible.  Tournaments are a good way\n\ | 
 | 577 | to that.  If, using all the memory available to hold a tournament, you\n\ | 
 | 578 | replace and percolate items that happen to fit the current run, you'll\n\ | 
 | 579 | produce runs which are twice the size of the memory for random input,\n\ | 
 | 580 | and much better for input fuzzily ordered.\n" | 
 | 581 | "\n\ | 
 | 582 | Moreover, if you output the 0'th item on disk and get an input which\n\ | 
 | 583 | may not fit in the current tournament (because the value \"wins\" over\n\ | 
 | 584 | the last output value), it cannot fit in the heap, so the size of the\n\ | 
 | 585 | heap decreases.  The freed memory could be cleverly reused immediately\n\ | 
 | 586 | for progressively building a second heap, which grows at exactly the\n\ | 
 | 587 | same rate the first heap is melting.  When the first heap completely\n\ | 
 | 588 | vanishes, you switch heaps and start a new run.  Clever and quite\n\ | 
 | 589 | effective!\n\ | 
 | 590 | \n\ | 
 | 591 | In a word, heaps are useful memory structures to know.  I use them in\n\ | 
 | 592 | a few applications, and I think it is good to keep a `heap' module\n\ | 
 | 593 | around. :-)\n" | 
 | 594 | "\n\ | 
 | 595 | --------------------\n\ | 
 | 596 | [1] The disk balancing algorithms which are current, nowadays, are\n\ | 
 | 597 | more annoying than clever, and this is a consequence of the seeking\n\ | 
 | 598 | capabilities of the disks.  On devices which cannot seek, like big\n\ | 
 | 599 | tape drives, the story was quite different, and one had to be very\n\ | 
 | 600 | clever to ensure (far in advance) that each tape movement will be the\n\ | 
 | 601 | most effective possible (that is, will best participate at\n\ | 
 | 602 | \"progressing\" the merge).  Some tapes were even able to read\n\ | 
 | 603 | backwards, and this was also used to avoid the rewinding time.\n\ | 
 | 604 | Believe me, real good tape sorts were quite spectacular to watch!\n\ | 
 | 605 | From all times, sorting has always been a Great Art! :-)\n"); | 
 | 606 |  | 
 | 607 | PyMODINIT_FUNC | 
 | 608 | init_heapq(void) | 
 | 609 | { | 
 | 610 | 	PyObject *m; | 
 | 611 |  | 
 | 612 | 	m = Py_InitModule3("_heapq", heapq_methods, module_doc); | 
 | 613 | 	PyModule_AddObject(m, "__about__", PyString_FromString(__about__)); | 
 | 614 | } | 
 | 615 |  |