blob: 695d45d9b1482f5b1b744f1458e7b03a782a92d2 [file] [log] [blame]
njne1b2b962005-08-14 22:13:00 +00001
2/*--------------------------------------------------------------------*/
3/*--- An ordered set implemented using an AVL tree. m_oset.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2005 Nicholas Nethercote
11 njn@valgrind.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31//----------------------------------------------------------------------
32// This file is based on:
33//
34// ANSI C Library for maintainance of AVL Balanced Trees
35// (C) 2000 Daniel Nagy, Budapest University of Technology and Economics
36// Released under GNU General Public License (GPL) version 2
37//----------------------------------------------------------------------
38
39// This file implements a generic ordered set using an AVL tree.
40//
41// Each node in the tree has two parts.
42// - First is the AVL metadata, which is three words: a left pointer, a
43// right pointer, and a word containing balancing information and a
44// "magic" value which provides some checking that the user has not
45// corrupted the metadata.
46// - Second is the user's data. This can be anything. Note that because it
47// comes after the metadata, it will only be word-aligned, even if the
48// user data is a struct that would normally be doubleword-aligned.
49//
50// AvlNode* node -> +---------------+ V
51// | struct |
52// | AvlNode |
53// void* element -> +---------------+ ^
54// | element | |
55// keyOff -> | key | elemSize
56// +---------------+ v
57//
58// Users have to allocate AvlNodes with OSet_AllocNode(), which allocates
59// space for the metadata.
60//
61// The terminology used throughout this file:
62// - a "node", usually called "n", is a pointer to the metadata.
63// - an "element", usually called "e", is a pointer to the user data.
64// - a "key", usually called "k", is a pointer to a key.
65//
66// The helper functions elem_of_node and node_of_elem do the pointer
67// arithmetic to switch between the node and the element. The node magic is
68// checked after each operation to make sure that we're really operating on
69// an AvlNode.
70//
71// Each tree also has an iterator. Note that we cannot use the iterator
72// internally within this file (eg. we could implement OSet_Size() by
73// stepping through with the iterator and counting nodes) because it's
74// non-reentrant -- the user might be using it themselves, and the
75// concurrent uses would screw things up.
76
77#include "pub_core_basics.h"
78#include "pub_core_libcbase.h"
79#include "pub_core_libcassert.h"
80#include "pub_core_libcprint.h"
81#include "pub_core_oset.h"
82
83/*--------------------------------------------------------------------*/
84/*--- Types and constants ---*/
85/*--------------------------------------------------------------------*/
86
87// Internal names for the OSet types.
88typedef OSet AvlTree;
89typedef OSetNode AvlNode;
90
91// The padding ensures that magic is right at the end of the node,
92// regardless of the machine's word size, so that any overwrites will be
93// detected earlier.
94struct _OSetNode {
95 AvlNode* left;
96 AvlNode* right;
97 Char balance;
98 Char padding[sizeof(void*)-3];
99 Short magic;
100};
101
102#define STACK_MAX 32 // At most 2**32 entries can be iterated over
103#define OSET_MAGIC 0x5b1f
104
105// An OSet (AVL tree). If cmp is NULL, the key must be a UWord, and must
106// be the first word in the element. If cmp is set, arbitrary keys in
107// arbitrary positions can be used.
108struct _OSet {
109 SizeT keyOff; // key offset
110 OSetCmp_t cmp; // compare a key and an element, or NULL
111 OSetAlloc_t alloc; // allocator
112 OSetFree_t free; // deallocator
113 Int nElems; // number of elements in the tree
114 AvlNode* root; // root node
115
116 AvlNode* nodeStack[STACK_MAX]; // Iterator node stack
117 Int numStack[STACK_MAX]; // Iterator num stack
118 Int stackTop; // Iterator stack pointer, one past end
119};
120
121/*--------------------------------------------------------------------*/
122/*--- Helper operations ---*/
123/*--------------------------------------------------------------------*/
124
125// Given a pointer to the node's element, return the pointer to the AvlNode
126// structure. If the node has a bad magic number, it will die with an
127// assertion failure.
128static inline
129AvlNode* node_of_elem(const void *elem)
130{
131 AvlNode* n = (AvlNode*)((Addr)elem - sizeof(AvlNode));
132 vg_assert2(n->magic == OSET_MAGIC,
133 "bad magic on node %p = %x (expected %x)\n"
134 "possible causes:\n"
135 " - node not allocated with VG_(OSet_AllocNode)()?\n"
136 " - node metadata corrupted by underwriting start of element?\n",
137 n, n->magic, OSET_MAGIC);
138 return n;
139}
140
141// Given an AvlNode, return the pointer to the element.
142static inline
143void* elem_of_node(const AvlNode *n)
144{
145 vg_assert2(n->magic == OSET_MAGIC,
146 "bad magic on node %p = %x (expected %x)\n"
147 "possible causes:\n"
148 " - node metadata corrupted by overwriting end of element?\n",
149 n, n->magic, OSET_MAGIC);
150 return (void*)((Addr)n + sizeof(AvlNode));
151}
152
153// Like elem_of_node, but no magic checking.
154static inline
155void* elem_of_node_no_check(const AvlNode *n)
156{
157 return (void*)((Addr)n + sizeof(AvlNode));
158}
159
160static inline
161void* slow_key_of_node(AvlTree* t, AvlNode* n)
162{
163 return (void*)((Addr)elem_of_node(n) + t->keyOff);
164}
165
166static inline
167void* fast_key_of_node(AvlNode* n)
168{
169 return elem_of_node(n);
170}
171
172// Compare the first word of each element. Inlining is *crucial*.
173static inline Int fast_cmp(void* k, AvlNode* n)
174{
175 return ( *(Int*)k - *(Int*)elem_of_node(n) );
176}
177
178// Compare a key and an element. Inlining is *crucial*.
179static inline Int slow_cmp(AvlTree* t, void* k, AvlNode* n)
180{
181 return t->cmp(k, elem_of_node(n));
182}
183
184
185// Swing to the left. Warning: no balance maintainance.
186static void avl_swl ( AvlNode** root )
187{
188 AvlNode* a = *root;
189 AvlNode* b = a->right;
190 *root = b;
191 a->right = b->left;
192 b->left = a;
193}
194
195// Swing to the right. Warning: no balance maintainance.
196static void avl_swr ( AvlNode** root )
197{
198 AvlNode* a = *root;
199 AvlNode* b = a->left;
200 *root = b;
201 a->left = b->right;
202 b->right = a;
203}
204
205// Balance maintainance after especially nasty swings.
206static void avl_nasty ( AvlNode* root )
207{
208 switch (root->balance) {
209 case -1:
210 root->left->balance = 0;
211 root->right->balance = 1;
212 break;
213 case 1:
214 root->left->balance =-1;
215 root->right->balance = 0;
216 break;
217 case 0:
218 root->left->balance = 0;
219 root->right->balance = 0;
220 }
221 root->balance = 0;
222}
223
224
225// Clear the iterator stack.
226static void stackClear(AvlTree* t)
227{
228 Int i;
229 vg_assert(t);
230 for (i = 0; i < STACK_MAX; i++) {
231 t->nodeStack[i] = NULL;
232 t->numStack[i] = 0;
233 }
234 t->stackTop = 0;
235}
236
237// Push onto the iterator stack.
238static void stackPush(AvlTree* t, AvlNode* n, Int i)
239{
240 vg_assert(t->stackTop < STACK_MAX);
241 vg_assert(1 <= i && i <= 3);
242 t->nodeStack[t->stackTop] = n;
243 t-> numStack[t->stackTop] = i;
244 t->stackTop++;
245}
246
247// Pop from the iterator stack.
248static Bool stackPop(AvlTree* t, AvlNode** n, Int* i)
249{
250 vg_assert(t->stackTop <= STACK_MAX);
251
252 if (t->stackTop > 0) {
253 t->stackTop--;
254 *n = t->nodeStack[t->stackTop];
255 *i = t-> numStack[t->stackTop];
256 vg_assert(1 <= *i && *i <= 3);
257 t->nodeStack[t->stackTop] = NULL;
258 t-> numStack[t->stackTop] = 0;
259 return True;
260 } else {
261 return False;
262 }
263}
264
265/*--------------------------------------------------------------------*/
266/*--- Creating and destroying AvlTrees and AvlNodes ---*/
267/*--------------------------------------------------------------------*/
268
269// The underscores avoid GCC complaints about overshadowing global names.
270AvlTree* VG_(OSet_Create)(OffT _keyOff, OSetCmp_t _cmp,
271 OSetAlloc_t _alloc, OSetFree_t _free)
272{
273 AvlTree* t;
274
275 // Check the padding is right and the AvlNode is the expected size.
276 vg_assert(sizeof(AvlNode) == 3*sizeof(void*));
277
278 // Sanity check args
279 vg_assert(_alloc);
280 vg_assert(_free);
281 if (!_cmp) vg_assert(0 == _keyOff); // If no cmp, offset must be zero
282
283 t = _alloc(sizeof(AvlTree));
284 t->keyOff = _keyOff;
285 t->cmp = _cmp;
286 t->alloc = _alloc;
287 t->free = _free;
288 t->nElems = 0;
289 t->root = NULL;
290 stackClear(t);
291
292 return t;
293}
294
295// Destructor, frees up all memory held by remaining nodes.
296void VG_(OSet_Destroy)(AvlTree* t)
297{
298 AvlNode* n;
299 Int i, sz = 0;
300
301 vg_assert(t);
302 stackClear(t);
303 if (t->root)
304 stackPush(t, t->root, 1);
305
306 // Free all the AvlNodes. This is a post-order traversal, because we
307 // must free all children of a node before the node itself.
308 while (stackPop(t, &n, &i)) {
309 switch (i) {
310 case 1:
311 stackPush(t, n, 2);
312 if (n->left) stackPush(t, n->left, 1);
313 break;
314 case 2:
315 stackPush(t, n, 3);
316 if (n->right) stackPush(t, n->right, 1);
317 break;
318 case 3:
319 t->free(n);
320 sz++;
321 break;
322 }
323 }
324 vg_assert(sz == t->nElems);
325
326 // Free the AvlTree itself.
327 t->free(t);
328}
329
330// Allocate and initialise a new node.
331void* VG_(OSet_AllocNode)(AvlTree* t, SizeT elemSize)
332{
333 Int nodeSize = sizeof(AvlNode) + elemSize;
334 AvlNode* n = t->alloc( nodeSize );
335 vg_assert(elemSize > 0);
336 VG_(memset)(n, 0, nodeSize);
337 n->magic = OSET_MAGIC;
338 return elem_of_node(n);
339}
340
341void VG_(OSet_FreeNode)(AvlTree* t, void* e)
342{
343 t->free( node_of_elem(e) );
344}
345
346/*--------------------------------------------------------------------*/
347/*--- Insertion ---*/
348/*--------------------------------------------------------------------*/
349
350static inline Int cmp_key_root(AvlTree* t, AvlNode* n)
351{
352 return t->cmp
353 ? slow_cmp(t, slow_key_of_node(t, n), t->root)
354 : fast_cmp( fast_key_of_node( n), t->root);
355}
356
357// Insert element e into the non-empty AVL tree t.
358// Returns True if the depth of the tree has grown.
359static Bool avl_insert(AvlTree* t, AvlNode* n)
360{
njn6db34702005-08-14 23:00:57 +0000361 Int cmpres = cmp_key_root(t, n);
njne1b2b962005-08-14 22:13:00 +0000362
363 if (cmpres < 0) {
364 // Insert into the left subtree.
365 if (t->root->left) {
366 // Only need to set the used fields in the subtree.
367 AvlTree left_subtree;
368 left_subtree.root = t->root->left;
369 left_subtree.cmp = t->cmp;
370 left_subtree.keyOff = t->keyOff;
371 if (avl_insert(&left_subtree, n)) {
372 switch (t->root->balance--) {
373 case 1: return False;
374 case 0: return True;
375 }
376 if (t->root->left->balance < 0) {
377 avl_swr(&(t->root));
378 t->root->balance = 0;
379 t->root->right->balance = 0;
380 } else {
381 avl_swl(&(t->root->left));
382 avl_swr(&(t->root));
383 avl_nasty(t->root);
384 }
385 } else {
386 t->root->left=left_subtree.root;
387 }
388 return False;
389 } else {
390 t->root->left = n;
391 if (t->root->balance--) return False;
392 return True;
393 }
394
395 } else if (cmpres > 0) {
396 // Insert into the right subtree
397 if (t->root->right) {
398 // Only need to set the used fields in the subtree.
399 AvlTree right_subtree;
400 right_subtree.root = t->root->right;
401 right_subtree.cmp = t->cmp;
402 right_subtree.keyOff = t->keyOff;
403 if (avl_insert(&right_subtree, n)) {
404 switch (t->root->balance++) {
405 case -1: return False;
406 case 0: return True;
407 }
408 if (t->root->right->balance > 0) {
409 avl_swl(&(t->root));
410 t->root->balance = 0;
411 t->root->left->balance = 0;
412 } else {
413 avl_swr(&(t->root->right));
414 avl_swl(&(t->root));
415 avl_nasty(t->root);
416 }
417 } else {
418 t->root->right=right_subtree.root;
419 }
420 return False;
421 } else {
422 t->root->right = n;
423 if (t->root->balance++) return False;
424 return True;
425 }
426
427 } else {
428 vg_assert2(0, "OSet_Insert: duplicate element added");
429 }
430}
431
432// Insert element e into the AVL tree t. This is just a wrapper for
433// avl_insert() which doesn't return a Bool.
434void VG_(OSet_Insert)(AvlTree* t, void* e)
435{
tom60a4b0b2005-10-12 10:45:27 +0000436 AvlNode* n;
437
njne1b2b962005-08-14 22:13:00 +0000438 vg_assert(t);
439
440 // Initialise. Even though OSet_AllocNode zeroes these fields, we should
441 // do it again in case a node is removed and then re-added to the tree.
tom60a4b0b2005-10-12 10:45:27 +0000442 n = node_of_elem(e);
njne1b2b962005-08-14 22:13:00 +0000443 n->left = 0;
444 n->right = 0;
445 n->balance = 0;
446
447 // Insert into an empty tree
448 if (!t->root) {
449 t->root = n;
450 } else {
451 avl_insert(t, n);
452 }
453
454 t->nElems++;
455 t->stackTop = 0; // So the iterator can't get out of sync
456}
457
458/*--------------------------------------------------------------------*/
459/*--- Lookup ---*/
460/*--------------------------------------------------------------------*/
461
462// Find the *node* in t matching k, or NULL if not found.
463static AvlNode* avl_lookup(AvlTree* t, void* k)
464{
465 Int cmpres;
njn0684dd42005-08-15 02:05:21 +0000466 AvlNode* curr = t->root;
njne1b2b962005-08-14 22:13:00 +0000467
468 if (t->cmp) {
469 // General case
470 while (True) {
471 if (curr == NULL) return NULL;
472 cmpres = slow_cmp(t, k, curr);
473 if (cmpres < 0) curr = curr->left; else
474 if (cmpres > 0) curr = curr->right; else
475 return curr;
476 }
477 } else {
478 // Fast-track special case. We use the no-check version of
479 // elem_of_node because it saves about 10% on lookup time. This
480 // shouldn't be very dangerous because each node will have been
481 // checked on insertion.
482 Int kk = *(Int*)k;
483 while (True) {
484 if (curr == NULL) return NULL;
485 cmpres = kk - *(Int*)elem_of_node_no_check(curr);
486 if (cmpres < 0) curr = curr->left; else
487 if (cmpres > 0) curr = curr->right; else
488 return curr;
489 }
490 }
491}
492
493// Find the *element* in t matching k, or NULL if not found.
494void* VG_(OSet_Lookup)(AvlTree* t, void* k)
495{
njn0684dd42005-08-15 02:05:21 +0000496 AvlNode* n;
497 vg_assert(t);
498 n = avl_lookup(t, k);
njne1b2b962005-08-14 22:13:00 +0000499 return ( n ? elem_of_node(n) : NULL );
500}
501
njnaa260e82005-08-17 21:06:07 +0000502// Find the *element* in t matching k, or NULL if not found; use the given
503// comparison function rather than the standard one.
504void* VG_(OSet_LookupWithCmp)(AvlTree* t, void* k, OSetCmp_t cmp)
505{
506 // Save the normal one to the side, then restore once we're done.
507 void* e;
508 OSetCmp_t tmpcmp;
509 vg_assert(t);
510 tmpcmp = t->cmp;
511 t->cmp = cmp;
512 e = VG_(OSet_Lookup)(t, k);
513 t->cmp = tmpcmp;
514 return e;
515}
516
njne1b2b962005-08-14 22:13:00 +0000517// Is there an element matching k?
518Bool VG_(OSet_Contains)(AvlTree* t, void* k)
519{
520 return (NULL != VG_(OSet_Lookup)(t, k));
521}
522
523/*--------------------------------------------------------------------*/
524/*--- Deletion ---*/
525/*--------------------------------------------------------------------*/
526
527static Bool avl_removeroot(AvlTree* t);
528
529// Remove an already-selected node n from the AVL tree t.
530// Returns True if the depth of the tree has shrunk.
531static Bool avl_remove(AvlTree* t, AvlNode* n)
532{
533 Bool ch;
njn6db34702005-08-14 23:00:57 +0000534 Int cmpres = cmp_key_root(t, n);
njne1b2b962005-08-14 22:13:00 +0000535
536 if (cmpres < 0) {
tom60a4b0b2005-10-12 10:45:27 +0000537 AvlTree left_subtree;
njne1b2b962005-08-14 22:13:00 +0000538 // Remove from the left subtree
539 vg_assert(t->root->left);
njne1b2b962005-08-14 22:13:00 +0000540 // Only need to set the used fields in the subtree.
541 left_subtree.root = t->root->left;
542 left_subtree.cmp = t->cmp;
543 left_subtree.keyOff = t->keyOff;
544 ch = avl_remove(&left_subtree, n);
545 t->root->left = left_subtree.root;
546 if (ch) {
547 switch (t->root->balance++) {
548 case -1: return True;
549 case 0: return False;
550 }
551 switch (t->root->right->balance) {
552 case 0:
553 avl_swl(&(t->root));
554 t->root->balance = -1;
555 t->root->left->balance = 1;
556 return False;
557 case 1:
558 avl_swl(&(t->root));
559 t->root->balance = 0;
560 t->root->left->balance = 0;
561 return True;
562 }
563 avl_swr(&(t->root->right));
564 avl_swl(&(t->root));
565 avl_nasty(t->root);
566 return True;
567 } else {
568 return False;
569 }
570
571 } else if (cmpres > 0) {
572 // Remove from the right subtree
573 AvlTree right_subtree;
574 vg_assert(t->root->right);
575 // Only need to set the used fields in the subtree.
576 right_subtree.root = t->root->right;
577 right_subtree.cmp = t->cmp;
578 right_subtree.keyOff = t->keyOff;
579 ch = avl_remove(&right_subtree, n);
580 t->root->right = right_subtree.root;
581 if (ch) {
582 switch (t->root->balance--) {
583 case 1: return True;
584 case 0: return False;
585 }
586 switch (t->root->left->balance) {
587 case 0:
588 avl_swr(&(t->root));
589 t->root->balance = 1;
590 t->root->right->balance = -1;
591 return False;
592 case -1:
593 avl_swr(&(t->root));
594 t->root->balance = 0;
595 t->root->right->balance = 0;
596 return True;
597 }
598 avl_swl(&(t->root->left));
599 avl_swr(&(t->root));
600 avl_nasty(t->root);
601 return True;
602 } else {
603 return False;
604 }
605
606 } else {
607 // Found the node to be removed.
608 vg_assert(t->root == n);
609 return avl_removeroot(t);
610 }
611}
612
613// Remove the root of the AVL tree t.
614// Returns True if the depth of the tree has shrunk.
615static Bool avl_removeroot(AvlTree* t)
616{
617 Int ch;
618 AvlNode* n;
619
njne1b2b962005-08-14 22:13:00 +0000620 if (!t->root->left) {
621 if (!t->root->right) {
622 t->root = NULL;
623 return True;
624 }
625 t->root = t->root->right;
626 return True;
627 }
628 if (!t->root->right) {
629 t->root = t->root->left;
630 return True;
631 }
632 if (t->root->balance < 0) {
633 // Remove from the left subtree
634 n = t->root->left;
635 while (n->right) n = n->right;
636 } else {
637 // Remove from the right subtree
638 n = t->root->right;
639 while (n->left) n = n->left;
640 }
641 ch = avl_remove(t, n);
642 n->left = t->root->left;
643 n->right = t->root->right;
644 n->balance = t->root->balance;
645 t->root = n;
646 if (n->balance == 0) return ch;
647 return False;
648}
649
650// Remove and return the element matching the key 'k', or NULL if not present.
651void* VG_(OSet_Remove)(AvlTree* t, void* k)
652{
653 // Have to find the node first, then remove it.
654 AvlNode* n = avl_lookup(t, k);
655 if (n) {
656 avl_remove(t, n);
657 t->nElems--;
658 t->stackTop = 0; // So the iterator can't get out of sync
659 return elem_of_node(n);
660 } else {
661 return NULL;
662 }
663}
664
665/*--------------------------------------------------------------------*/
666/*--- Iterator ---*/
667/*--------------------------------------------------------------------*/
668
669// The iterator is implemented using in-order traversal with an explicit
670// stack, which lets us do the traversal one step at a time and remember
671// where we are between each call to OSet_Next().
672
673void VG_(OSet_ResetIter)(AvlTree* t)
674{
675 vg_assert(t);
676 stackClear(t);
677 if (t->root)
678 stackPush(t, t->root, 1);
679}
680
681void* VG_(OSet_Next)(AvlTree* t)
682{
683 Int i;
684 OSetNode* n;
685
686 vg_assert(t);
687
688 // This in-order traversal requires each node to be pushed and popped
689 // three times. These could be avoided by updating nodes in-situ on the
690 // top of the stack, but the push/pop cost is so small that it's worth
691 // keeping this loop in this simpler form.
692 while (stackPop(t, &n, &i)) {
693 switch (i) {
694 case 1:
695 stackPush(t, n, 2);
696 if (n->left) stackPush(t, n->left, 1);
697 break;
698 case 2:
699 stackPush(t, n, 3);
700 return elem_of_node(n);
701 case 3:
702 if (n->right) stackPush(t, n->right, 1);
703 break;
704 }
705 }
706
707 // Stack empty, iterator is exhausted, return NULL
708 return NULL;
709}
710
711/*--------------------------------------------------------------------*/
712/*--- Miscellaneous operations ---*/
713/*--------------------------------------------------------------------*/
714
715Int VG_(OSet_Size)(AvlTree* t)
716{
717 vg_assert(t);
718 return t->nElems;
719}
720
721static void OSet_Print2( AvlTree* t, AvlNode* n,
722 Char*(*strElem)(void *), Int p )
723{
724 // This is a recursive in-order traversal.
725 Int q = p;
726 if (NULL == n) return;
727 if (n->right) OSet_Print2(t, n->right, strElem, p+1);
728 while (q--) VG_(printf)(".. ");
729 VG_(printf)("%s\n", strElem(elem_of_node(n)));
730 if (n->left) OSet_Print2(t, n->left, strElem, p+1);
731}
732
733__attribute__((unused))
734static void OSet_Print( AvlTree* t, const HChar *where, Char*(*strElem)(void *) )
735{
736 VG_(printf)("-- start %s ----------------\n", where);
737 OSet_Print2(t, t->root, strElem, 0);
738 VG_(printf)("-- end %s ----------------\n", where);
739}
740
741/*--------------------------------------------------------------------*/
742/*--- end ---*/
743/*--------------------------------------------------------------------*/