blob: 0730ac60ddc80fdcdf1fbfdd69dd36ffb2e74afd [file] [log] [blame]
njne1b2b962005-08-14 22:13:00 +00001
2/*--------------------------------------------------------------------*/
3/*--- An ordered set implemented using an AVL tree. m_oset.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2005 Nicholas Nethercote
11 njn@valgrind.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31//----------------------------------------------------------------------
32// This file is based on:
33//
34// ANSI C Library for maintainance of AVL Balanced Trees
35// (C) 2000 Daniel Nagy, Budapest University of Technology and Economics
36// Released under GNU General Public License (GPL) version 2
37//----------------------------------------------------------------------
38
39// This file implements a generic ordered set using an AVL tree.
40//
41// Each node in the tree has two parts.
42// - First is the AVL metadata, which is three words: a left pointer, a
43// right pointer, and a word containing balancing information and a
44// "magic" value which provides some checking that the user has not
njna527a492005-12-16 17:06:37 +000045// corrupted the metadata. So the overhead is 12 bytes on 32-bit
46// platforms and 24 bytes on 64-bit platforms.
njne1b2b962005-08-14 22:13:00 +000047// - Second is the user's data. This can be anything. Note that because it
48// comes after the metadata, it will only be word-aligned, even if the
49// user data is a struct that would normally be doubleword-aligned.
50//
51// AvlNode* node -> +---------------+ V
52// | struct |
53// | AvlNode |
54// void* element -> +---------------+ ^
55// | element | |
56// keyOff -> | key | elemSize
57// +---------------+ v
58//
59// Users have to allocate AvlNodes with OSet_AllocNode(), which allocates
60// space for the metadata.
61//
62// The terminology used throughout this file:
63// - a "node", usually called "n", is a pointer to the metadata.
64// - an "element", usually called "e", is a pointer to the user data.
65// - a "key", usually called "k", is a pointer to a key.
66//
67// The helper functions elem_of_node and node_of_elem do the pointer
68// arithmetic to switch between the node and the element. The node magic is
69// checked after each operation to make sure that we're really operating on
70// an AvlNode.
71//
72// Each tree also has an iterator. Note that we cannot use the iterator
73// internally within this file (eg. we could implement OSet_Size() by
74// stepping through with the iterator and counting nodes) because it's
75// non-reentrant -- the user might be using it themselves, and the
76// concurrent uses would screw things up.
77
78#include "pub_core_basics.h"
79#include "pub_core_libcbase.h"
80#include "pub_core_libcassert.h"
81#include "pub_core_libcprint.h"
82#include "pub_core_oset.h"
83
84/*--------------------------------------------------------------------*/
85/*--- Types and constants ---*/
86/*--------------------------------------------------------------------*/
87
88// Internal names for the OSet types.
89typedef OSet AvlTree;
90typedef OSetNode AvlNode;
91
92// The padding ensures that magic is right at the end of the node,
93// regardless of the machine's word size, so that any overwrites will be
94// detected earlier.
95struct _OSetNode {
96 AvlNode* left;
97 AvlNode* right;
98 Char balance;
99 Char padding[sizeof(void*)-3];
100 Short magic;
101};
102
103#define STACK_MAX 32 // At most 2**32 entries can be iterated over
104#define OSET_MAGIC 0x5b1f
105
106// An OSet (AVL tree). If cmp is NULL, the key must be a UWord, and must
107// be the first word in the element. If cmp is set, arbitrary keys in
108// arbitrary positions can be used.
109struct _OSet {
110 SizeT keyOff; // key offset
111 OSetCmp_t cmp; // compare a key and an element, or NULL
112 OSetAlloc_t alloc; // allocator
113 OSetFree_t free; // deallocator
114 Int nElems; // number of elements in the tree
115 AvlNode* root; // root node
116
117 AvlNode* nodeStack[STACK_MAX]; // Iterator node stack
118 Int numStack[STACK_MAX]; // Iterator num stack
119 Int stackTop; // Iterator stack pointer, one past end
120};
121
122/*--------------------------------------------------------------------*/
123/*--- Helper operations ---*/
124/*--------------------------------------------------------------------*/
125
126// Given a pointer to the node's element, return the pointer to the AvlNode
127// structure. If the node has a bad magic number, it will die with an
128// assertion failure.
129static inline
130AvlNode* node_of_elem(const void *elem)
131{
132 AvlNode* n = (AvlNode*)((Addr)elem - sizeof(AvlNode));
133 vg_assert2(n->magic == OSET_MAGIC,
134 "bad magic on node %p = %x (expected %x)\n"
135 "possible causes:\n"
136 " - node not allocated with VG_(OSet_AllocNode)()?\n"
137 " - node metadata corrupted by underwriting start of element?\n",
138 n, n->magic, OSET_MAGIC);
139 return n;
140}
141
142// Given an AvlNode, return the pointer to the element.
143static inline
144void* elem_of_node(const AvlNode *n)
145{
146 vg_assert2(n->magic == OSET_MAGIC,
147 "bad magic on node %p = %x (expected %x)\n"
148 "possible causes:\n"
149 " - node metadata corrupted by overwriting end of element?\n",
150 n, n->magic, OSET_MAGIC);
151 return (void*)((Addr)n + sizeof(AvlNode));
152}
153
154// Like elem_of_node, but no magic checking.
155static inline
156void* elem_of_node_no_check(const AvlNode *n)
157{
158 return (void*)((Addr)n + sizeof(AvlNode));
159}
160
161static inline
162void* slow_key_of_node(AvlTree* t, AvlNode* n)
163{
164 return (void*)((Addr)elem_of_node(n) + t->keyOff);
165}
166
167static inline
168void* fast_key_of_node(AvlNode* n)
169{
170 return elem_of_node(n);
171}
172
173// Compare the first word of each element. Inlining is *crucial*.
174static inline Int fast_cmp(void* k, AvlNode* n)
175{
176 return ( *(Int*)k - *(Int*)elem_of_node(n) );
177}
178
179// Compare a key and an element. Inlining is *crucial*.
180static inline Int slow_cmp(AvlTree* t, void* k, AvlNode* n)
181{
182 return t->cmp(k, elem_of_node(n));
183}
184
185
186// Swing to the left. Warning: no balance maintainance.
187static void avl_swl ( AvlNode** root )
188{
189 AvlNode* a = *root;
190 AvlNode* b = a->right;
191 *root = b;
192 a->right = b->left;
193 b->left = a;
194}
195
196// Swing to the right. Warning: no balance maintainance.
197static void avl_swr ( AvlNode** root )
198{
199 AvlNode* a = *root;
200 AvlNode* b = a->left;
201 *root = b;
202 a->left = b->right;
203 b->right = a;
204}
205
206// Balance maintainance after especially nasty swings.
207static void avl_nasty ( AvlNode* root )
208{
209 switch (root->balance) {
210 case -1:
211 root->left->balance = 0;
212 root->right->balance = 1;
213 break;
214 case 1:
215 root->left->balance =-1;
216 root->right->balance = 0;
217 break;
218 case 0:
219 root->left->balance = 0;
220 root->right->balance = 0;
221 }
222 root->balance = 0;
223}
224
225
226// Clear the iterator stack.
227static void stackClear(AvlTree* t)
228{
229 Int i;
230 vg_assert(t);
231 for (i = 0; i < STACK_MAX; i++) {
232 t->nodeStack[i] = NULL;
233 t->numStack[i] = 0;
234 }
235 t->stackTop = 0;
236}
237
238// Push onto the iterator stack.
239static void stackPush(AvlTree* t, AvlNode* n, Int i)
240{
241 vg_assert(t->stackTop < STACK_MAX);
242 vg_assert(1 <= i && i <= 3);
243 t->nodeStack[t->stackTop] = n;
244 t-> numStack[t->stackTop] = i;
245 t->stackTop++;
246}
247
248// Pop from the iterator stack.
249static Bool stackPop(AvlTree* t, AvlNode** n, Int* i)
250{
251 vg_assert(t->stackTop <= STACK_MAX);
252
253 if (t->stackTop > 0) {
254 t->stackTop--;
255 *n = t->nodeStack[t->stackTop];
256 *i = t-> numStack[t->stackTop];
257 vg_assert(1 <= *i && *i <= 3);
258 t->nodeStack[t->stackTop] = NULL;
259 t-> numStack[t->stackTop] = 0;
260 return True;
261 } else {
262 return False;
263 }
264}
265
266/*--------------------------------------------------------------------*/
267/*--- Creating and destroying AvlTrees and AvlNodes ---*/
268/*--------------------------------------------------------------------*/
269
270// The underscores avoid GCC complaints about overshadowing global names.
271AvlTree* VG_(OSet_Create)(OffT _keyOff, OSetCmp_t _cmp,
272 OSetAlloc_t _alloc, OSetFree_t _free)
273{
274 AvlTree* t;
275
276 // Check the padding is right and the AvlNode is the expected size.
277 vg_assert(sizeof(AvlNode) == 3*sizeof(void*));
278
279 // Sanity check args
280 vg_assert(_alloc);
281 vg_assert(_free);
282 if (!_cmp) vg_assert(0 == _keyOff); // If no cmp, offset must be zero
283
284 t = _alloc(sizeof(AvlTree));
285 t->keyOff = _keyOff;
286 t->cmp = _cmp;
287 t->alloc = _alloc;
288 t->free = _free;
289 t->nElems = 0;
290 t->root = NULL;
291 stackClear(t);
292
293 return t;
294}
295
296// Destructor, frees up all memory held by remaining nodes.
297void VG_(OSet_Destroy)(AvlTree* t)
298{
299 AvlNode* n;
300 Int i, sz = 0;
301
302 vg_assert(t);
303 stackClear(t);
304 if (t->root)
305 stackPush(t, t->root, 1);
306
307 // Free all the AvlNodes. This is a post-order traversal, because we
308 // must free all children of a node before the node itself.
309 while (stackPop(t, &n, &i)) {
310 switch (i) {
311 case 1:
312 stackPush(t, n, 2);
313 if (n->left) stackPush(t, n->left, 1);
314 break;
315 case 2:
316 stackPush(t, n, 3);
317 if (n->right) stackPush(t, n->right, 1);
318 break;
319 case 3:
320 t->free(n);
321 sz++;
322 break;
323 }
324 }
325 vg_assert(sz == t->nElems);
326
327 // Free the AvlTree itself.
328 t->free(t);
329}
330
331// Allocate and initialise a new node.
332void* VG_(OSet_AllocNode)(AvlTree* t, SizeT elemSize)
333{
334 Int nodeSize = sizeof(AvlNode) + elemSize;
335 AvlNode* n = t->alloc( nodeSize );
336 vg_assert(elemSize > 0);
337 VG_(memset)(n, 0, nodeSize);
338 n->magic = OSET_MAGIC;
339 return elem_of_node(n);
340}
341
342void VG_(OSet_FreeNode)(AvlTree* t, void* e)
343{
344 t->free( node_of_elem(e) );
345}
346
347/*--------------------------------------------------------------------*/
348/*--- Insertion ---*/
349/*--------------------------------------------------------------------*/
350
351static inline Int cmp_key_root(AvlTree* t, AvlNode* n)
352{
353 return t->cmp
354 ? slow_cmp(t, slow_key_of_node(t, n), t->root)
355 : fast_cmp( fast_key_of_node( n), t->root);
356}
357
358// Insert element e into the non-empty AVL tree t.
359// Returns True if the depth of the tree has grown.
360static Bool avl_insert(AvlTree* t, AvlNode* n)
361{
njn6db34702005-08-14 23:00:57 +0000362 Int cmpres = cmp_key_root(t, n);
njne1b2b962005-08-14 22:13:00 +0000363
364 if (cmpres < 0) {
365 // Insert into the left subtree.
366 if (t->root->left) {
367 // Only need to set the used fields in the subtree.
368 AvlTree left_subtree;
369 left_subtree.root = t->root->left;
370 left_subtree.cmp = t->cmp;
371 left_subtree.keyOff = t->keyOff;
372 if (avl_insert(&left_subtree, n)) {
373 switch (t->root->balance--) {
374 case 1: return False;
375 case 0: return True;
376 }
377 if (t->root->left->balance < 0) {
378 avl_swr(&(t->root));
379 t->root->balance = 0;
380 t->root->right->balance = 0;
381 } else {
382 avl_swl(&(t->root->left));
383 avl_swr(&(t->root));
384 avl_nasty(t->root);
385 }
386 } else {
387 t->root->left=left_subtree.root;
388 }
389 return False;
390 } else {
391 t->root->left = n;
392 if (t->root->balance--) return False;
393 return True;
394 }
395
396 } else if (cmpres > 0) {
397 // Insert into the right subtree
398 if (t->root->right) {
399 // Only need to set the used fields in the subtree.
400 AvlTree right_subtree;
401 right_subtree.root = t->root->right;
402 right_subtree.cmp = t->cmp;
403 right_subtree.keyOff = t->keyOff;
404 if (avl_insert(&right_subtree, n)) {
405 switch (t->root->balance++) {
406 case -1: return False;
407 case 0: return True;
408 }
409 if (t->root->right->balance > 0) {
410 avl_swl(&(t->root));
411 t->root->balance = 0;
412 t->root->left->balance = 0;
413 } else {
414 avl_swr(&(t->root->right));
415 avl_swl(&(t->root));
416 avl_nasty(t->root);
417 }
418 } else {
419 t->root->right=right_subtree.root;
420 }
421 return False;
422 } else {
423 t->root->right = n;
424 if (t->root->balance++) return False;
425 return True;
426 }
427
428 } else {
429 vg_assert2(0, "OSet_Insert: duplicate element added");
430 }
431}
432
433// Insert element e into the AVL tree t. This is just a wrapper for
434// avl_insert() which doesn't return a Bool.
435void VG_(OSet_Insert)(AvlTree* t, void* e)
436{
tom60a4b0b2005-10-12 10:45:27 +0000437 AvlNode* n;
438
njne1b2b962005-08-14 22:13:00 +0000439 vg_assert(t);
440
441 // Initialise. Even though OSet_AllocNode zeroes these fields, we should
442 // do it again in case a node is removed and then re-added to the tree.
tom60a4b0b2005-10-12 10:45:27 +0000443 n = node_of_elem(e);
njne1b2b962005-08-14 22:13:00 +0000444 n->left = 0;
445 n->right = 0;
446 n->balance = 0;
447
448 // Insert into an empty tree
449 if (!t->root) {
450 t->root = n;
451 } else {
452 avl_insert(t, n);
453 }
454
455 t->nElems++;
456 t->stackTop = 0; // So the iterator can't get out of sync
457}
458
459/*--------------------------------------------------------------------*/
460/*--- Lookup ---*/
461/*--------------------------------------------------------------------*/
462
463// Find the *node* in t matching k, or NULL if not found.
464static AvlNode* avl_lookup(AvlTree* t, void* k)
465{
466 Int cmpres;
njn0684dd42005-08-15 02:05:21 +0000467 AvlNode* curr = t->root;
njne1b2b962005-08-14 22:13:00 +0000468
469 if (t->cmp) {
470 // General case
471 while (True) {
472 if (curr == NULL) return NULL;
473 cmpres = slow_cmp(t, k, curr);
474 if (cmpres < 0) curr = curr->left; else
475 if (cmpres > 0) curr = curr->right; else
476 return curr;
477 }
478 } else {
479 // Fast-track special case. We use the no-check version of
480 // elem_of_node because it saves about 10% on lookup time. This
481 // shouldn't be very dangerous because each node will have been
482 // checked on insertion.
483 Int kk = *(Int*)k;
484 while (True) {
485 if (curr == NULL) return NULL;
486 cmpres = kk - *(Int*)elem_of_node_no_check(curr);
487 if (cmpres < 0) curr = curr->left; else
488 if (cmpres > 0) curr = curr->right; else
489 return curr;
490 }
491 }
492}
493
494// Find the *element* in t matching k, or NULL if not found.
495void* VG_(OSet_Lookup)(AvlTree* t, void* k)
496{
njn0684dd42005-08-15 02:05:21 +0000497 AvlNode* n;
498 vg_assert(t);
499 n = avl_lookup(t, k);
njne1b2b962005-08-14 22:13:00 +0000500 return ( n ? elem_of_node(n) : NULL );
501}
502
njnaa260e82005-08-17 21:06:07 +0000503// Find the *element* in t matching k, or NULL if not found; use the given
504// comparison function rather than the standard one.
505void* VG_(OSet_LookupWithCmp)(AvlTree* t, void* k, OSetCmp_t cmp)
506{
507 // Save the normal one to the side, then restore once we're done.
508 void* e;
509 OSetCmp_t tmpcmp;
510 vg_assert(t);
511 tmpcmp = t->cmp;
512 t->cmp = cmp;
513 e = VG_(OSet_Lookup)(t, k);
514 t->cmp = tmpcmp;
515 return e;
516}
517
njne1b2b962005-08-14 22:13:00 +0000518// Is there an element matching k?
519Bool VG_(OSet_Contains)(AvlTree* t, void* k)
520{
521 return (NULL != VG_(OSet_Lookup)(t, k));
522}
523
524/*--------------------------------------------------------------------*/
525/*--- Deletion ---*/
526/*--------------------------------------------------------------------*/
527
528static Bool avl_removeroot(AvlTree* t);
529
530// Remove an already-selected node n from the AVL tree t.
531// Returns True if the depth of the tree has shrunk.
532static Bool avl_remove(AvlTree* t, AvlNode* n)
533{
534 Bool ch;
njn6db34702005-08-14 23:00:57 +0000535 Int cmpres = cmp_key_root(t, n);
njne1b2b962005-08-14 22:13:00 +0000536
537 if (cmpres < 0) {
tom60a4b0b2005-10-12 10:45:27 +0000538 AvlTree left_subtree;
njne1b2b962005-08-14 22:13:00 +0000539 // Remove from the left subtree
540 vg_assert(t->root->left);
njne1b2b962005-08-14 22:13:00 +0000541 // Only need to set the used fields in the subtree.
542 left_subtree.root = t->root->left;
543 left_subtree.cmp = t->cmp;
544 left_subtree.keyOff = t->keyOff;
545 ch = avl_remove(&left_subtree, n);
546 t->root->left = left_subtree.root;
547 if (ch) {
548 switch (t->root->balance++) {
549 case -1: return True;
550 case 0: return False;
551 }
552 switch (t->root->right->balance) {
553 case 0:
554 avl_swl(&(t->root));
555 t->root->balance = -1;
556 t->root->left->balance = 1;
557 return False;
558 case 1:
559 avl_swl(&(t->root));
560 t->root->balance = 0;
561 t->root->left->balance = 0;
562 return True;
563 }
564 avl_swr(&(t->root->right));
565 avl_swl(&(t->root));
566 avl_nasty(t->root);
567 return True;
568 } else {
569 return False;
570 }
571
572 } else if (cmpres > 0) {
573 // Remove from the right subtree
574 AvlTree right_subtree;
575 vg_assert(t->root->right);
576 // Only need to set the used fields in the subtree.
577 right_subtree.root = t->root->right;
578 right_subtree.cmp = t->cmp;
579 right_subtree.keyOff = t->keyOff;
580 ch = avl_remove(&right_subtree, n);
581 t->root->right = right_subtree.root;
582 if (ch) {
583 switch (t->root->balance--) {
584 case 1: return True;
585 case 0: return False;
586 }
587 switch (t->root->left->balance) {
588 case 0:
589 avl_swr(&(t->root));
590 t->root->balance = 1;
591 t->root->right->balance = -1;
592 return False;
593 case -1:
594 avl_swr(&(t->root));
595 t->root->balance = 0;
596 t->root->right->balance = 0;
597 return True;
598 }
599 avl_swl(&(t->root->left));
600 avl_swr(&(t->root));
601 avl_nasty(t->root);
602 return True;
603 } else {
604 return False;
605 }
606
607 } else {
608 // Found the node to be removed.
609 vg_assert(t->root == n);
610 return avl_removeroot(t);
611 }
612}
613
614// Remove the root of the AVL tree t.
615// Returns True if the depth of the tree has shrunk.
616static Bool avl_removeroot(AvlTree* t)
617{
618 Int ch;
619 AvlNode* n;
620
njne1b2b962005-08-14 22:13:00 +0000621 if (!t->root->left) {
622 if (!t->root->right) {
623 t->root = NULL;
624 return True;
625 }
626 t->root = t->root->right;
627 return True;
628 }
629 if (!t->root->right) {
630 t->root = t->root->left;
631 return True;
632 }
633 if (t->root->balance < 0) {
634 // Remove from the left subtree
635 n = t->root->left;
636 while (n->right) n = n->right;
637 } else {
638 // Remove from the right subtree
639 n = t->root->right;
640 while (n->left) n = n->left;
641 }
642 ch = avl_remove(t, n);
643 n->left = t->root->left;
644 n->right = t->root->right;
645 n->balance = t->root->balance;
646 t->root = n;
647 if (n->balance == 0) return ch;
648 return False;
649}
650
651// Remove and return the element matching the key 'k', or NULL if not present.
652void* VG_(OSet_Remove)(AvlTree* t, void* k)
653{
654 // Have to find the node first, then remove it.
655 AvlNode* n = avl_lookup(t, k);
656 if (n) {
657 avl_remove(t, n);
658 t->nElems--;
659 t->stackTop = 0; // So the iterator can't get out of sync
660 return elem_of_node(n);
661 } else {
662 return NULL;
663 }
664}
665
666/*--------------------------------------------------------------------*/
667/*--- Iterator ---*/
668/*--------------------------------------------------------------------*/
669
670// The iterator is implemented using in-order traversal with an explicit
671// stack, which lets us do the traversal one step at a time and remember
672// where we are between each call to OSet_Next().
673
674void VG_(OSet_ResetIter)(AvlTree* t)
675{
676 vg_assert(t);
677 stackClear(t);
678 if (t->root)
679 stackPush(t, t->root, 1);
680}
681
682void* VG_(OSet_Next)(AvlTree* t)
683{
684 Int i;
685 OSetNode* n;
686
687 vg_assert(t);
688
689 // This in-order traversal requires each node to be pushed and popped
690 // three times. These could be avoided by updating nodes in-situ on the
691 // top of the stack, but the push/pop cost is so small that it's worth
692 // keeping this loop in this simpler form.
693 while (stackPop(t, &n, &i)) {
694 switch (i) {
695 case 1:
696 stackPush(t, n, 2);
697 if (n->left) stackPush(t, n->left, 1);
698 break;
699 case 2:
700 stackPush(t, n, 3);
701 return elem_of_node(n);
702 case 3:
703 if (n->right) stackPush(t, n->right, 1);
704 break;
705 }
706 }
707
708 // Stack empty, iterator is exhausted, return NULL
709 return NULL;
710}
711
712/*--------------------------------------------------------------------*/
713/*--- Miscellaneous operations ---*/
714/*--------------------------------------------------------------------*/
715
716Int VG_(OSet_Size)(AvlTree* t)
717{
718 vg_assert(t);
719 return t->nElems;
720}
721
722static void OSet_Print2( AvlTree* t, AvlNode* n,
723 Char*(*strElem)(void *), Int p )
724{
725 // This is a recursive in-order traversal.
726 Int q = p;
727 if (NULL == n) return;
728 if (n->right) OSet_Print2(t, n->right, strElem, p+1);
729 while (q--) VG_(printf)(".. ");
730 VG_(printf)("%s\n", strElem(elem_of_node(n)));
731 if (n->left) OSet_Print2(t, n->left, strElem, p+1);
732}
733
734__attribute__((unused))
735static void OSet_Print( AvlTree* t, const HChar *where, Char*(*strElem)(void *) )
736{
737 VG_(printf)("-- start %s ----------------\n", where);
738 OSet_Print2(t, t->root, strElem, 0);
739 VG_(printf)("-- end %s ----------------\n", where);
740}
741
742/*--------------------------------------------------------------------*/
743/*--- end ---*/
744/*--------------------------------------------------------------------*/