blob: 323ac57f3c40beced3798b3282d72dcea45a5d78 [file] [log] [blame]
sewardjb4112022007-11-09 22:49:28 +00001
2/*--------------------------------------------------------------------*/
3/*--- An AVL tree based finite map for word keys and word values. ---*/
4/*--- Inspired by Haskell's "FiniteMap" library. ---*/
sewardj896f6f92008-08-19 08:38:52 +00005/*--- m_wordfm.c ---*/
sewardjb4112022007-11-09 22:49:28 +00006/*--------------------------------------------------------------------*/
7
8/*
sewardj896f6f92008-08-19 08:38:52 +00009 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
sewardjb4112022007-11-09 22:49:28 +000011
Elliott Hughesed398002017-06-21 14:41:24 -070012 Copyright (C) 2007-2017 Julian Seward
sewardjb4112022007-11-09 22:49:28 +000013 jseward@acm.org
14
15 This code is based on previous work by Nicholas Nethercote
16 (coregrind/m_oset.c) which is
17
Elliott Hughesed398002017-06-21 14:41:24 -070018 Copyright (C) 2005-2017 Nicholas Nethercote
sewardjb4112022007-11-09 22:49:28 +000019 njn@valgrind.org
20
21 which in turn was derived partially from:
22
23 AVL C library
24 Copyright (C) 2000,2002 Daniel Nagy
25
26 This program is free software; you can redistribute it and/or
27 modify it under the terms of the GNU General Public License as
28 published by the Free Software Foundation; either version 2 of
29 the License, or (at your option) any later version.
30 [...]
31
32 (taken from libavl-0.4/debian/copyright)
33
34 This program is free software; you can redistribute it and/or
35 modify it under the terms of the GNU General Public License as
36 published by the Free Software Foundation; either version 2 of the
37 License, or (at your option) any later version.
38
39 This program is distributed in the hope that it will be useful, but
40 WITHOUT ANY WARRANTY; without even the implied warranty of
41 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42 General Public License for more details.
43
44 You should have received a copy of the GNU General Public License
45 along with this program; if not, write to the Free Software
46 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
47 02111-1307, USA.
48
49 The GNU General Public License is contained in the file COPYING.
50*/
51
sewardj896f6f92008-08-19 08:38:52 +000052#include "pub_core_basics.h"
53#include "pub_core_libcassert.h"
54#include "pub_core_libcbase.h"
55#include "pub_core_wordfm.h" /* self */
sewardjb4112022007-11-09 22:49:28 +000056
sewardjae5137e2008-01-17 23:19:54 +000057
sewardjb4112022007-11-09 22:49:28 +000058//------------------------------------------------------------------//
59//--- WordFM ---//
60//--- Implementation ---//
61//------------------------------------------------------------------//
62
63/* One element of the AVL tree */
64typedef
65 struct _AvlNode {
sewardj250ec2e2008-02-15 22:02:30 +000066 UWord key;
67 UWord val;
sewardjb4112022007-11-09 22:49:28 +000068 struct _AvlNode* child[2]; /* [0] is left subtree, [1] is right */
69 Char balance; /* do not make this unsigned */
70 }
71 AvlNode;
72
73typedef
74 struct {
sewardj250ec2e2008-02-15 22:02:30 +000075 UWord w;
sewardjb4112022007-11-09 22:49:28 +000076 Bool b;
77 }
78 MaybeWord;
79
80#define WFM_STKMAX 32 // At most 2**32 entries can be iterated over
81
82struct _WordFM {
83 AvlNode* root;
florian54fe2022012-10-27 23:07:42 +000084 void* (*alloc_nofail)( const HChar*, SizeT );
85 const HChar* cc;
sewardjb4112022007-11-09 22:49:28 +000086 void (*dealloc)(void*);
sewardj250ec2e2008-02-15 22:02:30 +000087 Word (*kCmp)(UWord,UWord);
sewardjb4112022007-11-09 22:49:28 +000088 AvlNode* nodeStack[WFM_STKMAX]; // Iterator node stack
89 Int numStack[WFM_STKMAX]; // Iterator num stack
90 Int stackTop; // Iterator stack pointer, one past end
91};
92
93/* forward */
sewardj250ec2e2008-02-15 22:02:30 +000094static Bool avl_removeroot_wrk(AvlNode** t, Word(*kCmp)(UWord,UWord));
sewardjb4112022007-11-09 22:49:28 +000095
Elliott Hughesed398002017-06-21 14:41:24 -070096/* Swing to the left. Warning: no balance maintenance. */
sewardjb4112022007-11-09 22:49:28 +000097static void avl_swl ( AvlNode** root )
98{
99 AvlNode* a = *root;
100 AvlNode* b = a->child[1];
101 *root = b;
102 a->child[1] = b->child[0];
103 b->child[0] = a;
104}
105
Elliott Hughesed398002017-06-21 14:41:24 -0700106/* Swing to the right. Warning: no balance maintenance. */
sewardjb4112022007-11-09 22:49:28 +0000107static void avl_swr ( AvlNode** root )
108{
109 AvlNode* a = *root;
110 AvlNode* b = a->child[0];
111 *root = b;
112 a->child[0] = b->child[1];
113 b->child[1] = a;
114}
115
Elliott Hughesed398002017-06-21 14:41:24 -0700116/* Balance maintenance after especially nasty swings. */
sewardjb4112022007-11-09 22:49:28 +0000117static void avl_nasty ( AvlNode* root )
118{
119 switch (root->balance) {
120 case -1:
121 root->child[0]->balance = 0;
122 root->child[1]->balance = 1;
123 break;
124 case 1:
125 root->child[0]->balance = -1;
126 root->child[1]->balance = 0;
127 break;
128 case 0:
129 root->child[0]->balance = 0;
130 root->child[1]->balance = 0;
131 break;
132 default:
floriane2800c92014-09-15 20:57:45 +0000133 vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000134 }
135 root->balance=0;
136}
137
138/* Find size of a non-NULL tree. */
florianee0d0e92014-10-18 16:17:13 +0000139static UWord size_avl_nonNull ( const AvlNode* nd )
sewardjb4112022007-11-09 22:49:28 +0000140{
141 return 1 + (nd->child[0] ? size_avl_nonNull(nd->child[0]) : 0)
142 + (nd->child[1] ? size_avl_nonNull(nd->child[1]) : 0);
143}
144
sewardj250ec2e2008-02-15 22:02:30 +0000145/* Unsignedly compare w1 and w2. If w1 < w2, produce a negative
146 number; if w1 > w2 produce a positive number, and if w1 == w2
147 produce zero. */
148static inline Word cmp_unsigned_Words ( UWord w1, UWord w2 ) {
sewardj95e79002007-12-09 02:14:35 +0000149 if (w1 < w2) return -1;
150 if (w1 > w2) return 1;
151 return 0;
152}
153
sewardjb4112022007-11-09 22:49:28 +0000154/* Insert element a into the AVL tree t. Returns True if the depth of
155 the tree has grown. If element with that key is already present,
156 just copy a->val to existing node, first returning old ->val field
157 of existing node in *oldV, so that the caller can finalize it
158 however it wants.
159*/
160static
161Bool avl_insert_wrk ( AvlNode** rootp,
162 /*OUT*/MaybeWord* oldV,
163 AvlNode* a,
sewardj250ec2e2008-02-15 22:02:30 +0000164 Word (*kCmp)(UWord,UWord) )
sewardjb4112022007-11-09 22:49:28 +0000165{
166 Word cmpres;
167
168 /* initialize */
169 a->child[0] = 0;
170 a->child[1] = 0;
171 a->balance = 0;
172 oldV->b = False;
173
174 /* insert into an empty tree? */
175 if (!(*rootp)) {
176 (*rootp) = a;
177 return True;
178 }
179
180 cmpres = kCmp ? /*boxed*/ kCmp( (*rootp)->key, a->key )
sewardj250ec2e2008-02-15 22:02:30 +0000181 : /*unboxed*/ cmp_unsigned_Words( (UWord)(*rootp)->key,
182 (UWord)a->key );
sewardjb4112022007-11-09 22:49:28 +0000183
184 if (cmpres > 0) {
185 /* insert into the left subtree */
186 if ((*rootp)->child[0]) {
187 AvlNode* left_subtree = (*rootp)->child[0];
188 if (avl_insert_wrk(&left_subtree, oldV, a, kCmp)) {
189 switch ((*rootp)->balance--) {
190 case 1: return False;
191 case 0: return True;
192 case -1: break;
floriane2800c92014-09-15 20:57:45 +0000193 default: vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000194 }
195 if ((*rootp)->child[0]->balance < 0) {
196 avl_swr( rootp );
197 (*rootp)->balance = 0;
198 (*rootp)->child[1]->balance = 0;
199 } else {
200 avl_swl( &((*rootp)->child[0]) );
201 avl_swr( rootp );
202 avl_nasty( *rootp );
203 }
204 } else {
205 (*rootp)->child[0] = left_subtree;
206 }
207 return False;
208 } else {
209 (*rootp)->child[0] = a;
210 if ((*rootp)->balance--)
211 return False;
212 return True;
213 }
floriane2800c92014-09-15 20:57:45 +0000214 vg_assert(0);/*NOTREACHED*/
sewardjb4112022007-11-09 22:49:28 +0000215 }
216 else
217 if (cmpres < 0) {
218 /* insert into the right subtree */
219 if ((*rootp)->child[1]) {
220 AvlNode* right_subtree = (*rootp)->child[1];
221 if (avl_insert_wrk(&right_subtree, oldV, a, kCmp)) {
222 switch((*rootp)->balance++){
223 case -1: return False;
224 case 0: return True;
225 case 1: break;
floriane2800c92014-09-15 20:57:45 +0000226 default: vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000227 }
228 if ((*rootp)->child[1]->balance > 0) {
229 avl_swl( rootp );
230 (*rootp)->balance = 0;
231 (*rootp)->child[0]->balance = 0;
232 } else {
233 avl_swr( &((*rootp)->child[1]) );
234 avl_swl( rootp );
235 avl_nasty( *rootp );
236 }
237 } else {
238 (*rootp)->child[1] = right_subtree;
239 }
240 return False;
241 } else {
242 (*rootp)->child[1] = a;
243 if ((*rootp)->balance++)
244 return False;
245 return True;
246 }
floriane2800c92014-09-15 20:57:45 +0000247 vg_assert(0);/*NOTREACHED*/
sewardjb4112022007-11-09 22:49:28 +0000248 }
249 else {
250 /* cmpres == 0, a duplicate - replace the val, but don't
251 incorporate the node in the tree */
252 oldV->b = True;
253 oldV->w = (*rootp)->val;
254 (*rootp)->val = a->val;
255 return False;
256 }
257}
258
259/* Remove an element a from the AVL tree t. a must be part of
260 the tree. Returns True if the depth of the tree has shrunk.
261*/
262static
263Bool avl_remove_wrk ( AvlNode** rootp,
264 AvlNode* a,
sewardj250ec2e2008-02-15 22:02:30 +0000265 Word(*kCmp)(UWord,UWord) )
sewardjb4112022007-11-09 22:49:28 +0000266{
267 Bool ch;
268 Word cmpres;
269 cmpres = kCmp ? /*boxed*/ kCmp( (*rootp)->key, a->key )
sewardj250ec2e2008-02-15 22:02:30 +0000270 : /*unboxed*/ cmp_unsigned_Words( (UWord)(*rootp)->key,
271 (UWord)a->key );
sewardjb4112022007-11-09 22:49:28 +0000272
273 if (cmpres > 0){
274 /* remove from the left subtree */
275 AvlNode* left_subtree = (*rootp)->child[0];
floriane2800c92014-09-15 20:57:45 +0000276 vg_assert(left_subtree);
sewardjb4112022007-11-09 22:49:28 +0000277 ch = avl_remove_wrk(&left_subtree, a, kCmp);
278 (*rootp)->child[0]=left_subtree;
279 if (ch) {
280 switch ((*rootp)->balance++) {
281 case -1: return True;
282 case 0: return False;
283 case 1: break;
floriane2800c92014-09-15 20:57:45 +0000284 default: vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000285 }
286 switch ((*rootp)->child[1]->balance) {
287 case 0:
288 avl_swl( rootp );
289 (*rootp)->balance = -1;
290 (*rootp)->child[0]->balance = 1;
291 return False;
292 case 1:
293 avl_swl( rootp );
294 (*rootp)->balance = 0;
295 (*rootp)->child[0]->balance = 0;
296 return True;
297 case -1:
298 break;
299 default:
floriane2800c92014-09-15 20:57:45 +0000300 vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000301 }
302 avl_swr( &((*rootp)->child[1]) );
303 avl_swl( rootp );
304 avl_nasty( *rootp );
305 return True;
306 }
307 }
308 else
309 if (cmpres < 0) {
310 /* remove from the right subtree */
311 AvlNode* right_subtree = (*rootp)->child[1];
floriane2800c92014-09-15 20:57:45 +0000312 vg_assert(right_subtree);
sewardjb4112022007-11-09 22:49:28 +0000313 ch = avl_remove_wrk(&right_subtree, a, kCmp);
314 (*rootp)->child[1] = right_subtree;
315 if (ch) {
316 switch ((*rootp)->balance--) {
317 case 1: return True;
318 case 0: return False;
319 case -1: break;
floriane2800c92014-09-15 20:57:45 +0000320 default: vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000321 }
322 switch ((*rootp)->child[0]->balance) {
323 case 0:
324 avl_swr( rootp );
325 (*rootp)->balance = 1;
326 (*rootp)->child[1]->balance = -1;
327 return False;
328 case -1:
329 avl_swr( rootp );
330 (*rootp)->balance = 0;
331 (*rootp)->child[1]->balance = 0;
332 return True;
333 case 1:
334 break;
335 default:
floriane2800c92014-09-15 20:57:45 +0000336 vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000337 }
338 avl_swl( &((*rootp)->child[0]) );
339 avl_swr( rootp );
340 avl_nasty( *rootp );
341 return True;
342 }
343 }
344 else {
floriane2800c92014-09-15 20:57:45 +0000345 vg_assert(cmpres == 0);
346 vg_assert((*rootp)==a);
sewardjb4112022007-11-09 22:49:28 +0000347 return avl_removeroot_wrk(rootp, kCmp);
348 }
349 return 0;
350}
351
352/* Remove the root of the AVL tree *rootp.
353 * Warning: dumps core if *rootp is empty
354 */
355static
356Bool avl_removeroot_wrk ( AvlNode** rootp,
sewardj250ec2e2008-02-15 22:02:30 +0000357 Word(*kCmp)(UWord,UWord) )
sewardjb4112022007-11-09 22:49:28 +0000358{
359 Bool ch;
360 AvlNode* a;
361 if (!(*rootp)->child[0]) {
362 if (!(*rootp)->child[1]) {
363 (*rootp) = 0;
364 return True;
365 }
366 (*rootp) = (*rootp)->child[1];
367 return True;
368 }
369 if (!(*rootp)->child[1]) {
370 (*rootp) = (*rootp)->child[0];
371 return True;
372 }
373 if ((*rootp)->balance < 0) {
374 /* remove from the left subtree */
375 a = (*rootp)->child[0];
376 while (a->child[1]) a = a->child[1];
377 } else {
378 /* remove from the right subtree */
379 a = (*rootp)->child[1];
380 while (a->child[0]) a = a->child[0];
381 }
382 ch = avl_remove_wrk(rootp, a, kCmp);
383 a->child[0] = (*rootp)->child[0];
384 a->child[1] = (*rootp)->child[1];
385 a->balance = (*rootp)->balance;
386 (*rootp) = a;
387 if(a->balance == 0) return ch;
388 return False;
389}
390
391static
sewardj250ec2e2008-02-15 22:02:30 +0000392AvlNode* avl_find_node ( AvlNode* t, Word k, Word(*kCmp)(UWord,UWord) )
sewardjb4112022007-11-09 22:49:28 +0000393{
394 if (kCmp) {
395 /* Boxed comparisons */
sewardj95e79002007-12-09 02:14:35 +0000396 Word cmpresS;
sewardjb4112022007-11-09 22:49:28 +0000397 while (True) {
398 if (t == NULL) return NULL;
sewardj95e79002007-12-09 02:14:35 +0000399 cmpresS = kCmp(t->key, k);
400 if (cmpresS > 0) t = t->child[0]; else
401 if (cmpresS < 0) t = t->child[1]; else
sewardjb4112022007-11-09 22:49:28 +0000402 return t;
403 }
404 } else {
405 /* Unboxed comparisons */
sewardj95e79002007-12-09 02:14:35 +0000406 Word cmpresS; /* signed */
sewardjb4112022007-11-09 22:49:28 +0000407 UWord cmpresU; /* unsigned */
408 while (True) {
409 if (t == NULL) return NULL; /* unlikely ==> predictable */
sewardj250ec2e2008-02-15 22:02:30 +0000410 cmpresS = cmp_unsigned_Words( (UWord)t->key, (UWord)k );
sewardj95e79002007-12-09 02:14:35 +0000411 if (cmpresS == 0) return t; /* unlikely ==> predictable */
412 cmpresU = (UWord)cmpresS;
sewardjefd3b4d2007-12-02 02:05:23 +0000413 cmpresU >>=/*unsigned*/ (8 * sizeof(cmpresU) - 1);
sewardjb4112022007-11-09 22:49:28 +0000414 t = t->child[cmpresU];
415 }
416 }
417}
418
sewardj9c606bd2008-09-18 18:12:50 +0000419static
florianee0d0e92014-10-18 16:17:13 +0000420Bool avl_find_bounds ( const AvlNode* t,
sewardj95208452008-10-18 19:55:31 +0000421 /*OUT*/UWord* kMinP, /*OUT*/UWord* vMinP,
422 /*OUT*/UWord* kMaxP, /*OUT*/UWord* vMaxP,
423 UWord minKey, UWord minVal,
424 UWord maxKey, UWord maxVal,
425 UWord key,
sewardj9c606bd2008-09-18 18:12:50 +0000426 Word(*kCmp)(UWord,UWord) )
427{
sewardj95208452008-10-18 19:55:31 +0000428 UWord kLowerBound = minKey;
429 UWord vLowerBound = minVal;
430 UWord kUpperBound = maxKey;
431 UWord vUpperBound = maxVal;
sewardj9c606bd2008-09-18 18:12:50 +0000432 while (t) {
433 Word cmpresS = kCmp ? kCmp(t->key, key)
434 : cmp_unsigned_Words(t->key, key);
435 if (cmpresS < 0) {
sewardj95208452008-10-18 19:55:31 +0000436 kLowerBound = t->key;
437 vLowerBound = t->val;
sewardj9c606bd2008-09-18 18:12:50 +0000438 t = t->child[1];
439 continue;
440 }
441 if (cmpresS > 0) {
sewardj95208452008-10-18 19:55:31 +0000442 kUpperBound = t->key;
443 vUpperBound = t->val;
sewardj9c606bd2008-09-18 18:12:50 +0000444 t = t->child[0];
445 continue;
446 }
447 /* We should never get here. If we do, it means the given key
448 is actually present in the tree, which means the original
449 call was invalid -- an error on the caller's part, and we
450 cannot give any meaningful values for the bounds. (Well,
451 maybe we could, but we're not gonna. Ner!) */
452 return False;
453 }
sewardj95208452008-10-18 19:55:31 +0000454 if (kMinP) *kMinP = kLowerBound;
455 if (vMinP) *vMinP = vLowerBound;
456 if (kMaxP) *kMaxP = kUpperBound;
457 if (vMaxP) *vMaxP = vUpperBound;
sewardj9c606bd2008-09-18 18:12:50 +0000458 return True;
459}
460
sewardjb4112022007-11-09 22:49:28 +0000461// Clear the iterator stack.
462static void stackClear(WordFM* fm)
463{
464 Int i;
floriane2800c92014-09-15 20:57:45 +0000465 vg_assert(fm);
sewardjb4112022007-11-09 22:49:28 +0000466 for (i = 0; i < WFM_STKMAX; i++) {
467 fm->nodeStack[i] = NULL;
468 fm->numStack[i] = 0;
469 }
470 fm->stackTop = 0;
471}
472
473// Push onto the iterator stack.
474static inline void stackPush(WordFM* fm, AvlNode* n, Int i)
475{
floriane2800c92014-09-15 20:57:45 +0000476 vg_assert(fm->stackTop < WFM_STKMAX);
477 vg_assert(1 <= i && i <= 3);
sewardjb4112022007-11-09 22:49:28 +0000478 fm->nodeStack[fm->stackTop] = n;
479 fm-> numStack[fm->stackTop] = i;
480 fm->stackTop++;
481}
482
483// Pop from the iterator stack.
484static inline Bool stackPop(WordFM* fm, AvlNode** n, Int* i)
485{
floriane2800c92014-09-15 20:57:45 +0000486 vg_assert(fm->stackTop <= WFM_STKMAX);
sewardjb4112022007-11-09 22:49:28 +0000487
488 if (fm->stackTop > 0) {
489 fm->stackTop--;
490 *n = fm->nodeStack[fm->stackTop];
491 *i = fm-> numStack[fm->stackTop];
floriane2800c92014-09-15 20:57:45 +0000492 vg_assert(1 <= *i && *i <= 3);
sewardjb4112022007-11-09 22:49:28 +0000493 fm->nodeStack[fm->stackTop] = NULL;
494 fm-> numStack[fm->stackTop] = 0;
495 return True;
496 } else {
497 return False;
498 }
499}
500
501static
florianee0d0e92014-10-18 16:17:13 +0000502AvlNode* avl_dopy ( const AvlNode* nd,
sewardj250ec2e2008-02-15 22:02:30 +0000503 UWord(*dopyK)(UWord),
504 UWord(*dopyV)(UWord),
florian54fe2022012-10-27 23:07:42 +0000505 void*(alloc_nofail)(const HChar*,SizeT),
506 const HChar* cc )
sewardjb4112022007-11-09 22:49:28 +0000507{
508 AvlNode* nyu;
florian6aa8bd02014-09-14 22:11:44 +0000509
510 vg_assert(nd != NULL);
511
sewardj9c606bd2008-09-18 18:12:50 +0000512 nyu = alloc_nofail(cc, sizeof(AvlNode));
sewardjb4112022007-11-09 22:49:28 +0000513
514 nyu->child[0] = nd->child[0];
515 nyu->child[1] = nd->child[1];
516 nyu->balance = nd->balance;
517
518 /* Copy key */
519 if (dopyK) {
520 nyu->key = dopyK( nd->key );
sewardjb4112022007-11-09 22:49:28 +0000521 } else {
522 /* copying assumedly unboxed keys */
523 nyu->key = nd->key;
524 }
525
526 /* Copy val */
527 if (dopyV) {
528 nyu->val = dopyV( nd->val );
sewardjb4112022007-11-09 22:49:28 +0000529 } else {
530 /* copying assumedly unboxed vals */
531 nyu->val = nd->val;
532 }
533
534 /* Copy subtrees */
535 if (nyu->child[0]) {
sewardj9c606bd2008-09-18 18:12:50 +0000536 nyu->child[0] = avl_dopy( nyu->child[0], dopyK, dopyV,
537 alloc_nofail, cc );
sewardjb4112022007-11-09 22:49:28 +0000538 }
539 if (nyu->child[1]) {
sewardj9c606bd2008-09-18 18:12:50 +0000540 nyu->child[1] = avl_dopy( nyu->child[1], dopyK, dopyV,
541 alloc_nofail, cc );
sewardjb4112022007-11-09 22:49:28 +0000542 }
543
544 return nyu;
545}
546
547/* Initialise a WordFM. */
548static void initFM ( WordFM* fm,
florian54fe2022012-10-27 23:07:42 +0000549 void* (*alloc_nofail)( const HChar*, SizeT ),
550 const HChar* cc,
sewardjb4112022007-11-09 22:49:28 +0000551 void (*dealloc)(void*),
sewardj250ec2e2008-02-15 22:02:30 +0000552 Word (*kCmp)(UWord,UWord) )
sewardjb4112022007-11-09 22:49:28 +0000553{
554 fm->root = 0;
555 fm->kCmp = kCmp;
556 fm->alloc_nofail = alloc_nofail;
sewardj9c606bd2008-09-18 18:12:50 +0000557 fm->cc = cc;
sewardjb4112022007-11-09 22:49:28 +0000558 fm->dealloc = dealloc;
559 fm->stackTop = 0;
560}
561
562/* --- Public interface functions --- */
563
sewardj250ec2e2008-02-15 22:02:30 +0000564/* Allocate and initialise a WordFM. If kCmp is non-NULL, elements in
565 the set are ordered according to the ordering specified by kCmp,
566 which becomes obvious if you use VG_(initIterFM),
567 VG_(initIterAtFM), VG_(nextIterFM), VG_(doneIterFM) to iterate over
sewardjfc4b63a2008-02-17 11:46:58 +0000568 sections of the map, or the whole thing. If kCmp is NULL then the
569 ordering used is unsigned word ordering (UWord) on the key
570 values. */
florian54fe2022012-10-27 23:07:42 +0000571WordFM* VG_(newFM) ( void* (*alloc_nofail)( const HChar*, SizeT ),
572 const HChar* cc,
sewardjb4112022007-11-09 22:49:28 +0000573 void (*dealloc)(void*),
sewardj250ec2e2008-02-15 22:02:30 +0000574 Word (*kCmp)(UWord,UWord) )
sewardjb4112022007-11-09 22:49:28 +0000575{
sewardj9c606bd2008-09-18 18:12:50 +0000576 WordFM* fm = alloc_nofail(cc, sizeof(WordFM));
sewardj9c606bd2008-09-18 18:12:50 +0000577 initFM(fm, alloc_nofail, cc, dealloc, kCmp);
sewardjb4112022007-11-09 22:49:28 +0000578 return fm;
579}
580
581static void avl_free ( AvlNode* nd,
sewardj250ec2e2008-02-15 22:02:30 +0000582 void(*kFin)(UWord),
583 void(*vFin)(UWord),
sewardjb4112022007-11-09 22:49:28 +0000584 void(*dealloc)(void*) )
585{
586 if (!nd)
587 return;
588 if (nd->child[0])
589 avl_free(nd->child[0], kFin, vFin, dealloc);
590 if (nd->child[1])
591 avl_free(nd->child[1], kFin, vFin, dealloc);
592 if (kFin)
593 kFin( nd->key );
594 if (vFin)
595 vFin( nd->val );
596 VG_(memset)(nd, 0, sizeof(AvlNode));
597 dealloc(nd);
598}
599
600/* Free up the FM. If kFin is non-NULL, it is applied to keys
601 before the FM is deleted; ditto with vFin for vals. */
sewardj896f6f92008-08-19 08:38:52 +0000602void VG_(deleteFM) ( WordFM* fm, void(*kFin)(UWord), void(*vFin)(UWord) )
sewardjb4112022007-11-09 22:49:28 +0000603{
604 void(*dealloc)(void*) = fm->dealloc;
605 avl_free( fm->root, kFin, vFin, dealloc );
606 VG_(memset)(fm, 0, sizeof(WordFM) );
607 dealloc(fm);
608}
609
610/* Add (k,v) to fm. */
sewardj9c606bd2008-09-18 18:12:50 +0000611Bool VG_(addToFM) ( WordFM* fm, UWord k, UWord v )
sewardjb4112022007-11-09 22:49:28 +0000612{
613 MaybeWord oldV;
614 AvlNode* node;
sewardjd86e3a22008-12-03 11:39:37 +0000615 node = fm->alloc_nofail( fm->cc, sizeof(AvlNode) );
sewardjb4112022007-11-09 22:49:28 +0000616 node->key = k;
617 node->val = v;
618 oldV.b = False;
619 oldV.w = 0;
620 avl_insert_wrk( &fm->root, &oldV, node, fm->kCmp );
621 //if (oldV.b && fm->vFin)
622 // fm->vFin( oldV.w );
623 if (oldV.b)
624 fm->dealloc(node);
sewardj9c606bd2008-09-18 18:12:50 +0000625 return oldV.b;
sewardjb4112022007-11-09 22:49:28 +0000626}
627
628// Delete key from fm, returning associated key and val if found
sewardj896f6f92008-08-19 08:38:52 +0000629Bool VG_(delFromFM) ( WordFM* fm,
sewardj250ec2e2008-02-15 22:02:30 +0000630 /*OUT*/UWord* oldK, /*OUT*/UWord* oldV, UWord key )
sewardjb4112022007-11-09 22:49:28 +0000631{
632 AvlNode* node = avl_find_node( fm->root, key, fm->kCmp );
633 if (node) {
634 avl_remove_wrk( &fm->root, node, fm->kCmp );
635 if (oldK)
636 *oldK = node->key;
637 if (oldV)
638 *oldV = node->val;
639 fm->dealloc(node);
640 return True;
641 } else {
642 return False;
643 }
644}
645
646// Look up in fm, assigning found key & val at spec'd addresses
florianee0d0e92014-10-18 16:17:13 +0000647Bool VG_(lookupFM) ( const WordFM* fm,
sewardj250ec2e2008-02-15 22:02:30 +0000648 /*OUT*/UWord* keyP, /*OUT*/UWord* valP, UWord key )
sewardjb4112022007-11-09 22:49:28 +0000649{
650 AvlNode* node = avl_find_node( fm->root, key, fm->kCmp );
651 if (node) {
652 if (keyP)
653 *keyP = node->key;
654 if (valP)
655 *valP = node->val;
656 return True;
657 } else {
658 return False;
659 }
660}
661
sewardj9c606bd2008-09-18 18:12:50 +0000662// See comment in pub_tool_wordfm.h for explanation
florianee0d0e92014-10-18 16:17:13 +0000663Bool VG_(findBoundsFM)( const WordFM* fm,
sewardj95208452008-10-18 19:55:31 +0000664 /*OUT*/UWord* kMinP, /*OUT*/UWord* vMinP,
665 /*OUT*/UWord* kMaxP, /*OUT*/UWord* vMaxP,
666 UWord minKey, UWord minVal,
667 UWord maxKey, UWord maxVal,
668 UWord key )
sewardj9c606bd2008-09-18 18:12:50 +0000669{
sewardjf8aecf42009-01-29 08:46:15 +0000670 /* really we should assert that minKey <= key <= maxKey,
671 where <= is as defined by fm->kCmp. */
sewardj95208452008-10-18 19:55:31 +0000672 return avl_find_bounds( fm->root, kMinP, vMinP,
673 kMaxP, vMaxP,
674 minKey, minVal,
675 maxKey, maxVal,
sewardj9c606bd2008-09-18 18:12:50 +0000676 key, fm->kCmp );
677}
678
sewardj0b5f9fc2008-11-13 13:17:06 +0000679// See comment in pub_tool_wordfm.h for performance warning
florianee0d0e92014-10-18 16:17:13 +0000680UWord VG_(sizeFM) ( const WordFM* fm )
sewardjb4112022007-11-09 22:49:28 +0000681{
682 // Hmm, this is a bad way to do this
683 return fm->root ? size_avl_nonNull( fm->root ) : 0;
684}
685
sewardj0b5f9fc2008-11-13 13:17:06 +0000686// NB UNTESTED! TEST BEFORE USE!
florianee0d0e92014-10-18 16:17:13 +0000687//Bool VG_(isEmptyFM)( const WordFM* fm )
sewardj0b5f9fc2008-11-13 13:17:06 +0000688//{
689// return fm->root ? False : True;
690//}
691
sewardjb4112022007-11-09 22:49:28 +0000692// set up FM for iteration
sewardj896f6f92008-08-19 08:38:52 +0000693void VG_(initIterFM) ( WordFM* fm )
sewardjb4112022007-11-09 22:49:28 +0000694{
floriane2800c92014-09-15 20:57:45 +0000695 vg_assert(fm);
sewardjb4112022007-11-09 22:49:28 +0000696 stackClear(fm);
697 if (fm->root)
698 stackPush(fm, fm->root, 1);
699}
700
sewardj250ec2e2008-02-15 22:02:30 +0000701// set up FM for iteration so that the first key subsequently produced
sewardj896f6f92008-08-19 08:38:52 +0000702// by VG_(nextIterFM) is the smallest key in the map >= start_at.
sewardj250ec2e2008-02-15 22:02:30 +0000703// Naturally ">=" is defined by the comparison function supplied to
sewardj896f6f92008-08-19 08:38:52 +0000704// VG_(newFM), as documented above.
705void VG_(initIterAtFM) ( WordFM* fm, UWord start_at )
sewardjae5137e2008-01-17 23:19:54 +0000706{
707 Int i;
708 AvlNode *n, *t;
709 Word cmpresS; /* signed */
710 UWord cmpresU; /* unsigned */
711
floriane2800c92014-09-15 20:57:45 +0000712 vg_assert(fm);
sewardjae5137e2008-01-17 23:19:54 +0000713 stackClear(fm);
714
715 if (!fm->root)
716 return;
717
718 n = NULL;
719 // We need to do regular search and fill in the stack.
720 t = fm->root;
721
722 while (True) {
723 if (t == NULL) return;
724
725 cmpresS
726 = fm->kCmp ? /*boxed*/ fm->kCmp( t->key, start_at )
sewardj250ec2e2008-02-15 22:02:30 +0000727 : /*unboxed*/ cmp_unsigned_Words( t->key, start_at );
sewardjae5137e2008-01-17 23:19:54 +0000728
729 if (cmpresS == 0) {
730 // We found the exact key -- we are done.
731 // The iteration should start with this node.
732 stackPush(fm, t, 2);
733 // The stack now looks like {2, 2, ... ,2, 2}
734 return;
735 }
736 cmpresU = (UWord)cmpresS;
737 cmpresU >>=/*unsigned*/ (8 * sizeof(cmpresU) - 1);
738 if (!cmpresU) {
739 // Push this node only if we go to the left child.
740 stackPush(fm, t, 2);
741 }
742 t = t->child[cmpresU];
743 }
744 if (stackPop(fm, &n, &i)) {
745 // If we've pushed something to stack and did not find the exact key,
746 // we must fix the top element of stack.
floriane2800c92014-09-15 20:57:45 +0000747 vg_assert(i == 2);
sewardjae5137e2008-01-17 23:19:54 +0000748 stackPush(fm, n, 3);
749 // the stack looks like {2, 2, ..., 2, 3}
750 }
751}
752
floriane2800c92014-09-15 20:57:45 +0000753// get next key/val pair. Will vg_assert if fm has been modified
sewardjae5137e2008-01-17 23:19:54 +0000754// or looked up in since initIter{,At}FM was called.
sewardj896f6f92008-08-19 08:38:52 +0000755Bool VG_(nextIterFM) ( WordFM* fm, /*OUT*/UWord* pKey, /*OUT*/UWord* pVal )
sewardjb4112022007-11-09 22:49:28 +0000756{
757 Int i = 0;
758 AvlNode* n = NULL;
759
floriane2800c92014-09-15 20:57:45 +0000760 vg_assert(fm);
sewardjb4112022007-11-09 22:49:28 +0000761
762 // This in-order traversal requires each node to be pushed and popped
763 // three times. These could be avoided by updating nodes in-situ on the
764 // top of the stack, but the push/pop cost is so small that it's worth
765 // keeping this loop in this simpler form.
766 while (stackPop(fm, &n, &i)) {
767 switch (i) {
768 case 1: case_1:
769 stackPush(fm, n, 2);
770 /* if (n->child[0]) stackPush(fm, n->child[0], 1); */
771 if (n->child[0]) { n = n->child[0]; goto case_1; }
772 break;
773 case 2:
774 stackPush(fm, n, 3);
775 if (pKey) *pKey = n->key;
776 if (pVal) *pVal = n->val;
777 return True;
778 case 3:
779 /* if (n->child[1]) stackPush(fm, n->child[1], 1); */
780 if (n->child[1]) { n = n->child[1]; goto case_1; }
781 break;
782 default:
floriane2800c92014-09-15 20:57:45 +0000783 vg_assert(0);
sewardjb4112022007-11-09 22:49:28 +0000784 }
785 }
786
787 // Stack empty, iterator is exhausted, return NULL
788 return False;
789}
790
florian6aa8bd02014-09-14 22:11:44 +0000791// Finish an FM iteration
sewardj896f6f92008-08-19 08:38:52 +0000792void VG_(doneIterFM) ( WordFM* fm )
sewardjb4112022007-11-09 22:49:28 +0000793{
794}
795
florianee0d0e92014-10-18 16:17:13 +0000796WordFM* VG_(dopyFM) ( WordFM* fm, UWord(*dopyK)(UWord),
797 UWord(*dopyV)(UWord) )
sewardjb4112022007-11-09 22:49:28 +0000798{
799 WordFM* nyu;
800
801 /* can't clone the fm whilst iterating on it */
floriane2800c92014-09-15 20:57:45 +0000802 vg_assert(fm->stackTop == 0);
sewardjb4112022007-11-09 22:49:28 +0000803
sewardj9c606bd2008-09-18 18:12:50 +0000804 nyu = fm->alloc_nofail( fm->cc, sizeof(WordFM) );
sewardjb4112022007-11-09 22:49:28 +0000805
806 *nyu = *fm;
807
808 fm->stackTop = 0;
809 VG_(memset)(fm->nodeStack, 0, sizeof(fm->nodeStack));
810 VG_(memset)(fm->numStack, 0, sizeof(fm->numStack));
811
812 if (nyu->root) {
sewardj9c606bd2008-09-18 18:12:50 +0000813 nyu->root = avl_dopy( nyu->root, dopyK, dopyV,
814 fm->alloc_nofail, fm->cc );
sewardjb4112022007-11-09 22:49:28 +0000815 if (! nyu->root)
816 return NULL;
817 }
818
819 return nyu;
820}
821
822//------------------------------------------------------------------//
823//--- end WordFM ---//
824//--- Implementation ---//
825//------------------------------------------------------------------//
826
827//------------------------------------------------------------------//
828//--- WordBag (unboxed words only) ---//
829//--- Implementation ---//
830//------------------------------------------------------------------//
831
832/* A trivial container, to make it opaque. */
833struct _WordBag {
834 WordFM* fm;
835};
836
florian54fe2022012-10-27 23:07:42 +0000837WordBag* VG_(newBag) ( void* (*alloc_nofail)( const HChar*, SizeT ),
838 const HChar* cc,
sewardjb4112022007-11-09 22:49:28 +0000839 void (*dealloc)(void*) )
840{
sewardj9c606bd2008-09-18 18:12:50 +0000841 WordBag* bag = alloc_nofail(cc, sizeof(WordBag));
842 bag->fm = VG_(newFM)( alloc_nofail, cc, dealloc, NULL );
sewardjb4112022007-11-09 22:49:28 +0000843 return bag;
844}
845
sewardj896f6f92008-08-19 08:38:52 +0000846void VG_(deleteBag) ( WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000847{
848 void (*dealloc)(void*) = bag->fm->dealloc;
sewardj896f6f92008-08-19 08:38:52 +0000849 VG_(deleteFM)( bag->fm, NULL, NULL );
sewardjb4112022007-11-09 22:49:28 +0000850 VG_(memset)(bag, 0, sizeof(WordBag));
851 dealloc(bag);
852}
853
sewardj896f6f92008-08-19 08:38:52 +0000854void VG_(addToBag)( WordBag* bag, UWord w )
sewardjb4112022007-11-09 22:49:28 +0000855{
sewardj250ec2e2008-02-15 22:02:30 +0000856 UWord key, count;
sewardj896f6f92008-08-19 08:38:52 +0000857 if (VG_(lookupFM)(bag->fm, &key, &count, w)) {
floriane2800c92014-09-15 20:57:45 +0000858 vg_assert(key == w);
859 vg_assert(count >= 1);
sewardj896f6f92008-08-19 08:38:52 +0000860 VG_(addToFM)(bag->fm, w, count+1);
sewardjb4112022007-11-09 22:49:28 +0000861 } else {
sewardj896f6f92008-08-19 08:38:52 +0000862 VG_(addToFM)(bag->fm, w, 1);
sewardjb4112022007-11-09 22:49:28 +0000863 }
864}
865
florianee0d0e92014-10-18 16:17:13 +0000866UWord VG_(elemBag) ( const WordBag* bag, UWord w )
sewardjb4112022007-11-09 22:49:28 +0000867{
sewardj250ec2e2008-02-15 22:02:30 +0000868 UWord key, count;
sewardj896f6f92008-08-19 08:38:52 +0000869 if (VG_(lookupFM)( bag->fm, &key, &count, w)) {
floriane2800c92014-09-15 20:57:45 +0000870 vg_assert(key == w);
871 vg_assert(count >= 1);
sewardjb4112022007-11-09 22:49:28 +0000872 return count;
873 } else {
874 return 0;
875 }
876}
877
florianee0d0e92014-10-18 16:17:13 +0000878UWord VG_(sizeUniqueBag) ( const WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000879{
sewardj896f6f92008-08-19 08:38:52 +0000880 return VG_(sizeFM)( bag->fm );
sewardjb4112022007-11-09 22:49:28 +0000881}
882
florianee0d0e92014-10-18 16:17:13 +0000883static UWord sizeTotalBag_wrk ( const AvlNode* nd )
sewardjb4112022007-11-09 22:49:28 +0000884{
885 /* unchecked pre: nd is non-NULL */
sewardj250ec2e2008-02-15 22:02:30 +0000886 UWord w = nd->val;
floriane2800c92014-09-15 20:57:45 +0000887 vg_assert(w >= 1);
sewardjb4112022007-11-09 22:49:28 +0000888 if (nd->child[0])
889 w += sizeTotalBag_wrk(nd->child[0]);
890 if (nd->child[1])
891 w += sizeTotalBag_wrk(nd->child[1]);
892 return w;
893}
florianee0d0e92014-10-18 16:17:13 +0000894UWord VG_(sizeTotalBag)( const WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000895{
896 if (bag->fm->root)
897 return sizeTotalBag_wrk(bag->fm->root);
898 else
899 return 0;
900}
901
sewardj896f6f92008-08-19 08:38:52 +0000902Bool VG_(delFromBag)( WordBag* bag, UWord w )
sewardjb4112022007-11-09 22:49:28 +0000903{
sewardj250ec2e2008-02-15 22:02:30 +0000904 UWord key, count;
sewardj896f6f92008-08-19 08:38:52 +0000905 if (VG_(lookupFM)(bag->fm, &key, &count, w)) {
floriane2800c92014-09-15 20:57:45 +0000906 vg_assert(key == w);
907 vg_assert(count >= 1);
sewardjb4112022007-11-09 22:49:28 +0000908 if (count > 1) {
sewardj896f6f92008-08-19 08:38:52 +0000909 VG_(addToFM)(bag->fm, w, count-1);
sewardjb4112022007-11-09 22:49:28 +0000910 } else {
floriane2800c92014-09-15 20:57:45 +0000911 vg_assert(count == 1);
sewardj896f6f92008-08-19 08:38:52 +0000912 VG_(delFromFM)( bag->fm, NULL, NULL, w );
sewardjb4112022007-11-09 22:49:28 +0000913 }
914 return True;
915 } else {
916 return False;
917 }
918}
919
florianee0d0e92014-10-18 16:17:13 +0000920Bool VG_(isEmptyBag)( const WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000921{
sewardj896f6f92008-08-19 08:38:52 +0000922 return VG_(sizeFM)(bag->fm) == 0;
sewardjb4112022007-11-09 22:49:28 +0000923}
924
florianee0d0e92014-10-18 16:17:13 +0000925Bool VG_(isSingletonTotalBag)( const WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000926{
927 AvlNode* nd;
sewardj896f6f92008-08-19 08:38:52 +0000928 if (VG_(sizeFM)(bag->fm) != 1)
sewardjb4112022007-11-09 22:49:28 +0000929 return False;
930 nd = bag->fm->root;
floriane2800c92014-09-15 20:57:45 +0000931 vg_assert(nd);
932 vg_assert(!nd->child[0]);
933 vg_assert(!nd->child[1]);
sewardjb4112022007-11-09 22:49:28 +0000934 return nd->val == 1;
935}
936
florianee0d0e92014-10-18 16:17:13 +0000937UWord VG_(anyElementOfBag)( const WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000938{
939 /* Return an arbitrarily chosen element in the bag. We might as
940 well return the one at the root of the underlying AVL tree. */
941 AvlNode* nd = bag->fm->root;
floriane2800c92014-09-15 20:57:45 +0000942 vg_assert(nd); /* if this fails, 'bag' is empty - caller is in error. */
943 vg_assert(nd->val >= 1);
sewardjb4112022007-11-09 22:49:28 +0000944 return nd->key;
945}
946
sewardj896f6f92008-08-19 08:38:52 +0000947void VG_(initIterBag)( WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000948{
sewardj896f6f92008-08-19 08:38:52 +0000949 VG_(initIterFM)(bag->fm);
sewardjb4112022007-11-09 22:49:28 +0000950}
951
sewardj896f6f92008-08-19 08:38:52 +0000952Bool VG_(nextIterBag)( WordBag* bag, /*OUT*/UWord* pVal, /*OUT*/UWord* pCount )
sewardjb4112022007-11-09 22:49:28 +0000953{
sewardj896f6f92008-08-19 08:38:52 +0000954 return VG_(nextIterFM)( bag->fm, pVal, pCount );
sewardjb4112022007-11-09 22:49:28 +0000955}
956
sewardj896f6f92008-08-19 08:38:52 +0000957void VG_(doneIterBag)( WordBag* bag )
sewardjb4112022007-11-09 22:49:28 +0000958{
sewardj896f6f92008-08-19 08:38:52 +0000959 VG_(doneIterFM)( bag->fm );
sewardjb4112022007-11-09 22:49:28 +0000960}
961
962//------------------------------------------------------------------//
963//--- end WordBag (unboxed words only) ---//
964//--- Implementation ---//
965//------------------------------------------------------------------//
966
sewardjb4112022007-11-09 22:49:28 +0000967/*--------------------------------------------------------------------*/
sewardj896f6f92008-08-19 08:38:52 +0000968/*--- end m_wordfm.c ---*/
sewardjb4112022007-11-09 22:49:28 +0000969/*--------------------------------------------------------------------*/