blob: ef03298f9d2d33136fb239911f32bce79abe00a3 [file] [log] [blame]
njn3e884182003-04-15 13:03:23 +00001
2/*--------------------------------------------------------------------*/
njnf69f9452005-07-03 17:53:11 +00003/*--- A separately-chained hash table. m_hashtable.c ---*/
njn3e884182003-04-15 13:03:23 +00004/*--------------------------------------------------------------------*/
5
6/*
njnb9c427c2004-12-01 14:14:42 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
njn3e884182003-04-15 13:03:23 +00009
Elliott Hughesed398002017-06-21 14:41:24 -070010 Copyright (C) 2005-2017 Nicholas Nethercote
sewardj0c24f8a2006-10-17 02:11:55 +000011 njn@valgrind.org
njn3e884182003-04-15 13:03:23 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
njnc7561b92005-06-19 01:24:32 +000031#include "pub_core_basics.h"
sewardj3f94a7d2007-08-25 07:19:08 +000032#include "pub_core_debuglog.h"
njn81c00df2005-05-14 21:28:43 +000033#include "pub_core_hashtable.h"
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_core_libcassert.h"
philippe64ba5742014-06-19 20:33:27 +000035#include "pub_core_libcbase.h"
philippe7293d252014-06-14 16:30:09 +000036#include "pub_core_libcprint.h"
njnaf1d7df2005-06-11 01:31:52 +000037#include "pub_core_mallocfree.h"
njn3e884182003-04-15 13:03:23 +000038
39/*--------------------------------------------------------------------*/
40/*--- Declarations ---*/
41/*--------------------------------------------------------------------*/
42
njnb8824022005-07-03 17:10:04 +000043#define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
44
njnf69f9452005-07-03 17:53:11 +000045struct _VgHashTable {
sewardj3f94a7d2007-08-25 07:19:08 +000046 UInt n_chains; // should be prime
47 UInt n_elements;
48 VgHashNode* iterNode; // current iterator node
49 UInt iterChain; // next chain to be traversed by the iterator
50 VgHashNode** chains; // expanding array of hash chains
51 Bool iterOK; // table safe to iterate over?
floriandbb35842012-10-27 18:39:11 +000052 const HChar* name; // name of table (for debugging only)
sewardj3f94a7d2007-08-25 07:19:08 +000053};
54
55#define N_HASH_PRIMES 20
56
florian09a4c792014-10-18 10:58:05 +000057static const SizeT primes[N_HASH_PRIMES] = {
sewardj3f94a7d2007-08-25 07:19:08 +000058 769UL, 1543UL, 3079UL, 6151UL,
59 12289UL, 24593UL, 49157UL, 98317UL,
60 196613UL, 393241UL, 786433UL, 1572869UL,
61 3145739UL, 6291469UL, 12582917UL, 25165843UL,
62 50331653UL, 100663319UL, 201326611UL, 402653189UL
njnf69f9452005-07-03 17:53:11 +000063};
njn3e884182003-04-15 13:03:23 +000064
65/*--------------------------------------------------------------------*/
66/*--- Functions ---*/
67/*--------------------------------------------------------------------*/
68
florian09a4c792014-10-18 10:58:05 +000069VgHashTable *VG_(HT_construct) ( const HChar* name )
njn3e884182003-04-15 13:03:23 +000070{
njnce46c2a2003-07-21 10:52:07 +000071 /* Initialises to zero, ie. all entries NULL */
sewardj3f94a7d2007-08-25 07:19:08 +000072 SizeT n_chains = primes[0];
73 SizeT sz = n_chains * sizeof(VgHashNode*);
florian09a4c792014-10-18 10:58:05 +000074 VgHashTable *table = VG_(calloc)("hashtable.Hc.1",
sewardj9c606bd2008-09-18 18:12:50 +000075 1, sizeof(struct _VgHashTable));
76 table->chains = VG_(calloc)("hashtable.Hc.2", 1, sz);
sewardj3f94a7d2007-08-25 07:19:08 +000077 table->n_chains = n_chains;
78 table->n_elements = 0;
79 table->iterOK = True;
80 table->name = name;
81 vg_assert(name);
njnf69f9452005-07-03 17:53:11 +000082 return table;
njn3e884182003-04-15 13:03:23 +000083}
84
floriana6a6d922015-08-05 11:26:10 +000085UInt VG_(HT_count_nodes) ( const VgHashTable *table )
njn3e884182003-04-15 13:03:23 +000086{
sewardj3f94a7d2007-08-25 07:19:08 +000087 return table->n_elements;
88}
njn3e884182003-04-15 13:03:23 +000089
florian09a4c792014-10-18 10:58:05 +000090static void resize ( VgHashTable *table )
sewardj3f94a7d2007-08-25 07:19:08 +000091{
92 Int i;
93 SizeT sz;
94 SizeT old_chains = table->n_chains;
95 SizeT new_chains = old_chains + 1;
96 VgHashNode** chains;
97 VgHashNode * node;
98
99 /* If we've run out of primes, do nothing. */
100 if (old_chains == primes[N_HASH_PRIMES-1])
101 return;
102
103 vg_assert(old_chains >= primes[0]
104 && old_chains < primes[N_HASH_PRIMES-1]);
105
106 for (i = 0; i < N_HASH_PRIMES; i++) {
107 if (primes[i] > new_chains) {
108 new_chains = primes[i];
109 break;
110 }
111 }
112
113 vg_assert(new_chains > old_chains);
114 vg_assert(new_chains > primes[0]
115 && new_chains <= primes[N_HASH_PRIMES-1]);
116
117 VG_(debugLog)(
118 1, "hashtable",
119 "resizing table `%s' from %lu to %lu (total elems %lu)\n",
120 table->name, (UWord)old_chains, (UWord)new_chains,
121 (UWord)table->n_elements );
122
123 table->n_chains = new_chains;
124 sz = new_chains * sizeof(VgHashNode*);
sewardj9c606bd2008-09-18 18:12:50 +0000125 chains = VG_(calloc)("hashtable.resize.1", 1, sz);
sewardj3f94a7d2007-08-25 07:19:08 +0000126
127 for (i = 0; i < old_chains; i++) {
128 node = table->chains[i];
129 while (node != NULL) {
130 VgHashNode* next = node->next;
131 UWord chain = CHAIN_NO(node->key, table);
132 node->next = chains[chain];
133 chains[chain] = node;
134 node = next;
135 }
136 }
137
138 VG_(free)(table->chains);
139 table->chains = chains;
njn3e884182003-04-15 13:03:23 +0000140}
141
njnf69f9452005-07-03 17:53:11 +0000142/* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends
njnb965efb2009-08-10 07:36:54 +0000143 the node to the appropriate chain. No duplicate key detection is done. */
florian09a4c792014-10-18 10:58:05 +0000144void VG_(HT_add_node) ( VgHashTable *table, void* vnode )
njn3e884182003-04-15 13:03:23 +0000145{
njn246a9d22005-08-14 06:24:20 +0000146 VgHashNode* node = (VgHashNode*)vnode;
sewardj3f94a7d2007-08-25 07:19:08 +0000147 UWord chain = CHAIN_NO(node->key, table);
njnf69f9452005-07-03 17:53:11 +0000148 node->next = table->chains[chain];
149 table->chains[chain] = node;
sewardj3f94a7d2007-08-25 07:19:08 +0000150 table->n_elements++;
151 if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) {
152 resize(table);
njn3e884182003-04-15 13:03:23 +0000153 }
154
sewardj3f94a7d2007-08-25 07:19:08 +0000155 /* Table has been modified; hence HT_Next should assert. */
156 table->iterOK = False;
njn3e884182003-04-15 13:03:23 +0000157}
158
philippe7293d252014-06-14 16:30:09 +0000159/* Looks up a VgHashNode by key in the table. Returns NULL if not found. */
florian09a4c792014-10-18 10:58:05 +0000160void* VG_(HT_lookup) ( const VgHashTable *table, UWord key )
njn34582fb2005-08-11 00:06:36 +0000161{
162 VgHashNode* curr = table->chains[ CHAIN_NO(key, table) ];
163
164 while (curr) {
165 if (key == curr->key) {
166 return curr;
167 }
168 curr = curr->next;
169 }
170 return NULL;
171}
172
philippe7293d252014-06-14 16:30:09 +0000173/* Looks up a VgHashNode by node in the table. Returns NULL if not found.
174 GEN!!! marks the lines that differs from VG_(HT_lookup). */
florian09a4c792014-10-18 10:58:05 +0000175void* VG_(HT_gen_lookup) ( const VgHashTable *table, const void* node,
176 HT_Cmp_t cmp )
philippe7293d252014-06-14 16:30:09 +0000177{
florian09a4c792014-10-18 10:58:05 +0000178 const VgHashNode* hnode = node; // GEN!!!
philippe7293d252014-06-14 16:30:09 +0000179 VgHashNode* curr = table->chains[ CHAIN_NO(hnode->key, table) ]; // GEN!!!
180
181 while (curr) {
philippee4374272015-05-20 15:08:09 +0000182 if (hnode->key == curr->key && cmp (hnode, curr) == 0) { // GEN!!!
philippe7293d252014-06-14 16:30:09 +0000183 return curr;
184 }
185 curr = curr->next;
186 }
187 return NULL;
188}
189
njn34582fb2005-08-11 00:06:36 +0000190/* Removes a VgHashNode from the table. Returns NULL if not found. */
florian09a4c792014-10-18 10:58:05 +0000191void* VG_(HT_remove) ( VgHashTable *table, UWord key )
njn34582fb2005-08-11 00:06:36 +0000192{
sewardj3f94a7d2007-08-25 07:19:08 +0000193 UWord chain = CHAIN_NO(key, table);
njn34582fb2005-08-11 00:06:36 +0000194 VgHashNode* curr = table->chains[chain];
195 VgHashNode** prev_next_ptr = &(table->chains[chain]);
196
sewardj3f94a7d2007-08-25 07:19:08 +0000197 /* Table has been modified; hence HT_Next should assert. */
198 table->iterOK = False;
199
njn34582fb2005-08-11 00:06:36 +0000200 while (curr) {
201 if (key == curr->key) {
202 *prev_next_ptr = curr->next;
sewardj3f94a7d2007-08-25 07:19:08 +0000203 table->n_elements--;
njn34582fb2005-08-11 00:06:36 +0000204 return curr;
205 }
206 prev_next_ptr = &(curr->next);
207 curr = curr->next;
208 }
209 return NULL;
210}
211
philippe7293d252014-06-14 16:30:09 +0000212/* Removes a VgHashNode by node from the table. Returns NULL if not found.
213 GEN!!! marks the lines that differs from VG_(HT_remove). */
florian09a4c792014-10-18 10:58:05 +0000214void* VG_(HT_gen_remove) ( VgHashTable *table, const void* node, HT_Cmp_t cmp )
philippe7293d252014-06-14 16:30:09 +0000215{
florian09a4c792014-10-18 10:58:05 +0000216 const VgHashNode* hnode = node; // GEN!!!
philippe7293d252014-06-14 16:30:09 +0000217 UWord chain = CHAIN_NO(hnode->key, table); // GEN!!!
218 VgHashNode* curr = table->chains[chain];
219 VgHashNode** prev_next_ptr = &(table->chains[chain]);
220
221 /* Table has been modified; hence HT_Next should assert. */
222 table->iterOK = False;
223
224 while (curr) {
philippee4374272015-05-20 15:08:09 +0000225 if (hnode->key == curr->key && cmp(hnode, curr) == 0) { // GEN!!!
philippe7293d252014-06-14 16:30:09 +0000226 *prev_next_ptr = curr->next;
227 table->n_elements--;
228 return curr;
229 }
230 prev_next_ptr = &(curr->next);
231 curr = curr->next;
232 }
233 return NULL;
234}
235
florian09a4c792014-10-18 10:58:05 +0000236void VG_(HT_print_stats) ( const VgHashTable *table, HT_Cmp_t cmp )
philippe7293d252014-06-14 16:30:09 +0000237{
238 #define MAXOCCUR 20
philippe64ba5742014-06-19 20:33:27 +0000239 UInt elt_occurences[MAXOCCUR+1];
240 UInt key_occurences[MAXOCCUR+1];
241 UInt cno_occurences[MAXOCCUR+1];
philippe7293d252014-06-14 16:30:09 +0000242 /* Key occurence : how many ht elements have the same key.
243 elt_occurences : how many elements are inserted multiple time.
244 cno_occurences : how many chains have that length.
philippe64ba5742014-06-19 20:33:27 +0000245 The last entry in these arrays collects all occurences >= MAXOCCUR. */
246 #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
philippe7293d252014-06-14 16:30:09 +0000247 UInt i;
248 UInt nkey, nelt, ncno;
249 VgHashNode *cnode, *node;
250
philippe64ba5742014-06-19 20:33:27 +0000251 VG_(memset)(key_occurences, 0, sizeof(key_occurences));
252 VG_(memset)(elt_occurences, 0, sizeof(elt_occurences));
253 VG_(memset)(cno_occurences, 0, sizeof(cno_occurences));
philippe7293d252014-06-14 16:30:09 +0000254
255 // Note that the below algorithm is quadractic in nr of elements in a chain
256 // but if that happens, the hash table/function is really bad and that
257 // should be fixed.
258 for (i = 0; i < table->n_chains; i++) {
259 ncno = 0;
260 for (cnode = table->chains[i]; cnode != NULL; cnode = cnode->next) {
261 ncno++;
262
263 nkey = 0;
264 // Is the same cnode->key existing before cnode ?
265 for (node = table->chains[i]; node != cnode; node = node->next) {
266 if (node->key == cnode->key)
267 nkey++;
268 }
269 // If cnode->key not in a previous node, count occurences of key.
270 if (nkey == 0) {
271 for (node = cnode; node != NULL; node = node->next) {
272 if (node->key == cnode->key)
273 nkey++;
274 }
275 INCOCCUR(key_occurences, nkey);
276 }
277
278 nelt = 0;
279 // Is the same cnode element existing before cnode ?
280 for (node = table->chains[i]; node != cnode; node = node->next) {
philippe6debd852015-05-21 22:01:19 +0000281 if (node->key == cnode->key
282 && (cmp == NULL || cmp (node, cnode) == 0)) {
283 nelt++;
284 }
philippe7293d252014-06-14 16:30:09 +0000285 }
286 // If cnode element not in a previous node, count occurences of elt.
287 if (nelt == 0) {
288 for (node = cnode; node != NULL; node = node->next) {
philippe6debd852015-05-21 22:01:19 +0000289 if (node->key == cnode->key
290 && (cmp == NULL || cmp (node, cnode) == 0)) {
291 nelt++;
292 }
philippe7293d252014-06-14 16:30:09 +0000293 }
294 INCOCCUR(elt_occurences, nelt);
295 }
296 }
297 INCOCCUR(cno_occurences, ncno);
298 }
299
300 VG_(message)(Vg_DebugMsg,
301 "nr occurences of"
302 " chains of len N,"
303 " N-plicated keys,"
304 " N-plicated elts\n");
305 nkey = nelt = ncno = 0;
philippe64ba5742014-06-19 20:33:27 +0000306 for (i = 0; i <= MAXOCCUR; i++) {
307 if (elt_occurences[i] > 0
308 || key_occurences[i] > 0
309 || cno_occurences[i] > 0)
310 VG_(message)(Vg_DebugMsg,
floriana5e06c32015-08-05 21:16:09 +0000311 "%s=%2u : nr chain %6u, nr keys %6u, nr elts %6u\n",
philippe64ba5742014-06-19 20:33:27 +0000312 i == MAXOCCUR ? ">" : "N", i,
313 cno_occurences[i], key_occurences[i], elt_occurences[i]);
philippe7293d252014-06-14 16:30:09 +0000314 nkey += key_occurences[i];
315 nelt += elt_occurences[i];
316 ncno += cno_occurences[i];
317 }
philippe64ba5742014-06-19 20:33:27 +0000318 VG_(message)(Vg_DebugMsg,
floriana5e06c32015-08-05 21:16:09 +0000319 "total nr of unique slots: %6u, keys %6u, elts %6u."
philippe47124e92015-04-25 14:00:24 +0000320 " Avg chain len %3.1f\n",
321 ncno, nkey, nelt,
322 (Double)nelt/(Double)(ncno == cno_occurences[0] ?
323 1 : ncno - cno_occurences[0]));
philippe7293d252014-06-14 16:30:09 +0000324}
325
326
njn29a5c012009-05-06 06:15:55 +0000327/* Allocates a suitably-sized array, copies pointers to all the hashtable
328 elements into it, then returns both the array and the size of it. The
329 array must be freed with VG_(free).
njn3e884182003-04-15 13:03:23 +0000330*/
florian09a4c792014-10-18 10:58:05 +0000331VgHashNode** VG_(HT_to_array) (const VgHashTable *table, /*OUT*/ UInt* n_elems)
njn3e884182003-04-15 13:03:23 +0000332{
333 UInt i, j;
334 VgHashNode** arr;
335 VgHashNode* node;
336
njn29a5c012009-05-06 06:15:55 +0000337 *n_elems = table->n_elements;
sewardj3f94a7d2007-08-25 07:19:08 +0000338 if (*n_elems == 0)
njn3e884182003-04-15 13:03:23 +0000339 return NULL;
340
sewardj9c606bd2008-09-18 18:12:50 +0000341 arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) );
njn3e884182003-04-15 13:03:23 +0000342
343 j = 0;
njnb8824022005-07-03 17:10:04 +0000344 for (i = 0; i < table->n_chains; i++) {
345 for (node = table->chains[i]; node != NULL; node = node->next) {
njn3e884182003-04-15 13:03:23 +0000346 arr[j++] = node;
347 }
348 }
sewardj3f94a7d2007-08-25 07:19:08 +0000349 vg_assert(j == *n_elems);
njn3e884182003-04-15 13:03:23 +0000350
njn3e884182003-04-15 13:03:23 +0000351 return arr;
352}
353
florian09a4c792014-10-18 10:58:05 +0000354void VG_(HT_ResetIter)(VgHashTable *table)
njn1af79722005-08-14 17:42:35 +0000355{
356 vg_assert(table);
357 table->iterNode = NULL;
358 table->iterChain = 0;
sewardj3f94a7d2007-08-25 07:19:08 +0000359 table->iterOK = True;
njn1af79722005-08-14 17:42:35 +0000360}
361
florian09a4c792014-10-18 10:58:05 +0000362void* VG_(HT_Next)(VgHashTable *table)
njn1af79722005-08-14 17:42:35 +0000363{
364 Int i;
365 vg_assert(table);
sewardj3f94a7d2007-08-25 07:19:08 +0000366 /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
367 In short if this fails, it means the caller tried to modify the
Elliott Hughesa0664b92017-04-18 17:46:52 -0700368 table whilst iterating over it, which is a bug.
369 One exception: HT_remove_at_Iter can remove the current entry and
370 leave the iterator in a valid state for HT_Next. */
sewardj3f94a7d2007-08-25 07:19:08 +0000371 vg_assert(table->iterOK);
372
njn1af79722005-08-14 17:42:35 +0000373 if (table->iterNode && table->iterNode->next) {
374 table->iterNode = table->iterNode->next;
375 return table->iterNode;
376 }
377
378 for (i = table->iterChain; i < table->n_chains; i++) {
379 if (table->chains[i]) {
380 table->iterNode = table->chains[i];
381 table->iterChain = i + 1; // Next chain to be traversed
382 return table->iterNode;
383 }
384 }
385 return NULL;
386}
387
Elliott Hughesa0664b92017-04-18 17:46:52 -0700388void VG_(HT_remove_at_Iter)(VgHashTable *table)
389{
390 vg_assert(table);
391 vg_assert(table->iterOK);
392 vg_assert(table->iterNode);
393
394 const UInt curChain = table->iterChain - 1; // chain of iterNode.
395
396
397 if (table->chains[curChain] == table->iterNode) {
398 /* iterNode is the first of its chain -> remove it from the chain. */
399 table->chains[curChain] = table->iterNode->next;
400 /* Setup the iterator to visit first node of curChain: */
401 table->iterNode = NULL;
402 table->iterChain = curChain;
403 } else {
404 /* iterNode is somewhere inside curChain chain */
405 VgHashNode* prev = table->chains[curChain];
406
407 while (prev->next != table->iterNode)
408 prev = prev->next;
409 /* Remove iterNode from the chain. */
410 prev->next = table->iterNode->next;
411 /* Setup the iterator to visit prev->next, which is the node
412 that was after the deleted node. */
413 table->iterNode = prev;
414 }
415
416 table->n_elements--;
417}
418
florian09a4c792014-10-18 10:58:05 +0000419void VG_(HT_destruct)(VgHashTable *table, void(*freenode_fn)(void*))
njn3e884182003-04-15 13:03:23 +0000420{
sewardj755e1262005-12-24 15:33:32 +0000421 UInt i;
422 VgHashNode *node, *node_next;
sewardj3f94a7d2007-08-25 07:19:08 +0000423
njnb8824022005-07-03 17:10:04 +0000424 for (i = 0; i < table->n_chains; i++) {
sewardj755e1262005-12-24 15:33:32 +0000425 for (node = table->chains[i]; node != NULL; node = node_next) {
426 node_next = node->next;
philippe6643e962012-01-17 21:16:30 +0000427 freenode_fn(node);
njn3e884182003-04-15 13:03:23 +0000428 }
429 }
sewardj3f94a7d2007-08-25 07:19:08 +0000430 VG_(free)(table->chains);
njn3e884182003-04-15 13:03:23 +0000431 VG_(free)(table);
432}
433
434/*--------------------------------------------------------------------*/
njn81c00df2005-05-14 21:28:43 +0000435/*--- end ---*/
njn3e884182003-04-15 13:03:23 +0000436/*--------------------------------------------------------------------*/