blob: 38e3daa2497871216ae850c20c3829bca2bccfcc [file] [log] [blame]
Petr Machataa11cbed2012-05-05 00:07:38 +02001/*
2 * This file is part of ltrace.
Petr Machatad7e4ca82012-11-28 03:38:47 +01003 * Copyright (C) 2012 Petr Machata, Red Hat Inc.
Petr Machataa11cbed2012-05-05 00:07:38 +02004 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 * 02110-1301 USA
19 */
20
Juan Cespedes7186e2a2003-01-31 19:56:34 +010021#include <string.h>
Petr Machatad7e4ca82012-11-28 03:38:47 +010022#include <stdlib.h>
23#include <stdio.h>
24#include "dict.h"
Juan Cespedescac15c32003-01-31 18:58:58 +010025
Petr Machatad7e4ca82012-11-28 03:38:47 +010026struct status_bits {
27 unsigned char taken : 1;
28 unsigned char erased : 1;
Juan Cespedescac15c32003-01-31 18:58:58 +010029};
30
Petr Machatad7e4ca82012-11-28 03:38:47 +010031static struct status_bits *
32bitp(struct dict *dict, size_t n)
Petr Machata345c9b52012-02-10 13:10:03 +010033{
Petr Machatad7e4ca82012-11-28 03:38:47 +010034 return VECT_ELEMENT(&dict->status, struct status_bits, n);
Juan Cespedescac15c32003-01-31 18:58:58 +010035}
36
Juan Cespedesf1350522008-12-16 18:19:58 +010037void
Petr Machatad7e4ca82012-11-28 03:38:47 +010038dict_init(struct dict *dict,
39 size_t key_size, size_t value_size,
40 size_t (*hash1)(const void *),
41 int (*eq)(const void *, const void *),
42 size_t (*hash2)(size_t))
43{
44 assert(hash1 != NULL);
45 assert(eq != NULL);
Juan Cespedescac15c32003-01-31 18:58:58 +010046
Petr Machatad7e4ca82012-11-28 03:38:47 +010047 vect_init(&dict->keys, key_size);
48 vect_init(&dict->values, value_size);
49 VECT_INIT(&dict->status, struct status_bits);
50 dict->size = 0;
51
52 dict->hash1 = hash1;
53 dict->hash2 = hash2;
54 dict->eq = eq;
55}
56
57struct clone_data {
58 struct dict *target;
59 int (*clone_key)(void *tgt, const void *src, void *data);
60 int (*clone_value)(void *tgt, const void *src, void *data);
61 void (*dtor_key)(void *tgt, void *data);
62 void (*dtor_value)(void *tgt, void *data);
63 void *data;
64};
65
66enum callback_status
67clone_cb(void *key, void *value, void *data)
68{
69 struct clone_data *clone_data = data;
70
71 char nkey[clone_data->target->keys.elt_size];
72 if (clone_data->clone_key == NULL)
73 memmove(nkey, key, sizeof(nkey));
74 else if (clone_data->clone_key(&nkey, key, clone_data->data) < 0)
75 return CBS_STOP;
76
77 char nvalue[clone_data->target->values.elt_size];
78 if (clone_data->clone_value == NULL) {
79 memmove(nvalue, value, sizeof(nvalue));
80 } else if (clone_data->clone_value(&nvalue, value,
81 clone_data->data) < 0) {
82 fail:
83 if (clone_data->clone_key != NULL)
84 clone_data->dtor_key(&nkey, clone_data->data);
85 return CBS_STOP;
Juan Cespedescac15c32003-01-31 18:58:58 +010086 }
Petr Machatad7e4ca82012-11-28 03:38:47 +010087
88 if (dict_insert(clone_data->target, nkey, nvalue) < 0) {
89 if (clone_data->clone_value != NULL)
90 clone_data->dtor_value(&nvalue, clone_data->data);
91 goto fail;
92 }
93
94 return CBS_CONT;
Juan Cespedescac15c32003-01-31 18:58:58 +010095}
96
Juan Cespedesf1350522008-12-16 18:19:58 +010097int
Petr Machatad7e4ca82012-11-28 03:38:47 +010098dict_clone(struct dict *target, const struct dict *source,
99 int (*clone_key)(void *tgt, const void *src, void *data),
100 void (*dtor_key)(void *tgt, void *data),
101 int (*clone_value)(void *tgt, const void *src, void *data),
102 void (*dtor_value)(void *tgt, void *data),
103 void *data)
104{
105 assert((clone_key != NULL) == (dtor_key != NULL));
106 assert((clone_value != NULL) == (dtor_value != NULL));
Juan Cespedescd8976d2009-05-14 13:47:58 +0200107
Petr Machatad7e4ca82012-11-28 03:38:47 +0100108 dict_init(target, source->keys.elt_size, source->values.elt_size,
109 source->hash1, source->eq, source->hash2);
110 struct clone_data clone_data = {
111 target, clone_key, clone_value, dtor_key, dtor_value, data
112 };
113 if (dict_each((struct dict *)source, NULL,
114 clone_cb, &clone_data) != NULL) {
115 dict_destroy(target, dtor_key, dtor_value, data);
116 return -1;
117 }
118 return 0;
119}
Juan Cespedescd8976d2009-05-14 13:47:58 +0200120
Petr Machatad7e4ca82012-11-28 03:38:47 +0100121size_t
122dict_size(const struct dict *dict)
123{
124 return dict->size;
125}
Juan Cespedescac15c32003-01-31 18:58:58 +0100126
Petr Machatad7e4ca82012-11-28 03:38:47 +0100127int
128dict_empty(const struct dict *dict)
129{
130 return dict->size == 0;
131}
132
133struct destroy_data {
134 void (*dtor_key)(void *tgt, void *data);
135 void (*dtor_value)(void *tgt, void *data);
136 void *data;
137};
138
139enum callback_status
140destroy_cb(void *key, void *value, void *data)
141{
142 struct destroy_data *destroy_data = data;
143 if (destroy_data->dtor_key)
144 destroy_data->dtor_key(key, destroy_data->data);
145 if (destroy_data->dtor_value)
146 destroy_data->dtor_value(value, destroy_data->data);
147 return CBS_CONT;
148}
149
150void
151dict_destroy(struct dict *dict,
152 void (*dtor_key)(void *tgt, void *data),
153 void (*dtor_value)(void *tgt, void *data),
154 void *data)
155{
156 /* Some keys and values are not initialized, so we can't call
157 * dtors for them. Iterate DICT instead. */
158 if (dtor_key != NULL || dtor_value != NULL) {
159 struct destroy_data destroy_data = {
160 dtor_key, dtor_value, data
161 };
162 dict_each(dict, NULL, destroy_cb, &destroy_data);
Juan Cespedescac15c32003-01-31 18:58:58 +0100163 }
164
Petr Machatad7e4ca82012-11-28 03:38:47 +0100165 vect_destroy(&dict->keys, NULL, NULL);
166 vect_destroy(&dict->values, NULL, NULL);
167 vect_destroy(&dict->status, NULL, NULL);
168}
Juan Cespedescac15c32003-01-31 18:58:58 +0100169
Petr Machatad7e4ca82012-11-28 03:38:47 +0100170static size_t
171default_secondary_hash(size_t pos)
172{
173 return pos % 97 + 1;
174}
Juan Cespedescac15c32003-01-31 18:58:58 +0100175
Petr Machatad7e4ca82012-11-28 03:38:47 +0100176static inline size_t
177n(struct dict *dict)
178{
179 return vect_size(&dict->keys);
180}
181
182static inline size_t (*
183hash2(struct dict *dict))(size_t)
184{
185 if (dict->hash2 != NULL)
186 return dict->hash2;
Ian Wienand2d45b1a2006-02-20 22:48:07 +0100187 else
Petr Machatad7e4ca82012-11-28 03:38:47 +0100188 return default_secondary_hash;
189}
Juan Cespedescac15c32003-01-31 18:58:58 +0100190
Petr Machatad7e4ca82012-11-28 03:38:47 +0100191static void *
192getkey(struct dict *dict, size_t pos)
193{
194 return ((unsigned char *)dict->keys.data)
195 + dict->keys.elt_size * pos;
196}
197
198static void *
199getvalue(struct dict *dict, size_t pos)
200{
201 return ((unsigned char *)dict->values.data)
202 + dict->values.elt_size * pos;
203}
204
205static size_t
206find_slot(struct dict *dict, const void *key,
207 int *foundp, int *should_rehash, size_t *pi)
208{
209 size_t pos = dict->hash1(key) % n(dict);
210 size_t pos0 = -1;
211 size_t d = hash2(dict)(pos);
212 size_t i = 0;
213 *foundp = 0;
214
215 /* We skip over any taken or erased slots. But we remember
216 * the first erased that we find, and if we don't find the key
217 * later, we return that position. */
218 for (; bitp(dict, pos)->taken || bitp(dict, pos)->erased;
219 pos = (pos + d) % n(dict)) {
220
221 if (pos0 == (size_t)-1 && bitp(dict, pos)->erased)
222 pos0 = pos;
223
224 if (++i > dict->size)
225 break;
226
227 if (bitp(dict, pos)->taken
228 && dict->eq(getkey(dict, pos), key)) {
229 *foundp = 1;
230 break;
231 }
232 }
233
234 if (!*foundp && pos0 != (size_t)-1)
235 pos = pos0;
236
237 /* If the hash table degraded into a linked list, request a
238 * rehash. */
239 if (should_rehash != NULL)
240 *should_rehash = i > 10 && i > n(dict) / 10;
241
242 if (pi != NULL)
243 *pi = i;
244 return pos;
245}
246
247enum callback_status
248rehash_move(void *key, void *value, void *data)
249{
250 if (dict_insert(data, key, value) < 0)
251 return CBS_STOP;
252 else
253 return CBS_CONT;
254}
255
256int
257rehash(struct dict *dict, size_t nn)
258{
259 int ret = -1;
260
261 struct dict tmp;
262 dict_init(&tmp, dict->keys.elt_size, dict->values.elt_size,
263 dict->hash1, dict->eq, dict->hash2);
264
265 /* To honor all invariants (so that we can safely call
266 * dict_destroy), we first make a request to _reserve_ enough
267 * room in all vectors. This has no observable effect on
268 * contents of vectors. */
269 if (vect_reserve(&tmp.keys, nn) < 0
270 || vect_reserve(&tmp.values, nn) < 0
271 || vect_reserve(&tmp.status, nn) < 0)
272 goto done;
273
274 /* Now that we know that there is enough size in vectors, we
275 * simply bump the size. */
276 tmp.keys.size = nn;
277 tmp.values.size = nn;
278 size_t old_size = tmp.status.size;
279 tmp.status.size = nn;
280 memset(VECT_ELEMENT(&tmp.status, struct status_bits, old_size),
281 0, (tmp.status.size - old_size) * tmp.status.elt_size);
282
283 /* At this point, TMP is once more an empty dictionary with NN
284 * slots. Now move stuff from DICT to TMP. */
285 if (dict_each(dict, NULL, rehash_move, &tmp) != NULL)
286 goto done;
287
288 /* And now swap contents of DICT and TMP, and we are done. */
289 {
290 struct dict tmp2 = *dict;
291 *dict = tmp;
292 tmp = tmp2;
293 }
294
295 ret = 0;
296
297done:
298 /* We only want to release the containers, not the actual data
299 * that they hold, so it's fine if we don't pass any dtor. */
300 dict_destroy(&tmp, NULL, NULL, NULL);
301 return ret;
302
303}
304
305static const size_t primes[] = {
306 13, 31, 61, 127, 251, 509, 1021, 2039, 4093,
307 8191, 16381, 32749, 65521, 130981, 0
308};
309
310static size_t
311larger_size(size_t current)
312{
313 if (current == 0)
314 return primes[0];
315
316 if (current < primes[sizeof(primes)/sizeof(*primes) - 2]) {
317 size_t i;
318 for (i = 0; primes[i] != 0; ++i)
319 if (primes[i] > current)
320 return primes[i];
321 abort();
322 }
323
324 /* We ran out of primes, so invent a new one. The following
325 * gives primes until about 17M elements (and then some more
326 * later). */
327 return 2 * current + 6585;
328}
329
330static size_t
331smaller_size(size_t current)
332{
333 if (current <= primes[0])
334 return primes[0];
335
336 if (current <= primes[sizeof(primes)/sizeof(*primes) - 2]) {
337 size_t i;
338 size_t prev = 0;
339 for (i = 0; primes[i] != 0; ++i) {
340 if (primes[i] >= current)
341 return prev;
342 prev = primes[i];
343 }
344 abort();
345 }
346
347 return (current - 6585) / 2;
348}
349
350int
351dict_insert(struct dict *dict, void *key, void *value)
352{
353 if (n(dict) == 0 || dict->size > 0.7 * n(dict))
354 rehash:
355 if (rehash(dict, larger_size(n(dict))) < 0)
356 return -1;
357
358 int found;
359 int should_rehash;
360 size_t slot_n = find_slot(dict, key, &found, &should_rehash, NULL);
361
362 if (found)
363 return 1;
364
365 /* If rehash was requested, do that, and retry. But just live
366 * with it for apparently sparse tables. No resizing can fix
367 * a rubbish hash. */
368 if (should_rehash && dict->size > 0.3 * n(dict))
369 goto rehash;
370
371 memmove(getkey(dict, slot_n), key, dict->keys.elt_size);
372 memmove(getvalue(dict, slot_n), value, dict->values.elt_size);
373
374 bitp(dict, slot_n)->taken = 1;
375 bitp(dict, slot_n)->erased = 0;
376 ++dict->size;
377
Juan Cespedescac15c32003-01-31 18:58:58 +0100378 return 0;
379}
380
Juan Cespedesf1350522008-12-16 18:19:58 +0100381void *
Petr Machatad7e4ca82012-11-28 03:38:47 +0100382dict_find(struct dict *dict, const void *key)
Petr Machata77fbb8f2012-04-19 16:57:57 +0200383{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100384 if (dict->size == 0)
385 return NULL;
Petr Machata77fbb8f2012-04-19 16:57:57 +0200386
Petr Machatad7e4ca82012-11-28 03:38:47 +0100387 int found;
388 size_t slot_n = find_slot(dict, key, &found, NULL, NULL);
389 if (found)
390 return getvalue(dict, slot_n);
391 else
392 return NULL;
393}
Petr Machata77fbb8f2012-04-19 16:57:57 +0200394
Petr Machatad7e4ca82012-11-28 03:38:47 +0100395int
396dict_erase(struct dict *dict, const void *key,
397 void (*dtor_key)(void *tgt, void *data),
398 void (*dtor_value)(void *tgt, void *data),
399 void *data)
400{
401 int found;
402 size_t i;
403 size_t slot_n = find_slot(dict, key, &found, NULL, &i);
404 if (!found)
405 return -1;
406
407 if (dtor_key != NULL)
408 dtor_key(getkey(dict, slot_n), data);
409 if (dtor_value != NULL)
410 dtor_value(getvalue(dict, slot_n), data);
411
412 bitp(dict, slot_n)->taken = 0;
413 bitp(dict, slot_n)->erased = 1;
414 --dict->size;
415
416 if (dict->size < 0.3 * n(dict)) {
417 /* Don't mind if it fails when shrinking. */
418 rehash(dict, smaller_size(n(dict)));
Petr Machata77fbb8f2012-04-19 16:57:57 +0200419 }
Petr Machatad7e4ca82012-11-28 03:38:47 +0100420
421 return 0;
Petr Machata77fbb8f2012-04-19 16:57:57 +0200422}
423
424void *
Petr Machatad7e4ca82012-11-28 03:38:47 +0100425dict_each(struct dict *dict, void *start_after,
426 enum callback_status (*cb)(void *, void *, void *), void *data)
Petr Machata345c9b52012-02-10 13:10:03 +0100427{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100428 size_t i;
429 if (start_after != NULL)
430 i = ((start_after - dict->keys.data) / dict->keys.elt_size) + 1;
431 else
432 i = 0;
Juan Cespedescac15c32003-01-31 18:58:58 +0100433
Petr Machatad7e4ca82012-11-28 03:38:47 +0100434 for (; i < dict->keys.size; ++i)
435 if (bitp(dict, i)->taken && !bitp(dict, i)->erased) {
436 void *key = getkey(dict, i);
437 if (cb(key, getvalue(dict, i), data) != CBS_CONT)
438 return key;
Juan Cespedescac15c32003-01-31 18:58:58 +0100439 }
Petr Machatad7e4ca82012-11-28 03:38:47 +0100440
441 return NULL;
Juan Cespedescac15c32003-01-31 18:58:58 +0100442}
443
Petr Machatad7e4ca82012-11-28 03:38:47 +0100444size_t
445dict_hash_int(const int *key)
Petr Machata345c9b52012-02-10 13:10:03 +0100446{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100447 return (size_t)(*key * 2654435761);
Juan Cespedescac15c32003-01-31 18:58:58 +0100448}
449
Juan Cespedesf1350522008-12-16 18:19:58 +0100450int
Petr Machatad7e4ca82012-11-28 03:38:47 +0100451dict_eq_int(const int *key1, const int *key2)
Petr Machata345c9b52012-02-10 13:10:03 +0100452{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100453 return *key1 == *key2;
Juan Cespedescac15c32003-01-31 18:58:58 +0100454}
455
Petr Machatad7e4ca82012-11-28 03:38:47 +0100456size_t
457dict_hash_string(const char **key)
Petr Machata345c9b52012-02-10 13:10:03 +0100458{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100459 size_t h = 5381;
460 const char *str = *key;
461 while (*str != 0)
462 h = h * 33 ^ *str++;
463 return h;
Juan Cespedescac15c32003-01-31 18:58:58 +0100464}
465
Juan Cespedesf1350522008-12-16 18:19:58 +0100466int
Petr Machatad7e4ca82012-11-28 03:38:47 +0100467dict_eq_string(const char **key1, const char **key2)
Petr Machata345c9b52012-02-10 13:10:03 +0100468{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100469 return strcmp(*key1, *key2) == 0;
Juan Cespedescac15c32003-01-31 18:58:58 +0100470}
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200471
Petr Machatad7e4ca82012-11-28 03:38:47 +0100472#ifdef TEST
473static enum callback_status
474dump(int *key, int *value, void *data)
Petr Machata534e00f2011-09-27 17:58:38 +0200475{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100476 char *seen = data;
477 assert(seen[*key] == 0);
478 seen[*key] = 1;
479 assert(*value == *key * 2 + 1);
480 return CBS_STOP;
481}
482
483static size_t
484dict_hash_int_silly(const int *key)
485{
486 return *key % 10;
487}
488
489static void
490verify(struct dict *di, size_t len, char *seen)
491{
492 size_t ct = 0;
493 int *it;
494 for (it = NULL; (it = DICT_EACH(di, int, int, it, dump, seen)) != NULL;)
495 ct++;
496 assert(ct == len);
497 memset(seen, 0, len);
498}
499
500static enum callback_status
501fill_keys(int *key, int *value, void *data)
502{
503 int *array = data;
504 array[++array[0]] = *key;
505 return CBS_CONT;
506}
507
508static void
509test1(void)
510{
511 struct dict di;
512 DICT_INIT(&di, int, int, dict_hash_int, dict_eq_int, NULL);
513
514 char seen[100000] = {};
515 size_t i;
516 for (i = 0; i < sizeof(seen); ++i) {
517 int key = i;
518 int value = 2 * i + 1;
519 DICT_INSERT(&di, &key, &value);
520 int *valp = DICT_FIND(&di, &key, int);
521 assert(valp != NULL);
522 assert(*valp == value);
523 assert(dict_size(&di) == i + 1);
524 }
525
526 verify(&di, sizeof(seen), seen);
527
528 struct dict d2;
529 DICT_CLONE(&d2, &di, int, int, NULL, NULL, NULL, NULL, NULL);
530 DICT_DESTROY(&di, int, int, NULL, NULL, NULL);
531 verify(&d2, sizeof(seen), seen);
532
533 /* Now we try to gradually erase all elements. We can't erase
534 * inside a DICT_EACH call, so copy first keys to a separate
535 * memory area first. */
536 int keys[d2.size + 1];
537 size_t ct = 0;
538 keys[0] = 0;
539 DICT_EACH(&d2, int, int, NULL, fill_keys, keys);
540 for (i = 0; i < (size_t)keys[0]; ++i) {
541 assert(DICT_ERASE(&d2, &keys[i + 1], int,
542 NULL, NULL, NULL) == 0);
543 ++ct;
544 }
545 assert(ct == sizeof(seen));
546 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
547}
548
549static void
550test_erase(void)
551{
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200552 int i;
553
Petr Machatad7e4ca82012-11-28 03:38:47 +0100554 /* To test erase, we need a relatively bad hash function, so
555 * that there are some overlapping chains in the table. */
556 struct dict d2;
557 DICT_INIT(&d2, int, int, dict_hash_int_silly, dict_eq_int, NULL);
558 const int limit = 500;
559 for (i = 0; i < limit; ++i) {
560 int key = 2 * i + 1;
561 int value = 2 * key + 1;
562 DICT_INSERT(&d2, &key, &value);
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200563 }
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200564
Petr Machatad7e4ca82012-11-28 03:38:47 +0100565 /* Now we try to delete each of the keys, and verify that none
566 * of the chains was broken. */
567 for (i = 0; i < limit; ++i) {
568 struct dict copy;
569 DICT_CLONE(&copy, &d2, int, int, NULL, NULL, NULL, NULL, NULL);
570 int key = 2 * i + 1;
571 DICT_ERASE(&copy, &key, int, NULL, NULL, NULL);
572 assert(dict_size(&copy) == dict_size(&d2) - 1);
573
574 int j;
575 for (j = 0; j < limit; ++j) {
576 key = 2 * j + 1;
577 int *valp = DICT_FIND(&copy, &key, int);
578 if (i != j) {
579 assert(valp != NULL);
580 assert(*valp == 2 * key + 1);
581 } else {
582 assert(valp == NULL);
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200583 }
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200584 }
Petr Machatad7e4ca82012-11-28 03:38:47 +0100585
586 DICT_DESTROY(&copy, int, int, NULL, NULL, NULL);
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200587 }
Petr Machatad7e4ca82012-11-28 03:38:47 +0100588 DICT_DESTROY(&d2, int, int, NULL, NULL, NULL);
Juan Cespedesbc8caf02009-05-07 19:38:38 +0200589}
Petr Machata534e00f2011-09-27 17:58:38 +0200590
Petr Machatad7e4ca82012-11-28 03:38:47 +0100591int main(int argc, char *argv[])
Petr Machata534e00f2011-09-27 17:58:38 +0200592{
Petr Machatad7e4ca82012-11-28 03:38:47 +0100593 test1();
594 test_erase();
595 return 0;
Petr Machata534e00f2011-09-27 17:58:38 +0200596}
597
Petr Machatad7e4ca82012-11-28 03:38:47 +0100598#endif