blob: 221ed53b9f604ecdd2cf9944f1d86034d670868d [file] [log] [blame]
Victor Stinnered3b0bc2013-11-23 12:27:24 +01001/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
2 project:
3 http://sourceforge.net/projects/libcfu/
4
5 Copyright of cfuhash:
6 ----------------------------------
7 Creation date: 2005-06-24 21:22:40
8 Authors: Don
9 Change log:
10
11 Copyright (c) 2005 Don Owens
12 All rights reserved.
13
14 This code is released under the BSD license:
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
19
20 * Redistributions of source code must retain the above copyright
21 notice, this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above
24 copyright notice, this list of conditions and the following
25 disclaimer in the documentation and/or other materials provided
26 with the distribution.
27
28 * Neither the name of the author nor the names of its
29 contributors may be used to endorse or promote products derived
30 from this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 OF THE POSSIBILITY OF SUCH DAMAGE.
44 ----------------------------------
45*/
46
47#include "Python.h"
48#include "hashtable.h"
49
50#define HASHTABLE_MIN_SIZE 16
51#define HASHTABLE_HIGH 0.50
52#define HASHTABLE_LOW 0.10
53#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54
55#define BUCKETS_HEAD(SLIST) \
56 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57#define TABLE_HEAD(HT, BUCKET) \
58 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59#define ENTRY_NEXT(ENTRY) \
60 ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61#define HASHTABLE_ITEM_SIZE(HT) \
62 (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
63
64/* Forward declaration */
65static void hashtable_rehash(_Py_hashtable_t *ht);
66
67static void
68_Py_slist_init(_Py_slist_t *list)
69{
70 list->head = NULL;
71}
72
73static void
74_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
75{
76 item->next = list->head;
77 list->head = item;
78}
79
80static void
81_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
82 _Py_slist_item_t *item)
83{
84 if (previous != NULL)
85 previous->next = item->next;
86 else
87 list->head = item->next;
88}
89
90Py_uhash_t
91_Py_hashtable_hash_int(const void *key)
92{
93 return (Py_uhash_t)key;
94}
95
96Py_uhash_t
97_Py_hashtable_hash_ptr(const void *key)
98{
99 return (Py_uhash_t)_Py_HashPointer((void *)key);
100}
101
102int
103_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
104{
105 return entry->key == key;
106}
107
108/* makes sure the real size of the buckets array is a power of 2 */
109static size_t
110round_size(size_t s)
111{
112 size_t i;
113 if (s < HASHTABLE_MIN_SIZE)
114 return HASHTABLE_MIN_SIZE;
115 i = 1;
116 while (i < s)
117 i <<= 1;
118 return i;
119}
120
121_Py_hashtable_t *
122_Py_hashtable_new_full(size_t data_size, size_t init_size,
123 _Py_hashtable_hash_func hash_func,
124 _Py_hashtable_compare_func compare_func,
125 _Py_hashtable_copy_data_func copy_data_func,
126 _Py_hashtable_free_data_func free_data_func,
127 _Py_hashtable_get_data_size_func get_data_size_func,
128 _Py_hashtable_allocator_t *allocator)
129{
130 _Py_hashtable_t *ht;
131 size_t buckets_size;
132 _Py_hashtable_allocator_t alloc;
133
134 if (allocator == NULL) {
135 alloc.malloc = PyMem_RawMalloc;
136 alloc.free = PyMem_RawFree;
137 }
138 else
139 alloc = *allocator;
140
141 ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
142 if (ht == NULL)
143 return ht;
144
145 ht->num_buckets = round_size(init_size);
146 ht->entries = 0;
147 ht->data_size = data_size;
148
149 buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
150 ht->buckets = alloc.malloc(buckets_size);
151 if (ht->buckets == NULL) {
152 alloc.free(ht);
153 return NULL;
154 }
155 memset(ht->buckets, 0, buckets_size);
156
157 ht->hash_func = hash_func;
158 ht->compare_func = compare_func;
159 ht->copy_data_func = copy_data_func;
160 ht->free_data_func = free_data_func;
161 ht->get_data_size_func = get_data_size_func;
162 ht->alloc = alloc;
163 return ht;
164}
165
166_Py_hashtable_t *
167_Py_hashtable_new(size_t data_size,
168 _Py_hashtable_hash_func hash_func,
169 _Py_hashtable_compare_func compare_func)
170{
171 return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
172 hash_func, compare_func,
173 NULL, NULL, NULL, NULL);
174}
175
176size_t
177_Py_hashtable_size(_Py_hashtable_t *ht)
178{
179 size_t size;
180 size_t hv;
181
182 size = sizeof(_Py_hashtable_t);
183
184 /* buckets */
185 size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
186
187 /* entries */
188 size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
189
190 /* data linked from entries */
191 if (ht->get_data_size_func) {
192 for (hv = 0; hv < ht->num_buckets; hv++) {
193 _Py_hashtable_entry_t *entry;
194
195 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
196 void *data;
197
198 data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
199 size += ht->get_data_size_func(data);
200 }
201 }
202 }
203 return size;
204}
205
206#ifdef Py_DEBUG
207void
208_Py_hashtable_print_stats(_Py_hashtable_t *ht)
209{
210 size_t size;
211 size_t chain_len, max_chain_len, total_chain_len, nchains;
212 _Py_hashtable_entry_t *entry;
213 size_t hv;
214 double load;
215
216 size = _Py_hashtable_size(ht);
217
218 load = (double)ht->entries / ht->num_buckets;
219
220 max_chain_len = 0;
221 total_chain_len = 0;
222 nchains = 0;
223 for (hv = 0; hv < ht->num_buckets; hv++) {
224 entry = TABLE_HEAD(ht, hv);
225 if (entry != NULL) {
226 chain_len = 0;
227 for (; entry; entry = ENTRY_NEXT(entry)) {
228 chain_len++;
229 }
230 if (chain_len > max_chain_len)
231 max_chain_len = chain_len;
232 total_chain_len += chain_len;
233 nchains++;
234 }
235 }
236 printf("hash table %p: entries=%zu/%zu (%.0f%%), ",
237 ht, ht->entries, ht->num_buckets, load * 100.0);
238 if (nchains)
239 printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
240 printf("max_chain_len=%zu, %zu kB\n",
241 max_chain_len, size / 1024);
242}
243#endif
244
245/* Get an entry. Return NULL if the key does not exist. */
246_Py_hashtable_entry_t *
247_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
248{
249 Py_uhash_t key_hash;
250 size_t index;
251 _Py_hashtable_entry_t *entry;
252
253 key_hash = ht->hash_func(key);
254 index = key_hash & (ht->num_buckets - 1);
255
256 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
257 if (entry->key_hash == key_hash && ht->compare_func(key, entry))
258 break;
259 }
260
261 return entry;
262}
263
264static int
265_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
266{
267 Py_uhash_t key_hash;
268 size_t index;
269 _Py_hashtable_entry_t *entry, *previous;
270
271 key_hash = ht->hash_func(key);
272 index = key_hash & (ht->num_buckets - 1);
273
274 previous = NULL;
275 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
276 if (entry->key_hash == key_hash && ht->compare_func(key, entry))
277 break;
278 previous = entry;
279 }
280
281 if (entry == NULL)
282 return 0;
283
284 _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
285 (_Py_slist_item_t *)entry);
286 ht->entries--;
287
288 if (data != NULL)
289 _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
290 ht->alloc.free(entry);
291
292 if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
293 hashtable_rehash(ht);
294 return 1;
295}
296
297/* Add a new entry to the hash. The key must not be present in the hash table.
298 Return 0 on success, -1 on memory error. */
299int
300_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
301 void *data, size_t data_size)
302{
303 Py_uhash_t key_hash;
304 size_t index;
305 _Py_hashtable_entry_t *entry;
306
307 assert(data != NULL || data_size == 0);
308#ifndef NDEBUG
309 /* Don't write the assertion on a single line because it is interesting
310 to know the duplicated entry if the assertion failed. The entry can
311 be read using a debugger. */
312 entry = _Py_hashtable_get_entry(ht, key);
313 assert(entry == NULL);
314#endif
315
316 key_hash = ht->hash_func(key);
317 index = key_hash & (ht->num_buckets - 1);
318
319 entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
320 if (entry == NULL) {
321 /* memory allocation failed */
322 return -1;
323 }
324
325 entry->key = (void *)key;
326 entry->key_hash = key_hash;
327
328 assert(data_size == ht->data_size);
329 memcpy(_PY_HASHTABLE_ENTRY_DATA(entry), data, data_size);
330
331 _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
332 ht->entries++;
333
334 if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
335 hashtable_rehash(ht);
336 return 0;
337}
338
339/* Get data from an entry. Copy entry data into data and return 1 if the entry
340 exists, return 0 if the entry does not exist. */
341int
342_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
343{
344 _Py_hashtable_entry_t *entry;
345
346 assert(data != NULL);
347
348 entry = _Py_hashtable_get_entry(ht, key);
349 if (entry == NULL)
350 return 0;
351 _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
352 return 1;
353}
354
355int
356_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
357{
358 assert(data != NULL);
359 assert(ht->free_data_func == NULL);
360 return _hashtable_pop_entry(ht, key, data, data_size);
361}
362
363/* Delete an entry. The entry must exist. */
364void
365_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
366{
367#ifndef NDEBUG
368 int found = _hashtable_pop_entry(ht, key, NULL, 0);
369 assert(found);
370#else
371 (void)_hashtable_pop_entry(ht, key, NULL, 0);
372#endif
373}
374
375/* Prototype for a pointer to a function to be called foreach
376 key/value pair in the hash by hashtable_foreach(). Iteration
377 stops if a non-zero value is returned. */
378int
379_Py_hashtable_foreach(_Py_hashtable_t *ht,
380 int (*func) (_Py_hashtable_entry_t *entry, void *arg),
381 void *arg)
382{
383 _Py_hashtable_entry_t *entry;
384 size_t hv;
385
386 for (hv = 0; hv < ht->num_buckets; hv++) {
387 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
388 int res = func(entry, arg);
389 if (res)
390 return res;
391 }
392 }
393 return 0;
394}
395
396static void
397hashtable_rehash(_Py_hashtable_t *ht)
398{
399 size_t buckets_size, new_size, bucket;
400 _Py_slist_t *old_buckets = NULL;
401 size_t old_num_buckets;
402
403 new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
404 if (new_size == ht->num_buckets)
405 return;
406
407 old_num_buckets = ht->num_buckets;
408
409 buckets_size = new_size * sizeof(ht->buckets[0]);
410 old_buckets = ht->buckets;
411 ht->buckets = ht->alloc.malloc(buckets_size);
412 if (ht->buckets == NULL) {
413 /* cancel rehash on memory allocation failure */
414 ht->buckets = old_buckets ;
415 /* memory allocation failed */
416 return;
417 }
418 memset(ht->buckets, 0, buckets_size);
419
420 ht->num_buckets = new_size;
421
422 for (bucket = 0; bucket < old_num_buckets; bucket++) {
423 _Py_hashtable_entry_t *entry, *next;
424 for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
425 size_t entry_index;
426
427 assert(ht->hash_func(entry->key) == entry->key_hash);
428 next = ENTRY_NEXT(entry);
429 entry_index = entry->key_hash & (new_size - 1);
430
431 _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
432 }
433 }
434
435 ht->alloc.free(old_buckets);
436}
437
438void
439_Py_hashtable_clear(_Py_hashtable_t *ht)
440{
441 _Py_hashtable_entry_t *entry, *next;
442 size_t i;
443
444 for (i=0; i < ht->num_buckets; i++) {
445 for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
446 next = ENTRY_NEXT(entry);
447 if (ht->free_data_func)
448 ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
449 ht->alloc.free(entry);
450 }
451 _Py_slist_init(&ht->buckets[i]);
452 }
453 ht->entries = 0;
454 hashtable_rehash(ht);
455}
456
457void
458_Py_hashtable_destroy(_Py_hashtable_t *ht)
459{
460 size_t i;
461
462 for (i = 0; i < ht->num_buckets; i++) {
463 _Py_slist_item_t *entry = ht->buckets[i].head;
464 while (entry) {
465 _Py_slist_item_t *entry_next = entry->next;
466 if (ht->free_data_func)
467 ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
468 ht->alloc.free(entry);
469 entry = entry_next;
470 }
471 }
472
473 ht->alloc.free(ht->buckets);
474 ht->alloc.free(ht);
475}
476
477/* Return a copy of the hash table */
478_Py_hashtable_t *
479_Py_hashtable_copy(_Py_hashtable_t *src)
480{
481 _Py_hashtable_t *dst;
482 _Py_hashtable_entry_t *entry;
483 size_t bucket;
484 int err;
485 void *data, *new_data;
486
487 dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
488 src->hash_func, src->compare_func,
489 src->copy_data_func, src->free_data_func,
490 src->get_data_size_func, &src->alloc);
491 if (dst == NULL)
492 return NULL;
493
494 for (bucket=0; bucket < src->num_buckets; bucket++) {
495 entry = TABLE_HEAD(src, bucket);
496 for (; entry; entry = ENTRY_NEXT(entry)) {
497 if (src->copy_data_func) {
498 data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
499 new_data = src->copy_data_func(data);
500 if (new_data != NULL)
501 err = _Py_hashtable_set(dst, entry->key,
502 &new_data, src->data_size);
503 else
504 err = 1;
505 }
506 else {
507 data = _PY_HASHTABLE_ENTRY_DATA(entry);
508 err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
509 }
510 if (err) {
511 _Py_hashtable_destroy(dst);
512 return NULL;
513 }
514 }
515 }
516 return dst;
517}
518