blob: 133f3133ef371ef31e59c70b52f8a8694cf6f480 [file] [log] [blame]
Victor Stinnered3b0bc2013-11-23 12:27:24 +01001/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
2 project:
3 http://sourceforge.net/projects/libcfu/
4
5 Copyright of cfuhash:
6 ----------------------------------
7 Creation date: 2005-06-24 21:22:40
8 Authors: Don
9 Change log:
10
11 Copyright (c) 2005 Don Owens
12 All rights reserved.
13
14 This code is released under the BSD license:
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
19
20 * Redistributions of source code must retain the above copyright
21 notice, this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above
24 copyright notice, this list of conditions and the following
25 disclaimer in the documentation and/or other materials provided
26 with the distribution.
27
28 * Neither the name of the author nor the names of its
29 contributors may be used to endorse or promote products derived
30 from this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 OF THE POSSIBILITY OF SUCH DAMAGE.
44 ----------------------------------
45*/
46
47#include "Python.h"
48#include "hashtable.h"
49
50#define HASHTABLE_MIN_SIZE 16
51#define HASHTABLE_HIGH 0.50
52#define HASHTABLE_LOW 0.10
53#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54
55#define BUCKETS_HEAD(SLIST) \
56 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57#define TABLE_HEAD(HT, BUCKET) \
58 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59#define ENTRY_NEXT(ENTRY) \
60 ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61#define HASHTABLE_ITEM_SIZE(HT) \
62 (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
63
64/* Forward declaration */
65static void hashtable_rehash(_Py_hashtable_t *ht);
66
67static void
68_Py_slist_init(_Py_slist_t *list)
69{
70 list->head = NULL;
71}
72
73static void
74_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
75{
76 item->next = list->head;
77 list->head = item;
78}
79
80static void
81_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
82 _Py_slist_item_t *item)
83{
84 if (previous != NULL)
85 previous->next = item->next;
86 else
87 list->head = item->next;
88}
89
90Py_uhash_t
91_Py_hashtable_hash_int(const void *key)
92{
93 return (Py_uhash_t)key;
94}
95
96Py_uhash_t
97_Py_hashtable_hash_ptr(const void *key)
98{
99 return (Py_uhash_t)_Py_HashPointer((void *)key);
100}
101
102int
103_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
104{
105 return entry->key == key;
106}
107
108/* makes sure the real size of the buckets array is a power of 2 */
109static size_t
110round_size(size_t s)
111{
112 size_t i;
113 if (s < HASHTABLE_MIN_SIZE)
114 return HASHTABLE_MIN_SIZE;
115 i = 1;
116 while (i < s)
117 i <<= 1;
118 return i;
119}
120
121_Py_hashtable_t *
122_Py_hashtable_new_full(size_t data_size, size_t init_size,
123 _Py_hashtable_hash_func hash_func,
124 _Py_hashtable_compare_func compare_func,
125 _Py_hashtable_copy_data_func copy_data_func,
126 _Py_hashtable_free_data_func free_data_func,
127 _Py_hashtable_get_data_size_func get_data_size_func,
128 _Py_hashtable_allocator_t *allocator)
129{
130 _Py_hashtable_t *ht;
131 size_t buckets_size;
132 _Py_hashtable_allocator_t alloc;
133
134 if (allocator == NULL) {
135 alloc.malloc = PyMem_RawMalloc;
136 alloc.free = PyMem_RawFree;
137 }
138 else
139 alloc = *allocator;
140
141 ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
142 if (ht == NULL)
143 return ht;
144
145 ht->num_buckets = round_size(init_size);
146 ht->entries = 0;
147 ht->data_size = data_size;
148
149 buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
150 ht->buckets = alloc.malloc(buckets_size);
151 if (ht->buckets == NULL) {
152 alloc.free(ht);
153 return NULL;
154 }
155 memset(ht->buckets, 0, buckets_size);
156
157 ht->hash_func = hash_func;
158 ht->compare_func = compare_func;
159 ht->copy_data_func = copy_data_func;
160 ht->free_data_func = free_data_func;
161 ht->get_data_size_func = get_data_size_func;
162 ht->alloc = alloc;
163 return ht;
164}
165
166_Py_hashtable_t *
167_Py_hashtable_new(size_t data_size,
168 _Py_hashtable_hash_func hash_func,
169 _Py_hashtable_compare_func compare_func)
170{
171 return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
172 hash_func, compare_func,
173 NULL, NULL, NULL, NULL);
174}
175
176size_t
177_Py_hashtable_size(_Py_hashtable_t *ht)
178{
179 size_t size;
180 size_t hv;
181
182 size = sizeof(_Py_hashtable_t);
183
184 /* buckets */
185 size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
186
187 /* entries */
188 size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
189
190 /* data linked from entries */
191 if (ht->get_data_size_func) {
192 for (hv = 0; hv < ht->num_buckets; hv++) {
193 _Py_hashtable_entry_t *entry;
194
195 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
196 void *data;
197
198 data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
199 size += ht->get_data_size_func(data);
200 }
201 }
202 }
203 return size;
204}
205
206#ifdef Py_DEBUG
207void
208_Py_hashtable_print_stats(_Py_hashtable_t *ht)
209{
210 size_t size;
211 size_t chain_len, max_chain_len, total_chain_len, nchains;
212 _Py_hashtable_entry_t *entry;
213 size_t hv;
214 double load;
215
216 size = _Py_hashtable_size(ht);
217
218 load = (double)ht->entries / ht->num_buckets;
219
220 max_chain_len = 0;
221 total_chain_len = 0;
222 nchains = 0;
223 for (hv = 0; hv < ht->num_buckets; hv++) {
224 entry = TABLE_HEAD(ht, hv);
225 if (entry != NULL) {
226 chain_len = 0;
227 for (; entry; entry = ENTRY_NEXT(entry)) {
228 chain_len++;
229 }
230 if (chain_len > max_chain_len)
231 max_chain_len = chain_len;
232 total_chain_len += chain_len;
233 nchains++;
234 }
235 }
Victor Stinner293f3f52014-07-01 08:57:10 +0200236 printf("hash table %p: entries=%"
237 PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100238 ht, ht->entries, ht->num_buckets, load * 100.0);
239 if (nchains)
240 printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
Victor Stinner293f3f52014-07-01 08:57:10 +0200241 printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n",
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100242 max_chain_len, size / 1024);
243}
244#endif
245
246/* Get an entry. Return NULL if the key does not exist. */
247_Py_hashtable_entry_t *
248_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
249{
250 Py_uhash_t key_hash;
251 size_t index;
252 _Py_hashtable_entry_t *entry;
253
254 key_hash = ht->hash_func(key);
255 index = key_hash & (ht->num_buckets - 1);
256
257 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
258 if (entry->key_hash == key_hash && ht->compare_func(key, entry))
259 break;
260 }
261
262 return entry;
263}
264
265static int
266_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
267{
268 Py_uhash_t key_hash;
269 size_t index;
270 _Py_hashtable_entry_t *entry, *previous;
271
272 key_hash = ht->hash_func(key);
273 index = key_hash & (ht->num_buckets - 1);
274
275 previous = NULL;
276 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
277 if (entry->key_hash == key_hash && ht->compare_func(key, entry))
278 break;
279 previous = entry;
280 }
281
282 if (entry == NULL)
283 return 0;
284
285 _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
286 (_Py_slist_item_t *)entry);
287 ht->entries--;
288
289 if (data != NULL)
290 _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
291 ht->alloc.free(entry);
292
293 if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
294 hashtable_rehash(ht);
295 return 1;
296}
297
298/* Add a new entry to the hash. The key must not be present in the hash table.
299 Return 0 on success, -1 on memory error. */
300int
301_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
302 void *data, size_t data_size)
303{
304 Py_uhash_t key_hash;
305 size_t index;
306 _Py_hashtable_entry_t *entry;
307
308 assert(data != NULL || data_size == 0);
309#ifndef NDEBUG
310 /* Don't write the assertion on a single line because it is interesting
311 to know the duplicated entry if the assertion failed. The entry can
312 be read using a debugger. */
313 entry = _Py_hashtable_get_entry(ht, key);
314 assert(entry == NULL);
315#endif
316
317 key_hash = ht->hash_func(key);
318 index = key_hash & (ht->num_buckets - 1);
319
320 entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
321 if (entry == NULL) {
322 /* memory allocation failed */
323 return -1;
324 }
325
326 entry->key = (void *)key;
327 entry->key_hash = key_hash;
328
329 assert(data_size == ht->data_size);
Victor Stinnerd9a73522014-03-24 22:34:34 +0100330 memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size);
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100331
332 _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
333 ht->entries++;
334
335 if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
336 hashtable_rehash(ht);
337 return 0;
338}
339
340/* Get data from an entry. Copy entry data into data and return 1 if the entry
341 exists, return 0 if the entry does not exist. */
342int
343_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
344{
345 _Py_hashtable_entry_t *entry;
346
347 assert(data != NULL);
348
349 entry = _Py_hashtable_get_entry(ht, key);
350 if (entry == NULL)
351 return 0;
352 _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
353 return 1;
354}
355
356int
357_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
358{
359 assert(data != NULL);
360 assert(ht->free_data_func == NULL);
361 return _hashtable_pop_entry(ht, key, data, data_size);
362}
363
364/* Delete an entry. The entry must exist. */
365void
366_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
367{
368#ifndef NDEBUG
369 int found = _hashtable_pop_entry(ht, key, NULL, 0);
370 assert(found);
371#else
372 (void)_hashtable_pop_entry(ht, key, NULL, 0);
373#endif
374}
375
376/* Prototype for a pointer to a function to be called foreach
377 key/value pair in the hash by hashtable_foreach(). Iteration
378 stops if a non-zero value is returned. */
379int
380_Py_hashtable_foreach(_Py_hashtable_t *ht,
381 int (*func) (_Py_hashtable_entry_t *entry, void *arg),
382 void *arg)
383{
384 _Py_hashtable_entry_t *entry;
385 size_t hv;
386
387 for (hv = 0; hv < ht->num_buckets; hv++) {
388 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
389 int res = func(entry, arg);
390 if (res)
391 return res;
392 }
393 }
394 return 0;
395}
396
397static void
398hashtable_rehash(_Py_hashtable_t *ht)
399{
400 size_t buckets_size, new_size, bucket;
401 _Py_slist_t *old_buckets = NULL;
402 size_t old_num_buckets;
403
404 new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
405 if (new_size == ht->num_buckets)
406 return;
407
408 old_num_buckets = ht->num_buckets;
409
410 buckets_size = new_size * sizeof(ht->buckets[0]);
411 old_buckets = ht->buckets;
412 ht->buckets = ht->alloc.malloc(buckets_size);
413 if (ht->buckets == NULL) {
414 /* cancel rehash on memory allocation failure */
415 ht->buckets = old_buckets ;
416 /* memory allocation failed */
417 return;
418 }
419 memset(ht->buckets, 0, buckets_size);
420
421 ht->num_buckets = new_size;
422
423 for (bucket = 0; bucket < old_num_buckets; bucket++) {
424 _Py_hashtable_entry_t *entry, *next;
425 for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
426 size_t entry_index;
427
428 assert(ht->hash_func(entry->key) == entry->key_hash);
429 next = ENTRY_NEXT(entry);
430 entry_index = entry->key_hash & (new_size - 1);
431
432 _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
433 }
434 }
435
436 ht->alloc.free(old_buckets);
437}
438
439void
440_Py_hashtable_clear(_Py_hashtable_t *ht)
441{
442 _Py_hashtable_entry_t *entry, *next;
443 size_t i;
444
445 for (i=0; i < ht->num_buckets; i++) {
446 for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
447 next = ENTRY_NEXT(entry);
448 if (ht->free_data_func)
449 ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
450 ht->alloc.free(entry);
451 }
452 _Py_slist_init(&ht->buckets[i]);
453 }
454 ht->entries = 0;
455 hashtable_rehash(ht);
456}
457
458void
459_Py_hashtable_destroy(_Py_hashtable_t *ht)
460{
461 size_t i;
462
463 for (i = 0; i < ht->num_buckets; i++) {
464 _Py_slist_item_t *entry = ht->buckets[i].head;
465 while (entry) {
466 _Py_slist_item_t *entry_next = entry->next;
467 if (ht->free_data_func)
468 ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
469 ht->alloc.free(entry);
470 entry = entry_next;
471 }
472 }
473
474 ht->alloc.free(ht->buckets);
475 ht->alloc.free(ht);
476}
477
478/* Return a copy of the hash table */
479_Py_hashtable_t *
480_Py_hashtable_copy(_Py_hashtable_t *src)
481{
482 _Py_hashtable_t *dst;
483 _Py_hashtable_entry_t *entry;
484 size_t bucket;
485 int err;
486 void *data, *new_data;
487
488 dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
489 src->hash_func, src->compare_func,
490 src->copy_data_func, src->free_data_func,
491 src->get_data_size_func, &src->alloc);
492 if (dst == NULL)
493 return NULL;
494
495 for (bucket=0; bucket < src->num_buckets; bucket++) {
496 entry = TABLE_HEAD(src, bucket);
497 for (; entry; entry = ENTRY_NEXT(entry)) {
498 if (src->copy_data_func) {
499 data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
500 new_data = src->copy_data_func(data);
501 if (new_data != NULL)
502 err = _Py_hashtable_set(dst, entry->key,
503 &new_data, src->data_size);
504 else
505 err = 1;
506 }
507 else {
Victor Stinnerd9a73522014-03-24 22:34:34 +0100508 data = _Py_HASHTABLE_ENTRY_DATA(entry);
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100509 err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
510 }
511 if (err) {
512 _Py_hashtable_destroy(dst);
513 return NULL;
514 }
515 }
516 }
517 return dst;
518}
519