blob: fdddc19cff1bf9cea7f5e36c1e7991c345622b33 [file] [log] [blame]
Victor Stinnered3b0bc2013-11-23 12:27:24 +01001/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
2 project:
3 http://sourceforge.net/projects/libcfu/
4
5 Copyright of cfuhash:
6 ----------------------------------
7 Creation date: 2005-06-24 21:22:40
8 Authors: Don
9 Change log:
10
11 Copyright (c) 2005 Don Owens
12 All rights reserved.
13
14 This code is released under the BSD license:
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
19
20 * Redistributions of source code must retain the above copyright
21 notice, this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above
24 copyright notice, this list of conditions and the following
25 disclaimer in the documentation and/or other materials provided
26 with the distribution.
27
28 * Neither the name of the author nor the names of its
29 contributors may be used to endorse or promote products derived
30 from this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 OF THE POSSIBILITY OF SUCH DAMAGE.
44 ----------------------------------
45*/
46
47#include "Python.h"
48#include "hashtable.h"
49
50#define HASHTABLE_MIN_SIZE 16
51#define HASHTABLE_HIGH 0.50
52#define HASHTABLE_LOW 0.10
53#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54
55#define BUCKETS_HEAD(SLIST) \
56 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57#define TABLE_HEAD(HT, BUCKET) \
58 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59#define ENTRY_NEXT(ENTRY) \
60 ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61#define HASHTABLE_ITEM_SIZE(HT) \
62 (sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
63
64/* Forward declaration */
65static void hashtable_rehash(_Py_hashtable_t *ht);
66
67static void
68_Py_slist_init(_Py_slist_t *list)
69{
70 list->head = NULL;
71}
72
73static void
74_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
75{
76 item->next = list->head;
77 list->head = item;
78}
79
80static void
81_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
82 _Py_slist_item_t *item)
83{
84 if (previous != NULL)
85 previous->next = item->next;
86 else
87 list->head = item->next;
88}
89
90Py_uhash_t
91_Py_hashtable_hash_int(const void *key)
92{
93 return (Py_uhash_t)key;
94}
95
96Py_uhash_t
97_Py_hashtable_hash_ptr(const void *key)
98{
99 return (Py_uhash_t)_Py_HashPointer((void *)key);
100}
101
102int
103_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
104{
105 return entry->key == key;
106}
107
108/* makes sure the real size of the buckets array is a power of 2 */
109static size_t
110round_size(size_t s)
111{
112 size_t i;
113 if (s < HASHTABLE_MIN_SIZE)
114 return HASHTABLE_MIN_SIZE;
115 i = 1;
116 while (i < s)
117 i <<= 1;
118 return i;
119}
120
121_Py_hashtable_t *
122_Py_hashtable_new_full(size_t data_size, size_t init_size,
123 _Py_hashtable_hash_func hash_func,
124 _Py_hashtable_compare_func compare_func,
125 _Py_hashtable_copy_data_func copy_data_func,
126 _Py_hashtable_free_data_func free_data_func,
127 _Py_hashtable_get_data_size_func get_data_size_func,
128 _Py_hashtable_allocator_t *allocator)
129{
130 _Py_hashtable_t *ht;
131 size_t buckets_size;
132 _Py_hashtable_allocator_t alloc;
133
134 if (allocator == NULL) {
135 alloc.malloc = PyMem_RawMalloc;
136 alloc.free = PyMem_RawFree;
137 }
138 else
139 alloc = *allocator;
140
141 ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
142 if (ht == NULL)
143 return ht;
144
145 ht->num_buckets = round_size(init_size);
146 ht->entries = 0;
147 ht->data_size = data_size;
148
149 buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
150 ht->buckets = alloc.malloc(buckets_size);
151 if (ht->buckets == NULL) {
152 alloc.free(ht);
153 return NULL;
154 }
155 memset(ht->buckets, 0, buckets_size);
156
157 ht->hash_func = hash_func;
158 ht->compare_func = compare_func;
159 ht->copy_data_func = copy_data_func;
160 ht->free_data_func = free_data_func;
161 ht->get_data_size_func = get_data_size_func;
162 ht->alloc = alloc;
163 return ht;
164}
165
166_Py_hashtable_t *
167_Py_hashtable_new(size_t data_size,
168 _Py_hashtable_hash_func hash_func,
169 _Py_hashtable_compare_func compare_func)
170{
171 return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
172 hash_func, compare_func,
173 NULL, NULL, NULL, NULL);
174}
175
176size_t
177_Py_hashtable_size(_Py_hashtable_t *ht)
178{
179 size_t size;
180 size_t hv;
181
182 size = sizeof(_Py_hashtable_t);
183
184 /* buckets */
185 size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
186
187 /* entries */
188 size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
189
190 /* data linked from entries */
191 if (ht->get_data_size_func) {
192 for (hv = 0; hv < ht->num_buckets; hv++) {
193 _Py_hashtable_entry_t *entry;
194
195 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
196 void *data;
197
198 data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
199 size += ht->get_data_size_func(data);
200 }
201 }
202 }
203 return size;
204}
205
206#ifdef Py_DEBUG
207void
208_Py_hashtable_print_stats(_Py_hashtable_t *ht)
209{
210 size_t size;
211 size_t chain_len, max_chain_len, total_chain_len, nchains;
212 _Py_hashtable_entry_t *entry;
213 size_t hv;
214 double load;
215
216 size = _Py_hashtable_size(ht);
217
218 load = (double)ht->entries / ht->num_buckets;
219
220 max_chain_len = 0;
221 total_chain_len = 0;
222 nchains = 0;
223 for (hv = 0; hv < ht->num_buckets; hv++) {
224 entry = TABLE_HEAD(ht, hv);
225 if (entry != NULL) {
226 chain_len = 0;
227 for (; entry; entry = ENTRY_NEXT(entry)) {
228 chain_len++;
229 }
230 if (chain_len > max_chain_len)
231 max_chain_len = chain_len;
232 total_chain_len += chain_len;
233 nchains++;
234 }
235 }
Victor Stinner293f3f52014-07-01 08:57:10 +0200236 printf("hash table %p: entries=%"
237 PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100238 ht, ht->entries, ht->num_buckets, load * 100.0);
239 if (nchains)
240 printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
Victor Stinner293f3f52014-07-01 08:57:10 +0200241 printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n",
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100242 max_chain_len, size / 1024);
243}
244#endif
245
246/* Get an entry. Return NULL if the key does not exist. */
247_Py_hashtable_entry_t *
248_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
249{
250 Py_uhash_t key_hash;
251 size_t index;
252 _Py_hashtable_entry_t *entry;
253
254 key_hash = ht->hash_func(key);
255 index = key_hash & (ht->num_buckets - 1);
256
257 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
258 if (entry->key_hash == key_hash && ht->compare_func(key, entry))
259 break;
260 }
261
262 return entry;
263}
264
265static int
266_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
267{
268 Py_uhash_t key_hash;
269 size_t index;
270 _Py_hashtable_entry_t *entry, *previous;
271
272 key_hash = ht->hash_func(key);
273 index = key_hash & (ht->num_buckets - 1);
274
275 previous = NULL;
276 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
277 if (entry->key_hash == key_hash && ht->compare_func(key, entry))
278 break;
279 previous = entry;
280 }
281
282 if (entry == NULL)
283 return 0;
284
285 _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
286 (_Py_slist_item_t *)entry);
287 ht->entries--;
288
289 if (data != NULL)
290 _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
291 ht->alloc.free(entry);
292
293 if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
294 hashtable_rehash(ht);
295 return 1;
296}
297
298/* Add a new entry to the hash. The key must not be present in the hash table.
299 Return 0 on success, -1 on memory error. */
300int
301_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
302 void *data, size_t data_size)
303{
304 Py_uhash_t key_hash;
305 size_t index;
306 _Py_hashtable_entry_t *entry;
307
308 assert(data != NULL || data_size == 0);
309#ifndef NDEBUG
310 /* Don't write the assertion on a single line because it is interesting
311 to know the duplicated entry if the assertion failed. The entry can
312 be read using a debugger. */
313 entry = _Py_hashtable_get_entry(ht, key);
314 assert(entry == NULL);
315#endif
316
317 key_hash = ht->hash_func(key);
318 index = key_hash & (ht->num_buckets - 1);
319
320 entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
321 if (entry == NULL) {
322 /* memory allocation failed */
323 return -1;
324 }
325
326 entry->key = (void *)key;
327 entry->key_hash = key_hash;
328
329 assert(data_size == ht->data_size);
Benjamin Peterson4a757602016-09-06 19:03:40 -0700330 if (data)
331 memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size);
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100332
333 _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
334 ht->entries++;
335
336 if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
337 hashtable_rehash(ht);
338 return 0;
339}
340
341/* Get data from an entry. Copy entry data into data and return 1 if the entry
342 exists, return 0 if the entry does not exist. */
343int
344_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
345{
346 _Py_hashtable_entry_t *entry;
347
348 assert(data != NULL);
349
350 entry = _Py_hashtable_get_entry(ht, key);
351 if (entry == NULL)
352 return 0;
353 _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
354 return 1;
355}
356
357int
358_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
359{
360 assert(data != NULL);
361 assert(ht->free_data_func == NULL);
362 return _hashtable_pop_entry(ht, key, data, data_size);
363}
364
365/* Delete an entry. The entry must exist. */
366void
367_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
368{
369#ifndef NDEBUG
370 int found = _hashtable_pop_entry(ht, key, NULL, 0);
371 assert(found);
372#else
373 (void)_hashtable_pop_entry(ht, key, NULL, 0);
374#endif
375}
376
377/* Prototype for a pointer to a function to be called foreach
378 key/value pair in the hash by hashtable_foreach(). Iteration
379 stops if a non-zero value is returned. */
380int
381_Py_hashtable_foreach(_Py_hashtable_t *ht,
382 int (*func) (_Py_hashtable_entry_t *entry, void *arg),
383 void *arg)
384{
385 _Py_hashtable_entry_t *entry;
386 size_t hv;
387
388 for (hv = 0; hv < ht->num_buckets; hv++) {
389 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
390 int res = func(entry, arg);
391 if (res)
392 return res;
393 }
394 }
395 return 0;
396}
397
398static void
399hashtable_rehash(_Py_hashtable_t *ht)
400{
401 size_t buckets_size, new_size, bucket;
402 _Py_slist_t *old_buckets = NULL;
403 size_t old_num_buckets;
404
405 new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
406 if (new_size == ht->num_buckets)
407 return;
408
409 old_num_buckets = ht->num_buckets;
410
411 buckets_size = new_size * sizeof(ht->buckets[0]);
412 old_buckets = ht->buckets;
413 ht->buckets = ht->alloc.malloc(buckets_size);
414 if (ht->buckets == NULL) {
415 /* cancel rehash on memory allocation failure */
416 ht->buckets = old_buckets ;
417 /* memory allocation failed */
418 return;
419 }
420 memset(ht->buckets, 0, buckets_size);
421
422 ht->num_buckets = new_size;
423
424 for (bucket = 0; bucket < old_num_buckets; bucket++) {
425 _Py_hashtable_entry_t *entry, *next;
426 for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
427 size_t entry_index;
428
429 assert(ht->hash_func(entry->key) == entry->key_hash);
430 next = ENTRY_NEXT(entry);
431 entry_index = entry->key_hash & (new_size - 1);
432
433 _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
434 }
435 }
436
437 ht->alloc.free(old_buckets);
438}
439
440void
441_Py_hashtable_clear(_Py_hashtable_t *ht)
442{
443 _Py_hashtable_entry_t *entry, *next;
444 size_t i;
445
446 for (i=0; i < ht->num_buckets; i++) {
447 for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
448 next = ENTRY_NEXT(entry);
449 if (ht->free_data_func)
450 ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
451 ht->alloc.free(entry);
452 }
453 _Py_slist_init(&ht->buckets[i]);
454 }
455 ht->entries = 0;
456 hashtable_rehash(ht);
457}
458
459void
460_Py_hashtable_destroy(_Py_hashtable_t *ht)
461{
462 size_t i;
463
464 for (i = 0; i < ht->num_buckets; i++) {
465 _Py_slist_item_t *entry = ht->buckets[i].head;
466 while (entry) {
467 _Py_slist_item_t *entry_next = entry->next;
468 if (ht->free_data_func)
469 ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
470 ht->alloc.free(entry);
471 entry = entry_next;
472 }
473 }
474
475 ht->alloc.free(ht->buckets);
476 ht->alloc.free(ht);
477}
478
479/* Return a copy of the hash table */
480_Py_hashtable_t *
481_Py_hashtable_copy(_Py_hashtable_t *src)
482{
483 _Py_hashtable_t *dst;
484 _Py_hashtable_entry_t *entry;
485 size_t bucket;
486 int err;
487 void *data, *new_data;
488
489 dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
490 src->hash_func, src->compare_func,
491 src->copy_data_func, src->free_data_func,
492 src->get_data_size_func, &src->alloc);
493 if (dst == NULL)
494 return NULL;
495
496 for (bucket=0; bucket < src->num_buckets; bucket++) {
497 entry = TABLE_HEAD(src, bucket);
498 for (; entry; entry = ENTRY_NEXT(entry)) {
499 if (src->copy_data_func) {
500 data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
501 new_data = src->copy_data_func(data);
502 if (new_data != NULL)
503 err = _Py_hashtable_set(dst, entry->key,
504 &new_data, src->data_size);
505 else
506 err = 1;
507 }
508 else {
Victor Stinnerd9a73522014-03-24 22:34:34 +0100509 data = _Py_HASHTABLE_ENTRY_DATA(entry);
Victor Stinnered3b0bc2013-11-23 12:27:24 +0100510 err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
511 }
512 if (err) {
513 _Py_hashtable_destroy(dst);
514 return NULL;
515 }
516 }
517 }
518 return dst;
519}
520