blob: af3dfce43c89b642f0320acb95786fa5dd845368 [file] [log] [blame]
Yury Selivanovf23746a2018-01-22 19:11:18 -05001#include "Python.h"
2
3#include "structmember.h"
4#include "internal/pystate.h"
5#include "internal/hamt.h"
6
7/* popcnt support in Visual Studio */
8#ifdef _MSC_VER
9#include <intrin.h>
10#endif
11
12/*
13This file provides an implemention of an immutable mapping using the
14Hash Array Mapped Trie (or HAMT) datastructure.
15
16This design allows to have:
17
181. Efficient copy: immutable mappings can be copied by reference,
19 making it an O(1) operation.
20
212. Efficient mutations: due to structural sharing, only a portion of
22 the trie needs to be copied when the collection is mutated. The
23 cost of set/delete operations is O(log N).
24
253. Efficient lookups: O(log N).
26
27(where N is number of key/value items in the immutable mapping.)
28
29
30HAMT
31====
32
33The core idea of HAMT is that the shape of the trie is encoded into the
34hashes of keys.
35
36Say we want to store a K/V pair in our mapping. First, we calculate the
37hash of K, let's say it's 19830128, or in binary:
38
39 0b1001011101001010101110000 = 19830128
40
41Now let's partition this bit representation of the hash into blocks of
425 bits each:
43
44 0b00_00000_10010_11101_00101_01011_10000 = 19830128
45 (6) (5) (4) (3) (2) (1)
46
47Each block of 5 bits represents a number betwen 0 and 31. So if we have
48a tree that consists of nodes, each of which is an array of 32 pointers,
49those 5-bit blocks will encode a position on a single tree level.
50
51For example, storing the key K with hash 19830128, results in the following
52tree structure:
53
54 (array of 32 pointers)
55 +---+ -- +----+----+----+ -- +----+
56 root node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b10000 = 16 (1)
57 (level 1) +---+ -- +----+----+----+ -- +----+
58 |
59 +---+ -- +----+----+----+ -- +----+
60 a 2nd level node | 0 | .. | 10 | 11 | 12 | .. | 31 | 0b01011 = 11 (2)
61 +---+ -- +----+----+----+ -- +----+
62 |
63 +---+ -- +----+----+----+ -- +----+
64 a 3rd level node | 0 | .. | 04 | 05 | 06 | .. | 31 | 0b01011 = 5 (3)
65 +---+ -- +----+----+----+ -- +----+
66 |
67 +---+ -- +----+----+----+----+
68 a 4th level node | 0 | .. | 04 | 29 | 30 | 31 | 0b11101 = 29 (4)
69 +---+ -- +----+----+----+----+
70 |
71 +---+ -- +----+----+----+ -- +----+
72 a 5th level node | 0 | .. | 17 | 18 | 19 | .. | 31 | 0b10010 = 18 (5)
73 +---+ -- +----+----+----+ -- +----+
74 |
75 +--------------+
76 |
77 +---+ -- +----+----+----+ -- +----+
78 a 6th level node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b00000 = 0 (6)
79 +---+ -- +----+----+----+ -- +----+
80 |
81 V -- our value (or collision)
82
83To rehash: for a K/V pair, the hash of K encodes where in the tree V will
84be stored.
85
86To optimize memory footprint and handle hash collisions, our implementation
87uses three different types of nodes:
88
89 * A Bitmap node;
90 * An Array node;
91 * A Collision node.
92
93Because we implement an immutable dictionary, our nodes are also
94immutable. Therefore, when we need to modify a node, we copy it, and
95do that modification to the copy.
96
97
98Array Nodes
99-----------
100
101These nodes are very simple. Essentially they are arrays of 32 pointers
102we used to illustrate the high-level idea in the previous section.
103
104We use Array nodes only when we need to store more than 16 pointers
105in a single node.
106
107Array nodes do not store key objects or value objects. They are used
108only as an indirection level - their pointers point to other nodes in
109the tree.
110
111
112Bitmap Node
113-----------
114
115Allocating a new 32-pointers array for every node of our tree would be
116very expensive. Unless we store millions of keys, most of tree nodes would
117be very sparse.
118
119When we have less than 16 elements in a node, we don't want to use the
120Array node, that would mean that we waste a lot of memory. Instead,
121we can use bitmap compression and can have just as many pointers
122as we need!
123
124Bitmap nodes consist of two fields:
125
1261. An array of pointers. If a Bitmap node holds N elements, the
127 array will be of N pointers.
128
1292. A 32bit integer -- a bitmap field. If an N-th bit is set in the
130 bitmap, it means that the node has an N-th element.
131
132For example, say we need to store a 3 elements sparse array:
133
134 +---+ -- +---+ -- +----+ -- +----+
135 | 0 | .. | 4 | .. | 11 | .. | 17 |
136 +---+ -- +---+ -- +----+ -- +----+
137 | | |
138 o1 o2 o3
139
140We allocate a three-pointer Bitmap node. Its bitmap field will be
141then set to:
142
143 0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
144
145To check if our Bitmap node has an I-th element we can do:
146
147 bitmap & (1 << I)
148
149
150And here's a formula to calculate a position in our pointer array
151which would correspond to an I-th element:
152
153 popcount(bitmap & ((1 << I) - 1))
154
155
156Let's break it down:
157
158 * `popcount` is a function that returns a number of bits set to 1;
159
160 * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
161 set to the *right* of our bit.
162
163
164So for our 17, 11, and 4 indexes:
165
166 * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
167
168 * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
169
170 * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
171
172
173To conclude: Bitmap nodes are just like Array nodes -- they can store
174a number of pointers, but use bitmap compression to eliminate unused
175pointers.
176
177
178Bitmap nodes have two pointers for each item:
179
180 +----+----+----+----+ -- +----+----+
181 | k1 | v1 | k2 | v2 | .. | kN | vN |
182 +----+----+----+----+ -- +----+----+
183
184When kI == NULL, vI points to another tree level.
185
186When kI != NULL, the actual key object is stored in kI, and its
187value is stored in vI.
188
189
190Collision Nodes
191---------------
192
193Collision nodes are simple arrays of pointers -- two pointers per
194key/value. When there's a hash collision, say for k1/v1 and k2/v2
195we have `hash(k1)==hash(k2)`. Then our collision node will be:
196
197 +----+----+----+----+
198 | k1 | v1 | k2 | v2 |
199 +----+----+----+----+
200
201
202Tree Structure
203--------------
204
205All nodes are PyObjects.
206
207The `PyHamtObject` object has a pointer to the root node (h_root),
208and has a length field (h_count).
209
210High-level functions accept a PyHamtObject object and dispatch to
211lower-level functions depending on what kind of node h_root points to.
212
213
214Operations
215==========
216
217There are three fundamental operations on an immutable dictionary:
218
2191. "o.assoc(k, v)" will return a new immutable dictionary, that will be
220 a copy of "o", but with the "k/v" item set.
221
222 Functions in this file:
223
224 hamt_node_assoc, hamt_node_bitmap_assoc,
225 hamt_node_array_assoc, hamt_node_collision_assoc
226
227 `hamt_node_assoc` function accepts a node object, and calls
228 other functions depending on its actual type.
229
2302. "o.find(k)" will lookup key "k" in "o".
231
232 Functions:
233
234 hamt_node_find, hamt_node_bitmap_find,
235 hamt_node_array_find, hamt_node_collision_find
236
2373. "o.without(k)" will return a new immutable dictionary, that will be
238 a copy of "o", buth without the "k" key.
239
240 Functions:
241
242 hamt_node_without, hamt_node_bitmap_without,
243 hamt_node_array_without, hamt_node_collision_without
244
245
246Further Reading
247===============
248
2491. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
250
2512. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
252
2533. Clojure's PersistentHashMap implementation:
254 https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
255
256
257Debug
258=====
259
260The HAMT datatype is accessible for testing purposes under the
261`_testcapi` module:
262
263 >>> from _testcapi import hamt
264 >>> h = hamt()
265 >>> h2 = h.set('a', 2)
266 >>> h3 = h2.set('b', 3)
267 >>> list(h3)
268 ['a', 'b']
269
270When CPython is built in debug mode, a '__dump__()' method is available
271to introspect the tree:
272
273 >>> print(h3.__dump__())
274 HAMT(len=2):
275 BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
276 'a': 2
277 'b': 3
278*/
279
280
281#define IS_ARRAY_NODE(node) (Py_TYPE(node) == &_PyHamt_ArrayNode_Type)
282#define IS_BITMAP_NODE(node) (Py_TYPE(node) == &_PyHamt_BitmapNode_Type)
283#define IS_COLLISION_NODE(node) (Py_TYPE(node) == &_PyHamt_CollisionNode_Type)
284
285
286/* Return type for 'find' (lookup a key) functions.
287
288 * F_ERROR - an error occurred;
289 * F_NOT_FOUND - the key was not found;
290 * F_FOUND - the key was found.
291*/
292typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
293
294
295/* Return type for 'without' (delete a key) functions.
296
297 * W_ERROR - an error occurred;
298 * W_NOT_FOUND - the key was not found: there's nothing to delete;
299 * W_EMPTY - the key was found: the node/tree would be empty
300 if the key is deleted;
301 * W_NEWNODE - the key was found: a new node/tree is returned
302 without that key.
303*/
304typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
305
306
307/* Low-level iterator protocol type.
308
309 * I_ITEM - a new item has been yielded;
310 * I_END - the whole tree was visited (similar to StopIteration).
311*/
312typedef enum {I_ITEM, I_END} hamt_iter_t;
313
314
315#define HAMT_ARRAY_NODE_SIZE 32
316
317
318typedef struct {
319 PyObject_HEAD
320 PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
321 Py_ssize_t a_count;
322} PyHamtNode_Array;
323
324
325typedef struct {
326 PyObject_VAR_HEAD
327 uint32_t b_bitmap;
328 PyObject *b_array[1];
329} PyHamtNode_Bitmap;
330
331
332typedef struct {
333 PyObject_VAR_HEAD
334 int32_t c_hash;
335 PyObject *c_array[1];
336} PyHamtNode_Collision;
337
338
339static PyHamtNode_Bitmap *_empty_bitmap_node;
340static PyHamtObject *_empty_hamt;
341
342
343static PyHamtObject *
344hamt_alloc(void);
345
346static PyHamtNode *
347hamt_node_assoc(PyHamtNode *node,
348 uint32_t shift, int32_t hash,
349 PyObject *key, PyObject *val, int* added_leaf);
350
351static hamt_without_t
352hamt_node_without(PyHamtNode *node,
353 uint32_t shift, int32_t hash,
354 PyObject *key,
355 PyHamtNode **new_node);
356
357static hamt_find_t
358hamt_node_find(PyHamtNode *node,
359 uint32_t shift, int32_t hash,
360 PyObject *key, PyObject **val);
361
362#ifdef Py_DEBUG
363static int
364hamt_node_dump(PyHamtNode *node,
365 _PyUnicodeWriter *writer, int level);
366#endif
367
368static PyHamtNode *
369hamt_node_array_new(Py_ssize_t);
370
371static PyHamtNode *
372hamt_node_collision_new(int32_t hash, Py_ssize_t size);
373
374static inline Py_ssize_t
375hamt_node_collision_count(PyHamtNode_Collision *node);
376
377
378#ifdef Py_DEBUG
379static void
380_hamt_node_array_validate(void *o)
381{
382 assert(IS_ARRAY_NODE(o));
383 PyHamtNode_Array *node = (PyHamtNode_Array*)(o);
384 Py_ssize_t i = 0, count = 0;
385 for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
386 if (node->a_array[i] != NULL) {
387 count++;
388 }
389 }
390 assert(count == node->a_count);
391}
392
393#define VALIDATE_ARRAY_NODE(NODE) \
394 do { _hamt_node_array_validate(NODE); } while (0);
395#else
396#define VALIDATE_ARRAY_NODE(NODE)
397#endif
398
399
400/* Returns -1 on error */
401static inline int32_t
402hamt_hash(PyObject *o)
403{
404 Py_hash_t hash = PyObject_Hash(o);
405
406#if SIZEOF_PY_HASH_T <= 4
407 return hash;
408#else
409 if (hash == -1) {
410 /* exception */
411 return -1;
412 }
413
414 /* While it's suboptimal to reduce Python's 64 bit hash to
415 32 bits via XOR, it seems that the resulting hash function
416 is good enough (this is also how Long type is hashed in Java.)
417 Storing 10, 100, 1000 Python strings results in a relatively
418 shallow and uniform tree structure.
419
420 Please don't change this hashing algorithm, as there are many
421 tests that test some exact tree shape to cover all code paths.
422 */
423 int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
424 return xored == -1 ? -2 : xored;
425#endif
426}
427
428static inline uint32_t
429hamt_mask(int32_t hash, uint32_t shift)
430{
431 return (((uint32_t)hash >> shift) & 0x01f);
432}
433
434static inline uint32_t
435hamt_bitpos(int32_t hash, uint32_t shift)
436{
437 return (uint32_t)1 << hamt_mask(hash, shift);
438}
439
440static inline uint32_t
441hamt_bitcount(uint32_t i)
442{
443#if defined(__GNUC__) && (__GNUC__ > 4)
444 return (uint32_t)__builtin_popcountl(i);
445#elif defined(__clang__) && (__clang_major__ > 3)
446 return (uint32_t)__builtin_popcountl(i);
447#elif defined(_MSC_VER)
448 return (uint32_t)__popcnt(i);
449#else
450 /* https://graphics.stanford.edu/~seander/bithacks.html */
451 i = i - ((i >> 1) & 0x55555555);
452 i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
453 return ((i + (i >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
454#endif
455}
456
457static inline uint32_t
458hamt_bitindex(uint32_t bitmap, uint32_t bit)
459{
460 return hamt_bitcount(bitmap & (bit - 1));
461}
462
463
464/////////////////////////////////// Dump Helpers
465#ifdef Py_DEBUG
466
467static int
468_hamt_dump_ident(_PyUnicodeWriter *writer, int level)
469{
470 /* Write `' ' * level` to the `writer` */
471 PyObject *str = NULL;
472 PyObject *num = NULL;
473 PyObject *res = NULL;
474 int ret = -1;
475
476 str = PyUnicode_FromString(" ");
477 if (str == NULL) {
478 goto error;
479 }
480
481 num = PyLong_FromLong((long)level);
482 if (num == NULL) {
483 goto error;
484 }
485
486 res = PyNumber_Multiply(str, num);
487 if (res == NULL) {
488 goto error;
489 }
490
491 ret = _PyUnicodeWriter_WriteStr(writer, res);
492
493error:
494 Py_XDECREF(res);
495 Py_XDECREF(str);
496 Py_XDECREF(num);
497 return ret;
498}
499
500static int
501_hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
502{
503 /* A convenient helper combining _PyUnicodeWriter_WriteStr and
504 PyUnicode_FromFormatV.
505 */
506 PyObject* msg;
507 int ret;
508
509 va_list vargs;
510#ifdef HAVE_STDARG_PROTOTYPES
511 va_start(vargs, format);
512#else
513 va_start(vargs);
514#endif
515 msg = PyUnicode_FromFormatV(format, vargs);
516 va_end(vargs);
517
518 if (msg == NULL) {
519 return -1;
520 }
521
522 ret = _PyUnicodeWriter_WriteStr(writer, msg);
523 Py_DECREF(msg);
524 return ret;
525}
526
527#endif /* Py_DEBUG */
528/////////////////////////////////// Bitmap Node
529
530
531static PyHamtNode *
532hamt_node_bitmap_new(Py_ssize_t size)
533{
534 /* Create a new bitmap node of size 'size' */
535
536 PyHamtNode_Bitmap *node;
537 Py_ssize_t i;
538
539 assert(size >= 0);
540 assert(size % 2 == 0);
541
542 if (size == 0 && _empty_bitmap_node != NULL) {
543 Py_INCREF(_empty_bitmap_node);
544 return (PyHamtNode *)_empty_bitmap_node;
545 }
546
547 /* No freelist; allocate a new bitmap node */
548 node = PyObject_GC_NewVar(
549 PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
550 if (node == NULL) {
551 return NULL;
552 }
553
554 Py_SIZE(node) = size;
555
556 for (i = 0; i < size; i++) {
557 node->b_array[i] = NULL;
558 }
559
560 node->b_bitmap = 0;
561
562 _PyObject_GC_TRACK(node);
563
564 if (size == 0 && _empty_bitmap_node == NULL) {
565 /* Since bitmap nodes are immutable, we can cache the instance
566 for size=0 and reuse it whenever we need an empty bitmap node.
567 */
568 _empty_bitmap_node = node;
569 Py_INCREF(_empty_bitmap_node);
570 }
571
572 return (PyHamtNode *)node;
573}
574
575static inline Py_ssize_t
576hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
577{
578 return Py_SIZE(node) / 2;
579}
580
581static PyHamtNode_Bitmap *
582hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
583{
584 /* Clone a bitmap node; return a new one with the same child notes. */
585
586 PyHamtNode_Bitmap *clone;
587 Py_ssize_t i;
588
589 clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
590 if (clone == NULL) {
591 return NULL;
592 }
593
594 for (i = 0; i < Py_SIZE(node); i++) {
595 Py_XINCREF(node->b_array[i]);
596 clone->b_array[i] = node->b_array[i];
597 }
598
599 clone->b_bitmap = node->b_bitmap;
600 return clone;
601}
602
603static PyHamtNode_Bitmap *
604hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
605{
606 assert(bit & o->b_bitmap);
607 assert(hamt_node_bitmap_count(o) > 1);
608
609 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
610 Py_SIZE(o) - 2);
611 if (new == NULL) {
612 return NULL;
613 }
614
615 uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
616 uint32_t key_idx = 2 * idx;
617 uint32_t val_idx = key_idx + 1;
618 uint32_t i;
619
620 for (i = 0; i < key_idx; i++) {
621 Py_XINCREF(o->b_array[i]);
622 new->b_array[i] = o->b_array[i];
623 }
624
625 for (i = val_idx + 1; i < Py_SIZE(o); i++) {
626 Py_XINCREF(o->b_array[i]);
627 new->b_array[i - 2] = o->b_array[i];
628 }
629
630 new->b_bitmap = o->b_bitmap & ~bit;
631 return new;
632}
633
634static PyHamtNode *
635hamt_node_new_bitmap_or_collision(uint32_t shift,
636 PyObject *key1, PyObject *val1,
637 int32_t key2_hash,
638 PyObject *key2, PyObject *val2)
639{
640 /* Helper method. Creates a new node for key1/val and key2/val2
641 pairs.
642
643 If key1 hash is equal to the hash of key2, a Collision node
644 will be created. If they are not equal, a Bitmap node is
645 created.
646 */
647
648 int32_t key1_hash = hamt_hash(key1);
649 if (key1_hash == -1) {
650 return NULL;
651 }
652
653 if (key1_hash == key2_hash) {
654 PyHamtNode_Collision *n;
655 n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
656 if (n == NULL) {
657 return NULL;
658 }
659
660 Py_INCREF(key1);
661 n->c_array[0] = key1;
662 Py_INCREF(val1);
663 n->c_array[1] = val1;
664
665 Py_INCREF(key2);
666 n->c_array[2] = key2;
667 Py_INCREF(val2);
668 n->c_array[3] = val2;
669
670 return (PyHamtNode *)n;
671 }
672 else {
673 int added_leaf = 0;
674 PyHamtNode *n = hamt_node_bitmap_new(0);
675 if (n == NULL) {
676 return NULL;
677 }
678
679 PyHamtNode *n2 = hamt_node_assoc(
680 n, shift, key1_hash, key1, val1, &added_leaf);
681 Py_DECREF(n);
682 if (n2 == NULL) {
683 return NULL;
684 }
685
686 n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
687 Py_DECREF(n2);
688 if (n == NULL) {
689 return NULL;
690 }
691
692 return n;
693 }
694}
695
696static PyHamtNode *
697hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
698 uint32_t shift, int32_t hash,
699 PyObject *key, PyObject *val, int* added_leaf)
700{
701 /* assoc operation for bitmap nodes.
702
703 Return: a new node, or self if key/val already is in the
704 collection.
705
706 'added_leaf' is later used in '_PyHamt_Assoc' to determine if
707 `hamt.set(key, val)` increased the size of the collection.
708 */
709
710 uint32_t bit = hamt_bitpos(hash, shift);
711 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
712
713 /* Bitmap node layout:
714
715 +------+------+------+------+ --- +------+------+
716 | key1 | val1 | key2 | val2 | ... | keyN | valN |
717 +------+------+------+------+ --- +------+------+
718 where `N < Py_SIZE(node)`.
719
720 The `node->b_bitmap` field is a bitmap. For a given
721 `(shift, hash)` pair we can determine:
722
723 - If this node has the corresponding key/val slots.
724 - The index of key/val slots.
725 */
726
727 if (self->b_bitmap & bit) {
728 /* The key is set in this node */
729
730 uint32_t key_idx = 2 * idx;
731 uint32_t val_idx = key_idx + 1;
732
733 assert(val_idx < Py_SIZE(self));
734
735 PyObject *key_or_null = self->b_array[key_idx];
736 PyObject *val_or_node = self->b_array[val_idx];
737
738 if (key_or_null == NULL) {
739 /* key is NULL. This means that we have a few keys
740 that have the same (hash, shift) pair. */
741
742 assert(val_or_node != NULL);
743
744 PyHamtNode *sub_node = hamt_node_assoc(
745 (PyHamtNode *)val_or_node,
746 shift + 5, hash, key, val, added_leaf);
747 if (sub_node == NULL) {
748 return NULL;
749 }
750
751 if (val_or_node == (PyObject *)sub_node) {
752 Py_DECREF(sub_node);
753 Py_INCREF(self);
754 return (PyHamtNode *)self;
755 }
756
757 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
758 if (ret == NULL) {
759 return NULL;
760 }
761 Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
762 return (PyHamtNode *)ret;
763 }
764
765 assert(key != NULL);
766 /* key is not NULL. This means that we have only one other
767 key in this collection that matches our hash for this shift. */
768
769 int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
770 if (comp_err < 0) { /* exception in __eq__ */
771 return NULL;
772 }
773 if (comp_err == 1) { /* key == key_or_null */
774 if (val == val_or_node) {
775 /* we already have the same key/val pair; return self. */
776 Py_INCREF(self);
777 return (PyHamtNode *)self;
778 }
779
780 /* We're setting a new value for the key we had before.
781 Make a new bitmap node with a replaced value, and return it. */
782 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
783 if (ret == NULL) {
784 return NULL;
785 }
786 Py_INCREF(val);
787 Py_SETREF(ret->b_array[val_idx], val);
788 return (PyHamtNode *)ret;
789 }
790
791 /* It's a new key, and it has the same index as *one* another key.
792 We have a collision. We need to create a new node which will
793 combine the existing key and the key we're adding.
794
795 `hamt_node_new_bitmap_or_collision` will either create a new
796 Collision node if the keys have identical hashes, or
797 a new Bitmap node.
798 */
799 PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
800 shift + 5,
801 key_or_null, val_or_node, /* existing key/val */
802 hash,
803 key, val /* new key/val */
804 );
805 if (sub_node == NULL) {
806 return NULL;
807 }
808
809 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
810 if (ret == NULL) {
811 Py_DECREF(sub_node);
812 return NULL;
813 }
814 Py_SETREF(ret->b_array[key_idx], NULL);
815 Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
816
817 *added_leaf = 1;
818 return (PyHamtNode *)ret;
819 }
820 else {
821 /* There was no key before with the same (shift,hash). */
822
823 uint32_t n = hamt_bitcount(self->b_bitmap);
824
825 if (n >= 16) {
826 /* When we have a situation where we want to store more
827 than 16 nodes at one level of the tree, we no longer
828 want to use the Bitmap node with bitmap encoding.
829
830 Instead we start using an Array node, which has
831 simpler (faster) implementation at the expense of
832 having prealocated 32 pointers for its keys/values
833 pairs.
834
835 Small hamt objects (<30 keys) usually don't have any
836 Array nodes at all. Betwen ~30 and ~400 keys hamt
837 objects usually have one Array node, and usually it's
838 a root node.
839 */
840
841 uint32_t jdx = hamt_mask(hash, shift);
842 /* 'jdx' is the index of where the new key should be added
843 in the new Array node we're about to create. */
844
845 PyHamtNode *empty = NULL;
846 PyHamtNode_Array *new_node = NULL;
847 PyHamtNode *res = NULL;
848
849 /* Create a new Array node. */
850 new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
851 if (new_node == NULL) {
852 goto fin;
853 }
854
855 /* Create an empty bitmap node for the next
856 hamt_node_assoc call. */
857 empty = hamt_node_bitmap_new(0);
858 if (empty == NULL) {
859 goto fin;
860 }
861
862 /* Make a new bitmap node for the key/val we're adding.
863 Set that bitmap node to new-array-node[jdx]. */
864 new_node->a_array[jdx] = hamt_node_assoc(
865 empty, shift + 5, hash, key, val, added_leaf);
866 if (new_node->a_array[jdx] == NULL) {
867 goto fin;
868 }
869
870 /* Copy existing key/value pairs from the current Bitmap
871 node to the new Array node we've just created. */
872 Py_ssize_t i, j;
873 for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
874 if (((self->b_bitmap >> i) & 1) != 0) {
875 /* Ensure we don't accidentally override `jdx` element
876 we set few lines above.
877 */
878 assert(new_node->a_array[i] == NULL);
879
880 if (self->b_array[j] == NULL) {
881 new_node->a_array[i] =
882 (PyHamtNode *)self->b_array[j + 1];
883 Py_INCREF(new_node->a_array[i]);
884 }
885 else {
886 int32_t rehash = hamt_hash(self->b_array[j]);
887 if (rehash == -1) {
888 goto fin;
889 }
890
891 new_node->a_array[i] = hamt_node_assoc(
892 empty, shift + 5,
893 rehash,
894 self->b_array[j],
895 self->b_array[j + 1],
896 added_leaf);
897
898 if (new_node->a_array[i] == NULL) {
899 goto fin;
900 }
901 }
902 j += 2;
903 }
904 }
905
906 VALIDATE_ARRAY_NODE(new_node)
907
908 /* That's it! */
909 res = (PyHamtNode *)new_node;
910
911 fin:
912 Py_XDECREF(empty);
913 if (res == NULL) {
914 Py_XDECREF(new_node);
915 }
916 return res;
917 }
918 else {
919 /* We have less than 16 keys at this level; let's just
920 create a new bitmap node out of this node with the
921 new key/val pair added. */
922
923 uint32_t key_idx = 2 * idx;
924 uint32_t val_idx = key_idx + 1;
925 Py_ssize_t i;
926
927 *added_leaf = 1;
928
929 /* Allocate new Bitmap node which can have one more key/val
930 pair in addition to what we have already. */
931 PyHamtNode_Bitmap *new_node =
932 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
933 if (new_node == NULL) {
934 return NULL;
935 }
936
937 /* Copy all keys/values that will be before the new key/value
938 we are adding. */
939 for (i = 0; i < key_idx; i++) {
940 Py_XINCREF(self->b_array[i]);
941 new_node->b_array[i] = self->b_array[i];
942 }
943
944 /* Set the new key/value to the new Bitmap node. */
945 Py_INCREF(key);
946 new_node->b_array[key_idx] = key;
947 Py_INCREF(val);
948 new_node->b_array[val_idx] = val;
949
950 /* Copy all keys/values that will be after the new key/value
951 we are adding. */
952 for (i = key_idx; i < Py_SIZE(self); i++) {
953 Py_XINCREF(self->b_array[i]);
954 new_node->b_array[i + 2] = self->b_array[i];
955 }
956
957 new_node->b_bitmap = self->b_bitmap | bit;
958 return (PyHamtNode *)new_node;
959 }
960 }
961}
962
963static hamt_without_t
964hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
965 uint32_t shift, int32_t hash,
966 PyObject *key,
967 PyHamtNode **new_node)
968{
969 uint32_t bit = hamt_bitpos(hash, shift);
970 if ((self->b_bitmap & bit) == 0) {
971 return W_NOT_FOUND;
972 }
973
974 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
975
976 uint32_t key_idx = 2 * idx;
977 uint32_t val_idx = key_idx + 1;
978
979 PyObject *key_or_null = self->b_array[key_idx];
980 PyObject *val_or_node = self->b_array[val_idx];
981
982 if (key_or_null == NULL) {
983 /* key == NULL means that 'value' is another tree node. */
984
985 PyHamtNode *sub_node = NULL;
986
987 hamt_without_t res = hamt_node_without(
988 (PyHamtNode *)val_or_node,
989 shift + 5, hash, key, &sub_node);
990
991 switch (res) {
992 case W_EMPTY:
993 /* It's impossible for us to receive a W_EMPTY here:
994
995 - Array nodes are converted to Bitmap nodes when
996 we delete 16th item from them;
997
998 - Collision nodes are converted to Bitmap when
999 there is one item in them;
1000
1001 - Bitmap node's without() inlines single-item
1002 sub-nodes.
1003
1004 So in no situation we can have a single-item
1005 Bitmap child of another Bitmap node.
1006 */
1007 Py_UNREACHABLE();
1008
1009 case W_NEWNODE: {
1010 assert(sub_node != NULL);
1011
1012 if (IS_BITMAP_NODE(sub_node)) {
1013 PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1014 if (hamt_node_bitmap_count(sub_tree) == 1 &&
1015 sub_tree->b_array[0] != NULL)
1016 {
1017 /* A bitmap node with one key/value pair. Just
1018 merge it into this node.
1019
1020 Note that we don't inline Bitmap nodes that
1021 have a NULL key -- those nodes point to another
1022 tree level, and we cannot simply move tree levels
1023 up or down.
1024 */
1025
1026 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1027 if (clone == NULL) {
1028 Py_DECREF(sub_node);
1029 return W_ERROR;
1030 }
1031
1032 PyObject *key = sub_tree->b_array[0];
1033 PyObject *val = sub_tree->b_array[1];
1034
1035 Py_INCREF(key);
1036 Py_XSETREF(clone->b_array[key_idx], key);
1037 Py_INCREF(val);
1038 Py_SETREF(clone->b_array[val_idx], val);
1039
1040 Py_DECREF(sub_tree);
1041
1042 *new_node = (PyHamtNode *)clone;
1043 return W_NEWNODE;
1044 }
1045 }
1046
1047#ifdef Py_DEBUG
1048 /* Ensure that Collision.without implementation
1049 converts to Bitmap nodes itself.
1050 */
1051 if (IS_COLLISION_NODE(sub_node)) {
1052 assert(hamt_node_collision_count(
1053 (PyHamtNode_Collision*)sub_node) > 1);
1054 }
1055#endif
1056
1057 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
Yury Selivanov0bad4d62018-01-23 16:26:07 -05001058 if (clone == NULL) {
1059 return W_ERROR;
1060 }
1061
Yury Selivanovf23746a2018-01-22 19:11:18 -05001062 Py_SETREF(clone->b_array[val_idx],
1063 (PyObject *)sub_node); /* borrow */
1064
1065 *new_node = (PyHamtNode *)clone;
1066 return W_NEWNODE;
1067 }
1068
1069 case W_ERROR:
1070 case W_NOT_FOUND:
1071 assert(sub_node == NULL);
1072 return res;
1073
1074 default:
1075 Py_UNREACHABLE();
1076 }
1077 }
1078 else {
1079 /* We have a regular key/value pair */
1080
1081 int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1082 if (cmp < 0) {
1083 return W_ERROR;
1084 }
1085 if (cmp == 0) {
1086 return W_NOT_FOUND;
1087 }
1088
1089 if (hamt_node_bitmap_count(self) == 1) {
1090 return W_EMPTY;
1091 }
1092
1093 *new_node = (PyHamtNode *)
1094 hamt_node_bitmap_clone_without(self, bit);
1095 if (*new_node == NULL) {
1096 return W_ERROR;
1097 }
1098
1099 return W_NEWNODE;
1100 }
1101}
1102
1103static hamt_find_t
1104hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1105 uint32_t shift, int32_t hash,
1106 PyObject *key, PyObject **val)
1107{
1108 /* Lookup a key in a Bitmap node. */
1109
1110 uint32_t bit = hamt_bitpos(hash, shift);
1111 uint32_t idx;
1112 uint32_t key_idx;
1113 uint32_t val_idx;
1114 PyObject *key_or_null;
1115 PyObject *val_or_node;
1116 int comp_err;
1117
1118 if ((self->b_bitmap & bit) == 0) {
1119 return F_NOT_FOUND;
1120 }
1121
1122 idx = hamt_bitindex(self->b_bitmap, bit);
Yury Selivanovf23746a2018-01-22 19:11:18 -05001123 key_idx = idx * 2;
1124 val_idx = key_idx + 1;
1125
1126 assert(val_idx < Py_SIZE(self));
1127
1128 key_or_null = self->b_array[key_idx];
1129 val_or_node = self->b_array[val_idx];
1130
1131 if (key_or_null == NULL) {
1132 /* There are a few keys that have the same hash at the current shift
1133 that match our key. Dispatch the lookup further down the tree. */
1134 assert(val_or_node != NULL);
1135 return hamt_node_find((PyHamtNode *)val_or_node,
1136 shift + 5, hash, key, val);
1137 }
1138
1139 /* We have only one key -- a potential match. Let's compare if the
1140 key we are looking at is equal to the key we are looking for. */
1141 assert(key != NULL);
1142 comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1143 if (comp_err < 0) { /* exception in __eq__ */
1144 return F_ERROR;
1145 }
1146 if (comp_err == 1) { /* key == key_or_null */
1147 *val = val_or_node;
1148 return F_FOUND;
1149 }
1150
1151 return F_NOT_FOUND;
1152}
1153
1154static int
1155hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1156{
1157 /* Bitmap's tp_traverse */
1158
1159 Py_ssize_t i;
1160
1161 for (i = Py_SIZE(self); --i >= 0; ) {
1162 Py_VISIT(self->b_array[i]);
1163 }
1164
1165 return 0;
1166}
1167
1168static void
1169hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1170{
1171 /* Bitmap's tp_dealloc */
1172
1173 Py_ssize_t len = Py_SIZE(self);
1174 Py_ssize_t i;
1175
1176 PyObject_GC_UnTrack(self);
1177 Py_TRASHCAN_SAFE_BEGIN(self)
1178
1179 if (len > 0) {
1180 i = len;
1181 while (--i >= 0) {
1182 Py_XDECREF(self->b_array[i]);
1183 }
1184 }
1185
1186 Py_TYPE(self)->tp_free((PyObject *)self);
1187 Py_TRASHCAN_SAFE_END(self)
1188}
1189
1190#ifdef Py_DEBUG
1191static int
1192hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1193 _PyUnicodeWriter *writer, int level)
1194{
1195 /* Debug build: __dump__() method implementation for Bitmap nodes. */
1196
1197 Py_ssize_t i;
1198 PyObject *tmp1;
1199 PyObject *tmp2;
1200
1201 if (_hamt_dump_ident(writer, level + 1)) {
1202 goto error;
1203 }
1204
1205 if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1206 Py_SIZE(node), Py_SIZE(node) / 2))
1207 {
1208 goto error;
1209 }
1210
1211 tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1212 if (tmp1 == NULL) {
1213 goto error;
1214 }
1215 tmp2 = _PyLong_Format(tmp1, 2);
1216 Py_DECREF(tmp1);
1217 if (tmp2 == NULL) {
1218 goto error;
1219 }
1220 if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1221 Py_DECREF(tmp2);
1222 goto error;
1223 }
1224 Py_DECREF(tmp2);
1225
1226 for (i = 0; i < Py_SIZE(node); i += 2) {
1227 PyObject *key_or_null = node->b_array[i];
1228 PyObject *val_or_node = node->b_array[i + 1];
1229
1230 if (_hamt_dump_ident(writer, level + 2)) {
1231 goto error;
1232 }
1233
1234 if (key_or_null == NULL) {
1235 if (_hamt_dump_format(writer, "NULL:\n")) {
1236 goto error;
1237 }
1238
1239 if (hamt_node_dump((PyHamtNode *)val_or_node,
1240 writer, level + 2))
1241 {
1242 goto error;
1243 }
1244 }
1245 else {
1246 if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1247 val_or_node))
1248 {
1249 goto error;
1250 }
1251 }
1252
1253 if (_hamt_dump_format(writer, "\n")) {
1254 goto error;
1255 }
1256 }
1257
1258 return 0;
1259error:
1260 return -1;
1261}
1262#endif /* Py_DEBUG */
1263
1264
1265/////////////////////////////////// Collision Node
1266
1267
1268static PyHamtNode *
1269hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1270{
1271 /* Create a new Collision node. */
1272
1273 PyHamtNode_Collision *node;
1274 Py_ssize_t i;
1275
1276 assert(size >= 4);
1277 assert(size % 2 == 0);
1278
1279 node = PyObject_GC_NewVar(
1280 PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1281 if (node == NULL) {
1282 return NULL;
1283 }
1284
1285 for (i = 0; i < size; i++) {
1286 node->c_array[i] = NULL;
1287 }
1288
1289 Py_SIZE(node) = size;
1290 node->c_hash = hash;
1291
1292 _PyObject_GC_TRACK(node);
1293
1294 return (PyHamtNode *)node;
1295}
1296
1297static hamt_find_t
1298hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1299 Py_ssize_t *idx)
1300{
1301 /* Lookup `key` in the Collision node `self`. Set the index of the
1302 found key to 'idx'. */
1303
1304 Py_ssize_t i;
1305 PyObject *el;
1306
1307 for (i = 0; i < Py_SIZE(self); i += 2) {
1308 el = self->c_array[i];
1309
1310 assert(el != NULL);
1311 int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1312 if (cmp < 0) {
1313 return F_ERROR;
1314 }
1315 if (cmp == 1) {
1316 *idx = i;
1317 return F_FOUND;
1318 }
1319 }
1320
1321 return F_NOT_FOUND;
1322}
1323
1324static PyHamtNode *
1325hamt_node_collision_assoc(PyHamtNode_Collision *self,
1326 uint32_t shift, int32_t hash,
1327 PyObject *key, PyObject *val, int* added_leaf)
1328{
1329 /* Set a new key to this level (currently a Collision node)
1330 of the tree. */
1331
1332 if (hash == self->c_hash) {
1333 /* The hash of the 'key' we are adding matches the hash of
1334 other keys in this Collision node. */
1335
1336 Py_ssize_t key_idx = -1;
1337 hamt_find_t found;
1338 PyHamtNode_Collision *new_node;
1339 Py_ssize_t i;
1340
1341 /* Let's try to lookup the new 'key', maybe we already have it. */
1342 found = hamt_node_collision_find_index(self, key, &key_idx);
1343 switch (found) {
1344 case F_ERROR:
1345 /* Exception. */
1346 return NULL;
1347
1348 case F_NOT_FOUND:
1349 /* This is a totally new key. Clone the current node,
1350 add a new key/value to the cloned node. */
1351
1352 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1353 self->c_hash, Py_SIZE(self) + 2);
1354 if (new_node == NULL) {
1355 return NULL;
1356 }
1357
1358 for (i = 0; i < Py_SIZE(self); i++) {
1359 Py_INCREF(self->c_array[i]);
1360 new_node->c_array[i] = self->c_array[i];
1361 }
1362
1363 Py_INCREF(key);
1364 new_node->c_array[i] = key;
1365 Py_INCREF(val);
1366 new_node->c_array[i + 1] = val;
1367
1368 *added_leaf = 1;
1369 return (PyHamtNode *)new_node;
1370
1371 case F_FOUND:
1372 /* There's a key which is equal to the key we are adding. */
1373
1374 assert(key_idx >= 0);
1375 assert(key_idx < Py_SIZE(self));
1376 Py_ssize_t val_idx = key_idx + 1;
1377
1378 if (self->c_array[val_idx] == val) {
1379 /* We're setting a key/value pair that's already set. */
1380 Py_INCREF(self);
1381 return (PyHamtNode *)self;
1382 }
1383
1384 /* We need to replace old value for the key
1385 with a new value. Create a new Collision node.*/
1386 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1387 self->c_hash, Py_SIZE(self));
1388 if (new_node == NULL) {
1389 return NULL;
1390 }
1391
1392 /* Copy all elements of the old node to the new one. */
1393 for (i = 0; i < Py_SIZE(self); i++) {
1394 Py_INCREF(self->c_array[i]);
1395 new_node->c_array[i] = self->c_array[i];
1396 }
1397
1398 /* Replace the old value with the new value for the our key. */
1399 Py_DECREF(new_node->c_array[val_idx]);
1400 Py_INCREF(val);
1401 new_node->c_array[val_idx] = val;
1402
1403 return (PyHamtNode *)new_node;
1404
1405 default:
1406 Py_UNREACHABLE();
1407 }
1408 }
1409 else {
1410 /* The hash of the new key is different from the hash that
1411 all keys of this Collision node have.
1412
1413 Create a Bitmap node inplace with two children:
1414 key/value pair that we're adding, and the Collision node
1415 we're replacing on this tree level.
1416 */
1417
1418 PyHamtNode_Bitmap *new_node;
1419 PyHamtNode *assoc_res;
1420
1421 new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1422 if (new_node == NULL) {
1423 return NULL;
1424 }
1425 new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1426 Py_INCREF(self);
1427 new_node->b_array[1] = (PyObject*) self;
1428
1429 assoc_res = hamt_node_bitmap_assoc(
1430 new_node, shift, hash, key, val, added_leaf);
1431 Py_DECREF(new_node);
1432 return assoc_res;
1433 }
1434}
1435
1436static inline Py_ssize_t
1437hamt_node_collision_count(PyHamtNode_Collision *node)
1438{
1439 return Py_SIZE(node) / 2;
1440}
1441
1442static hamt_without_t
1443hamt_node_collision_without(PyHamtNode_Collision *self,
1444 uint32_t shift, int32_t hash,
1445 PyObject *key,
1446 PyHamtNode **new_node)
1447{
1448 if (hash != self->c_hash) {
1449 return W_NOT_FOUND;
1450 }
1451
1452 Py_ssize_t key_idx = -1;
1453 hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1454
1455 switch (found) {
1456 case F_ERROR:
1457 return W_ERROR;
1458
1459 case F_NOT_FOUND:
1460 return W_NOT_FOUND;
1461
1462 case F_FOUND:
1463 assert(key_idx >= 0);
1464 assert(key_idx < Py_SIZE(self));
1465
1466 Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1467
1468 if (new_count == 0) {
1469 /* The node has only one key/value pair and it's for the
1470 key we're trying to delete. So a new node will be empty
1471 after the removal.
1472 */
1473 return W_EMPTY;
1474 }
1475
1476 if (new_count == 1) {
1477 /* The node has two keys, and after deletion the
1478 new Collision node would have one. Collision nodes
1479 with one key shouldn't exist, co convert it to a
1480 Bitmap node.
1481 */
1482 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1483 hamt_node_bitmap_new(2);
1484 if (node == NULL) {
1485 return W_ERROR;
1486 }
1487
1488 if (key_idx == 0) {
1489 Py_INCREF(self->c_array[2]);
1490 node->b_array[0] = self->c_array[2];
1491 Py_INCREF(self->c_array[3]);
1492 node->b_array[1] = self->c_array[3];
1493 }
1494 else {
1495 assert(key_idx == 2);
1496 Py_INCREF(self->c_array[0]);
1497 node->b_array[0] = self->c_array[0];
1498 Py_INCREF(self->c_array[1]);
1499 node->b_array[1] = self->c_array[1];
1500 }
1501
1502 node->b_bitmap = hamt_bitpos(hash, shift);
1503
1504 *new_node = (PyHamtNode *)node;
1505 return W_NEWNODE;
1506 }
1507
1508 /* Allocate a new Collision node with capacity for one
1509 less key/value pair */
1510 PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1511 hamt_node_collision_new(
1512 self->c_hash, Py_SIZE(self) - 2);
1513
1514 /* Copy all other keys from `self` to `new` */
1515 Py_ssize_t i;
1516 for (i = 0; i < key_idx; i++) {
1517 Py_INCREF(self->c_array[i]);
1518 new->c_array[i] = self->c_array[i];
1519 }
1520 for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1521 Py_INCREF(self->c_array[i]);
1522 new->c_array[i - 2] = self->c_array[i];
1523 }
1524
1525 *new_node = (PyHamtNode*)new;
1526 return W_NEWNODE;
1527
1528 default:
1529 Py_UNREACHABLE();
1530 }
1531}
1532
1533static hamt_find_t
1534hamt_node_collision_find(PyHamtNode_Collision *self,
1535 uint32_t shift, int32_t hash,
1536 PyObject *key, PyObject **val)
1537{
1538 /* Lookup `key` in the Collision node `self`. Set the value
1539 for the found key to 'val'. */
1540
1541 Py_ssize_t idx = -1;
1542 hamt_find_t res;
1543
1544 res = hamt_node_collision_find_index(self, key, &idx);
1545 if (res == F_ERROR || res == F_NOT_FOUND) {
1546 return res;
1547 }
1548
1549 assert(idx >= 0);
1550 assert(idx + 1 < Py_SIZE(self));
1551
1552 *val = self->c_array[idx + 1];
1553 assert(*val != NULL);
1554
1555 return F_FOUND;
1556}
1557
1558
1559static int
1560hamt_node_collision_traverse(PyHamtNode_Collision *self,
1561 visitproc visit, void *arg)
1562{
1563 /* Collision's tp_traverse */
1564
1565 Py_ssize_t i;
1566
1567 for (i = Py_SIZE(self); --i >= 0; ) {
1568 Py_VISIT(self->c_array[i]);
1569 }
1570
1571 return 0;
1572}
1573
1574static void
1575hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1576{
1577 /* Collision's tp_dealloc */
1578
1579 Py_ssize_t len = Py_SIZE(self);
1580
1581 PyObject_GC_UnTrack(self);
1582 Py_TRASHCAN_SAFE_BEGIN(self)
1583
1584 if (len > 0) {
1585
1586 while (--len >= 0) {
1587 Py_XDECREF(self->c_array[len]);
1588 }
1589 }
1590
1591 Py_TYPE(self)->tp_free((PyObject *)self);
1592 Py_TRASHCAN_SAFE_END(self)
1593}
1594
1595#ifdef Py_DEBUG
1596static int
1597hamt_node_collision_dump(PyHamtNode_Collision *node,
1598 _PyUnicodeWriter *writer, int level)
1599{
1600 /* Debug build: __dump__() method implementation for Collision nodes. */
1601
1602 Py_ssize_t i;
1603
1604 if (_hamt_dump_ident(writer, level + 1)) {
1605 goto error;
1606 }
1607
1608 if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1609 Py_SIZE(node), node))
1610 {
1611 goto error;
1612 }
1613
1614 for (i = 0; i < Py_SIZE(node); i += 2) {
1615 PyObject *key = node->c_array[i];
1616 PyObject *val = node->c_array[i + 1];
1617
1618 if (_hamt_dump_ident(writer, level + 2)) {
1619 goto error;
1620 }
1621
1622 if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1623 goto error;
1624 }
1625 }
1626
1627 return 0;
1628error:
1629 return -1;
1630}
1631#endif /* Py_DEBUG */
1632
1633
1634/////////////////////////////////// Array Node
1635
1636
1637static PyHamtNode *
1638hamt_node_array_new(Py_ssize_t count)
1639{
1640 Py_ssize_t i;
1641
1642 PyHamtNode_Array *node = PyObject_GC_New(
1643 PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1644 if (node == NULL) {
1645 return NULL;
1646 }
1647
1648 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1649 node->a_array[i] = NULL;
1650 }
1651
1652 node->a_count = count;
1653
1654 _PyObject_GC_TRACK(node);
1655 return (PyHamtNode *)node;
1656}
1657
1658static PyHamtNode_Array *
1659hamt_node_array_clone(PyHamtNode_Array *node)
1660{
1661 PyHamtNode_Array *clone;
1662 Py_ssize_t i;
1663
1664 VALIDATE_ARRAY_NODE(node)
1665
1666 /* Create a new Array node. */
1667 clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1668 if (clone == NULL) {
1669 return NULL;
1670 }
1671
1672 /* Copy all elements from the current Array node to the new one. */
1673 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1674 Py_XINCREF(node->a_array[i]);
1675 clone->a_array[i] = node->a_array[i];
1676 }
1677
1678 VALIDATE_ARRAY_NODE(clone)
1679 return clone;
1680}
1681
1682static PyHamtNode *
1683hamt_node_array_assoc(PyHamtNode_Array *self,
1684 uint32_t shift, int32_t hash,
1685 PyObject *key, PyObject *val, int* added_leaf)
1686{
1687 /* Set a new key to this level (currently a Collision node)
1688 of the tree.
1689
1690 Array nodes don't store values, they can only point to
1691 other nodes. They are simple arrays of 32 BaseNode pointers/
1692 */
1693
1694 uint32_t idx = hamt_mask(hash, shift);
1695 PyHamtNode *node = self->a_array[idx];
1696 PyHamtNode *child_node;
1697 PyHamtNode_Array *new_node;
1698 Py_ssize_t i;
1699
1700 if (node == NULL) {
1701 /* There's no child node for the given hash. Create a new
1702 Bitmap node for this key. */
1703
1704 PyHamtNode_Bitmap *empty = NULL;
1705
1706 /* Get an empty Bitmap node to work with. */
1707 empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1708 if (empty == NULL) {
1709 return NULL;
1710 }
1711
1712 /* Set key/val to the newly created empty Bitmap, thus
1713 creating a new Bitmap node with our key/value pair. */
1714 child_node = hamt_node_bitmap_assoc(
1715 empty,
1716 shift + 5, hash, key, val, added_leaf);
1717 Py_DECREF(empty);
1718 if (child_node == NULL) {
1719 return NULL;
1720 }
1721
1722 /* Create a new Array node. */
1723 new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1724 if (new_node == NULL) {
1725 Py_DECREF(child_node);
1726 return NULL;
1727 }
1728
1729 /* Copy all elements from the current Array node to the
1730 new one. */
1731 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1732 Py_XINCREF(self->a_array[i]);
1733 new_node->a_array[i] = self->a_array[i];
1734 }
1735
1736 assert(new_node->a_array[idx] == NULL);
1737 new_node->a_array[idx] = child_node; /* borrow */
1738 VALIDATE_ARRAY_NODE(new_node)
1739 }
1740 else {
1741 /* There's a child node for the given hash.
1742 Set the key to it./ */
1743 child_node = hamt_node_assoc(
1744 node, shift + 5, hash, key, val, added_leaf);
1745 if (child_node == (PyHamtNode *)self) {
1746 Py_DECREF(child_node);
1747 return (PyHamtNode *)self;
1748 }
1749
1750 new_node = hamt_node_array_clone(self);
1751 if (new_node == NULL) {
1752 Py_DECREF(child_node);
1753 return NULL;
1754 }
1755
1756 Py_SETREF(new_node->a_array[idx], child_node); /* borrow */
1757 VALIDATE_ARRAY_NODE(new_node)
1758 }
1759
1760 return (PyHamtNode *)new_node;
1761}
1762
1763static hamt_without_t
1764hamt_node_array_without(PyHamtNode_Array *self,
1765 uint32_t shift, int32_t hash,
1766 PyObject *key,
1767 PyHamtNode **new_node)
1768{
1769 uint32_t idx = hamt_mask(hash, shift);
1770 PyHamtNode *node = self->a_array[idx];
1771
1772 if (node == NULL) {
1773 return W_NOT_FOUND;
1774 }
1775
1776 PyHamtNode *sub_node = NULL;
1777 hamt_without_t res = hamt_node_without(
1778 (PyHamtNode *)node,
1779 shift + 5, hash, key, &sub_node);
1780
1781 switch (res) {
1782 case W_NOT_FOUND:
1783 case W_ERROR:
1784 assert(sub_node == NULL);
1785 return res;
1786
1787 case W_NEWNODE: {
1788 /* We need to replace a node at the `idx` index.
1789 Clone this node and replace.
1790 */
1791 assert(sub_node != NULL);
1792
1793 PyHamtNode_Array *clone = hamt_node_array_clone(self);
1794 if (clone == NULL) {
1795 Py_DECREF(sub_node);
1796 return W_ERROR;
1797 }
1798
1799 Py_SETREF(clone->a_array[idx], sub_node); /* borrow */
1800 *new_node = (PyHamtNode*)clone; /* borrow */
1801 return W_NEWNODE;
1802 }
1803
1804 case W_EMPTY: {
1805 assert(sub_node == NULL);
1806 /* We need to remove a node at the `idx` index.
1807 Calculate the size of the replacement Array node.
1808 */
1809 Py_ssize_t new_count = self->a_count - 1;
1810
1811 if (new_count == 0) {
1812 return W_EMPTY;
1813 }
1814
1815 if (new_count >= 16) {
1816 /* We convert Bitmap nodes to Array nodes, when a
1817 Bitmap node needs to store more than 15 key/value
1818 pairs. So we will create a new Array node if we
1819 the number of key/values after deletion is still
1820 greater than 15.
1821 */
1822
1823 PyHamtNode_Array *new = hamt_node_array_clone(self);
1824 if (new == NULL) {
1825 return W_ERROR;
1826 }
1827 new->a_count = new_count;
1828 Py_CLEAR(new->a_array[idx]);
1829
1830 *new_node = (PyHamtNode*)new; /* borrow */
1831 return W_NEWNODE;
1832 }
1833
1834 /* New Array node would have less than 16 key/value
1835 pairs. We need to create a replacement Bitmap node. */
1836
1837 Py_ssize_t bitmap_size = new_count * 2;
1838 uint32_t bitmap = 0;
1839
1840 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1841 hamt_node_bitmap_new(bitmap_size);
1842 if (new == NULL) {
1843 return W_ERROR;
1844 }
1845
1846 Py_ssize_t new_i = 0;
1847 for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1848 if (i == idx) {
1849 /* Skip the node we are deleting. */
1850 continue;
1851 }
1852
1853 PyHamtNode *node = self->a_array[i];
1854 if (node == NULL) {
1855 /* Skip any missing nodes. */
1856 continue;
1857 }
1858
1859 bitmap |= 1 << i;
1860
1861 if (IS_BITMAP_NODE(node)) {
1862 PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1863
1864 if (hamt_node_bitmap_count(child) == 1 &&
1865 child->b_array[0] != NULL)
1866 {
1867 /* node is a Bitmap with one key/value pair, just
1868 merge it into the new Bitmap node we're building.
1869
1870 Note that we don't inline Bitmap nodes that
1871 have a NULL key -- those nodes point to another
1872 tree level, and we cannot simply move tree levels
1873 up or down.
1874 */
1875 PyObject *key = child->b_array[0];
1876 PyObject *val = child->b_array[1];
1877
1878 Py_INCREF(key);
1879 new->b_array[new_i] = key;
1880 Py_INCREF(val);
1881 new->b_array[new_i + 1] = val;
1882 }
1883 else {
1884 new->b_array[new_i] = NULL;
1885 Py_INCREF(node);
1886 new->b_array[new_i + 1] = (PyObject*)node;
1887 }
1888 }
1889 else {
1890
1891#ifdef Py_DEBUG
1892 if (IS_COLLISION_NODE(node)) {
1893 Py_ssize_t child_count = hamt_node_collision_count(
1894 (PyHamtNode_Collision*)node);
1895 assert(child_count > 1);
1896 }
1897 else if (IS_ARRAY_NODE(node)) {
1898 assert(((PyHamtNode_Array*)node)->a_count >= 16);
1899 }
1900#endif
1901
1902 /* Just copy the node into our new Bitmap */
1903 new->b_array[new_i] = NULL;
1904 Py_INCREF(node);
1905 new->b_array[new_i + 1] = (PyObject*)node;
1906 }
1907
1908 new_i += 2;
1909 }
1910
1911 new->b_bitmap = bitmap;
1912 *new_node = (PyHamtNode*)new; /* borrow */
1913 return W_NEWNODE;
1914 }
1915
1916 default:
1917 Py_UNREACHABLE();
1918 }
1919}
1920
1921static hamt_find_t
1922hamt_node_array_find(PyHamtNode_Array *self,
1923 uint32_t shift, int32_t hash,
1924 PyObject *key, PyObject **val)
1925{
1926 /* Lookup `key` in the Array node `self`. Set the value
1927 for the found key to 'val'. */
1928
1929 uint32_t idx = hamt_mask(hash, shift);
1930 PyHamtNode *node;
1931
1932 node = self->a_array[idx];
1933 if (node == NULL) {
1934 return F_NOT_FOUND;
1935 }
1936
1937 /* Dispatch to the generic hamt_node_find */
1938 return hamt_node_find(node, shift + 5, hash, key, val);
1939}
1940
1941static int
1942hamt_node_array_traverse(PyHamtNode_Array *self,
1943 visitproc visit, void *arg)
1944{
1945 /* Array's tp_traverse */
1946
1947 Py_ssize_t i;
1948
1949 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1950 Py_VISIT(self->a_array[i]);
1951 }
1952
1953 return 0;
1954}
1955
1956static void
1957hamt_node_array_dealloc(PyHamtNode_Array *self)
1958{
1959 /* Array's tp_dealloc */
1960
1961 Py_ssize_t i;
1962
1963 PyObject_GC_UnTrack(self);
1964 Py_TRASHCAN_SAFE_BEGIN(self)
1965
1966 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1967 Py_XDECREF(self->a_array[i]);
1968 }
1969
1970 Py_TYPE(self)->tp_free((PyObject *)self);
1971 Py_TRASHCAN_SAFE_END(self)
1972}
1973
1974#ifdef Py_DEBUG
1975static int
1976hamt_node_array_dump(PyHamtNode_Array *node,
1977 _PyUnicodeWriter *writer, int level)
1978{
1979 /* Debug build: __dump__() method implementation for Array nodes. */
1980
1981 Py_ssize_t i;
1982
1983 if (_hamt_dump_ident(writer, level + 1)) {
1984 goto error;
1985 }
1986
1987 if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1988 goto error;
1989 }
1990
1991 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1992 if (node->a_array[i] == NULL) {
1993 continue;
1994 }
1995
1996 if (_hamt_dump_ident(writer, level + 2)) {
1997 goto error;
1998 }
1999
2000 if (_hamt_dump_format(writer, "%d::\n", i)) {
2001 goto error;
2002 }
2003
2004 if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2005 goto error;
2006 }
2007
2008 if (_hamt_dump_format(writer, "\n")) {
2009 goto error;
2010 }
2011 }
2012
2013 return 0;
2014error:
2015 return -1;
2016}
2017#endif /* Py_DEBUG */
2018
2019
2020/////////////////////////////////// Node Dispatch
2021
2022
2023static PyHamtNode *
2024hamt_node_assoc(PyHamtNode *node,
2025 uint32_t shift, int32_t hash,
2026 PyObject *key, PyObject *val, int* added_leaf)
2027{
2028 /* Set key/value to the 'node' starting with the given shift/hash.
2029 Return a new node, or the same node if key/value already
2030 set.
2031
2032 added_leaf will be set to 1 if key/value wasn't in the
2033 tree before.
2034
2035 This method automatically dispatches to the suitable
2036 hamt_node_{nodetype}_assoc method.
2037 */
2038
2039 if (IS_BITMAP_NODE(node)) {
2040 return hamt_node_bitmap_assoc(
2041 (PyHamtNode_Bitmap *)node,
2042 shift, hash, key, val, added_leaf);
2043 }
2044 else if (IS_ARRAY_NODE(node)) {
2045 return hamt_node_array_assoc(
2046 (PyHamtNode_Array *)node,
2047 shift, hash, key, val, added_leaf);
2048 }
2049 else {
2050 assert(IS_COLLISION_NODE(node));
2051 return hamt_node_collision_assoc(
2052 (PyHamtNode_Collision *)node,
2053 shift, hash, key, val, added_leaf);
2054 }
2055}
2056
2057static hamt_without_t
2058hamt_node_without(PyHamtNode *node,
2059 uint32_t shift, int32_t hash,
2060 PyObject *key,
2061 PyHamtNode **new_node)
2062{
2063 if (IS_BITMAP_NODE(node)) {
2064 return hamt_node_bitmap_without(
2065 (PyHamtNode_Bitmap *)node,
2066 shift, hash, key,
2067 new_node);
2068 }
2069 else if (IS_ARRAY_NODE(node)) {
2070 return hamt_node_array_without(
2071 (PyHamtNode_Array *)node,
2072 shift, hash, key,
2073 new_node);
2074 }
2075 else {
2076 assert(IS_COLLISION_NODE(node));
2077 return hamt_node_collision_without(
2078 (PyHamtNode_Collision *)node,
2079 shift, hash, key,
2080 new_node);
2081 }
2082}
2083
2084static hamt_find_t
2085hamt_node_find(PyHamtNode *node,
2086 uint32_t shift, int32_t hash,
2087 PyObject *key, PyObject **val)
2088{
2089 /* Find the key in the node starting with the given shift/hash.
2090
2091 If a value is found, the result will be set to F_FOUND, and
2092 *val will point to the found value object.
2093
2094 If a value wasn't found, the result will be set to F_NOT_FOUND.
2095
2096 If an exception occurs during the call, the result will be F_ERROR.
2097
2098 This method automatically dispatches to the suitable
2099 hamt_node_{nodetype}_find method.
2100 */
2101
2102 if (IS_BITMAP_NODE(node)) {
2103 return hamt_node_bitmap_find(
2104 (PyHamtNode_Bitmap *)node,
2105 shift, hash, key, val);
2106
2107 }
2108 else if (IS_ARRAY_NODE(node)) {
2109 return hamt_node_array_find(
2110 (PyHamtNode_Array *)node,
2111 shift, hash, key, val);
2112 }
2113 else {
2114 assert(IS_COLLISION_NODE(node));
2115 return hamt_node_collision_find(
2116 (PyHamtNode_Collision *)node,
2117 shift, hash, key, val);
2118 }
2119}
2120
2121#ifdef Py_DEBUG
2122static int
2123hamt_node_dump(PyHamtNode *node,
2124 _PyUnicodeWriter *writer, int level)
2125{
2126 /* Debug build: __dump__() method implementation for a node.
2127
2128 This method automatically dispatches to the suitable
2129 hamt_node_{nodetype})_dump method.
2130 */
2131
2132 if (IS_BITMAP_NODE(node)) {
2133 return hamt_node_bitmap_dump(
2134 (PyHamtNode_Bitmap *)node, writer, level);
2135 }
2136 else if (IS_ARRAY_NODE(node)) {
2137 return hamt_node_array_dump(
2138 (PyHamtNode_Array *)node, writer, level);
2139 }
2140 else {
2141 assert(IS_COLLISION_NODE(node));
2142 return hamt_node_collision_dump(
2143 (PyHamtNode_Collision *)node, writer, level);
2144 }
2145}
2146#endif /* Py_DEBUG */
2147
2148
2149/////////////////////////////////// Iterators: Machinery
2150
2151
2152static hamt_iter_t
2153hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2154
2155
2156static void
2157hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2158{
2159 for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2160 iter->i_nodes[i] = NULL;
2161 iter->i_pos[i] = 0;
2162 }
2163
2164 iter->i_level = 0;
2165
2166 /* Note: we don't incref/decref nodes in i_nodes. */
2167 iter->i_nodes[0] = root;
2168}
2169
2170static hamt_iter_t
2171hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2172 PyObject **key, PyObject **val)
2173{
2174 int8_t level = iter->i_level;
2175
2176 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2177 Py_ssize_t pos = iter->i_pos[level];
2178
2179 if (pos + 1 >= Py_SIZE(node)) {
2180#ifdef Py_DEBUG
2181 assert(iter->i_level >= 0);
2182 iter->i_nodes[iter->i_level] = NULL;
2183#endif
2184 iter->i_level--;
2185 return hamt_iterator_next(iter, key, val);
2186 }
2187
2188 if (node->b_array[pos] == NULL) {
2189 iter->i_pos[level] = pos + 2;
2190
2191 int8_t next_level = level + 1;
2192 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2193 iter->i_level = next_level;
2194 iter->i_pos[next_level] = 0;
2195 iter->i_nodes[next_level] = (PyHamtNode *)
2196 node->b_array[pos + 1];
2197
2198 return hamt_iterator_next(iter, key, val);
2199 }
2200
2201 *key = node->b_array[pos];
2202 *val = node->b_array[pos + 1];
2203 iter->i_pos[level] = pos + 2;
2204 return I_ITEM;
2205}
2206
2207static hamt_iter_t
2208hamt_iterator_collision_next(PyHamtIteratorState *iter,
2209 PyObject **key, PyObject **val)
2210{
2211 int8_t level = iter->i_level;
2212
2213 PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2214 Py_ssize_t pos = iter->i_pos[level];
2215
2216 if (pos + 1 >= Py_SIZE(node)) {
2217#ifdef Py_DEBUG
2218 assert(iter->i_level >= 0);
2219 iter->i_nodes[iter->i_level] = NULL;
2220#endif
2221 iter->i_level--;
2222 return hamt_iterator_next(iter, key, val);
2223 }
2224
2225 *key = node->c_array[pos];
2226 *val = node->c_array[pos + 1];
2227 iter->i_pos[level] = pos + 2;
2228 return I_ITEM;
2229}
2230
2231static hamt_iter_t
2232hamt_iterator_array_next(PyHamtIteratorState *iter,
2233 PyObject **key, PyObject **val)
2234{
2235 int8_t level = iter->i_level;
2236
2237 PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2238 Py_ssize_t pos = iter->i_pos[level];
2239
2240 if (pos >= HAMT_ARRAY_NODE_SIZE) {
2241#ifdef Py_DEBUG
2242 assert(iter->i_level >= 0);
2243 iter->i_nodes[iter->i_level] = NULL;
2244#endif
2245 iter->i_level--;
2246 return hamt_iterator_next(iter, key, val);
2247 }
2248
2249 for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2250 if (node->a_array[i] != NULL) {
2251 iter->i_pos[level] = i + 1;
2252
2253 int8_t next_level = level + 1;
2254 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2255 iter->i_pos[next_level] = 0;
2256 iter->i_nodes[next_level] = node->a_array[i];
2257 iter->i_level = next_level;
2258
2259 return hamt_iterator_next(iter, key, val);
2260 }
2261 }
2262
2263#ifdef Py_DEBUG
2264 assert(iter->i_level >= 0);
2265 iter->i_nodes[iter->i_level] = NULL;
2266#endif
2267
2268 iter->i_level--;
2269 return hamt_iterator_next(iter, key, val);
2270}
2271
2272static hamt_iter_t
2273hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2274{
2275 if (iter->i_level < 0) {
2276 return I_END;
2277 }
2278
2279 assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2280
2281 PyHamtNode *current = iter->i_nodes[iter->i_level];
2282
2283 if (IS_BITMAP_NODE(current)) {
2284 return hamt_iterator_bitmap_next(iter, key, val);
2285 }
2286 else if (IS_ARRAY_NODE(current)) {
2287 return hamt_iterator_array_next(iter, key, val);
2288 }
2289 else {
2290 assert(IS_COLLISION_NODE(current));
2291 return hamt_iterator_collision_next(iter, key, val);
2292 }
2293}
2294
2295
2296/////////////////////////////////// HAMT high-level functions
2297
2298
2299PyHamtObject *
2300_PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2301{
2302 int32_t key_hash;
2303 int added_leaf = 0;
2304 PyHamtNode *new_root;
2305 PyHamtObject *new_o;
2306
2307 key_hash = hamt_hash(key);
2308 if (key_hash == -1) {
2309 return NULL;
2310 }
2311
2312 new_root = hamt_node_assoc(
2313 (PyHamtNode *)(o->h_root),
2314 0, key_hash, key, val, &added_leaf);
2315 if (new_root == NULL) {
2316 return NULL;
2317 }
2318
2319 if (new_root == o->h_root) {
2320 Py_DECREF(new_root);
2321 Py_INCREF(o);
2322 return o;
2323 }
2324
2325 new_o = hamt_alloc();
2326 if (new_o == NULL) {
2327 Py_DECREF(new_root);
2328 return NULL;
2329 }
2330
2331 new_o->h_root = new_root; /* borrow */
2332 new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2333
2334 return new_o;
2335}
2336
2337PyHamtObject *
2338_PyHamt_Without(PyHamtObject *o, PyObject *key)
2339{
2340 int32_t key_hash = hamt_hash(key);
2341 if (key_hash == -1) {
2342 return NULL;
2343 }
2344
2345 PyHamtNode *new_root;
2346
2347 hamt_without_t res = hamt_node_without(
2348 (PyHamtNode *)(o->h_root),
2349 0, key_hash, key,
2350 &new_root);
2351
2352 switch (res) {
2353 case W_ERROR:
2354 return NULL;
2355 case W_EMPTY:
2356 return _PyHamt_New();
2357 case W_NOT_FOUND:
2358 Py_INCREF(o);
2359 return o;
2360 case W_NEWNODE: {
2361 PyHamtObject *new_o = hamt_alloc();
2362 if (new_o == NULL) {
2363 Py_DECREF(new_root);
2364 return NULL;
2365 }
2366
2367 new_o->h_root = new_root; /* borrow */
2368 new_o->h_count = o->h_count - 1;
2369 assert(new_o->h_count >= 0);
2370 return new_o;
2371 }
2372 default:
2373 Py_UNREACHABLE();
2374 }
2375}
2376
2377static hamt_find_t
2378hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2379{
2380 if (o->h_count == 0) {
2381 return F_NOT_FOUND;
2382 }
2383
2384 int32_t key_hash = hamt_hash(key);
2385 if (key_hash == -1) {
2386 return F_ERROR;
2387 }
2388
2389 return hamt_node_find(o->h_root, 0, key_hash, key, val);
2390}
2391
2392
2393int
2394_PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2395{
2396 hamt_find_t res = hamt_find(o, key, val);
2397 switch (res) {
2398 case F_ERROR:
2399 return -1;
2400 case F_NOT_FOUND:
2401 return 0;
2402 case F_FOUND:
2403 return 1;
2404 default:
2405 Py_UNREACHABLE();
2406 }
2407}
2408
2409
2410int
2411_PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2412{
2413 if (v == w) {
2414 return 1;
2415 }
2416
2417 if (v->h_count != w->h_count) {
2418 return 0;
2419 }
2420
2421 PyHamtIteratorState iter;
2422 hamt_iter_t iter_res;
2423 hamt_find_t find_res;
2424 PyObject *v_key;
2425 PyObject *v_val;
2426 PyObject *w_val;
2427
2428 hamt_iterator_init(&iter, v->h_root);
2429
2430 do {
2431 iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2432 if (iter_res == I_ITEM) {
2433 find_res = hamt_find(w, v_key, &w_val);
2434 switch (find_res) {
2435 case F_ERROR:
2436 return -1;
2437
2438 case F_NOT_FOUND:
2439 return 0;
2440
2441 case F_FOUND: {
2442 int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2443 if (cmp < 0) {
2444 return -1;
2445 }
2446 if (cmp == 0) {
2447 return 0;
2448 }
2449 }
2450 }
2451 }
2452 } while (iter_res != I_END);
2453
2454 return 1;
2455}
2456
2457Py_ssize_t
2458_PyHamt_Len(PyHamtObject *o)
2459{
2460 return o->h_count;
2461}
2462
2463static PyHamtObject *
2464hamt_alloc(void)
2465{
2466 PyHamtObject *o;
2467 o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2468 if (o == NULL) {
2469 return NULL;
2470 }
2471 o->h_weakreflist = NULL;
2472 PyObject_GC_Track(o);
2473 return o;
2474}
2475
2476PyHamtObject *
2477_PyHamt_New(void)
2478{
2479 if (_empty_hamt != NULL) {
2480 /* HAMT is an immutable object so we can easily cache an
2481 empty instance. */
2482 Py_INCREF(_empty_hamt);
2483 return _empty_hamt;
2484 }
2485
2486 PyHamtObject *o = hamt_alloc();
2487 if (o == NULL) {
2488 return NULL;
2489 }
2490
2491 o->h_root = hamt_node_bitmap_new(0);
2492 if (o->h_root == NULL) {
2493 Py_DECREF(o);
2494 return NULL;
2495 }
2496
2497 o->h_count = 0;
2498
2499 if (_empty_hamt == NULL) {
2500 Py_INCREF(o);
2501 _empty_hamt = o;
2502 }
2503
2504 return o;
2505}
2506
2507#ifdef Py_DEBUG
2508static PyObject *
2509hamt_dump(PyHamtObject *self)
2510{
2511 _PyUnicodeWriter writer;
2512
2513 _PyUnicodeWriter_Init(&writer);
2514
2515 if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2516 goto error;
2517 }
2518
2519 if (hamt_node_dump(self->h_root, &writer, 0)) {
2520 goto error;
2521 }
2522
2523 return _PyUnicodeWriter_Finish(&writer);
2524
2525error:
2526 _PyUnicodeWriter_Dealloc(&writer);
2527 return NULL;
2528}
2529#endif /* Py_DEBUG */
2530
2531
2532/////////////////////////////////// Iterators: Shared Iterator Implementation
2533
2534
2535static int
2536hamt_baseiter_tp_clear(PyHamtIterator *it)
2537{
2538 Py_CLEAR(it->hi_obj);
2539 return 0;
2540}
2541
2542static void
2543hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2544{
2545 PyObject_GC_UnTrack(it);
2546 (void)hamt_baseiter_tp_clear(it);
2547 PyObject_GC_Del(it);
2548}
2549
2550static int
2551hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2552{
2553 Py_VISIT(it->hi_obj);
2554 return 0;
2555}
2556
2557static PyObject *
2558hamt_baseiter_tp_iternext(PyHamtIterator *it)
2559{
2560 PyObject *key;
2561 PyObject *val;
2562 hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2563
2564 switch (res) {
2565 case I_END:
2566 PyErr_SetNone(PyExc_StopIteration);
2567 return NULL;
2568
2569 case I_ITEM: {
2570 return (*(it->hi_yield))(key, val);
2571 }
2572
2573 default: {
2574 Py_UNREACHABLE();
2575 }
2576 }
2577}
2578
2579static Py_ssize_t
2580hamt_baseiter_tp_len(PyHamtIterator *it)
2581{
2582 return it->hi_obj->h_count;
2583}
2584
2585static PyMappingMethods PyHamtIterator_as_mapping = {
2586 (lenfunc)hamt_baseiter_tp_len,
2587};
2588
2589static PyObject *
2590hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2591{
2592 PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2593 if (it == NULL) {
2594 return NULL;
2595 }
2596
2597 Py_INCREF(o);
2598 it->hi_obj = o;
2599 it->hi_yield = yield;
2600
2601 hamt_iterator_init(&it->hi_iter, o->h_root);
2602
2603 return (PyObject*)it;
2604}
2605
2606#define ITERATOR_TYPE_SHARED_SLOTS \
2607 .tp_basicsize = sizeof(PyHamtIterator), \
2608 .tp_itemsize = 0, \
2609 .tp_as_mapping = &PyHamtIterator_as_mapping, \
2610 .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc, \
2611 .tp_getattro = PyObject_GenericGetAttr, \
2612 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, \
2613 .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse, \
2614 .tp_clear = (inquiry)hamt_baseiter_tp_clear, \
2615 .tp_iter = PyObject_SelfIter, \
2616 .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2617
2618
2619/////////////////////////////////// _PyHamtItems_Type
2620
2621
2622PyTypeObject _PyHamtItems_Type = {
2623 PyVarObject_HEAD_INIT(NULL, 0)
2624 "items",
2625 ITERATOR_TYPE_SHARED_SLOTS
2626};
2627
2628static PyObject *
2629hamt_iter_yield_items(PyObject *key, PyObject *val)
2630{
2631 return PyTuple_Pack(2, key, val);
2632}
2633
2634PyObject *
2635_PyHamt_NewIterItems(PyHamtObject *o)
2636{
2637 return hamt_baseiter_new(
2638 &_PyHamtItems_Type, hamt_iter_yield_items, o);
2639}
2640
2641
2642/////////////////////////////////// _PyHamtKeys_Type
2643
2644
2645PyTypeObject _PyHamtKeys_Type = {
2646 PyVarObject_HEAD_INIT(NULL, 0)
2647 "keys",
2648 ITERATOR_TYPE_SHARED_SLOTS
2649};
2650
2651static PyObject *
2652hamt_iter_yield_keys(PyObject *key, PyObject *val)
2653{
2654 Py_INCREF(key);
2655 return key;
2656}
2657
2658PyObject *
2659_PyHamt_NewIterKeys(PyHamtObject *o)
2660{
2661 return hamt_baseiter_new(
2662 &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2663}
2664
2665
2666/////////////////////////////////// _PyHamtValues_Type
2667
2668
2669PyTypeObject _PyHamtValues_Type = {
2670 PyVarObject_HEAD_INIT(NULL, 0)
2671 "values",
2672 ITERATOR_TYPE_SHARED_SLOTS
2673};
2674
2675static PyObject *
2676hamt_iter_yield_values(PyObject *key, PyObject *val)
2677{
2678 Py_INCREF(val);
2679 return val;
2680}
2681
2682PyObject *
2683_PyHamt_NewIterValues(PyHamtObject *o)
2684{
2685 return hamt_baseiter_new(
2686 &_PyHamtValues_Type, hamt_iter_yield_values, o);
2687}
2688
2689
2690/////////////////////////////////// _PyHamt_Type
2691
2692
2693#ifdef Py_DEBUG
2694static PyObject *
2695hamt_dump(PyHamtObject *self);
2696#endif
2697
2698
2699static PyObject *
2700hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2701{
2702 return (PyObject*)_PyHamt_New();
2703}
2704
2705static int
2706hamt_tp_clear(PyHamtObject *self)
2707{
2708 Py_CLEAR(self->h_root);
2709 return 0;
2710}
2711
2712
2713static int
2714hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2715{
2716 Py_VISIT(self->h_root);
2717 return 0;
2718}
2719
2720static void
2721hamt_tp_dealloc(PyHamtObject *self)
2722{
2723 PyObject_GC_UnTrack(self);
2724 if (self->h_weakreflist != NULL) {
2725 PyObject_ClearWeakRefs((PyObject*)self);
2726 }
2727 (void)hamt_tp_clear(self);
2728 Py_TYPE(self)->tp_free(self);
2729}
2730
2731
2732static PyObject *
2733hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2734{
2735 if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2736 Py_RETURN_NOTIMPLEMENTED;
2737 }
2738
2739 int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2740 if (res < 0) {
2741 return NULL;
2742 }
2743
2744 if (op == Py_NE) {
2745 res = !res;
2746 }
2747
2748 if (res) {
2749 Py_RETURN_TRUE;
2750 }
2751 else {
2752 Py_RETURN_FALSE;
2753 }
2754}
2755
2756static int
2757hamt_tp_contains(PyHamtObject *self, PyObject *key)
2758{
2759 PyObject *val;
2760 return _PyHamt_Find(self, key, &val);
2761}
2762
2763static PyObject *
2764hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2765{
2766 PyObject *val;
2767 hamt_find_t res = hamt_find(self, key, &val);
2768 switch (res) {
2769 case F_ERROR:
2770 return NULL;
2771 case F_FOUND:
2772 Py_INCREF(val);
2773 return val;
2774 case F_NOT_FOUND:
2775 PyErr_SetObject(PyExc_KeyError, key);
2776 return NULL;
2777 default:
2778 Py_UNREACHABLE();
2779 }
2780}
2781
2782static Py_ssize_t
2783hamt_tp_len(PyHamtObject *self)
2784{
2785 return _PyHamt_Len(self);
2786}
2787
2788static PyObject *
2789hamt_tp_iter(PyHamtObject *self)
2790{
2791 return _PyHamt_NewIterKeys(self);
2792}
2793
2794static PyObject *
2795hamt_py_set(PyHamtObject *self, PyObject *args)
2796{
2797 PyObject *key;
2798 PyObject *val;
2799
2800 if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2801 return NULL;
2802 }
2803
2804 return (PyObject *)_PyHamt_Assoc(self, key, val);
2805}
2806
2807static PyObject *
2808hamt_py_get(PyHamtObject *self, PyObject *args)
2809{
2810 PyObject *key;
2811 PyObject *def = NULL;
2812
2813 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2814 return NULL;
2815 }
2816
2817 PyObject *val = NULL;
2818 hamt_find_t res = hamt_find(self, key, &val);
2819 switch (res) {
2820 case F_ERROR:
2821 return NULL;
2822 case F_FOUND:
2823 Py_INCREF(val);
2824 return val;
2825 case F_NOT_FOUND:
2826 if (def == NULL) {
2827 Py_RETURN_NONE;
2828 }
2829 Py_INCREF(def);
2830 return def;
2831 default:
2832 Py_UNREACHABLE();
2833 }
2834}
2835
2836static PyObject *
2837hamt_py_delete(PyHamtObject *self, PyObject *key)
2838{
2839 return (PyObject *)_PyHamt_Without(self, key);
2840}
2841
2842static PyObject *
2843hamt_py_items(PyHamtObject *self, PyObject *args)
2844{
2845 return _PyHamt_NewIterItems(self);
2846}
2847
2848static PyObject *
2849hamt_py_values(PyHamtObject *self, PyObject *args)
2850{
2851 return _PyHamt_NewIterValues(self);
2852}
2853
2854static PyObject *
2855hamt_py_keys(PyHamtObject *self, PyObject *args)
2856{
2857 return _PyHamt_NewIterKeys(self);
2858}
2859
2860#ifdef Py_DEBUG
2861static PyObject *
2862hamt_py_dump(PyHamtObject *self, PyObject *args)
2863{
2864 return hamt_dump(self);
2865}
2866#endif
2867
2868
2869static PyMethodDef PyHamt_methods[] = {
2870 {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2871 {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2872 {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2873 {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2874 {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2875 {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2876#ifdef Py_DEBUG
2877 {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2878#endif
2879 {NULL, NULL}
2880};
2881
2882static PySequenceMethods PyHamt_as_sequence = {
2883 0, /* sq_length */
2884 0, /* sq_concat */
2885 0, /* sq_repeat */
2886 0, /* sq_item */
2887 0, /* sq_slice */
2888 0, /* sq_ass_item */
2889 0, /* sq_ass_slice */
2890 (objobjproc)hamt_tp_contains, /* sq_contains */
2891 0, /* sq_inplace_concat */
2892 0, /* sq_inplace_repeat */
2893};
2894
2895static PyMappingMethods PyHamt_as_mapping = {
2896 (lenfunc)hamt_tp_len, /* mp_length */
2897 (binaryfunc)hamt_tp_subscript, /* mp_subscript */
2898};
2899
2900PyTypeObject _PyHamt_Type = {
2901 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2902 "hamt",
2903 sizeof(PyHamtObject),
2904 .tp_methods = PyHamt_methods,
2905 .tp_as_mapping = &PyHamt_as_mapping,
2906 .tp_as_sequence = &PyHamt_as_sequence,
2907 .tp_iter = (getiterfunc)hamt_tp_iter,
2908 .tp_dealloc = (destructor)hamt_tp_dealloc,
2909 .tp_getattro = PyObject_GenericGetAttr,
2910 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2911 .tp_richcompare = hamt_tp_richcompare,
2912 .tp_traverse = (traverseproc)hamt_tp_traverse,
2913 .tp_clear = (inquiry)hamt_tp_clear,
2914 .tp_new = hamt_tp_new,
2915 .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2916 .tp_hash = PyObject_HashNotImplemented,
2917};
2918
2919
2920/////////////////////////////////// Tree Node Types
2921
2922
2923PyTypeObject _PyHamt_ArrayNode_Type = {
2924 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2925 "hamt_array_node",
2926 sizeof(PyHamtNode_Array),
2927 0,
2928 .tp_dealloc = (destructor)hamt_node_array_dealloc,
2929 .tp_getattro = PyObject_GenericGetAttr,
2930 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2931 .tp_traverse = (traverseproc)hamt_node_array_traverse,
2932 .tp_free = PyObject_GC_Del,
2933 .tp_hash = PyObject_HashNotImplemented,
2934};
2935
2936PyTypeObject _PyHamt_BitmapNode_Type = {
2937 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2938 "hamt_bitmap_node",
2939 sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2940 sizeof(PyObject *),
2941 .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2942 .tp_getattro = PyObject_GenericGetAttr,
2943 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2944 .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2945 .tp_free = PyObject_GC_Del,
2946 .tp_hash = PyObject_HashNotImplemented,
2947};
2948
2949PyTypeObject _PyHamt_CollisionNode_Type = {
2950 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2951 "hamt_collision_node",
2952 sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2953 sizeof(PyObject *),
2954 .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2955 .tp_getattro = PyObject_GenericGetAttr,
2956 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2957 .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2958 .tp_free = PyObject_GC_Del,
2959 .tp_hash = PyObject_HashNotImplemented,
2960};
2961
2962
2963int
2964_PyHamt_Init(void)
2965{
2966 if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2967 (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2968 (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2969 (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2970 (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2971 (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2972 (PyType_Ready(&_PyHamtItems_Type) < 0))
2973 {
2974 return 0;
2975 }
2976
2977 return 1;
2978}
2979
2980void
2981_PyHamt_Fini(void)
2982{
2983 Py_CLEAR(_empty_hamt);
2984 Py_CLEAR(_empty_bitmap_node);
2985}