blob: df3b1092cbe58ce522a0328734b605bc97c9ac6e [file] [log] [blame]
Yury Selivanovf23746a2018-01-22 19:11:18 -05001#include "Python.h"
2
3#include "structmember.h"
4#include "internal/pystate.h"
5#include "internal/hamt.h"
6
Yury Selivanovf23746a2018-01-22 19:11:18 -05007/*
8This file provides an implemention of an immutable mapping using the
9Hash Array Mapped Trie (or HAMT) datastructure.
10
11This design allows to have:
12
131. Efficient copy: immutable mappings can be copied by reference,
14 making it an O(1) operation.
15
162. Efficient mutations: due to structural sharing, only a portion of
17 the trie needs to be copied when the collection is mutated. The
18 cost of set/delete operations is O(log N).
19
203. Efficient lookups: O(log N).
21
22(where N is number of key/value items in the immutable mapping.)
23
24
25HAMT
26====
27
28The core idea of HAMT is that the shape of the trie is encoded into the
29hashes of keys.
30
31Say we want to store a K/V pair in our mapping. First, we calculate the
32hash of K, let's say it's 19830128, or in binary:
33
34 0b1001011101001010101110000 = 19830128
35
36Now let's partition this bit representation of the hash into blocks of
375 bits each:
38
39 0b00_00000_10010_11101_00101_01011_10000 = 19830128
40 (6) (5) (4) (3) (2) (1)
41
42Each block of 5 bits represents a number betwen 0 and 31. So if we have
43a tree that consists of nodes, each of which is an array of 32 pointers,
44those 5-bit blocks will encode a position on a single tree level.
45
46For example, storing the key K with hash 19830128, results in the following
47tree structure:
48
49 (array of 32 pointers)
50 +---+ -- +----+----+----+ -- +----+
51 root node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b10000 = 16 (1)
52 (level 1) +---+ -- +----+----+----+ -- +----+
53 |
54 +---+ -- +----+----+----+ -- +----+
55 a 2nd level node | 0 | .. | 10 | 11 | 12 | .. | 31 | 0b01011 = 11 (2)
56 +---+ -- +----+----+----+ -- +----+
57 |
58 +---+ -- +----+----+----+ -- +----+
Dmitry Alimov93a61192018-01-25 20:54:41 +030059 a 3rd level node | 0 | .. | 04 | 05 | 06 | .. | 31 | 0b00101 = 5 (3)
Yury Selivanovf23746a2018-01-22 19:11:18 -050060 +---+ -- +----+----+----+ -- +----+
61 |
62 +---+ -- +----+----+----+----+
63 a 4th level node | 0 | .. | 04 | 29 | 30 | 31 | 0b11101 = 29 (4)
64 +---+ -- +----+----+----+----+
65 |
66 +---+ -- +----+----+----+ -- +----+
67 a 5th level node | 0 | .. | 17 | 18 | 19 | .. | 31 | 0b10010 = 18 (5)
68 +---+ -- +----+----+----+ -- +----+
69 |
70 +--------------+
71 |
72 +---+ -- +----+----+----+ -- +----+
73 a 6th level node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b00000 = 0 (6)
74 +---+ -- +----+----+----+ -- +----+
75 |
76 V -- our value (or collision)
77
78To rehash: for a K/V pair, the hash of K encodes where in the tree V will
79be stored.
80
81To optimize memory footprint and handle hash collisions, our implementation
82uses three different types of nodes:
83
84 * A Bitmap node;
85 * An Array node;
86 * A Collision node.
87
88Because we implement an immutable dictionary, our nodes are also
89immutable. Therefore, when we need to modify a node, we copy it, and
90do that modification to the copy.
91
92
93Array Nodes
94-----------
95
96These nodes are very simple. Essentially they are arrays of 32 pointers
97we used to illustrate the high-level idea in the previous section.
98
99We use Array nodes only when we need to store more than 16 pointers
100in a single node.
101
102Array nodes do not store key objects or value objects. They are used
103only as an indirection level - their pointers point to other nodes in
104the tree.
105
106
107Bitmap Node
108-----------
109
110Allocating a new 32-pointers array for every node of our tree would be
111very expensive. Unless we store millions of keys, most of tree nodes would
112be very sparse.
113
114When we have less than 16 elements in a node, we don't want to use the
115Array node, that would mean that we waste a lot of memory. Instead,
116we can use bitmap compression and can have just as many pointers
117as we need!
118
119Bitmap nodes consist of two fields:
120
1211. An array of pointers. If a Bitmap node holds N elements, the
122 array will be of N pointers.
123
1242. A 32bit integer -- a bitmap field. If an N-th bit is set in the
125 bitmap, it means that the node has an N-th element.
126
127For example, say we need to store a 3 elements sparse array:
128
129 +---+ -- +---+ -- +----+ -- +----+
130 | 0 | .. | 4 | .. | 11 | .. | 17 |
131 +---+ -- +---+ -- +----+ -- +----+
132 | | |
133 o1 o2 o3
134
135We allocate a three-pointer Bitmap node. Its bitmap field will be
136then set to:
137
138 0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
139
140To check if our Bitmap node has an I-th element we can do:
141
142 bitmap & (1 << I)
143
144
145And here's a formula to calculate a position in our pointer array
146which would correspond to an I-th element:
147
148 popcount(bitmap & ((1 << I) - 1))
149
150
151Let's break it down:
152
153 * `popcount` is a function that returns a number of bits set to 1;
154
155 * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
156 set to the *right* of our bit.
157
158
159So for our 17, 11, and 4 indexes:
160
161 * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
162
163 * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
164
165 * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
166
167
168To conclude: Bitmap nodes are just like Array nodes -- they can store
169a number of pointers, but use bitmap compression to eliminate unused
170pointers.
171
172
173Bitmap nodes have two pointers for each item:
174
175 +----+----+----+----+ -- +----+----+
176 | k1 | v1 | k2 | v2 | .. | kN | vN |
177 +----+----+----+----+ -- +----+----+
178
179When kI == NULL, vI points to another tree level.
180
181When kI != NULL, the actual key object is stored in kI, and its
182value is stored in vI.
183
184
185Collision Nodes
186---------------
187
188Collision nodes are simple arrays of pointers -- two pointers per
189key/value. When there's a hash collision, say for k1/v1 and k2/v2
190we have `hash(k1)==hash(k2)`. Then our collision node will be:
191
192 +----+----+----+----+
193 | k1 | v1 | k2 | v2 |
194 +----+----+----+----+
195
196
197Tree Structure
198--------------
199
200All nodes are PyObjects.
201
202The `PyHamtObject` object has a pointer to the root node (h_root),
203and has a length field (h_count).
204
205High-level functions accept a PyHamtObject object and dispatch to
206lower-level functions depending on what kind of node h_root points to.
207
208
209Operations
210==========
211
212There are three fundamental operations on an immutable dictionary:
213
2141. "o.assoc(k, v)" will return a new immutable dictionary, that will be
215 a copy of "o", but with the "k/v" item set.
216
217 Functions in this file:
218
219 hamt_node_assoc, hamt_node_bitmap_assoc,
220 hamt_node_array_assoc, hamt_node_collision_assoc
221
222 `hamt_node_assoc` function accepts a node object, and calls
223 other functions depending on its actual type.
224
2252. "o.find(k)" will lookup key "k" in "o".
226
227 Functions:
228
229 hamt_node_find, hamt_node_bitmap_find,
230 hamt_node_array_find, hamt_node_collision_find
231
2323. "o.without(k)" will return a new immutable dictionary, that will be
233 a copy of "o", buth without the "k" key.
234
235 Functions:
236
237 hamt_node_without, hamt_node_bitmap_without,
238 hamt_node_array_without, hamt_node_collision_without
239
240
241Further Reading
242===============
243
2441. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
245
2462. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
247
2483. Clojure's PersistentHashMap implementation:
249 https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
250
251
252Debug
253=====
254
255The HAMT datatype is accessible for testing purposes under the
256`_testcapi` module:
257
258 >>> from _testcapi import hamt
259 >>> h = hamt()
260 >>> h2 = h.set('a', 2)
261 >>> h3 = h2.set('b', 3)
262 >>> list(h3)
263 ['a', 'b']
264
265When CPython is built in debug mode, a '__dump__()' method is available
266to introspect the tree:
267
268 >>> print(h3.__dump__())
269 HAMT(len=2):
270 BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
271 'a': 2
272 'b': 3
273*/
274
275
276#define IS_ARRAY_NODE(node) (Py_TYPE(node) == &_PyHamt_ArrayNode_Type)
277#define IS_BITMAP_NODE(node) (Py_TYPE(node) == &_PyHamt_BitmapNode_Type)
278#define IS_COLLISION_NODE(node) (Py_TYPE(node) == &_PyHamt_CollisionNode_Type)
279
280
281/* Return type for 'find' (lookup a key) functions.
282
283 * F_ERROR - an error occurred;
284 * F_NOT_FOUND - the key was not found;
285 * F_FOUND - the key was found.
286*/
287typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
288
289
290/* Return type for 'without' (delete a key) functions.
291
292 * W_ERROR - an error occurred;
293 * W_NOT_FOUND - the key was not found: there's nothing to delete;
294 * W_EMPTY - the key was found: the node/tree would be empty
295 if the key is deleted;
296 * W_NEWNODE - the key was found: a new node/tree is returned
297 without that key.
298*/
299typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
300
301
302/* Low-level iterator protocol type.
303
304 * I_ITEM - a new item has been yielded;
305 * I_END - the whole tree was visited (similar to StopIteration).
306*/
307typedef enum {I_ITEM, I_END} hamt_iter_t;
308
309
310#define HAMT_ARRAY_NODE_SIZE 32
311
312
313typedef struct {
314 PyObject_HEAD
315 PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
316 Py_ssize_t a_count;
317} PyHamtNode_Array;
318
319
320typedef struct {
321 PyObject_VAR_HEAD
322 uint32_t b_bitmap;
323 PyObject *b_array[1];
324} PyHamtNode_Bitmap;
325
326
327typedef struct {
328 PyObject_VAR_HEAD
329 int32_t c_hash;
330 PyObject *c_array[1];
331} PyHamtNode_Collision;
332
333
334static PyHamtNode_Bitmap *_empty_bitmap_node;
335static PyHamtObject *_empty_hamt;
336
337
338static PyHamtObject *
339hamt_alloc(void);
340
341static PyHamtNode *
342hamt_node_assoc(PyHamtNode *node,
343 uint32_t shift, int32_t hash,
344 PyObject *key, PyObject *val, int* added_leaf);
345
346static hamt_without_t
347hamt_node_without(PyHamtNode *node,
348 uint32_t shift, int32_t hash,
349 PyObject *key,
350 PyHamtNode **new_node);
351
352static hamt_find_t
353hamt_node_find(PyHamtNode *node,
354 uint32_t shift, int32_t hash,
355 PyObject *key, PyObject **val);
356
357#ifdef Py_DEBUG
358static int
359hamt_node_dump(PyHamtNode *node,
360 _PyUnicodeWriter *writer, int level);
361#endif
362
363static PyHamtNode *
364hamt_node_array_new(Py_ssize_t);
365
366static PyHamtNode *
367hamt_node_collision_new(int32_t hash, Py_ssize_t size);
368
369static inline Py_ssize_t
370hamt_node_collision_count(PyHamtNode_Collision *node);
371
372
373#ifdef Py_DEBUG
374static void
375_hamt_node_array_validate(void *o)
376{
377 assert(IS_ARRAY_NODE(o));
378 PyHamtNode_Array *node = (PyHamtNode_Array*)(o);
379 Py_ssize_t i = 0, count = 0;
380 for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
381 if (node->a_array[i] != NULL) {
382 count++;
383 }
384 }
385 assert(count == node->a_count);
386}
387
388#define VALIDATE_ARRAY_NODE(NODE) \
389 do { _hamt_node_array_validate(NODE); } while (0);
390#else
391#define VALIDATE_ARRAY_NODE(NODE)
392#endif
393
394
395/* Returns -1 on error */
396static inline int32_t
397hamt_hash(PyObject *o)
398{
399 Py_hash_t hash = PyObject_Hash(o);
400
401#if SIZEOF_PY_HASH_T <= 4
402 return hash;
403#else
404 if (hash == -1) {
405 /* exception */
406 return -1;
407 }
408
409 /* While it's suboptimal to reduce Python's 64 bit hash to
410 32 bits via XOR, it seems that the resulting hash function
411 is good enough (this is also how Long type is hashed in Java.)
412 Storing 10, 100, 1000 Python strings results in a relatively
413 shallow and uniform tree structure.
414
415 Please don't change this hashing algorithm, as there are many
416 tests that test some exact tree shape to cover all code paths.
417 */
418 int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
419 return xored == -1 ? -2 : xored;
420#endif
421}
422
423static inline uint32_t
424hamt_mask(int32_t hash, uint32_t shift)
425{
426 return (((uint32_t)hash >> shift) & 0x01f);
427}
428
429static inline uint32_t
430hamt_bitpos(int32_t hash, uint32_t shift)
431{
432 return (uint32_t)1 << hamt_mask(hash, shift);
433}
434
435static inline uint32_t
436hamt_bitcount(uint32_t i)
437{
Yury Selivanovb7a80d52018-01-23 22:17:04 -0500438 /* We could use native popcount instruction but that would
439 require to either add configure flags to enable SSE4.2
440 support or to detect it dynamically. Otherwise, we have
441 a risk of CPython not working properly on older hardware.
442
443 In practice, there's no observable difference in
444 performance between using a popcount instruction or the
445 following fallback code.
446
447 The algorithm is copied from:
448 https://graphics.stanford.edu/~seander/bithacks.html
449 */
Yury Selivanovf23746a2018-01-22 19:11:18 -0500450 i = i - ((i >> 1) & 0x55555555);
451 i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
452 return ((i + (i >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
Yury Selivanovf23746a2018-01-22 19:11:18 -0500453}
454
455static inline uint32_t
456hamt_bitindex(uint32_t bitmap, uint32_t bit)
457{
458 return hamt_bitcount(bitmap & (bit - 1));
459}
460
461
462/////////////////////////////////// Dump Helpers
463#ifdef Py_DEBUG
464
465static int
466_hamt_dump_ident(_PyUnicodeWriter *writer, int level)
467{
468 /* Write `' ' * level` to the `writer` */
469 PyObject *str = NULL;
470 PyObject *num = NULL;
471 PyObject *res = NULL;
472 int ret = -1;
473
474 str = PyUnicode_FromString(" ");
475 if (str == NULL) {
476 goto error;
477 }
478
479 num = PyLong_FromLong((long)level);
480 if (num == NULL) {
481 goto error;
482 }
483
484 res = PyNumber_Multiply(str, num);
485 if (res == NULL) {
486 goto error;
487 }
488
489 ret = _PyUnicodeWriter_WriteStr(writer, res);
490
491error:
492 Py_XDECREF(res);
493 Py_XDECREF(str);
494 Py_XDECREF(num);
495 return ret;
496}
497
498static int
499_hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
500{
501 /* A convenient helper combining _PyUnicodeWriter_WriteStr and
502 PyUnicode_FromFormatV.
503 */
504 PyObject* msg;
505 int ret;
506
507 va_list vargs;
508#ifdef HAVE_STDARG_PROTOTYPES
509 va_start(vargs, format);
510#else
511 va_start(vargs);
512#endif
513 msg = PyUnicode_FromFormatV(format, vargs);
514 va_end(vargs);
515
516 if (msg == NULL) {
517 return -1;
518 }
519
520 ret = _PyUnicodeWriter_WriteStr(writer, msg);
521 Py_DECREF(msg);
522 return ret;
523}
524
525#endif /* Py_DEBUG */
526/////////////////////////////////// Bitmap Node
527
528
529static PyHamtNode *
530hamt_node_bitmap_new(Py_ssize_t size)
531{
532 /* Create a new bitmap node of size 'size' */
533
534 PyHamtNode_Bitmap *node;
535 Py_ssize_t i;
536
537 assert(size >= 0);
538 assert(size % 2 == 0);
539
540 if (size == 0 && _empty_bitmap_node != NULL) {
541 Py_INCREF(_empty_bitmap_node);
542 return (PyHamtNode *)_empty_bitmap_node;
543 }
544
545 /* No freelist; allocate a new bitmap node */
546 node = PyObject_GC_NewVar(
547 PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
548 if (node == NULL) {
549 return NULL;
550 }
551
552 Py_SIZE(node) = size;
553
554 for (i = 0; i < size; i++) {
555 node->b_array[i] = NULL;
556 }
557
558 node->b_bitmap = 0;
559
560 _PyObject_GC_TRACK(node);
561
562 if (size == 0 && _empty_bitmap_node == NULL) {
563 /* Since bitmap nodes are immutable, we can cache the instance
564 for size=0 and reuse it whenever we need an empty bitmap node.
565 */
566 _empty_bitmap_node = node;
567 Py_INCREF(_empty_bitmap_node);
568 }
569
570 return (PyHamtNode *)node;
571}
572
573static inline Py_ssize_t
574hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
575{
576 return Py_SIZE(node) / 2;
577}
578
579static PyHamtNode_Bitmap *
580hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
581{
582 /* Clone a bitmap node; return a new one with the same child notes. */
583
584 PyHamtNode_Bitmap *clone;
585 Py_ssize_t i;
586
587 clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
588 if (clone == NULL) {
589 return NULL;
590 }
591
592 for (i = 0; i < Py_SIZE(node); i++) {
593 Py_XINCREF(node->b_array[i]);
594 clone->b_array[i] = node->b_array[i];
595 }
596
597 clone->b_bitmap = node->b_bitmap;
598 return clone;
599}
600
601static PyHamtNode_Bitmap *
602hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
603{
604 assert(bit & o->b_bitmap);
605 assert(hamt_node_bitmap_count(o) > 1);
606
607 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
608 Py_SIZE(o) - 2);
609 if (new == NULL) {
610 return NULL;
611 }
612
613 uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
614 uint32_t key_idx = 2 * idx;
615 uint32_t val_idx = key_idx + 1;
616 uint32_t i;
617
618 for (i = 0; i < key_idx; i++) {
619 Py_XINCREF(o->b_array[i]);
620 new->b_array[i] = o->b_array[i];
621 }
622
623 for (i = val_idx + 1; i < Py_SIZE(o); i++) {
624 Py_XINCREF(o->b_array[i]);
625 new->b_array[i - 2] = o->b_array[i];
626 }
627
628 new->b_bitmap = o->b_bitmap & ~bit;
629 return new;
630}
631
632static PyHamtNode *
633hamt_node_new_bitmap_or_collision(uint32_t shift,
634 PyObject *key1, PyObject *val1,
635 int32_t key2_hash,
636 PyObject *key2, PyObject *val2)
637{
638 /* Helper method. Creates a new node for key1/val and key2/val2
639 pairs.
640
641 If key1 hash is equal to the hash of key2, a Collision node
642 will be created. If they are not equal, a Bitmap node is
643 created.
644 */
645
646 int32_t key1_hash = hamt_hash(key1);
647 if (key1_hash == -1) {
648 return NULL;
649 }
650
651 if (key1_hash == key2_hash) {
652 PyHamtNode_Collision *n;
653 n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
654 if (n == NULL) {
655 return NULL;
656 }
657
658 Py_INCREF(key1);
659 n->c_array[0] = key1;
660 Py_INCREF(val1);
661 n->c_array[1] = val1;
662
663 Py_INCREF(key2);
664 n->c_array[2] = key2;
665 Py_INCREF(val2);
666 n->c_array[3] = val2;
667
668 return (PyHamtNode *)n;
669 }
670 else {
671 int added_leaf = 0;
672 PyHamtNode *n = hamt_node_bitmap_new(0);
673 if (n == NULL) {
674 return NULL;
675 }
676
677 PyHamtNode *n2 = hamt_node_assoc(
678 n, shift, key1_hash, key1, val1, &added_leaf);
679 Py_DECREF(n);
680 if (n2 == NULL) {
681 return NULL;
682 }
683
684 n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
685 Py_DECREF(n2);
686 if (n == NULL) {
687 return NULL;
688 }
689
690 return n;
691 }
692}
693
694static PyHamtNode *
695hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
696 uint32_t shift, int32_t hash,
697 PyObject *key, PyObject *val, int* added_leaf)
698{
699 /* assoc operation for bitmap nodes.
700
701 Return: a new node, or self if key/val already is in the
702 collection.
703
704 'added_leaf' is later used in '_PyHamt_Assoc' to determine if
705 `hamt.set(key, val)` increased the size of the collection.
706 */
707
708 uint32_t bit = hamt_bitpos(hash, shift);
709 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
710
711 /* Bitmap node layout:
712
713 +------+------+------+------+ --- +------+------+
714 | key1 | val1 | key2 | val2 | ... | keyN | valN |
715 +------+------+------+------+ --- +------+------+
716 where `N < Py_SIZE(node)`.
717
718 The `node->b_bitmap` field is a bitmap. For a given
719 `(shift, hash)` pair we can determine:
720
721 - If this node has the corresponding key/val slots.
722 - The index of key/val slots.
723 */
724
725 if (self->b_bitmap & bit) {
726 /* The key is set in this node */
727
728 uint32_t key_idx = 2 * idx;
729 uint32_t val_idx = key_idx + 1;
730
731 assert(val_idx < Py_SIZE(self));
732
733 PyObject *key_or_null = self->b_array[key_idx];
734 PyObject *val_or_node = self->b_array[val_idx];
735
736 if (key_or_null == NULL) {
737 /* key is NULL. This means that we have a few keys
738 that have the same (hash, shift) pair. */
739
740 assert(val_or_node != NULL);
741
742 PyHamtNode *sub_node = hamt_node_assoc(
743 (PyHamtNode *)val_or_node,
744 shift + 5, hash, key, val, added_leaf);
745 if (sub_node == NULL) {
746 return NULL;
747 }
748
749 if (val_or_node == (PyObject *)sub_node) {
750 Py_DECREF(sub_node);
751 Py_INCREF(self);
752 return (PyHamtNode *)self;
753 }
754
755 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
756 if (ret == NULL) {
757 return NULL;
758 }
759 Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
760 return (PyHamtNode *)ret;
761 }
762
763 assert(key != NULL);
764 /* key is not NULL. This means that we have only one other
765 key in this collection that matches our hash for this shift. */
766
767 int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
768 if (comp_err < 0) { /* exception in __eq__ */
769 return NULL;
770 }
771 if (comp_err == 1) { /* key == key_or_null */
772 if (val == val_or_node) {
773 /* we already have the same key/val pair; return self. */
774 Py_INCREF(self);
775 return (PyHamtNode *)self;
776 }
777
778 /* We're setting a new value for the key we had before.
779 Make a new bitmap node with a replaced value, and return it. */
780 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
781 if (ret == NULL) {
782 return NULL;
783 }
784 Py_INCREF(val);
785 Py_SETREF(ret->b_array[val_idx], val);
786 return (PyHamtNode *)ret;
787 }
788
789 /* It's a new key, and it has the same index as *one* another key.
790 We have a collision. We need to create a new node which will
791 combine the existing key and the key we're adding.
792
793 `hamt_node_new_bitmap_or_collision` will either create a new
794 Collision node if the keys have identical hashes, or
795 a new Bitmap node.
796 */
797 PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
798 shift + 5,
799 key_or_null, val_or_node, /* existing key/val */
800 hash,
801 key, val /* new key/val */
802 );
803 if (sub_node == NULL) {
804 return NULL;
805 }
806
807 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
808 if (ret == NULL) {
809 Py_DECREF(sub_node);
810 return NULL;
811 }
812 Py_SETREF(ret->b_array[key_idx], NULL);
813 Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
814
815 *added_leaf = 1;
816 return (PyHamtNode *)ret;
817 }
818 else {
819 /* There was no key before with the same (shift,hash). */
820
821 uint32_t n = hamt_bitcount(self->b_bitmap);
822
823 if (n >= 16) {
824 /* When we have a situation where we want to store more
825 than 16 nodes at one level of the tree, we no longer
826 want to use the Bitmap node with bitmap encoding.
827
828 Instead we start using an Array node, which has
829 simpler (faster) implementation at the expense of
830 having prealocated 32 pointers for its keys/values
831 pairs.
832
833 Small hamt objects (<30 keys) usually don't have any
834 Array nodes at all. Betwen ~30 and ~400 keys hamt
835 objects usually have one Array node, and usually it's
836 a root node.
837 */
838
839 uint32_t jdx = hamt_mask(hash, shift);
840 /* 'jdx' is the index of where the new key should be added
841 in the new Array node we're about to create. */
842
843 PyHamtNode *empty = NULL;
844 PyHamtNode_Array *new_node = NULL;
845 PyHamtNode *res = NULL;
846
847 /* Create a new Array node. */
848 new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
849 if (new_node == NULL) {
850 goto fin;
851 }
852
853 /* Create an empty bitmap node for the next
854 hamt_node_assoc call. */
855 empty = hamt_node_bitmap_new(0);
856 if (empty == NULL) {
857 goto fin;
858 }
859
860 /* Make a new bitmap node for the key/val we're adding.
861 Set that bitmap node to new-array-node[jdx]. */
862 new_node->a_array[jdx] = hamt_node_assoc(
863 empty, shift + 5, hash, key, val, added_leaf);
864 if (new_node->a_array[jdx] == NULL) {
865 goto fin;
866 }
867
868 /* Copy existing key/value pairs from the current Bitmap
869 node to the new Array node we've just created. */
870 Py_ssize_t i, j;
871 for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
872 if (((self->b_bitmap >> i) & 1) != 0) {
873 /* Ensure we don't accidentally override `jdx` element
874 we set few lines above.
875 */
876 assert(new_node->a_array[i] == NULL);
877
878 if (self->b_array[j] == NULL) {
879 new_node->a_array[i] =
880 (PyHamtNode *)self->b_array[j + 1];
881 Py_INCREF(new_node->a_array[i]);
882 }
883 else {
884 int32_t rehash = hamt_hash(self->b_array[j]);
885 if (rehash == -1) {
886 goto fin;
887 }
888
889 new_node->a_array[i] = hamt_node_assoc(
890 empty, shift + 5,
891 rehash,
892 self->b_array[j],
893 self->b_array[j + 1],
894 added_leaf);
895
896 if (new_node->a_array[i] == NULL) {
897 goto fin;
898 }
899 }
900 j += 2;
901 }
902 }
903
904 VALIDATE_ARRAY_NODE(new_node)
905
906 /* That's it! */
907 res = (PyHamtNode *)new_node;
908
909 fin:
910 Py_XDECREF(empty);
911 if (res == NULL) {
912 Py_XDECREF(new_node);
913 }
914 return res;
915 }
916 else {
917 /* We have less than 16 keys at this level; let's just
918 create a new bitmap node out of this node with the
919 new key/val pair added. */
920
921 uint32_t key_idx = 2 * idx;
922 uint32_t val_idx = key_idx + 1;
923 Py_ssize_t i;
924
925 *added_leaf = 1;
926
927 /* Allocate new Bitmap node which can have one more key/val
928 pair in addition to what we have already. */
929 PyHamtNode_Bitmap *new_node =
930 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
931 if (new_node == NULL) {
932 return NULL;
933 }
934
935 /* Copy all keys/values that will be before the new key/value
936 we are adding. */
937 for (i = 0; i < key_idx; i++) {
938 Py_XINCREF(self->b_array[i]);
939 new_node->b_array[i] = self->b_array[i];
940 }
941
942 /* Set the new key/value to the new Bitmap node. */
943 Py_INCREF(key);
944 new_node->b_array[key_idx] = key;
945 Py_INCREF(val);
946 new_node->b_array[val_idx] = val;
947
948 /* Copy all keys/values that will be after the new key/value
949 we are adding. */
950 for (i = key_idx; i < Py_SIZE(self); i++) {
951 Py_XINCREF(self->b_array[i]);
952 new_node->b_array[i + 2] = self->b_array[i];
953 }
954
955 new_node->b_bitmap = self->b_bitmap | bit;
956 return (PyHamtNode *)new_node;
957 }
958 }
959}
960
961static hamt_without_t
962hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
963 uint32_t shift, int32_t hash,
964 PyObject *key,
965 PyHamtNode **new_node)
966{
967 uint32_t bit = hamt_bitpos(hash, shift);
968 if ((self->b_bitmap & bit) == 0) {
969 return W_NOT_FOUND;
970 }
971
972 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
973
974 uint32_t key_idx = 2 * idx;
975 uint32_t val_idx = key_idx + 1;
976
977 PyObject *key_or_null = self->b_array[key_idx];
978 PyObject *val_or_node = self->b_array[val_idx];
979
980 if (key_or_null == NULL) {
981 /* key == NULL means that 'value' is another tree node. */
982
983 PyHamtNode *sub_node = NULL;
984
985 hamt_without_t res = hamt_node_without(
986 (PyHamtNode *)val_or_node,
987 shift + 5, hash, key, &sub_node);
988
989 switch (res) {
990 case W_EMPTY:
991 /* It's impossible for us to receive a W_EMPTY here:
992
993 - Array nodes are converted to Bitmap nodes when
994 we delete 16th item from them;
995
996 - Collision nodes are converted to Bitmap when
997 there is one item in them;
998
999 - Bitmap node's without() inlines single-item
1000 sub-nodes.
1001
1002 So in no situation we can have a single-item
1003 Bitmap child of another Bitmap node.
1004 */
1005 Py_UNREACHABLE();
1006
1007 case W_NEWNODE: {
1008 assert(sub_node != NULL);
1009
1010 if (IS_BITMAP_NODE(sub_node)) {
1011 PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1012 if (hamt_node_bitmap_count(sub_tree) == 1 &&
1013 sub_tree->b_array[0] != NULL)
1014 {
1015 /* A bitmap node with one key/value pair. Just
1016 merge it into this node.
1017
1018 Note that we don't inline Bitmap nodes that
1019 have a NULL key -- those nodes point to another
1020 tree level, and we cannot simply move tree levels
1021 up or down.
1022 */
1023
1024 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1025 if (clone == NULL) {
1026 Py_DECREF(sub_node);
1027 return W_ERROR;
1028 }
1029
1030 PyObject *key = sub_tree->b_array[0];
1031 PyObject *val = sub_tree->b_array[1];
1032
1033 Py_INCREF(key);
1034 Py_XSETREF(clone->b_array[key_idx], key);
1035 Py_INCREF(val);
1036 Py_SETREF(clone->b_array[val_idx], val);
1037
1038 Py_DECREF(sub_tree);
1039
1040 *new_node = (PyHamtNode *)clone;
1041 return W_NEWNODE;
1042 }
1043 }
1044
1045#ifdef Py_DEBUG
1046 /* Ensure that Collision.without implementation
1047 converts to Bitmap nodes itself.
1048 */
1049 if (IS_COLLISION_NODE(sub_node)) {
1050 assert(hamt_node_collision_count(
1051 (PyHamtNode_Collision*)sub_node) > 1);
1052 }
1053#endif
1054
1055 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
Yury Selivanov0bad4d62018-01-23 16:26:07 -05001056 if (clone == NULL) {
1057 return W_ERROR;
1058 }
1059
Yury Selivanovf23746a2018-01-22 19:11:18 -05001060 Py_SETREF(clone->b_array[val_idx],
1061 (PyObject *)sub_node); /* borrow */
1062
1063 *new_node = (PyHamtNode *)clone;
1064 return W_NEWNODE;
1065 }
1066
1067 case W_ERROR:
1068 case W_NOT_FOUND:
1069 assert(sub_node == NULL);
1070 return res;
1071
1072 default:
1073 Py_UNREACHABLE();
1074 }
1075 }
1076 else {
1077 /* We have a regular key/value pair */
1078
1079 int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1080 if (cmp < 0) {
1081 return W_ERROR;
1082 }
1083 if (cmp == 0) {
1084 return W_NOT_FOUND;
1085 }
1086
1087 if (hamt_node_bitmap_count(self) == 1) {
1088 return W_EMPTY;
1089 }
1090
1091 *new_node = (PyHamtNode *)
1092 hamt_node_bitmap_clone_without(self, bit);
1093 if (*new_node == NULL) {
1094 return W_ERROR;
1095 }
1096
1097 return W_NEWNODE;
1098 }
1099}
1100
1101static hamt_find_t
1102hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1103 uint32_t shift, int32_t hash,
1104 PyObject *key, PyObject **val)
1105{
1106 /* Lookup a key in a Bitmap node. */
1107
1108 uint32_t bit = hamt_bitpos(hash, shift);
1109 uint32_t idx;
1110 uint32_t key_idx;
1111 uint32_t val_idx;
1112 PyObject *key_or_null;
1113 PyObject *val_or_node;
1114 int comp_err;
1115
1116 if ((self->b_bitmap & bit) == 0) {
1117 return F_NOT_FOUND;
1118 }
1119
1120 idx = hamt_bitindex(self->b_bitmap, bit);
Yury Selivanovf23746a2018-01-22 19:11:18 -05001121 key_idx = idx * 2;
1122 val_idx = key_idx + 1;
1123
1124 assert(val_idx < Py_SIZE(self));
1125
1126 key_or_null = self->b_array[key_idx];
1127 val_or_node = self->b_array[val_idx];
1128
1129 if (key_or_null == NULL) {
1130 /* There are a few keys that have the same hash at the current shift
1131 that match our key. Dispatch the lookup further down the tree. */
1132 assert(val_or_node != NULL);
1133 return hamt_node_find((PyHamtNode *)val_or_node,
1134 shift + 5, hash, key, val);
1135 }
1136
1137 /* We have only one key -- a potential match. Let's compare if the
1138 key we are looking at is equal to the key we are looking for. */
1139 assert(key != NULL);
1140 comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1141 if (comp_err < 0) { /* exception in __eq__ */
1142 return F_ERROR;
1143 }
1144 if (comp_err == 1) { /* key == key_or_null */
1145 *val = val_or_node;
1146 return F_FOUND;
1147 }
1148
1149 return F_NOT_FOUND;
1150}
1151
1152static int
1153hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1154{
1155 /* Bitmap's tp_traverse */
1156
1157 Py_ssize_t i;
1158
1159 for (i = Py_SIZE(self); --i >= 0; ) {
1160 Py_VISIT(self->b_array[i]);
1161 }
1162
1163 return 0;
1164}
1165
1166static void
1167hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1168{
1169 /* Bitmap's tp_dealloc */
1170
1171 Py_ssize_t len = Py_SIZE(self);
1172 Py_ssize_t i;
1173
1174 PyObject_GC_UnTrack(self);
1175 Py_TRASHCAN_SAFE_BEGIN(self)
1176
1177 if (len > 0) {
1178 i = len;
1179 while (--i >= 0) {
1180 Py_XDECREF(self->b_array[i]);
1181 }
1182 }
1183
1184 Py_TYPE(self)->tp_free((PyObject *)self);
1185 Py_TRASHCAN_SAFE_END(self)
1186}
1187
1188#ifdef Py_DEBUG
1189static int
1190hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1191 _PyUnicodeWriter *writer, int level)
1192{
1193 /* Debug build: __dump__() method implementation for Bitmap nodes. */
1194
1195 Py_ssize_t i;
1196 PyObject *tmp1;
1197 PyObject *tmp2;
1198
1199 if (_hamt_dump_ident(writer, level + 1)) {
1200 goto error;
1201 }
1202
1203 if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1204 Py_SIZE(node), Py_SIZE(node) / 2))
1205 {
1206 goto error;
1207 }
1208
1209 tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1210 if (tmp1 == NULL) {
1211 goto error;
1212 }
1213 tmp2 = _PyLong_Format(tmp1, 2);
1214 Py_DECREF(tmp1);
1215 if (tmp2 == NULL) {
1216 goto error;
1217 }
1218 if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1219 Py_DECREF(tmp2);
1220 goto error;
1221 }
1222 Py_DECREF(tmp2);
1223
1224 for (i = 0; i < Py_SIZE(node); i += 2) {
1225 PyObject *key_or_null = node->b_array[i];
1226 PyObject *val_or_node = node->b_array[i + 1];
1227
1228 if (_hamt_dump_ident(writer, level + 2)) {
1229 goto error;
1230 }
1231
1232 if (key_or_null == NULL) {
1233 if (_hamt_dump_format(writer, "NULL:\n")) {
1234 goto error;
1235 }
1236
1237 if (hamt_node_dump((PyHamtNode *)val_or_node,
1238 writer, level + 2))
1239 {
1240 goto error;
1241 }
1242 }
1243 else {
1244 if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1245 val_or_node))
1246 {
1247 goto error;
1248 }
1249 }
1250
1251 if (_hamt_dump_format(writer, "\n")) {
1252 goto error;
1253 }
1254 }
1255
1256 return 0;
1257error:
1258 return -1;
1259}
1260#endif /* Py_DEBUG */
1261
1262
1263/////////////////////////////////// Collision Node
1264
1265
1266static PyHamtNode *
1267hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1268{
1269 /* Create a new Collision node. */
1270
1271 PyHamtNode_Collision *node;
1272 Py_ssize_t i;
1273
1274 assert(size >= 4);
1275 assert(size % 2 == 0);
1276
1277 node = PyObject_GC_NewVar(
1278 PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1279 if (node == NULL) {
1280 return NULL;
1281 }
1282
1283 for (i = 0; i < size; i++) {
1284 node->c_array[i] = NULL;
1285 }
1286
1287 Py_SIZE(node) = size;
1288 node->c_hash = hash;
1289
1290 _PyObject_GC_TRACK(node);
1291
1292 return (PyHamtNode *)node;
1293}
1294
1295static hamt_find_t
1296hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1297 Py_ssize_t *idx)
1298{
1299 /* Lookup `key` in the Collision node `self`. Set the index of the
1300 found key to 'idx'. */
1301
1302 Py_ssize_t i;
1303 PyObject *el;
1304
1305 for (i = 0; i < Py_SIZE(self); i += 2) {
1306 el = self->c_array[i];
1307
1308 assert(el != NULL);
1309 int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1310 if (cmp < 0) {
1311 return F_ERROR;
1312 }
1313 if (cmp == 1) {
1314 *idx = i;
1315 return F_FOUND;
1316 }
1317 }
1318
1319 return F_NOT_FOUND;
1320}
1321
1322static PyHamtNode *
1323hamt_node_collision_assoc(PyHamtNode_Collision *self,
1324 uint32_t shift, int32_t hash,
1325 PyObject *key, PyObject *val, int* added_leaf)
1326{
1327 /* Set a new key to this level (currently a Collision node)
1328 of the tree. */
1329
1330 if (hash == self->c_hash) {
1331 /* The hash of the 'key' we are adding matches the hash of
1332 other keys in this Collision node. */
1333
1334 Py_ssize_t key_idx = -1;
1335 hamt_find_t found;
1336 PyHamtNode_Collision *new_node;
1337 Py_ssize_t i;
1338
1339 /* Let's try to lookup the new 'key', maybe we already have it. */
1340 found = hamt_node_collision_find_index(self, key, &key_idx);
1341 switch (found) {
1342 case F_ERROR:
1343 /* Exception. */
1344 return NULL;
1345
1346 case F_NOT_FOUND:
1347 /* This is a totally new key. Clone the current node,
1348 add a new key/value to the cloned node. */
1349
1350 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1351 self->c_hash, Py_SIZE(self) + 2);
1352 if (new_node == NULL) {
1353 return NULL;
1354 }
1355
1356 for (i = 0; i < Py_SIZE(self); i++) {
1357 Py_INCREF(self->c_array[i]);
1358 new_node->c_array[i] = self->c_array[i];
1359 }
1360
1361 Py_INCREF(key);
1362 new_node->c_array[i] = key;
1363 Py_INCREF(val);
1364 new_node->c_array[i + 1] = val;
1365
1366 *added_leaf = 1;
1367 return (PyHamtNode *)new_node;
1368
1369 case F_FOUND:
1370 /* There's a key which is equal to the key we are adding. */
1371
1372 assert(key_idx >= 0);
1373 assert(key_idx < Py_SIZE(self));
1374 Py_ssize_t val_idx = key_idx + 1;
1375
1376 if (self->c_array[val_idx] == val) {
1377 /* We're setting a key/value pair that's already set. */
1378 Py_INCREF(self);
1379 return (PyHamtNode *)self;
1380 }
1381
1382 /* We need to replace old value for the key
1383 with a new value. Create a new Collision node.*/
1384 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1385 self->c_hash, Py_SIZE(self));
1386 if (new_node == NULL) {
1387 return NULL;
1388 }
1389
1390 /* Copy all elements of the old node to the new one. */
1391 for (i = 0; i < Py_SIZE(self); i++) {
1392 Py_INCREF(self->c_array[i]);
1393 new_node->c_array[i] = self->c_array[i];
1394 }
1395
1396 /* Replace the old value with the new value for the our key. */
1397 Py_DECREF(new_node->c_array[val_idx]);
1398 Py_INCREF(val);
1399 new_node->c_array[val_idx] = val;
1400
1401 return (PyHamtNode *)new_node;
1402
1403 default:
1404 Py_UNREACHABLE();
1405 }
1406 }
1407 else {
1408 /* The hash of the new key is different from the hash that
1409 all keys of this Collision node have.
1410
1411 Create a Bitmap node inplace with two children:
1412 key/value pair that we're adding, and the Collision node
1413 we're replacing on this tree level.
1414 */
1415
1416 PyHamtNode_Bitmap *new_node;
1417 PyHamtNode *assoc_res;
1418
1419 new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1420 if (new_node == NULL) {
1421 return NULL;
1422 }
1423 new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1424 Py_INCREF(self);
1425 new_node->b_array[1] = (PyObject*) self;
1426
1427 assoc_res = hamt_node_bitmap_assoc(
1428 new_node, shift, hash, key, val, added_leaf);
1429 Py_DECREF(new_node);
1430 return assoc_res;
1431 }
1432}
1433
1434static inline Py_ssize_t
1435hamt_node_collision_count(PyHamtNode_Collision *node)
1436{
1437 return Py_SIZE(node) / 2;
1438}
1439
1440static hamt_without_t
1441hamt_node_collision_without(PyHamtNode_Collision *self,
1442 uint32_t shift, int32_t hash,
1443 PyObject *key,
1444 PyHamtNode **new_node)
1445{
1446 if (hash != self->c_hash) {
1447 return W_NOT_FOUND;
1448 }
1449
1450 Py_ssize_t key_idx = -1;
1451 hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1452
1453 switch (found) {
1454 case F_ERROR:
1455 return W_ERROR;
1456
1457 case F_NOT_FOUND:
1458 return W_NOT_FOUND;
1459
1460 case F_FOUND:
1461 assert(key_idx >= 0);
1462 assert(key_idx < Py_SIZE(self));
1463
1464 Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1465
1466 if (new_count == 0) {
1467 /* The node has only one key/value pair and it's for the
1468 key we're trying to delete. So a new node will be empty
1469 after the removal.
1470 */
1471 return W_EMPTY;
1472 }
1473
1474 if (new_count == 1) {
1475 /* The node has two keys, and after deletion the
1476 new Collision node would have one. Collision nodes
1477 with one key shouldn't exist, co convert it to a
1478 Bitmap node.
1479 */
1480 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1481 hamt_node_bitmap_new(2);
1482 if (node == NULL) {
1483 return W_ERROR;
1484 }
1485
1486 if (key_idx == 0) {
1487 Py_INCREF(self->c_array[2]);
1488 node->b_array[0] = self->c_array[2];
1489 Py_INCREF(self->c_array[3]);
1490 node->b_array[1] = self->c_array[3];
1491 }
1492 else {
1493 assert(key_idx == 2);
1494 Py_INCREF(self->c_array[0]);
1495 node->b_array[0] = self->c_array[0];
1496 Py_INCREF(self->c_array[1]);
1497 node->b_array[1] = self->c_array[1];
1498 }
1499
1500 node->b_bitmap = hamt_bitpos(hash, shift);
1501
1502 *new_node = (PyHamtNode *)node;
1503 return W_NEWNODE;
1504 }
1505
1506 /* Allocate a new Collision node with capacity for one
1507 less key/value pair */
1508 PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1509 hamt_node_collision_new(
1510 self->c_hash, Py_SIZE(self) - 2);
1511
1512 /* Copy all other keys from `self` to `new` */
1513 Py_ssize_t i;
1514 for (i = 0; i < key_idx; i++) {
1515 Py_INCREF(self->c_array[i]);
1516 new->c_array[i] = self->c_array[i];
1517 }
1518 for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1519 Py_INCREF(self->c_array[i]);
1520 new->c_array[i - 2] = self->c_array[i];
1521 }
1522
1523 *new_node = (PyHamtNode*)new;
1524 return W_NEWNODE;
1525
1526 default:
1527 Py_UNREACHABLE();
1528 }
1529}
1530
1531static hamt_find_t
1532hamt_node_collision_find(PyHamtNode_Collision *self,
1533 uint32_t shift, int32_t hash,
1534 PyObject *key, PyObject **val)
1535{
1536 /* Lookup `key` in the Collision node `self`. Set the value
1537 for the found key to 'val'. */
1538
1539 Py_ssize_t idx = -1;
1540 hamt_find_t res;
1541
1542 res = hamt_node_collision_find_index(self, key, &idx);
1543 if (res == F_ERROR || res == F_NOT_FOUND) {
1544 return res;
1545 }
1546
1547 assert(idx >= 0);
1548 assert(idx + 1 < Py_SIZE(self));
1549
1550 *val = self->c_array[idx + 1];
1551 assert(*val != NULL);
1552
1553 return F_FOUND;
1554}
1555
1556
1557static int
1558hamt_node_collision_traverse(PyHamtNode_Collision *self,
1559 visitproc visit, void *arg)
1560{
1561 /* Collision's tp_traverse */
1562
1563 Py_ssize_t i;
1564
1565 for (i = Py_SIZE(self); --i >= 0; ) {
1566 Py_VISIT(self->c_array[i]);
1567 }
1568
1569 return 0;
1570}
1571
1572static void
1573hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1574{
1575 /* Collision's tp_dealloc */
1576
1577 Py_ssize_t len = Py_SIZE(self);
1578
1579 PyObject_GC_UnTrack(self);
1580 Py_TRASHCAN_SAFE_BEGIN(self)
1581
1582 if (len > 0) {
1583
1584 while (--len >= 0) {
1585 Py_XDECREF(self->c_array[len]);
1586 }
1587 }
1588
1589 Py_TYPE(self)->tp_free((PyObject *)self);
1590 Py_TRASHCAN_SAFE_END(self)
1591}
1592
1593#ifdef Py_DEBUG
1594static int
1595hamt_node_collision_dump(PyHamtNode_Collision *node,
1596 _PyUnicodeWriter *writer, int level)
1597{
1598 /* Debug build: __dump__() method implementation for Collision nodes. */
1599
1600 Py_ssize_t i;
1601
1602 if (_hamt_dump_ident(writer, level + 1)) {
1603 goto error;
1604 }
1605
1606 if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1607 Py_SIZE(node), node))
1608 {
1609 goto error;
1610 }
1611
1612 for (i = 0; i < Py_SIZE(node); i += 2) {
1613 PyObject *key = node->c_array[i];
1614 PyObject *val = node->c_array[i + 1];
1615
1616 if (_hamt_dump_ident(writer, level + 2)) {
1617 goto error;
1618 }
1619
1620 if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1621 goto error;
1622 }
1623 }
1624
1625 return 0;
1626error:
1627 return -1;
1628}
1629#endif /* Py_DEBUG */
1630
1631
1632/////////////////////////////////// Array Node
1633
1634
1635static PyHamtNode *
1636hamt_node_array_new(Py_ssize_t count)
1637{
1638 Py_ssize_t i;
1639
1640 PyHamtNode_Array *node = PyObject_GC_New(
1641 PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1642 if (node == NULL) {
1643 return NULL;
1644 }
1645
1646 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1647 node->a_array[i] = NULL;
1648 }
1649
1650 node->a_count = count;
1651
1652 _PyObject_GC_TRACK(node);
1653 return (PyHamtNode *)node;
1654}
1655
1656static PyHamtNode_Array *
1657hamt_node_array_clone(PyHamtNode_Array *node)
1658{
1659 PyHamtNode_Array *clone;
1660 Py_ssize_t i;
1661
1662 VALIDATE_ARRAY_NODE(node)
1663
1664 /* Create a new Array node. */
1665 clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1666 if (clone == NULL) {
1667 return NULL;
1668 }
1669
1670 /* Copy all elements from the current Array node to the new one. */
1671 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1672 Py_XINCREF(node->a_array[i]);
1673 clone->a_array[i] = node->a_array[i];
1674 }
1675
1676 VALIDATE_ARRAY_NODE(clone)
1677 return clone;
1678}
1679
1680static PyHamtNode *
1681hamt_node_array_assoc(PyHamtNode_Array *self,
1682 uint32_t shift, int32_t hash,
1683 PyObject *key, PyObject *val, int* added_leaf)
1684{
1685 /* Set a new key to this level (currently a Collision node)
1686 of the tree.
1687
1688 Array nodes don't store values, they can only point to
1689 other nodes. They are simple arrays of 32 BaseNode pointers/
1690 */
1691
1692 uint32_t idx = hamt_mask(hash, shift);
1693 PyHamtNode *node = self->a_array[idx];
1694 PyHamtNode *child_node;
1695 PyHamtNode_Array *new_node;
1696 Py_ssize_t i;
1697
1698 if (node == NULL) {
1699 /* There's no child node for the given hash. Create a new
1700 Bitmap node for this key. */
1701
1702 PyHamtNode_Bitmap *empty = NULL;
1703
1704 /* Get an empty Bitmap node to work with. */
1705 empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1706 if (empty == NULL) {
1707 return NULL;
1708 }
1709
1710 /* Set key/val to the newly created empty Bitmap, thus
1711 creating a new Bitmap node with our key/value pair. */
1712 child_node = hamt_node_bitmap_assoc(
1713 empty,
1714 shift + 5, hash, key, val, added_leaf);
1715 Py_DECREF(empty);
1716 if (child_node == NULL) {
1717 return NULL;
1718 }
1719
1720 /* Create a new Array node. */
1721 new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1722 if (new_node == NULL) {
1723 Py_DECREF(child_node);
1724 return NULL;
1725 }
1726
1727 /* Copy all elements from the current Array node to the
1728 new one. */
1729 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1730 Py_XINCREF(self->a_array[i]);
1731 new_node->a_array[i] = self->a_array[i];
1732 }
1733
1734 assert(new_node->a_array[idx] == NULL);
1735 new_node->a_array[idx] = child_node; /* borrow */
1736 VALIDATE_ARRAY_NODE(new_node)
1737 }
1738 else {
1739 /* There's a child node for the given hash.
1740 Set the key to it./ */
1741 child_node = hamt_node_assoc(
1742 node, shift + 5, hash, key, val, added_leaf);
1743 if (child_node == (PyHamtNode *)self) {
1744 Py_DECREF(child_node);
1745 return (PyHamtNode *)self;
1746 }
1747
1748 new_node = hamt_node_array_clone(self);
1749 if (new_node == NULL) {
1750 Py_DECREF(child_node);
1751 return NULL;
1752 }
1753
1754 Py_SETREF(new_node->a_array[idx], child_node); /* borrow */
1755 VALIDATE_ARRAY_NODE(new_node)
1756 }
1757
1758 return (PyHamtNode *)new_node;
1759}
1760
1761static hamt_without_t
1762hamt_node_array_without(PyHamtNode_Array *self,
1763 uint32_t shift, int32_t hash,
1764 PyObject *key,
1765 PyHamtNode **new_node)
1766{
1767 uint32_t idx = hamt_mask(hash, shift);
1768 PyHamtNode *node = self->a_array[idx];
1769
1770 if (node == NULL) {
1771 return W_NOT_FOUND;
1772 }
1773
1774 PyHamtNode *sub_node = NULL;
1775 hamt_without_t res = hamt_node_without(
1776 (PyHamtNode *)node,
1777 shift + 5, hash, key, &sub_node);
1778
1779 switch (res) {
1780 case W_NOT_FOUND:
1781 case W_ERROR:
1782 assert(sub_node == NULL);
1783 return res;
1784
1785 case W_NEWNODE: {
1786 /* We need to replace a node at the `idx` index.
1787 Clone this node and replace.
1788 */
1789 assert(sub_node != NULL);
1790
1791 PyHamtNode_Array *clone = hamt_node_array_clone(self);
1792 if (clone == NULL) {
1793 Py_DECREF(sub_node);
1794 return W_ERROR;
1795 }
1796
1797 Py_SETREF(clone->a_array[idx], sub_node); /* borrow */
1798 *new_node = (PyHamtNode*)clone; /* borrow */
1799 return W_NEWNODE;
1800 }
1801
1802 case W_EMPTY: {
1803 assert(sub_node == NULL);
1804 /* We need to remove a node at the `idx` index.
1805 Calculate the size of the replacement Array node.
1806 */
1807 Py_ssize_t new_count = self->a_count - 1;
1808
1809 if (new_count == 0) {
1810 return W_EMPTY;
1811 }
1812
1813 if (new_count >= 16) {
1814 /* We convert Bitmap nodes to Array nodes, when a
1815 Bitmap node needs to store more than 15 key/value
1816 pairs. So we will create a new Array node if we
1817 the number of key/values after deletion is still
1818 greater than 15.
1819 */
1820
1821 PyHamtNode_Array *new = hamt_node_array_clone(self);
1822 if (new == NULL) {
1823 return W_ERROR;
1824 }
1825 new->a_count = new_count;
1826 Py_CLEAR(new->a_array[idx]);
1827
1828 *new_node = (PyHamtNode*)new; /* borrow */
1829 return W_NEWNODE;
1830 }
1831
1832 /* New Array node would have less than 16 key/value
1833 pairs. We need to create a replacement Bitmap node. */
1834
1835 Py_ssize_t bitmap_size = new_count * 2;
1836 uint32_t bitmap = 0;
1837
1838 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1839 hamt_node_bitmap_new(bitmap_size);
1840 if (new == NULL) {
1841 return W_ERROR;
1842 }
1843
1844 Py_ssize_t new_i = 0;
1845 for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1846 if (i == idx) {
1847 /* Skip the node we are deleting. */
1848 continue;
1849 }
1850
1851 PyHamtNode *node = self->a_array[i];
1852 if (node == NULL) {
1853 /* Skip any missing nodes. */
1854 continue;
1855 }
1856
1857 bitmap |= 1 << i;
1858
1859 if (IS_BITMAP_NODE(node)) {
1860 PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1861
1862 if (hamt_node_bitmap_count(child) == 1 &&
1863 child->b_array[0] != NULL)
1864 {
1865 /* node is a Bitmap with one key/value pair, just
1866 merge it into the new Bitmap node we're building.
1867
1868 Note that we don't inline Bitmap nodes that
1869 have a NULL key -- those nodes point to another
1870 tree level, and we cannot simply move tree levels
1871 up or down.
1872 */
1873 PyObject *key = child->b_array[0];
1874 PyObject *val = child->b_array[1];
1875
1876 Py_INCREF(key);
1877 new->b_array[new_i] = key;
1878 Py_INCREF(val);
1879 new->b_array[new_i + 1] = val;
1880 }
1881 else {
1882 new->b_array[new_i] = NULL;
1883 Py_INCREF(node);
1884 new->b_array[new_i + 1] = (PyObject*)node;
1885 }
1886 }
1887 else {
1888
1889#ifdef Py_DEBUG
1890 if (IS_COLLISION_NODE(node)) {
1891 Py_ssize_t child_count = hamt_node_collision_count(
1892 (PyHamtNode_Collision*)node);
1893 assert(child_count > 1);
1894 }
1895 else if (IS_ARRAY_NODE(node)) {
1896 assert(((PyHamtNode_Array*)node)->a_count >= 16);
1897 }
1898#endif
1899
1900 /* Just copy the node into our new Bitmap */
1901 new->b_array[new_i] = NULL;
1902 Py_INCREF(node);
1903 new->b_array[new_i + 1] = (PyObject*)node;
1904 }
1905
1906 new_i += 2;
1907 }
1908
1909 new->b_bitmap = bitmap;
1910 *new_node = (PyHamtNode*)new; /* borrow */
1911 return W_NEWNODE;
1912 }
1913
1914 default:
1915 Py_UNREACHABLE();
1916 }
1917}
1918
1919static hamt_find_t
1920hamt_node_array_find(PyHamtNode_Array *self,
1921 uint32_t shift, int32_t hash,
1922 PyObject *key, PyObject **val)
1923{
1924 /* Lookup `key` in the Array node `self`. Set the value
1925 for the found key to 'val'. */
1926
1927 uint32_t idx = hamt_mask(hash, shift);
1928 PyHamtNode *node;
1929
1930 node = self->a_array[idx];
1931 if (node == NULL) {
1932 return F_NOT_FOUND;
1933 }
1934
1935 /* Dispatch to the generic hamt_node_find */
1936 return hamt_node_find(node, shift + 5, hash, key, val);
1937}
1938
1939static int
1940hamt_node_array_traverse(PyHamtNode_Array *self,
1941 visitproc visit, void *arg)
1942{
1943 /* Array's tp_traverse */
1944
1945 Py_ssize_t i;
1946
1947 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1948 Py_VISIT(self->a_array[i]);
1949 }
1950
1951 return 0;
1952}
1953
1954static void
1955hamt_node_array_dealloc(PyHamtNode_Array *self)
1956{
1957 /* Array's tp_dealloc */
1958
1959 Py_ssize_t i;
1960
1961 PyObject_GC_UnTrack(self);
1962 Py_TRASHCAN_SAFE_BEGIN(self)
1963
1964 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1965 Py_XDECREF(self->a_array[i]);
1966 }
1967
1968 Py_TYPE(self)->tp_free((PyObject *)self);
1969 Py_TRASHCAN_SAFE_END(self)
1970}
1971
1972#ifdef Py_DEBUG
1973static int
1974hamt_node_array_dump(PyHamtNode_Array *node,
1975 _PyUnicodeWriter *writer, int level)
1976{
1977 /* Debug build: __dump__() method implementation for Array nodes. */
1978
1979 Py_ssize_t i;
1980
1981 if (_hamt_dump_ident(writer, level + 1)) {
1982 goto error;
1983 }
1984
1985 if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1986 goto error;
1987 }
1988
1989 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1990 if (node->a_array[i] == NULL) {
1991 continue;
1992 }
1993
1994 if (_hamt_dump_ident(writer, level + 2)) {
1995 goto error;
1996 }
1997
1998 if (_hamt_dump_format(writer, "%d::\n", i)) {
1999 goto error;
2000 }
2001
2002 if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2003 goto error;
2004 }
2005
2006 if (_hamt_dump_format(writer, "\n")) {
2007 goto error;
2008 }
2009 }
2010
2011 return 0;
2012error:
2013 return -1;
2014}
2015#endif /* Py_DEBUG */
2016
2017
2018/////////////////////////////////// Node Dispatch
2019
2020
2021static PyHamtNode *
2022hamt_node_assoc(PyHamtNode *node,
2023 uint32_t shift, int32_t hash,
2024 PyObject *key, PyObject *val, int* added_leaf)
2025{
2026 /* Set key/value to the 'node' starting with the given shift/hash.
2027 Return a new node, or the same node if key/value already
2028 set.
2029
2030 added_leaf will be set to 1 if key/value wasn't in the
2031 tree before.
2032
2033 This method automatically dispatches to the suitable
2034 hamt_node_{nodetype}_assoc method.
2035 */
2036
2037 if (IS_BITMAP_NODE(node)) {
2038 return hamt_node_bitmap_assoc(
2039 (PyHamtNode_Bitmap *)node,
2040 shift, hash, key, val, added_leaf);
2041 }
2042 else if (IS_ARRAY_NODE(node)) {
2043 return hamt_node_array_assoc(
2044 (PyHamtNode_Array *)node,
2045 shift, hash, key, val, added_leaf);
2046 }
2047 else {
2048 assert(IS_COLLISION_NODE(node));
2049 return hamt_node_collision_assoc(
2050 (PyHamtNode_Collision *)node,
2051 shift, hash, key, val, added_leaf);
2052 }
2053}
2054
2055static hamt_without_t
2056hamt_node_without(PyHamtNode *node,
2057 uint32_t shift, int32_t hash,
2058 PyObject *key,
2059 PyHamtNode **new_node)
2060{
2061 if (IS_BITMAP_NODE(node)) {
2062 return hamt_node_bitmap_without(
2063 (PyHamtNode_Bitmap *)node,
2064 shift, hash, key,
2065 new_node);
2066 }
2067 else if (IS_ARRAY_NODE(node)) {
2068 return hamt_node_array_without(
2069 (PyHamtNode_Array *)node,
2070 shift, hash, key,
2071 new_node);
2072 }
2073 else {
2074 assert(IS_COLLISION_NODE(node));
2075 return hamt_node_collision_without(
2076 (PyHamtNode_Collision *)node,
2077 shift, hash, key,
2078 new_node);
2079 }
2080}
2081
2082static hamt_find_t
2083hamt_node_find(PyHamtNode *node,
2084 uint32_t shift, int32_t hash,
2085 PyObject *key, PyObject **val)
2086{
2087 /* Find the key in the node starting with the given shift/hash.
2088
2089 If a value is found, the result will be set to F_FOUND, and
2090 *val will point to the found value object.
2091
2092 If a value wasn't found, the result will be set to F_NOT_FOUND.
2093
2094 If an exception occurs during the call, the result will be F_ERROR.
2095
2096 This method automatically dispatches to the suitable
2097 hamt_node_{nodetype}_find method.
2098 */
2099
2100 if (IS_BITMAP_NODE(node)) {
2101 return hamt_node_bitmap_find(
2102 (PyHamtNode_Bitmap *)node,
2103 shift, hash, key, val);
2104
2105 }
2106 else if (IS_ARRAY_NODE(node)) {
2107 return hamt_node_array_find(
2108 (PyHamtNode_Array *)node,
2109 shift, hash, key, val);
2110 }
2111 else {
2112 assert(IS_COLLISION_NODE(node));
2113 return hamt_node_collision_find(
2114 (PyHamtNode_Collision *)node,
2115 shift, hash, key, val);
2116 }
2117}
2118
2119#ifdef Py_DEBUG
2120static int
2121hamt_node_dump(PyHamtNode *node,
2122 _PyUnicodeWriter *writer, int level)
2123{
2124 /* Debug build: __dump__() method implementation for a node.
2125
2126 This method automatically dispatches to the suitable
2127 hamt_node_{nodetype})_dump method.
2128 */
2129
2130 if (IS_BITMAP_NODE(node)) {
2131 return hamt_node_bitmap_dump(
2132 (PyHamtNode_Bitmap *)node, writer, level);
2133 }
2134 else if (IS_ARRAY_NODE(node)) {
2135 return hamt_node_array_dump(
2136 (PyHamtNode_Array *)node, writer, level);
2137 }
2138 else {
2139 assert(IS_COLLISION_NODE(node));
2140 return hamt_node_collision_dump(
2141 (PyHamtNode_Collision *)node, writer, level);
2142 }
2143}
2144#endif /* Py_DEBUG */
2145
2146
2147/////////////////////////////////// Iterators: Machinery
2148
2149
2150static hamt_iter_t
2151hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2152
2153
2154static void
2155hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2156{
2157 for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2158 iter->i_nodes[i] = NULL;
2159 iter->i_pos[i] = 0;
2160 }
2161
2162 iter->i_level = 0;
2163
2164 /* Note: we don't incref/decref nodes in i_nodes. */
2165 iter->i_nodes[0] = root;
2166}
2167
2168static hamt_iter_t
2169hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2170 PyObject **key, PyObject **val)
2171{
2172 int8_t level = iter->i_level;
2173
2174 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2175 Py_ssize_t pos = iter->i_pos[level];
2176
2177 if (pos + 1 >= Py_SIZE(node)) {
2178#ifdef Py_DEBUG
2179 assert(iter->i_level >= 0);
2180 iter->i_nodes[iter->i_level] = NULL;
2181#endif
2182 iter->i_level--;
2183 return hamt_iterator_next(iter, key, val);
2184 }
2185
2186 if (node->b_array[pos] == NULL) {
2187 iter->i_pos[level] = pos + 2;
2188
2189 int8_t next_level = level + 1;
2190 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2191 iter->i_level = next_level;
2192 iter->i_pos[next_level] = 0;
2193 iter->i_nodes[next_level] = (PyHamtNode *)
2194 node->b_array[pos + 1];
2195
2196 return hamt_iterator_next(iter, key, val);
2197 }
2198
2199 *key = node->b_array[pos];
2200 *val = node->b_array[pos + 1];
2201 iter->i_pos[level] = pos + 2;
2202 return I_ITEM;
2203}
2204
2205static hamt_iter_t
2206hamt_iterator_collision_next(PyHamtIteratorState *iter,
2207 PyObject **key, PyObject **val)
2208{
2209 int8_t level = iter->i_level;
2210
2211 PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2212 Py_ssize_t pos = iter->i_pos[level];
2213
2214 if (pos + 1 >= Py_SIZE(node)) {
2215#ifdef Py_DEBUG
2216 assert(iter->i_level >= 0);
2217 iter->i_nodes[iter->i_level] = NULL;
2218#endif
2219 iter->i_level--;
2220 return hamt_iterator_next(iter, key, val);
2221 }
2222
2223 *key = node->c_array[pos];
2224 *val = node->c_array[pos + 1];
2225 iter->i_pos[level] = pos + 2;
2226 return I_ITEM;
2227}
2228
2229static hamt_iter_t
2230hamt_iterator_array_next(PyHamtIteratorState *iter,
2231 PyObject **key, PyObject **val)
2232{
2233 int8_t level = iter->i_level;
2234
2235 PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2236 Py_ssize_t pos = iter->i_pos[level];
2237
2238 if (pos >= HAMT_ARRAY_NODE_SIZE) {
2239#ifdef Py_DEBUG
2240 assert(iter->i_level >= 0);
2241 iter->i_nodes[iter->i_level] = NULL;
2242#endif
2243 iter->i_level--;
2244 return hamt_iterator_next(iter, key, val);
2245 }
2246
2247 for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2248 if (node->a_array[i] != NULL) {
2249 iter->i_pos[level] = i + 1;
2250
2251 int8_t next_level = level + 1;
2252 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2253 iter->i_pos[next_level] = 0;
2254 iter->i_nodes[next_level] = node->a_array[i];
2255 iter->i_level = next_level;
2256
2257 return hamt_iterator_next(iter, key, val);
2258 }
2259 }
2260
2261#ifdef Py_DEBUG
2262 assert(iter->i_level >= 0);
2263 iter->i_nodes[iter->i_level] = NULL;
2264#endif
2265
2266 iter->i_level--;
2267 return hamt_iterator_next(iter, key, val);
2268}
2269
2270static hamt_iter_t
2271hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2272{
2273 if (iter->i_level < 0) {
2274 return I_END;
2275 }
2276
2277 assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2278
2279 PyHamtNode *current = iter->i_nodes[iter->i_level];
2280
2281 if (IS_BITMAP_NODE(current)) {
2282 return hamt_iterator_bitmap_next(iter, key, val);
2283 }
2284 else if (IS_ARRAY_NODE(current)) {
2285 return hamt_iterator_array_next(iter, key, val);
2286 }
2287 else {
2288 assert(IS_COLLISION_NODE(current));
2289 return hamt_iterator_collision_next(iter, key, val);
2290 }
2291}
2292
2293
2294/////////////////////////////////// HAMT high-level functions
2295
2296
2297PyHamtObject *
2298_PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2299{
2300 int32_t key_hash;
2301 int added_leaf = 0;
2302 PyHamtNode *new_root;
2303 PyHamtObject *new_o;
2304
2305 key_hash = hamt_hash(key);
2306 if (key_hash == -1) {
2307 return NULL;
2308 }
2309
2310 new_root = hamt_node_assoc(
2311 (PyHamtNode *)(o->h_root),
2312 0, key_hash, key, val, &added_leaf);
2313 if (new_root == NULL) {
2314 return NULL;
2315 }
2316
2317 if (new_root == o->h_root) {
2318 Py_DECREF(new_root);
2319 Py_INCREF(o);
2320 return o;
2321 }
2322
2323 new_o = hamt_alloc();
2324 if (new_o == NULL) {
2325 Py_DECREF(new_root);
2326 return NULL;
2327 }
2328
2329 new_o->h_root = new_root; /* borrow */
2330 new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2331
2332 return new_o;
2333}
2334
2335PyHamtObject *
2336_PyHamt_Without(PyHamtObject *o, PyObject *key)
2337{
2338 int32_t key_hash = hamt_hash(key);
2339 if (key_hash == -1) {
2340 return NULL;
2341 }
2342
2343 PyHamtNode *new_root;
2344
2345 hamt_without_t res = hamt_node_without(
2346 (PyHamtNode *)(o->h_root),
2347 0, key_hash, key,
2348 &new_root);
2349
2350 switch (res) {
2351 case W_ERROR:
2352 return NULL;
2353 case W_EMPTY:
2354 return _PyHamt_New();
2355 case W_NOT_FOUND:
2356 Py_INCREF(o);
2357 return o;
2358 case W_NEWNODE: {
2359 PyHamtObject *new_o = hamt_alloc();
2360 if (new_o == NULL) {
2361 Py_DECREF(new_root);
2362 return NULL;
2363 }
2364
2365 new_o->h_root = new_root; /* borrow */
2366 new_o->h_count = o->h_count - 1;
2367 assert(new_o->h_count >= 0);
2368 return new_o;
2369 }
2370 default:
2371 Py_UNREACHABLE();
2372 }
2373}
2374
2375static hamt_find_t
2376hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2377{
2378 if (o->h_count == 0) {
2379 return F_NOT_FOUND;
2380 }
2381
2382 int32_t key_hash = hamt_hash(key);
2383 if (key_hash == -1) {
2384 return F_ERROR;
2385 }
2386
2387 return hamt_node_find(o->h_root, 0, key_hash, key, val);
2388}
2389
2390
2391int
2392_PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2393{
2394 hamt_find_t res = hamt_find(o, key, val);
2395 switch (res) {
2396 case F_ERROR:
2397 return -1;
2398 case F_NOT_FOUND:
2399 return 0;
2400 case F_FOUND:
2401 return 1;
2402 default:
2403 Py_UNREACHABLE();
2404 }
2405}
2406
2407
2408int
2409_PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2410{
2411 if (v == w) {
2412 return 1;
2413 }
2414
2415 if (v->h_count != w->h_count) {
2416 return 0;
2417 }
2418
2419 PyHamtIteratorState iter;
2420 hamt_iter_t iter_res;
2421 hamt_find_t find_res;
2422 PyObject *v_key;
2423 PyObject *v_val;
2424 PyObject *w_val;
2425
2426 hamt_iterator_init(&iter, v->h_root);
2427
2428 do {
2429 iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2430 if (iter_res == I_ITEM) {
2431 find_res = hamt_find(w, v_key, &w_val);
2432 switch (find_res) {
2433 case F_ERROR:
2434 return -1;
2435
2436 case F_NOT_FOUND:
2437 return 0;
2438
2439 case F_FOUND: {
2440 int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2441 if (cmp < 0) {
2442 return -1;
2443 }
2444 if (cmp == 0) {
2445 return 0;
2446 }
2447 }
2448 }
2449 }
2450 } while (iter_res != I_END);
2451
2452 return 1;
2453}
2454
2455Py_ssize_t
2456_PyHamt_Len(PyHamtObject *o)
2457{
2458 return o->h_count;
2459}
2460
2461static PyHamtObject *
2462hamt_alloc(void)
2463{
2464 PyHamtObject *o;
2465 o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2466 if (o == NULL) {
2467 return NULL;
2468 }
2469 o->h_weakreflist = NULL;
2470 PyObject_GC_Track(o);
2471 return o;
2472}
2473
2474PyHamtObject *
2475_PyHamt_New(void)
2476{
2477 if (_empty_hamt != NULL) {
2478 /* HAMT is an immutable object so we can easily cache an
2479 empty instance. */
2480 Py_INCREF(_empty_hamt);
2481 return _empty_hamt;
2482 }
2483
2484 PyHamtObject *o = hamt_alloc();
2485 if (o == NULL) {
2486 return NULL;
2487 }
2488
2489 o->h_root = hamt_node_bitmap_new(0);
2490 if (o->h_root == NULL) {
2491 Py_DECREF(o);
2492 return NULL;
2493 }
2494
2495 o->h_count = 0;
2496
2497 if (_empty_hamt == NULL) {
2498 Py_INCREF(o);
2499 _empty_hamt = o;
2500 }
2501
2502 return o;
2503}
2504
2505#ifdef Py_DEBUG
2506static PyObject *
2507hamt_dump(PyHamtObject *self)
2508{
2509 _PyUnicodeWriter writer;
2510
2511 _PyUnicodeWriter_Init(&writer);
2512
2513 if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2514 goto error;
2515 }
2516
2517 if (hamt_node_dump(self->h_root, &writer, 0)) {
2518 goto error;
2519 }
2520
2521 return _PyUnicodeWriter_Finish(&writer);
2522
2523error:
2524 _PyUnicodeWriter_Dealloc(&writer);
2525 return NULL;
2526}
2527#endif /* Py_DEBUG */
2528
2529
2530/////////////////////////////////// Iterators: Shared Iterator Implementation
2531
2532
2533static int
2534hamt_baseiter_tp_clear(PyHamtIterator *it)
2535{
2536 Py_CLEAR(it->hi_obj);
2537 return 0;
2538}
2539
2540static void
2541hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2542{
2543 PyObject_GC_UnTrack(it);
2544 (void)hamt_baseiter_tp_clear(it);
2545 PyObject_GC_Del(it);
2546}
2547
2548static int
2549hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2550{
2551 Py_VISIT(it->hi_obj);
2552 return 0;
2553}
2554
2555static PyObject *
2556hamt_baseiter_tp_iternext(PyHamtIterator *it)
2557{
2558 PyObject *key;
2559 PyObject *val;
2560 hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2561
2562 switch (res) {
2563 case I_END:
2564 PyErr_SetNone(PyExc_StopIteration);
2565 return NULL;
2566
2567 case I_ITEM: {
2568 return (*(it->hi_yield))(key, val);
2569 }
2570
2571 default: {
2572 Py_UNREACHABLE();
2573 }
2574 }
2575}
2576
2577static Py_ssize_t
2578hamt_baseiter_tp_len(PyHamtIterator *it)
2579{
2580 return it->hi_obj->h_count;
2581}
2582
2583static PyMappingMethods PyHamtIterator_as_mapping = {
2584 (lenfunc)hamt_baseiter_tp_len,
2585};
2586
2587static PyObject *
2588hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2589{
2590 PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2591 if (it == NULL) {
2592 return NULL;
2593 }
2594
2595 Py_INCREF(o);
2596 it->hi_obj = o;
2597 it->hi_yield = yield;
2598
2599 hamt_iterator_init(&it->hi_iter, o->h_root);
2600
2601 return (PyObject*)it;
2602}
2603
2604#define ITERATOR_TYPE_SHARED_SLOTS \
2605 .tp_basicsize = sizeof(PyHamtIterator), \
2606 .tp_itemsize = 0, \
2607 .tp_as_mapping = &PyHamtIterator_as_mapping, \
2608 .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc, \
2609 .tp_getattro = PyObject_GenericGetAttr, \
2610 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, \
2611 .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse, \
2612 .tp_clear = (inquiry)hamt_baseiter_tp_clear, \
2613 .tp_iter = PyObject_SelfIter, \
2614 .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2615
2616
2617/////////////////////////////////// _PyHamtItems_Type
2618
2619
2620PyTypeObject _PyHamtItems_Type = {
2621 PyVarObject_HEAD_INIT(NULL, 0)
2622 "items",
2623 ITERATOR_TYPE_SHARED_SLOTS
2624};
2625
2626static PyObject *
2627hamt_iter_yield_items(PyObject *key, PyObject *val)
2628{
2629 return PyTuple_Pack(2, key, val);
2630}
2631
2632PyObject *
2633_PyHamt_NewIterItems(PyHamtObject *o)
2634{
2635 return hamt_baseiter_new(
2636 &_PyHamtItems_Type, hamt_iter_yield_items, o);
2637}
2638
2639
2640/////////////////////////////////// _PyHamtKeys_Type
2641
2642
2643PyTypeObject _PyHamtKeys_Type = {
2644 PyVarObject_HEAD_INIT(NULL, 0)
2645 "keys",
2646 ITERATOR_TYPE_SHARED_SLOTS
2647};
2648
2649static PyObject *
2650hamt_iter_yield_keys(PyObject *key, PyObject *val)
2651{
2652 Py_INCREF(key);
2653 return key;
2654}
2655
2656PyObject *
2657_PyHamt_NewIterKeys(PyHamtObject *o)
2658{
2659 return hamt_baseiter_new(
2660 &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2661}
2662
2663
2664/////////////////////////////////// _PyHamtValues_Type
2665
2666
2667PyTypeObject _PyHamtValues_Type = {
2668 PyVarObject_HEAD_INIT(NULL, 0)
2669 "values",
2670 ITERATOR_TYPE_SHARED_SLOTS
2671};
2672
2673static PyObject *
2674hamt_iter_yield_values(PyObject *key, PyObject *val)
2675{
2676 Py_INCREF(val);
2677 return val;
2678}
2679
2680PyObject *
2681_PyHamt_NewIterValues(PyHamtObject *o)
2682{
2683 return hamt_baseiter_new(
2684 &_PyHamtValues_Type, hamt_iter_yield_values, o);
2685}
2686
2687
2688/////////////////////////////////// _PyHamt_Type
2689
2690
2691#ifdef Py_DEBUG
2692static PyObject *
2693hamt_dump(PyHamtObject *self);
2694#endif
2695
2696
2697static PyObject *
2698hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2699{
2700 return (PyObject*)_PyHamt_New();
2701}
2702
2703static int
2704hamt_tp_clear(PyHamtObject *self)
2705{
2706 Py_CLEAR(self->h_root);
2707 return 0;
2708}
2709
2710
2711static int
2712hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2713{
2714 Py_VISIT(self->h_root);
2715 return 0;
2716}
2717
2718static void
2719hamt_tp_dealloc(PyHamtObject *self)
2720{
2721 PyObject_GC_UnTrack(self);
2722 if (self->h_weakreflist != NULL) {
2723 PyObject_ClearWeakRefs((PyObject*)self);
2724 }
2725 (void)hamt_tp_clear(self);
2726 Py_TYPE(self)->tp_free(self);
2727}
2728
2729
2730static PyObject *
2731hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2732{
2733 if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2734 Py_RETURN_NOTIMPLEMENTED;
2735 }
2736
2737 int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2738 if (res < 0) {
2739 return NULL;
2740 }
2741
2742 if (op == Py_NE) {
2743 res = !res;
2744 }
2745
2746 if (res) {
2747 Py_RETURN_TRUE;
2748 }
2749 else {
2750 Py_RETURN_FALSE;
2751 }
2752}
2753
2754static int
2755hamt_tp_contains(PyHamtObject *self, PyObject *key)
2756{
2757 PyObject *val;
2758 return _PyHamt_Find(self, key, &val);
2759}
2760
2761static PyObject *
2762hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2763{
2764 PyObject *val;
2765 hamt_find_t res = hamt_find(self, key, &val);
2766 switch (res) {
2767 case F_ERROR:
2768 return NULL;
2769 case F_FOUND:
2770 Py_INCREF(val);
2771 return val;
2772 case F_NOT_FOUND:
2773 PyErr_SetObject(PyExc_KeyError, key);
2774 return NULL;
2775 default:
2776 Py_UNREACHABLE();
2777 }
2778}
2779
2780static Py_ssize_t
2781hamt_tp_len(PyHamtObject *self)
2782{
2783 return _PyHamt_Len(self);
2784}
2785
2786static PyObject *
2787hamt_tp_iter(PyHamtObject *self)
2788{
2789 return _PyHamt_NewIterKeys(self);
2790}
2791
2792static PyObject *
2793hamt_py_set(PyHamtObject *self, PyObject *args)
2794{
2795 PyObject *key;
2796 PyObject *val;
2797
2798 if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2799 return NULL;
2800 }
2801
2802 return (PyObject *)_PyHamt_Assoc(self, key, val);
2803}
2804
2805static PyObject *
2806hamt_py_get(PyHamtObject *self, PyObject *args)
2807{
2808 PyObject *key;
2809 PyObject *def = NULL;
2810
2811 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2812 return NULL;
2813 }
2814
2815 PyObject *val = NULL;
2816 hamt_find_t res = hamt_find(self, key, &val);
2817 switch (res) {
2818 case F_ERROR:
2819 return NULL;
2820 case F_FOUND:
2821 Py_INCREF(val);
2822 return val;
2823 case F_NOT_FOUND:
2824 if (def == NULL) {
2825 Py_RETURN_NONE;
2826 }
2827 Py_INCREF(def);
2828 return def;
2829 default:
2830 Py_UNREACHABLE();
2831 }
2832}
2833
2834static PyObject *
2835hamt_py_delete(PyHamtObject *self, PyObject *key)
2836{
2837 return (PyObject *)_PyHamt_Without(self, key);
2838}
2839
2840static PyObject *
2841hamt_py_items(PyHamtObject *self, PyObject *args)
2842{
2843 return _PyHamt_NewIterItems(self);
2844}
2845
2846static PyObject *
2847hamt_py_values(PyHamtObject *self, PyObject *args)
2848{
2849 return _PyHamt_NewIterValues(self);
2850}
2851
2852static PyObject *
2853hamt_py_keys(PyHamtObject *self, PyObject *args)
2854{
2855 return _PyHamt_NewIterKeys(self);
2856}
2857
2858#ifdef Py_DEBUG
2859static PyObject *
2860hamt_py_dump(PyHamtObject *self, PyObject *args)
2861{
2862 return hamt_dump(self);
2863}
2864#endif
2865
2866
2867static PyMethodDef PyHamt_methods[] = {
2868 {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2869 {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2870 {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2871 {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2872 {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2873 {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2874#ifdef Py_DEBUG
2875 {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2876#endif
2877 {NULL, NULL}
2878};
2879
2880static PySequenceMethods PyHamt_as_sequence = {
2881 0, /* sq_length */
2882 0, /* sq_concat */
2883 0, /* sq_repeat */
2884 0, /* sq_item */
2885 0, /* sq_slice */
2886 0, /* sq_ass_item */
2887 0, /* sq_ass_slice */
2888 (objobjproc)hamt_tp_contains, /* sq_contains */
2889 0, /* sq_inplace_concat */
2890 0, /* sq_inplace_repeat */
2891};
2892
2893static PyMappingMethods PyHamt_as_mapping = {
2894 (lenfunc)hamt_tp_len, /* mp_length */
2895 (binaryfunc)hamt_tp_subscript, /* mp_subscript */
2896};
2897
2898PyTypeObject _PyHamt_Type = {
2899 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2900 "hamt",
2901 sizeof(PyHamtObject),
2902 .tp_methods = PyHamt_methods,
2903 .tp_as_mapping = &PyHamt_as_mapping,
2904 .tp_as_sequence = &PyHamt_as_sequence,
2905 .tp_iter = (getiterfunc)hamt_tp_iter,
2906 .tp_dealloc = (destructor)hamt_tp_dealloc,
2907 .tp_getattro = PyObject_GenericGetAttr,
2908 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2909 .tp_richcompare = hamt_tp_richcompare,
2910 .tp_traverse = (traverseproc)hamt_tp_traverse,
2911 .tp_clear = (inquiry)hamt_tp_clear,
2912 .tp_new = hamt_tp_new,
2913 .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2914 .tp_hash = PyObject_HashNotImplemented,
2915};
2916
2917
2918/////////////////////////////////// Tree Node Types
2919
2920
2921PyTypeObject _PyHamt_ArrayNode_Type = {
2922 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2923 "hamt_array_node",
2924 sizeof(PyHamtNode_Array),
2925 0,
2926 .tp_dealloc = (destructor)hamt_node_array_dealloc,
2927 .tp_getattro = PyObject_GenericGetAttr,
2928 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2929 .tp_traverse = (traverseproc)hamt_node_array_traverse,
2930 .tp_free = PyObject_GC_Del,
2931 .tp_hash = PyObject_HashNotImplemented,
2932};
2933
2934PyTypeObject _PyHamt_BitmapNode_Type = {
2935 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2936 "hamt_bitmap_node",
2937 sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2938 sizeof(PyObject *),
2939 .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2940 .tp_getattro = PyObject_GenericGetAttr,
2941 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2942 .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2943 .tp_free = PyObject_GC_Del,
2944 .tp_hash = PyObject_HashNotImplemented,
2945};
2946
2947PyTypeObject _PyHamt_CollisionNode_Type = {
2948 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2949 "hamt_collision_node",
2950 sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2951 sizeof(PyObject *),
2952 .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2953 .tp_getattro = PyObject_GenericGetAttr,
2954 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2955 .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2956 .tp_free = PyObject_GC_Del,
2957 .tp_hash = PyObject_HashNotImplemented,
2958};
2959
2960
2961int
2962_PyHamt_Init(void)
2963{
2964 if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2965 (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2966 (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2967 (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2968 (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2969 (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2970 (PyType_Ready(&_PyHamtItems_Type) < 0))
2971 {
2972 return 0;
2973 }
2974
2975 return 1;
2976}
2977
2978void
2979_PyHamt_Fini(void)
2980{
2981 Py_CLEAR(_empty_hamt);
2982 Py_CLEAR(_empty_bitmap_node);
2983}