blob: e54d3a4c55f912d04593231d7322de57bedd5c7a [file] [log] [blame]
Yury Selivanovf23746a2018-01-22 19:11:18 -05001#include "Python.h"
2
3#include "structmember.h"
4#include "internal/pystate.h"
5#include "internal/hamt.h"
6
Yury Selivanovf23746a2018-01-22 19:11:18 -05007/*
8This file provides an implemention of an immutable mapping using the
9Hash Array Mapped Trie (or HAMT) datastructure.
10
11This design allows to have:
12
131. Efficient copy: immutable mappings can be copied by reference,
14 making it an O(1) operation.
15
162. Efficient mutations: due to structural sharing, only a portion of
17 the trie needs to be copied when the collection is mutated. The
18 cost of set/delete operations is O(log N).
19
203. Efficient lookups: O(log N).
21
22(where N is number of key/value items in the immutable mapping.)
23
24
25HAMT
26====
27
28The core idea of HAMT is that the shape of the trie is encoded into the
29hashes of keys.
30
31Say we want to store a K/V pair in our mapping. First, we calculate the
32hash of K, let's say it's 19830128, or in binary:
33
34 0b1001011101001010101110000 = 19830128
35
36Now let's partition this bit representation of the hash into blocks of
375 bits each:
38
39 0b00_00000_10010_11101_00101_01011_10000 = 19830128
40 (6) (5) (4) (3) (2) (1)
41
42Each block of 5 bits represents a number betwen 0 and 31. So if we have
43a tree that consists of nodes, each of which is an array of 32 pointers,
44those 5-bit blocks will encode a position on a single tree level.
45
46For example, storing the key K with hash 19830128, results in the following
47tree structure:
48
49 (array of 32 pointers)
50 +---+ -- +----+----+----+ -- +----+
51 root node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b10000 = 16 (1)
52 (level 1) +---+ -- +----+----+----+ -- +----+
53 |
54 +---+ -- +----+----+----+ -- +----+
55 a 2nd level node | 0 | .. | 10 | 11 | 12 | .. | 31 | 0b01011 = 11 (2)
56 +---+ -- +----+----+----+ -- +----+
57 |
58 +---+ -- +----+----+----+ -- +----+
Dmitry Alimov93a61192018-01-25 20:54:41 +030059 a 3rd level node | 0 | .. | 04 | 05 | 06 | .. | 31 | 0b00101 = 5 (3)
Yury Selivanovf23746a2018-01-22 19:11:18 -050060 +---+ -- +----+----+----+ -- +----+
61 |
62 +---+ -- +----+----+----+----+
63 a 4th level node | 0 | .. | 04 | 29 | 30 | 31 | 0b11101 = 29 (4)
64 +---+ -- +----+----+----+----+
65 |
66 +---+ -- +----+----+----+ -- +----+
67 a 5th level node | 0 | .. | 17 | 18 | 19 | .. | 31 | 0b10010 = 18 (5)
68 +---+ -- +----+----+----+ -- +----+
69 |
70 +--------------+
71 |
72 +---+ -- +----+----+----+ -- +----+
73 a 6th level node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b00000 = 0 (6)
74 +---+ -- +----+----+----+ -- +----+
75 |
76 V -- our value (or collision)
77
78To rehash: for a K/V pair, the hash of K encodes where in the tree V will
79be stored.
80
81To optimize memory footprint and handle hash collisions, our implementation
82uses three different types of nodes:
83
84 * A Bitmap node;
85 * An Array node;
86 * A Collision node.
87
88Because we implement an immutable dictionary, our nodes are also
89immutable. Therefore, when we need to modify a node, we copy it, and
90do that modification to the copy.
91
92
93Array Nodes
94-----------
95
96These nodes are very simple. Essentially they are arrays of 32 pointers
97we used to illustrate the high-level idea in the previous section.
98
99We use Array nodes only when we need to store more than 16 pointers
100in a single node.
101
102Array nodes do not store key objects or value objects. They are used
103only as an indirection level - their pointers point to other nodes in
104the tree.
105
106
107Bitmap Node
108-----------
109
110Allocating a new 32-pointers array for every node of our tree would be
111very expensive. Unless we store millions of keys, most of tree nodes would
112be very sparse.
113
114When we have less than 16 elements in a node, we don't want to use the
115Array node, that would mean that we waste a lot of memory. Instead,
116we can use bitmap compression and can have just as many pointers
117as we need!
118
119Bitmap nodes consist of two fields:
120
1211. An array of pointers. If a Bitmap node holds N elements, the
122 array will be of N pointers.
123
1242. A 32bit integer -- a bitmap field. If an N-th bit is set in the
125 bitmap, it means that the node has an N-th element.
126
127For example, say we need to store a 3 elements sparse array:
128
129 +---+ -- +---+ -- +----+ -- +----+
130 | 0 | .. | 4 | .. | 11 | .. | 17 |
131 +---+ -- +---+ -- +----+ -- +----+
132 | | |
133 o1 o2 o3
134
135We allocate a three-pointer Bitmap node. Its bitmap field will be
136then set to:
137
138 0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
139
140To check if our Bitmap node has an I-th element we can do:
141
142 bitmap & (1 << I)
143
144
145And here's a formula to calculate a position in our pointer array
146which would correspond to an I-th element:
147
148 popcount(bitmap & ((1 << I) - 1))
149
150
151Let's break it down:
152
153 * `popcount` is a function that returns a number of bits set to 1;
154
155 * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
156 set to the *right* of our bit.
157
158
159So for our 17, 11, and 4 indexes:
160
161 * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
162
163 * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
164
165 * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
166
167
168To conclude: Bitmap nodes are just like Array nodes -- they can store
169a number of pointers, but use bitmap compression to eliminate unused
170pointers.
171
172
173Bitmap nodes have two pointers for each item:
174
175 +----+----+----+----+ -- +----+----+
176 | k1 | v1 | k2 | v2 | .. | kN | vN |
177 +----+----+----+----+ -- +----+----+
178
179When kI == NULL, vI points to another tree level.
180
181When kI != NULL, the actual key object is stored in kI, and its
182value is stored in vI.
183
184
185Collision Nodes
186---------------
187
188Collision nodes are simple arrays of pointers -- two pointers per
189key/value. When there's a hash collision, say for k1/v1 and k2/v2
190we have `hash(k1)==hash(k2)`. Then our collision node will be:
191
192 +----+----+----+----+
193 | k1 | v1 | k2 | v2 |
194 +----+----+----+----+
195
196
197Tree Structure
198--------------
199
200All nodes are PyObjects.
201
202The `PyHamtObject` object has a pointer to the root node (h_root),
203and has a length field (h_count).
204
205High-level functions accept a PyHamtObject object and dispatch to
206lower-level functions depending on what kind of node h_root points to.
207
208
209Operations
210==========
211
212There are three fundamental operations on an immutable dictionary:
213
2141. "o.assoc(k, v)" will return a new immutable dictionary, that will be
215 a copy of "o", but with the "k/v" item set.
216
217 Functions in this file:
218
219 hamt_node_assoc, hamt_node_bitmap_assoc,
220 hamt_node_array_assoc, hamt_node_collision_assoc
221
222 `hamt_node_assoc` function accepts a node object, and calls
223 other functions depending on its actual type.
224
2252. "o.find(k)" will lookup key "k" in "o".
226
227 Functions:
228
229 hamt_node_find, hamt_node_bitmap_find,
230 hamt_node_array_find, hamt_node_collision_find
231
2323. "o.without(k)" will return a new immutable dictionary, that will be
233 a copy of "o", buth without the "k" key.
234
235 Functions:
236
237 hamt_node_without, hamt_node_bitmap_without,
238 hamt_node_array_without, hamt_node_collision_without
239
240
241Further Reading
242===============
243
2441. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
245
2462. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
247
2483. Clojure's PersistentHashMap implementation:
249 https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
250
251
252Debug
253=====
254
255The HAMT datatype is accessible for testing purposes under the
256`_testcapi` module:
257
258 >>> from _testcapi import hamt
259 >>> h = hamt()
260 >>> h2 = h.set('a', 2)
261 >>> h3 = h2.set('b', 3)
262 >>> list(h3)
263 ['a', 'b']
264
265When CPython is built in debug mode, a '__dump__()' method is available
266to introspect the tree:
267
268 >>> print(h3.__dump__())
269 HAMT(len=2):
270 BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
271 'a': 2
272 'b': 3
273*/
274
275
276#define IS_ARRAY_NODE(node) (Py_TYPE(node) == &_PyHamt_ArrayNode_Type)
277#define IS_BITMAP_NODE(node) (Py_TYPE(node) == &_PyHamt_BitmapNode_Type)
278#define IS_COLLISION_NODE(node) (Py_TYPE(node) == &_PyHamt_CollisionNode_Type)
279
280
281/* Return type for 'find' (lookup a key) functions.
282
283 * F_ERROR - an error occurred;
284 * F_NOT_FOUND - the key was not found;
285 * F_FOUND - the key was found.
286*/
287typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
288
289
290/* Return type for 'without' (delete a key) functions.
291
292 * W_ERROR - an error occurred;
293 * W_NOT_FOUND - the key was not found: there's nothing to delete;
294 * W_EMPTY - the key was found: the node/tree would be empty
295 if the key is deleted;
296 * W_NEWNODE - the key was found: a new node/tree is returned
297 without that key.
298*/
299typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
300
301
302/* Low-level iterator protocol type.
303
304 * I_ITEM - a new item has been yielded;
305 * I_END - the whole tree was visited (similar to StopIteration).
306*/
307typedef enum {I_ITEM, I_END} hamt_iter_t;
308
309
310#define HAMT_ARRAY_NODE_SIZE 32
311
312
313typedef struct {
314 PyObject_HEAD
315 PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
316 Py_ssize_t a_count;
317} PyHamtNode_Array;
318
319
320typedef struct {
321 PyObject_VAR_HEAD
322 uint32_t b_bitmap;
323 PyObject *b_array[1];
324} PyHamtNode_Bitmap;
325
326
327typedef struct {
328 PyObject_VAR_HEAD
329 int32_t c_hash;
330 PyObject *c_array[1];
331} PyHamtNode_Collision;
332
333
334static PyHamtNode_Bitmap *_empty_bitmap_node;
335static PyHamtObject *_empty_hamt;
336
337
338static PyHamtObject *
339hamt_alloc(void);
340
341static PyHamtNode *
342hamt_node_assoc(PyHamtNode *node,
343 uint32_t shift, int32_t hash,
344 PyObject *key, PyObject *val, int* added_leaf);
345
346static hamt_without_t
347hamt_node_without(PyHamtNode *node,
348 uint32_t shift, int32_t hash,
349 PyObject *key,
350 PyHamtNode **new_node);
351
352static hamt_find_t
353hamt_node_find(PyHamtNode *node,
354 uint32_t shift, int32_t hash,
355 PyObject *key, PyObject **val);
356
357#ifdef Py_DEBUG
358static int
359hamt_node_dump(PyHamtNode *node,
360 _PyUnicodeWriter *writer, int level);
361#endif
362
363static PyHamtNode *
364hamt_node_array_new(Py_ssize_t);
365
366static PyHamtNode *
367hamt_node_collision_new(int32_t hash, Py_ssize_t size);
368
369static inline Py_ssize_t
370hamt_node_collision_count(PyHamtNode_Collision *node);
371
372
373#ifdef Py_DEBUG
374static void
375_hamt_node_array_validate(void *o)
376{
377 assert(IS_ARRAY_NODE(o));
378 PyHamtNode_Array *node = (PyHamtNode_Array*)(o);
379 Py_ssize_t i = 0, count = 0;
380 for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
381 if (node->a_array[i] != NULL) {
382 count++;
383 }
384 }
385 assert(count == node->a_count);
386}
387
388#define VALIDATE_ARRAY_NODE(NODE) \
389 do { _hamt_node_array_validate(NODE); } while (0);
390#else
391#define VALIDATE_ARRAY_NODE(NODE)
392#endif
393
394
395/* Returns -1 on error */
396static inline int32_t
397hamt_hash(PyObject *o)
398{
399 Py_hash_t hash = PyObject_Hash(o);
400
401#if SIZEOF_PY_HASH_T <= 4
402 return hash;
403#else
404 if (hash == -1) {
405 /* exception */
406 return -1;
407 }
408
409 /* While it's suboptimal to reduce Python's 64 bit hash to
410 32 bits via XOR, it seems that the resulting hash function
411 is good enough (this is also how Long type is hashed in Java.)
412 Storing 10, 100, 1000 Python strings results in a relatively
413 shallow and uniform tree structure.
414
415 Please don't change this hashing algorithm, as there are many
416 tests that test some exact tree shape to cover all code paths.
417 */
418 int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
419 return xored == -1 ? -2 : xored;
420#endif
421}
422
423static inline uint32_t
424hamt_mask(int32_t hash, uint32_t shift)
425{
426 return (((uint32_t)hash >> shift) & 0x01f);
427}
428
429static inline uint32_t
430hamt_bitpos(int32_t hash, uint32_t shift)
431{
432 return (uint32_t)1 << hamt_mask(hash, shift);
433}
434
435static inline uint32_t
436hamt_bitcount(uint32_t i)
437{
Yury Selivanovb7a80d52018-01-23 22:17:04 -0500438 /* We could use native popcount instruction but that would
439 require to either add configure flags to enable SSE4.2
440 support or to detect it dynamically. Otherwise, we have
441 a risk of CPython not working properly on older hardware.
442
443 In practice, there's no observable difference in
444 performance between using a popcount instruction or the
445 following fallback code.
446
447 The algorithm is copied from:
448 https://graphics.stanford.edu/~seander/bithacks.html
449 */
Yury Selivanovf23746a2018-01-22 19:11:18 -0500450 i = i - ((i >> 1) & 0x55555555);
451 i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
Yury Selivanov6ab62922018-01-25 14:18:55 -0500452 return (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
Yury Selivanovf23746a2018-01-22 19:11:18 -0500453}
454
455static inline uint32_t
456hamt_bitindex(uint32_t bitmap, uint32_t bit)
457{
458 return hamt_bitcount(bitmap & (bit - 1));
459}
460
461
462/////////////////////////////////// Dump Helpers
463#ifdef Py_DEBUG
464
465static int
466_hamt_dump_ident(_PyUnicodeWriter *writer, int level)
467{
468 /* Write `' ' * level` to the `writer` */
469 PyObject *str = NULL;
470 PyObject *num = NULL;
471 PyObject *res = NULL;
472 int ret = -1;
473
474 str = PyUnicode_FromString(" ");
475 if (str == NULL) {
476 goto error;
477 }
478
479 num = PyLong_FromLong((long)level);
480 if (num == NULL) {
481 goto error;
482 }
483
484 res = PyNumber_Multiply(str, num);
485 if (res == NULL) {
486 goto error;
487 }
488
489 ret = _PyUnicodeWriter_WriteStr(writer, res);
490
491error:
492 Py_XDECREF(res);
493 Py_XDECREF(str);
494 Py_XDECREF(num);
495 return ret;
496}
497
498static int
499_hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
500{
501 /* A convenient helper combining _PyUnicodeWriter_WriteStr and
502 PyUnicode_FromFormatV.
503 */
504 PyObject* msg;
505 int ret;
506
507 va_list vargs;
508#ifdef HAVE_STDARG_PROTOTYPES
509 va_start(vargs, format);
510#else
511 va_start(vargs);
512#endif
513 msg = PyUnicode_FromFormatV(format, vargs);
514 va_end(vargs);
515
516 if (msg == NULL) {
517 return -1;
518 }
519
520 ret = _PyUnicodeWriter_WriteStr(writer, msg);
521 Py_DECREF(msg);
522 return ret;
523}
524
525#endif /* Py_DEBUG */
526/////////////////////////////////// Bitmap Node
527
528
529static PyHamtNode *
530hamt_node_bitmap_new(Py_ssize_t size)
531{
532 /* Create a new bitmap node of size 'size' */
533
534 PyHamtNode_Bitmap *node;
535 Py_ssize_t i;
536
537 assert(size >= 0);
538 assert(size % 2 == 0);
539
540 if (size == 0 && _empty_bitmap_node != NULL) {
541 Py_INCREF(_empty_bitmap_node);
542 return (PyHamtNode *)_empty_bitmap_node;
543 }
544
545 /* No freelist; allocate a new bitmap node */
546 node = PyObject_GC_NewVar(
547 PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
548 if (node == NULL) {
549 return NULL;
550 }
551
552 Py_SIZE(node) = size;
553
554 for (i = 0; i < size; i++) {
555 node->b_array[i] = NULL;
556 }
557
558 node->b_bitmap = 0;
559
560 _PyObject_GC_TRACK(node);
561
562 if (size == 0 && _empty_bitmap_node == NULL) {
563 /* Since bitmap nodes are immutable, we can cache the instance
564 for size=0 and reuse it whenever we need an empty bitmap node.
565 */
566 _empty_bitmap_node = node;
567 Py_INCREF(_empty_bitmap_node);
568 }
569
570 return (PyHamtNode *)node;
571}
572
573static inline Py_ssize_t
574hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
575{
576 return Py_SIZE(node) / 2;
577}
578
579static PyHamtNode_Bitmap *
580hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
581{
582 /* Clone a bitmap node; return a new one with the same child notes. */
583
584 PyHamtNode_Bitmap *clone;
585 Py_ssize_t i;
586
587 clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
588 if (clone == NULL) {
589 return NULL;
590 }
591
592 for (i = 0; i < Py_SIZE(node); i++) {
593 Py_XINCREF(node->b_array[i]);
594 clone->b_array[i] = node->b_array[i];
595 }
596
597 clone->b_bitmap = node->b_bitmap;
598 return clone;
599}
600
601static PyHamtNode_Bitmap *
602hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
603{
604 assert(bit & o->b_bitmap);
605 assert(hamt_node_bitmap_count(o) > 1);
606
607 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
608 Py_SIZE(o) - 2);
609 if (new == NULL) {
610 return NULL;
611 }
612
613 uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
614 uint32_t key_idx = 2 * idx;
615 uint32_t val_idx = key_idx + 1;
616 uint32_t i;
617
618 for (i = 0; i < key_idx; i++) {
619 Py_XINCREF(o->b_array[i]);
620 new->b_array[i] = o->b_array[i];
621 }
622
Yury Selivanovb647d702018-01-29 13:31:37 -0500623 assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32);
624 for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) {
Yury Selivanovf23746a2018-01-22 19:11:18 -0500625 Py_XINCREF(o->b_array[i]);
626 new->b_array[i - 2] = o->b_array[i];
627 }
628
629 new->b_bitmap = o->b_bitmap & ~bit;
630 return new;
631}
632
633static PyHamtNode *
634hamt_node_new_bitmap_or_collision(uint32_t shift,
635 PyObject *key1, PyObject *val1,
636 int32_t key2_hash,
637 PyObject *key2, PyObject *val2)
638{
639 /* Helper method. Creates a new node for key1/val and key2/val2
640 pairs.
641
642 If key1 hash is equal to the hash of key2, a Collision node
643 will be created. If they are not equal, a Bitmap node is
644 created.
645 */
646
647 int32_t key1_hash = hamt_hash(key1);
648 if (key1_hash == -1) {
649 return NULL;
650 }
651
652 if (key1_hash == key2_hash) {
653 PyHamtNode_Collision *n;
654 n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
655 if (n == NULL) {
656 return NULL;
657 }
658
659 Py_INCREF(key1);
660 n->c_array[0] = key1;
661 Py_INCREF(val1);
662 n->c_array[1] = val1;
663
664 Py_INCREF(key2);
665 n->c_array[2] = key2;
666 Py_INCREF(val2);
667 n->c_array[3] = val2;
668
669 return (PyHamtNode *)n;
670 }
671 else {
672 int added_leaf = 0;
673 PyHamtNode *n = hamt_node_bitmap_new(0);
674 if (n == NULL) {
675 return NULL;
676 }
677
678 PyHamtNode *n2 = hamt_node_assoc(
679 n, shift, key1_hash, key1, val1, &added_leaf);
680 Py_DECREF(n);
681 if (n2 == NULL) {
682 return NULL;
683 }
684
685 n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
686 Py_DECREF(n2);
687 if (n == NULL) {
688 return NULL;
689 }
690
691 return n;
692 }
693}
694
695static PyHamtNode *
696hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
697 uint32_t shift, int32_t hash,
698 PyObject *key, PyObject *val, int* added_leaf)
699{
700 /* assoc operation for bitmap nodes.
701
702 Return: a new node, or self if key/val already is in the
703 collection.
704
705 'added_leaf' is later used in '_PyHamt_Assoc' to determine if
706 `hamt.set(key, val)` increased the size of the collection.
707 */
708
709 uint32_t bit = hamt_bitpos(hash, shift);
710 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
711
712 /* Bitmap node layout:
713
714 +------+------+------+------+ --- +------+------+
715 | key1 | val1 | key2 | val2 | ... | keyN | valN |
716 +------+------+------+------+ --- +------+------+
717 where `N < Py_SIZE(node)`.
718
719 The `node->b_bitmap` field is a bitmap. For a given
720 `(shift, hash)` pair we can determine:
721
722 - If this node has the corresponding key/val slots.
723 - The index of key/val slots.
724 */
725
726 if (self->b_bitmap & bit) {
727 /* The key is set in this node */
728
729 uint32_t key_idx = 2 * idx;
730 uint32_t val_idx = key_idx + 1;
731
Serhiy Storchakabfe4fd52018-02-09 17:31:26 +0200732 assert(val_idx < (size_t)Py_SIZE(self));
Yury Selivanovf23746a2018-01-22 19:11:18 -0500733
734 PyObject *key_or_null = self->b_array[key_idx];
735 PyObject *val_or_node = self->b_array[val_idx];
736
737 if (key_or_null == NULL) {
738 /* key is NULL. This means that we have a few keys
739 that have the same (hash, shift) pair. */
740
741 assert(val_or_node != NULL);
742
743 PyHamtNode *sub_node = hamt_node_assoc(
744 (PyHamtNode *)val_or_node,
745 shift + 5, hash, key, val, added_leaf);
746 if (sub_node == NULL) {
747 return NULL;
748 }
749
750 if (val_or_node == (PyObject *)sub_node) {
751 Py_DECREF(sub_node);
752 Py_INCREF(self);
753 return (PyHamtNode *)self;
754 }
755
756 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
757 if (ret == NULL) {
758 return NULL;
759 }
760 Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
761 return (PyHamtNode *)ret;
762 }
763
764 assert(key != NULL);
765 /* key is not NULL. This means that we have only one other
766 key in this collection that matches our hash for this shift. */
767
768 int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
769 if (comp_err < 0) { /* exception in __eq__ */
770 return NULL;
771 }
772 if (comp_err == 1) { /* key == key_or_null */
773 if (val == val_or_node) {
774 /* we already have the same key/val pair; return self. */
775 Py_INCREF(self);
776 return (PyHamtNode *)self;
777 }
778
779 /* We're setting a new value for the key we had before.
780 Make a new bitmap node with a replaced value, and return it. */
781 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
782 if (ret == NULL) {
783 return NULL;
784 }
785 Py_INCREF(val);
786 Py_SETREF(ret->b_array[val_idx], val);
787 return (PyHamtNode *)ret;
788 }
789
790 /* It's a new key, and it has the same index as *one* another key.
791 We have a collision. We need to create a new node which will
792 combine the existing key and the key we're adding.
793
794 `hamt_node_new_bitmap_or_collision` will either create a new
795 Collision node if the keys have identical hashes, or
796 a new Bitmap node.
797 */
798 PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
799 shift + 5,
800 key_or_null, val_or_node, /* existing key/val */
801 hash,
802 key, val /* new key/val */
803 );
804 if (sub_node == NULL) {
805 return NULL;
806 }
807
808 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
809 if (ret == NULL) {
810 Py_DECREF(sub_node);
811 return NULL;
812 }
813 Py_SETREF(ret->b_array[key_idx], NULL);
814 Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
815
816 *added_leaf = 1;
817 return (PyHamtNode *)ret;
818 }
819 else {
820 /* There was no key before with the same (shift,hash). */
821
822 uint32_t n = hamt_bitcount(self->b_bitmap);
823
824 if (n >= 16) {
825 /* When we have a situation where we want to store more
826 than 16 nodes at one level of the tree, we no longer
827 want to use the Bitmap node with bitmap encoding.
828
829 Instead we start using an Array node, which has
830 simpler (faster) implementation at the expense of
831 having prealocated 32 pointers for its keys/values
832 pairs.
833
834 Small hamt objects (<30 keys) usually don't have any
835 Array nodes at all. Betwen ~30 and ~400 keys hamt
836 objects usually have one Array node, and usually it's
837 a root node.
838 */
839
840 uint32_t jdx = hamt_mask(hash, shift);
841 /* 'jdx' is the index of where the new key should be added
842 in the new Array node we're about to create. */
843
844 PyHamtNode *empty = NULL;
845 PyHamtNode_Array *new_node = NULL;
846 PyHamtNode *res = NULL;
847
848 /* Create a new Array node. */
849 new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
850 if (new_node == NULL) {
851 goto fin;
852 }
853
854 /* Create an empty bitmap node for the next
855 hamt_node_assoc call. */
856 empty = hamt_node_bitmap_new(0);
857 if (empty == NULL) {
858 goto fin;
859 }
860
861 /* Make a new bitmap node for the key/val we're adding.
862 Set that bitmap node to new-array-node[jdx]. */
863 new_node->a_array[jdx] = hamt_node_assoc(
864 empty, shift + 5, hash, key, val, added_leaf);
865 if (new_node->a_array[jdx] == NULL) {
866 goto fin;
867 }
868
869 /* Copy existing key/value pairs from the current Bitmap
870 node to the new Array node we've just created. */
871 Py_ssize_t i, j;
872 for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
873 if (((self->b_bitmap >> i) & 1) != 0) {
874 /* Ensure we don't accidentally override `jdx` element
875 we set few lines above.
876 */
877 assert(new_node->a_array[i] == NULL);
878
879 if (self->b_array[j] == NULL) {
880 new_node->a_array[i] =
881 (PyHamtNode *)self->b_array[j + 1];
882 Py_INCREF(new_node->a_array[i]);
883 }
884 else {
885 int32_t rehash = hamt_hash(self->b_array[j]);
886 if (rehash == -1) {
887 goto fin;
888 }
889
890 new_node->a_array[i] = hamt_node_assoc(
891 empty, shift + 5,
892 rehash,
893 self->b_array[j],
894 self->b_array[j + 1],
895 added_leaf);
896
897 if (new_node->a_array[i] == NULL) {
898 goto fin;
899 }
900 }
901 j += 2;
902 }
903 }
904
905 VALIDATE_ARRAY_NODE(new_node)
906
907 /* That's it! */
908 res = (PyHamtNode *)new_node;
909
910 fin:
911 Py_XDECREF(empty);
912 if (res == NULL) {
913 Py_XDECREF(new_node);
914 }
915 return res;
916 }
917 else {
918 /* We have less than 16 keys at this level; let's just
919 create a new bitmap node out of this node with the
920 new key/val pair added. */
921
922 uint32_t key_idx = 2 * idx;
923 uint32_t val_idx = key_idx + 1;
Yury Selivanovb647d702018-01-29 13:31:37 -0500924 uint32_t i;
Yury Selivanovf23746a2018-01-22 19:11:18 -0500925
926 *added_leaf = 1;
927
928 /* Allocate new Bitmap node which can have one more key/val
929 pair in addition to what we have already. */
930 PyHamtNode_Bitmap *new_node =
931 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
932 if (new_node == NULL) {
933 return NULL;
934 }
935
936 /* Copy all keys/values that will be before the new key/value
937 we are adding. */
938 for (i = 0; i < key_idx; i++) {
939 Py_XINCREF(self->b_array[i]);
940 new_node->b_array[i] = self->b_array[i];
941 }
942
943 /* Set the new key/value to the new Bitmap node. */
944 Py_INCREF(key);
945 new_node->b_array[key_idx] = key;
946 Py_INCREF(val);
947 new_node->b_array[val_idx] = val;
948
949 /* Copy all keys/values that will be after the new key/value
950 we are adding. */
Yury Selivanovb647d702018-01-29 13:31:37 -0500951 assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32);
952 for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) {
Yury Selivanovf23746a2018-01-22 19:11:18 -0500953 Py_XINCREF(self->b_array[i]);
954 new_node->b_array[i + 2] = self->b_array[i];
955 }
956
957 new_node->b_bitmap = self->b_bitmap | bit;
958 return (PyHamtNode *)new_node;
959 }
960 }
961}
962
963static hamt_without_t
964hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
965 uint32_t shift, int32_t hash,
966 PyObject *key,
967 PyHamtNode **new_node)
968{
969 uint32_t bit = hamt_bitpos(hash, shift);
970 if ((self->b_bitmap & bit) == 0) {
971 return W_NOT_FOUND;
972 }
973
974 uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
975
976 uint32_t key_idx = 2 * idx;
977 uint32_t val_idx = key_idx + 1;
978
979 PyObject *key_or_null = self->b_array[key_idx];
980 PyObject *val_or_node = self->b_array[val_idx];
981
982 if (key_or_null == NULL) {
983 /* key == NULL means that 'value' is another tree node. */
984
985 PyHamtNode *sub_node = NULL;
986
987 hamt_without_t res = hamt_node_without(
988 (PyHamtNode *)val_or_node,
989 shift + 5, hash, key, &sub_node);
990
991 switch (res) {
992 case W_EMPTY:
993 /* It's impossible for us to receive a W_EMPTY here:
994
995 - Array nodes are converted to Bitmap nodes when
996 we delete 16th item from them;
997
998 - Collision nodes are converted to Bitmap when
999 there is one item in them;
1000
1001 - Bitmap node's without() inlines single-item
1002 sub-nodes.
1003
1004 So in no situation we can have a single-item
1005 Bitmap child of another Bitmap node.
1006 */
1007 Py_UNREACHABLE();
1008
1009 case W_NEWNODE: {
1010 assert(sub_node != NULL);
1011
1012 if (IS_BITMAP_NODE(sub_node)) {
1013 PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1014 if (hamt_node_bitmap_count(sub_tree) == 1 &&
1015 sub_tree->b_array[0] != NULL)
1016 {
1017 /* A bitmap node with one key/value pair. Just
1018 merge it into this node.
1019
1020 Note that we don't inline Bitmap nodes that
1021 have a NULL key -- those nodes point to another
1022 tree level, and we cannot simply move tree levels
1023 up or down.
1024 */
1025
1026 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1027 if (clone == NULL) {
1028 Py_DECREF(sub_node);
1029 return W_ERROR;
1030 }
1031
1032 PyObject *key = sub_tree->b_array[0];
1033 PyObject *val = sub_tree->b_array[1];
1034
1035 Py_INCREF(key);
1036 Py_XSETREF(clone->b_array[key_idx], key);
1037 Py_INCREF(val);
1038 Py_SETREF(clone->b_array[val_idx], val);
1039
1040 Py_DECREF(sub_tree);
1041
1042 *new_node = (PyHamtNode *)clone;
1043 return W_NEWNODE;
1044 }
1045 }
1046
1047#ifdef Py_DEBUG
1048 /* Ensure that Collision.without implementation
1049 converts to Bitmap nodes itself.
1050 */
1051 if (IS_COLLISION_NODE(sub_node)) {
1052 assert(hamt_node_collision_count(
1053 (PyHamtNode_Collision*)sub_node) > 1);
1054 }
1055#endif
1056
1057 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
Yury Selivanov0bad4d62018-01-23 16:26:07 -05001058 if (clone == NULL) {
1059 return W_ERROR;
1060 }
1061
Yury Selivanovf23746a2018-01-22 19:11:18 -05001062 Py_SETREF(clone->b_array[val_idx],
1063 (PyObject *)sub_node); /* borrow */
1064
1065 *new_node = (PyHamtNode *)clone;
1066 return W_NEWNODE;
1067 }
1068
1069 case W_ERROR:
1070 case W_NOT_FOUND:
1071 assert(sub_node == NULL);
1072 return res;
1073
1074 default:
1075 Py_UNREACHABLE();
1076 }
1077 }
1078 else {
1079 /* We have a regular key/value pair */
1080
1081 int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1082 if (cmp < 0) {
1083 return W_ERROR;
1084 }
1085 if (cmp == 0) {
1086 return W_NOT_FOUND;
1087 }
1088
1089 if (hamt_node_bitmap_count(self) == 1) {
1090 return W_EMPTY;
1091 }
1092
1093 *new_node = (PyHamtNode *)
1094 hamt_node_bitmap_clone_without(self, bit);
1095 if (*new_node == NULL) {
1096 return W_ERROR;
1097 }
1098
1099 return W_NEWNODE;
1100 }
1101}
1102
1103static hamt_find_t
1104hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1105 uint32_t shift, int32_t hash,
1106 PyObject *key, PyObject **val)
1107{
1108 /* Lookup a key in a Bitmap node. */
1109
1110 uint32_t bit = hamt_bitpos(hash, shift);
1111 uint32_t idx;
1112 uint32_t key_idx;
1113 uint32_t val_idx;
1114 PyObject *key_or_null;
1115 PyObject *val_or_node;
1116 int comp_err;
1117
1118 if ((self->b_bitmap & bit) == 0) {
1119 return F_NOT_FOUND;
1120 }
1121
1122 idx = hamt_bitindex(self->b_bitmap, bit);
Yury Selivanovf23746a2018-01-22 19:11:18 -05001123 key_idx = idx * 2;
1124 val_idx = key_idx + 1;
1125
Serhiy Storchakabfe4fd52018-02-09 17:31:26 +02001126 assert(val_idx < (size_t)Py_SIZE(self));
Yury Selivanovf23746a2018-01-22 19:11:18 -05001127
1128 key_or_null = self->b_array[key_idx];
1129 val_or_node = self->b_array[val_idx];
1130
1131 if (key_or_null == NULL) {
1132 /* There are a few keys that have the same hash at the current shift
1133 that match our key. Dispatch the lookup further down the tree. */
1134 assert(val_or_node != NULL);
1135 return hamt_node_find((PyHamtNode *)val_or_node,
1136 shift + 5, hash, key, val);
1137 }
1138
1139 /* We have only one key -- a potential match. Let's compare if the
1140 key we are looking at is equal to the key we are looking for. */
1141 assert(key != NULL);
1142 comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1143 if (comp_err < 0) { /* exception in __eq__ */
1144 return F_ERROR;
1145 }
1146 if (comp_err == 1) { /* key == key_or_null */
1147 *val = val_or_node;
1148 return F_FOUND;
1149 }
1150
1151 return F_NOT_FOUND;
1152}
1153
1154static int
1155hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1156{
1157 /* Bitmap's tp_traverse */
1158
1159 Py_ssize_t i;
1160
1161 for (i = Py_SIZE(self); --i >= 0; ) {
1162 Py_VISIT(self->b_array[i]);
1163 }
1164
1165 return 0;
1166}
1167
1168static void
1169hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1170{
1171 /* Bitmap's tp_dealloc */
1172
1173 Py_ssize_t len = Py_SIZE(self);
1174 Py_ssize_t i;
1175
1176 PyObject_GC_UnTrack(self);
1177 Py_TRASHCAN_SAFE_BEGIN(self)
1178
1179 if (len > 0) {
1180 i = len;
1181 while (--i >= 0) {
1182 Py_XDECREF(self->b_array[i]);
1183 }
1184 }
1185
1186 Py_TYPE(self)->tp_free((PyObject *)self);
1187 Py_TRASHCAN_SAFE_END(self)
1188}
1189
1190#ifdef Py_DEBUG
1191static int
1192hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1193 _PyUnicodeWriter *writer, int level)
1194{
1195 /* Debug build: __dump__() method implementation for Bitmap nodes. */
1196
1197 Py_ssize_t i;
1198 PyObject *tmp1;
1199 PyObject *tmp2;
1200
1201 if (_hamt_dump_ident(writer, level + 1)) {
1202 goto error;
1203 }
1204
1205 if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1206 Py_SIZE(node), Py_SIZE(node) / 2))
1207 {
1208 goto error;
1209 }
1210
1211 tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1212 if (tmp1 == NULL) {
1213 goto error;
1214 }
1215 tmp2 = _PyLong_Format(tmp1, 2);
1216 Py_DECREF(tmp1);
1217 if (tmp2 == NULL) {
1218 goto error;
1219 }
1220 if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1221 Py_DECREF(tmp2);
1222 goto error;
1223 }
1224 Py_DECREF(tmp2);
1225
1226 for (i = 0; i < Py_SIZE(node); i += 2) {
1227 PyObject *key_or_null = node->b_array[i];
1228 PyObject *val_or_node = node->b_array[i + 1];
1229
1230 if (_hamt_dump_ident(writer, level + 2)) {
1231 goto error;
1232 }
1233
1234 if (key_or_null == NULL) {
1235 if (_hamt_dump_format(writer, "NULL:\n")) {
1236 goto error;
1237 }
1238
1239 if (hamt_node_dump((PyHamtNode *)val_or_node,
1240 writer, level + 2))
1241 {
1242 goto error;
1243 }
1244 }
1245 else {
1246 if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1247 val_or_node))
1248 {
1249 goto error;
1250 }
1251 }
1252
1253 if (_hamt_dump_format(writer, "\n")) {
1254 goto error;
1255 }
1256 }
1257
1258 return 0;
1259error:
1260 return -1;
1261}
1262#endif /* Py_DEBUG */
1263
1264
1265/////////////////////////////////// Collision Node
1266
1267
1268static PyHamtNode *
1269hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1270{
1271 /* Create a new Collision node. */
1272
1273 PyHamtNode_Collision *node;
1274 Py_ssize_t i;
1275
1276 assert(size >= 4);
1277 assert(size % 2 == 0);
1278
1279 node = PyObject_GC_NewVar(
1280 PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1281 if (node == NULL) {
1282 return NULL;
1283 }
1284
1285 for (i = 0; i < size; i++) {
1286 node->c_array[i] = NULL;
1287 }
1288
1289 Py_SIZE(node) = size;
1290 node->c_hash = hash;
1291
1292 _PyObject_GC_TRACK(node);
1293
1294 return (PyHamtNode *)node;
1295}
1296
1297static hamt_find_t
1298hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1299 Py_ssize_t *idx)
1300{
1301 /* Lookup `key` in the Collision node `self`. Set the index of the
1302 found key to 'idx'. */
1303
1304 Py_ssize_t i;
1305 PyObject *el;
1306
1307 for (i = 0; i < Py_SIZE(self); i += 2) {
1308 el = self->c_array[i];
1309
1310 assert(el != NULL);
1311 int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1312 if (cmp < 0) {
1313 return F_ERROR;
1314 }
1315 if (cmp == 1) {
1316 *idx = i;
1317 return F_FOUND;
1318 }
1319 }
1320
1321 return F_NOT_FOUND;
1322}
1323
1324static PyHamtNode *
1325hamt_node_collision_assoc(PyHamtNode_Collision *self,
1326 uint32_t shift, int32_t hash,
1327 PyObject *key, PyObject *val, int* added_leaf)
1328{
1329 /* Set a new key to this level (currently a Collision node)
1330 of the tree. */
1331
1332 if (hash == self->c_hash) {
1333 /* The hash of the 'key' we are adding matches the hash of
1334 other keys in this Collision node. */
1335
1336 Py_ssize_t key_idx = -1;
1337 hamt_find_t found;
1338 PyHamtNode_Collision *new_node;
1339 Py_ssize_t i;
1340
1341 /* Let's try to lookup the new 'key', maybe we already have it. */
1342 found = hamt_node_collision_find_index(self, key, &key_idx);
1343 switch (found) {
1344 case F_ERROR:
1345 /* Exception. */
1346 return NULL;
1347
1348 case F_NOT_FOUND:
1349 /* This is a totally new key. Clone the current node,
1350 add a new key/value to the cloned node. */
1351
1352 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1353 self->c_hash, Py_SIZE(self) + 2);
1354 if (new_node == NULL) {
1355 return NULL;
1356 }
1357
1358 for (i = 0; i < Py_SIZE(self); i++) {
1359 Py_INCREF(self->c_array[i]);
1360 new_node->c_array[i] = self->c_array[i];
1361 }
1362
1363 Py_INCREF(key);
1364 new_node->c_array[i] = key;
1365 Py_INCREF(val);
1366 new_node->c_array[i + 1] = val;
1367
1368 *added_leaf = 1;
1369 return (PyHamtNode *)new_node;
1370
1371 case F_FOUND:
1372 /* There's a key which is equal to the key we are adding. */
1373
1374 assert(key_idx >= 0);
1375 assert(key_idx < Py_SIZE(self));
1376 Py_ssize_t val_idx = key_idx + 1;
1377
1378 if (self->c_array[val_idx] == val) {
1379 /* We're setting a key/value pair that's already set. */
1380 Py_INCREF(self);
1381 return (PyHamtNode *)self;
1382 }
1383
1384 /* We need to replace old value for the key
1385 with a new value. Create a new Collision node.*/
1386 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1387 self->c_hash, Py_SIZE(self));
1388 if (new_node == NULL) {
1389 return NULL;
1390 }
1391
1392 /* Copy all elements of the old node to the new one. */
1393 for (i = 0; i < Py_SIZE(self); i++) {
1394 Py_INCREF(self->c_array[i]);
1395 new_node->c_array[i] = self->c_array[i];
1396 }
1397
1398 /* Replace the old value with the new value for the our key. */
1399 Py_DECREF(new_node->c_array[val_idx]);
1400 Py_INCREF(val);
1401 new_node->c_array[val_idx] = val;
1402
1403 return (PyHamtNode *)new_node;
1404
1405 default:
1406 Py_UNREACHABLE();
1407 }
1408 }
1409 else {
1410 /* The hash of the new key is different from the hash that
1411 all keys of this Collision node have.
1412
1413 Create a Bitmap node inplace with two children:
1414 key/value pair that we're adding, and the Collision node
1415 we're replacing on this tree level.
1416 */
1417
1418 PyHamtNode_Bitmap *new_node;
1419 PyHamtNode *assoc_res;
1420
1421 new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1422 if (new_node == NULL) {
1423 return NULL;
1424 }
1425 new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1426 Py_INCREF(self);
1427 new_node->b_array[1] = (PyObject*) self;
1428
1429 assoc_res = hamt_node_bitmap_assoc(
1430 new_node, shift, hash, key, val, added_leaf);
1431 Py_DECREF(new_node);
1432 return assoc_res;
1433 }
1434}
1435
1436static inline Py_ssize_t
1437hamt_node_collision_count(PyHamtNode_Collision *node)
1438{
1439 return Py_SIZE(node) / 2;
1440}
1441
1442static hamt_without_t
1443hamt_node_collision_without(PyHamtNode_Collision *self,
1444 uint32_t shift, int32_t hash,
1445 PyObject *key,
1446 PyHamtNode **new_node)
1447{
1448 if (hash != self->c_hash) {
1449 return W_NOT_FOUND;
1450 }
1451
1452 Py_ssize_t key_idx = -1;
1453 hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1454
1455 switch (found) {
1456 case F_ERROR:
1457 return W_ERROR;
1458
1459 case F_NOT_FOUND:
1460 return W_NOT_FOUND;
1461
1462 case F_FOUND:
1463 assert(key_idx >= 0);
1464 assert(key_idx < Py_SIZE(self));
1465
1466 Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1467
1468 if (new_count == 0) {
1469 /* The node has only one key/value pair and it's for the
1470 key we're trying to delete. So a new node will be empty
1471 after the removal.
1472 */
1473 return W_EMPTY;
1474 }
1475
1476 if (new_count == 1) {
1477 /* The node has two keys, and after deletion the
1478 new Collision node would have one. Collision nodes
Dmitry Alimov01a0cb82018-02-02 05:59:48 +03001479 with one key shouldn't exist, so convert it to a
Yury Selivanovf23746a2018-01-22 19:11:18 -05001480 Bitmap node.
1481 */
1482 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1483 hamt_node_bitmap_new(2);
1484 if (node == NULL) {
1485 return W_ERROR;
1486 }
1487
1488 if (key_idx == 0) {
1489 Py_INCREF(self->c_array[2]);
1490 node->b_array[0] = self->c_array[2];
1491 Py_INCREF(self->c_array[3]);
1492 node->b_array[1] = self->c_array[3];
1493 }
1494 else {
1495 assert(key_idx == 2);
1496 Py_INCREF(self->c_array[0]);
1497 node->b_array[0] = self->c_array[0];
1498 Py_INCREF(self->c_array[1]);
1499 node->b_array[1] = self->c_array[1];
1500 }
1501
1502 node->b_bitmap = hamt_bitpos(hash, shift);
1503
1504 *new_node = (PyHamtNode *)node;
1505 return W_NEWNODE;
1506 }
1507
1508 /* Allocate a new Collision node with capacity for one
1509 less key/value pair */
1510 PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1511 hamt_node_collision_new(
1512 self->c_hash, Py_SIZE(self) - 2);
1513
1514 /* Copy all other keys from `self` to `new` */
1515 Py_ssize_t i;
1516 for (i = 0; i < key_idx; i++) {
1517 Py_INCREF(self->c_array[i]);
1518 new->c_array[i] = self->c_array[i];
1519 }
1520 for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1521 Py_INCREF(self->c_array[i]);
1522 new->c_array[i - 2] = self->c_array[i];
1523 }
1524
1525 *new_node = (PyHamtNode*)new;
1526 return W_NEWNODE;
1527
1528 default:
1529 Py_UNREACHABLE();
1530 }
1531}
1532
1533static hamt_find_t
1534hamt_node_collision_find(PyHamtNode_Collision *self,
1535 uint32_t shift, int32_t hash,
1536 PyObject *key, PyObject **val)
1537{
1538 /* Lookup `key` in the Collision node `self`. Set the value
1539 for the found key to 'val'. */
1540
1541 Py_ssize_t idx = -1;
1542 hamt_find_t res;
1543
1544 res = hamt_node_collision_find_index(self, key, &idx);
1545 if (res == F_ERROR || res == F_NOT_FOUND) {
1546 return res;
1547 }
1548
1549 assert(idx >= 0);
1550 assert(idx + 1 < Py_SIZE(self));
1551
1552 *val = self->c_array[idx + 1];
1553 assert(*val != NULL);
1554
1555 return F_FOUND;
1556}
1557
1558
1559static int
1560hamt_node_collision_traverse(PyHamtNode_Collision *self,
1561 visitproc visit, void *arg)
1562{
1563 /* Collision's tp_traverse */
1564
1565 Py_ssize_t i;
1566
1567 for (i = Py_SIZE(self); --i >= 0; ) {
1568 Py_VISIT(self->c_array[i]);
1569 }
1570
1571 return 0;
1572}
1573
1574static void
1575hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1576{
1577 /* Collision's tp_dealloc */
1578
1579 Py_ssize_t len = Py_SIZE(self);
1580
1581 PyObject_GC_UnTrack(self);
1582 Py_TRASHCAN_SAFE_BEGIN(self)
1583
1584 if (len > 0) {
1585
1586 while (--len >= 0) {
1587 Py_XDECREF(self->c_array[len]);
1588 }
1589 }
1590
1591 Py_TYPE(self)->tp_free((PyObject *)self);
1592 Py_TRASHCAN_SAFE_END(self)
1593}
1594
1595#ifdef Py_DEBUG
1596static int
1597hamt_node_collision_dump(PyHamtNode_Collision *node,
1598 _PyUnicodeWriter *writer, int level)
1599{
1600 /* Debug build: __dump__() method implementation for Collision nodes. */
1601
1602 Py_ssize_t i;
1603
1604 if (_hamt_dump_ident(writer, level + 1)) {
1605 goto error;
1606 }
1607
1608 if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1609 Py_SIZE(node), node))
1610 {
1611 goto error;
1612 }
1613
1614 for (i = 0; i < Py_SIZE(node); i += 2) {
1615 PyObject *key = node->c_array[i];
1616 PyObject *val = node->c_array[i + 1];
1617
1618 if (_hamt_dump_ident(writer, level + 2)) {
1619 goto error;
1620 }
1621
1622 if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1623 goto error;
1624 }
1625 }
1626
1627 return 0;
1628error:
1629 return -1;
1630}
1631#endif /* Py_DEBUG */
1632
1633
1634/////////////////////////////////// Array Node
1635
1636
1637static PyHamtNode *
1638hamt_node_array_new(Py_ssize_t count)
1639{
1640 Py_ssize_t i;
1641
1642 PyHamtNode_Array *node = PyObject_GC_New(
1643 PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1644 if (node == NULL) {
1645 return NULL;
1646 }
1647
1648 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1649 node->a_array[i] = NULL;
1650 }
1651
1652 node->a_count = count;
1653
1654 _PyObject_GC_TRACK(node);
1655 return (PyHamtNode *)node;
1656}
1657
1658static PyHamtNode_Array *
1659hamt_node_array_clone(PyHamtNode_Array *node)
1660{
1661 PyHamtNode_Array *clone;
1662 Py_ssize_t i;
1663
1664 VALIDATE_ARRAY_NODE(node)
1665
1666 /* Create a new Array node. */
1667 clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1668 if (clone == NULL) {
1669 return NULL;
1670 }
1671
1672 /* Copy all elements from the current Array node to the new one. */
1673 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1674 Py_XINCREF(node->a_array[i]);
1675 clone->a_array[i] = node->a_array[i];
1676 }
1677
1678 VALIDATE_ARRAY_NODE(clone)
1679 return clone;
1680}
1681
1682static PyHamtNode *
1683hamt_node_array_assoc(PyHamtNode_Array *self,
1684 uint32_t shift, int32_t hash,
1685 PyObject *key, PyObject *val, int* added_leaf)
1686{
1687 /* Set a new key to this level (currently a Collision node)
1688 of the tree.
1689
1690 Array nodes don't store values, they can only point to
1691 other nodes. They are simple arrays of 32 BaseNode pointers/
1692 */
1693
1694 uint32_t idx = hamt_mask(hash, shift);
1695 PyHamtNode *node = self->a_array[idx];
1696 PyHamtNode *child_node;
1697 PyHamtNode_Array *new_node;
1698 Py_ssize_t i;
1699
1700 if (node == NULL) {
1701 /* There's no child node for the given hash. Create a new
1702 Bitmap node for this key. */
1703
1704 PyHamtNode_Bitmap *empty = NULL;
1705
1706 /* Get an empty Bitmap node to work with. */
1707 empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1708 if (empty == NULL) {
1709 return NULL;
1710 }
1711
1712 /* Set key/val to the newly created empty Bitmap, thus
1713 creating a new Bitmap node with our key/value pair. */
1714 child_node = hamt_node_bitmap_assoc(
1715 empty,
1716 shift + 5, hash, key, val, added_leaf);
1717 Py_DECREF(empty);
1718 if (child_node == NULL) {
1719 return NULL;
1720 }
1721
1722 /* Create a new Array node. */
1723 new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1724 if (new_node == NULL) {
1725 Py_DECREF(child_node);
1726 return NULL;
1727 }
1728
1729 /* Copy all elements from the current Array node to the
1730 new one. */
1731 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1732 Py_XINCREF(self->a_array[i]);
1733 new_node->a_array[i] = self->a_array[i];
1734 }
1735
1736 assert(new_node->a_array[idx] == NULL);
1737 new_node->a_array[idx] = child_node; /* borrow */
1738 VALIDATE_ARRAY_NODE(new_node)
1739 }
1740 else {
1741 /* There's a child node for the given hash.
1742 Set the key to it./ */
1743 child_node = hamt_node_assoc(
1744 node, shift + 5, hash, key, val, added_leaf);
1745 if (child_node == (PyHamtNode *)self) {
1746 Py_DECREF(child_node);
1747 return (PyHamtNode *)self;
1748 }
1749
1750 new_node = hamt_node_array_clone(self);
1751 if (new_node == NULL) {
1752 Py_DECREF(child_node);
1753 return NULL;
1754 }
1755
1756 Py_SETREF(new_node->a_array[idx], child_node); /* borrow */
1757 VALIDATE_ARRAY_NODE(new_node)
1758 }
1759
1760 return (PyHamtNode *)new_node;
1761}
1762
1763static hamt_without_t
1764hamt_node_array_without(PyHamtNode_Array *self,
1765 uint32_t shift, int32_t hash,
1766 PyObject *key,
1767 PyHamtNode **new_node)
1768{
1769 uint32_t idx = hamt_mask(hash, shift);
1770 PyHamtNode *node = self->a_array[idx];
1771
1772 if (node == NULL) {
1773 return W_NOT_FOUND;
1774 }
1775
1776 PyHamtNode *sub_node = NULL;
1777 hamt_without_t res = hamt_node_without(
1778 (PyHamtNode *)node,
1779 shift + 5, hash, key, &sub_node);
1780
1781 switch (res) {
1782 case W_NOT_FOUND:
1783 case W_ERROR:
1784 assert(sub_node == NULL);
1785 return res;
1786
1787 case W_NEWNODE: {
1788 /* We need to replace a node at the `idx` index.
1789 Clone this node and replace.
1790 */
1791 assert(sub_node != NULL);
1792
1793 PyHamtNode_Array *clone = hamt_node_array_clone(self);
1794 if (clone == NULL) {
1795 Py_DECREF(sub_node);
1796 return W_ERROR;
1797 }
1798
1799 Py_SETREF(clone->a_array[idx], sub_node); /* borrow */
1800 *new_node = (PyHamtNode*)clone; /* borrow */
1801 return W_NEWNODE;
1802 }
1803
1804 case W_EMPTY: {
1805 assert(sub_node == NULL);
1806 /* We need to remove a node at the `idx` index.
1807 Calculate the size of the replacement Array node.
1808 */
1809 Py_ssize_t new_count = self->a_count - 1;
1810
1811 if (new_count == 0) {
1812 return W_EMPTY;
1813 }
1814
1815 if (new_count >= 16) {
1816 /* We convert Bitmap nodes to Array nodes, when a
1817 Bitmap node needs to store more than 15 key/value
1818 pairs. So we will create a new Array node if we
1819 the number of key/values after deletion is still
1820 greater than 15.
1821 */
1822
1823 PyHamtNode_Array *new = hamt_node_array_clone(self);
1824 if (new == NULL) {
1825 return W_ERROR;
1826 }
1827 new->a_count = new_count;
1828 Py_CLEAR(new->a_array[idx]);
1829
1830 *new_node = (PyHamtNode*)new; /* borrow */
1831 return W_NEWNODE;
1832 }
1833
1834 /* New Array node would have less than 16 key/value
1835 pairs. We need to create a replacement Bitmap node. */
1836
1837 Py_ssize_t bitmap_size = new_count * 2;
1838 uint32_t bitmap = 0;
1839
1840 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1841 hamt_node_bitmap_new(bitmap_size);
1842 if (new == NULL) {
1843 return W_ERROR;
1844 }
1845
1846 Py_ssize_t new_i = 0;
1847 for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1848 if (i == idx) {
1849 /* Skip the node we are deleting. */
1850 continue;
1851 }
1852
1853 PyHamtNode *node = self->a_array[i];
1854 if (node == NULL) {
1855 /* Skip any missing nodes. */
1856 continue;
1857 }
1858
1859 bitmap |= 1 << i;
1860
1861 if (IS_BITMAP_NODE(node)) {
1862 PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1863
1864 if (hamt_node_bitmap_count(child) == 1 &&
1865 child->b_array[0] != NULL)
1866 {
1867 /* node is a Bitmap with one key/value pair, just
1868 merge it into the new Bitmap node we're building.
1869
1870 Note that we don't inline Bitmap nodes that
1871 have a NULL key -- those nodes point to another
1872 tree level, and we cannot simply move tree levels
1873 up or down.
1874 */
1875 PyObject *key = child->b_array[0];
1876 PyObject *val = child->b_array[1];
1877
1878 Py_INCREF(key);
1879 new->b_array[new_i] = key;
1880 Py_INCREF(val);
1881 new->b_array[new_i + 1] = val;
1882 }
1883 else {
1884 new->b_array[new_i] = NULL;
1885 Py_INCREF(node);
1886 new->b_array[new_i + 1] = (PyObject*)node;
1887 }
1888 }
1889 else {
1890
1891#ifdef Py_DEBUG
1892 if (IS_COLLISION_NODE(node)) {
1893 Py_ssize_t child_count = hamt_node_collision_count(
1894 (PyHamtNode_Collision*)node);
1895 assert(child_count > 1);
1896 }
1897 else if (IS_ARRAY_NODE(node)) {
1898 assert(((PyHamtNode_Array*)node)->a_count >= 16);
1899 }
1900#endif
1901
1902 /* Just copy the node into our new Bitmap */
1903 new->b_array[new_i] = NULL;
1904 Py_INCREF(node);
1905 new->b_array[new_i + 1] = (PyObject*)node;
1906 }
1907
1908 new_i += 2;
1909 }
1910
1911 new->b_bitmap = bitmap;
1912 *new_node = (PyHamtNode*)new; /* borrow */
1913 return W_NEWNODE;
1914 }
1915
1916 default:
1917 Py_UNREACHABLE();
1918 }
1919}
1920
1921static hamt_find_t
1922hamt_node_array_find(PyHamtNode_Array *self,
1923 uint32_t shift, int32_t hash,
1924 PyObject *key, PyObject **val)
1925{
1926 /* Lookup `key` in the Array node `self`. Set the value
1927 for the found key to 'val'. */
1928
1929 uint32_t idx = hamt_mask(hash, shift);
1930 PyHamtNode *node;
1931
1932 node = self->a_array[idx];
1933 if (node == NULL) {
1934 return F_NOT_FOUND;
1935 }
1936
1937 /* Dispatch to the generic hamt_node_find */
1938 return hamt_node_find(node, shift + 5, hash, key, val);
1939}
1940
1941static int
1942hamt_node_array_traverse(PyHamtNode_Array *self,
1943 visitproc visit, void *arg)
1944{
1945 /* Array's tp_traverse */
1946
1947 Py_ssize_t i;
1948
1949 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1950 Py_VISIT(self->a_array[i]);
1951 }
1952
1953 return 0;
1954}
1955
1956static void
1957hamt_node_array_dealloc(PyHamtNode_Array *self)
1958{
1959 /* Array's tp_dealloc */
1960
1961 Py_ssize_t i;
1962
1963 PyObject_GC_UnTrack(self);
1964 Py_TRASHCAN_SAFE_BEGIN(self)
1965
1966 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1967 Py_XDECREF(self->a_array[i]);
1968 }
1969
1970 Py_TYPE(self)->tp_free((PyObject *)self);
1971 Py_TRASHCAN_SAFE_END(self)
1972}
1973
1974#ifdef Py_DEBUG
1975static int
1976hamt_node_array_dump(PyHamtNode_Array *node,
1977 _PyUnicodeWriter *writer, int level)
1978{
1979 /* Debug build: __dump__() method implementation for Array nodes. */
1980
1981 Py_ssize_t i;
1982
1983 if (_hamt_dump_ident(writer, level + 1)) {
1984 goto error;
1985 }
1986
1987 if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1988 goto error;
1989 }
1990
1991 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1992 if (node->a_array[i] == NULL) {
1993 continue;
1994 }
1995
1996 if (_hamt_dump_ident(writer, level + 2)) {
1997 goto error;
1998 }
1999
2000 if (_hamt_dump_format(writer, "%d::\n", i)) {
2001 goto error;
2002 }
2003
2004 if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2005 goto error;
2006 }
2007
2008 if (_hamt_dump_format(writer, "\n")) {
2009 goto error;
2010 }
2011 }
2012
2013 return 0;
2014error:
2015 return -1;
2016}
2017#endif /* Py_DEBUG */
2018
2019
2020/////////////////////////////////// Node Dispatch
2021
2022
2023static PyHamtNode *
2024hamt_node_assoc(PyHamtNode *node,
2025 uint32_t shift, int32_t hash,
2026 PyObject *key, PyObject *val, int* added_leaf)
2027{
2028 /* Set key/value to the 'node' starting with the given shift/hash.
2029 Return a new node, or the same node if key/value already
2030 set.
2031
2032 added_leaf will be set to 1 if key/value wasn't in the
2033 tree before.
2034
2035 This method automatically dispatches to the suitable
2036 hamt_node_{nodetype}_assoc method.
2037 */
2038
2039 if (IS_BITMAP_NODE(node)) {
2040 return hamt_node_bitmap_assoc(
2041 (PyHamtNode_Bitmap *)node,
2042 shift, hash, key, val, added_leaf);
2043 }
2044 else if (IS_ARRAY_NODE(node)) {
2045 return hamt_node_array_assoc(
2046 (PyHamtNode_Array *)node,
2047 shift, hash, key, val, added_leaf);
2048 }
2049 else {
2050 assert(IS_COLLISION_NODE(node));
2051 return hamt_node_collision_assoc(
2052 (PyHamtNode_Collision *)node,
2053 shift, hash, key, val, added_leaf);
2054 }
2055}
2056
2057static hamt_without_t
2058hamt_node_without(PyHamtNode *node,
2059 uint32_t shift, int32_t hash,
2060 PyObject *key,
2061 PyHamtNode **new_node)
2062{
2063 if (IS_BITMAP_NODE(node)) {
2064 return hamt_node_bitmap_without(
2065 (PyHamtNode_Bitmap *)node,
2066 shift, hash, key,
2067 new_node);
2068 }
2069 else if (IS_ARRAY_NODE(node)) {
2070 return hamt_node_array_without(
2071 (PyHamtNode_Array *)node,
2072 shift, hash, key,
2073 new_node);
2074 }
2075 else {
2076 assert(IS_COLLISION_NODE(node));
2077 return hamt_node_collision_without(
2078 (PyHamtNode_Collision *)node,
2079 shift, hash, key,
2080 new_node);
2081 }
2082}
2083
2084static hamt_find_t
2085hamt_node_find(PyHamtNode *node,
2086 uint32_t shift, int32_t hash,
2087 PyObject *key, PyObject **val)
2088{
2089 /* Find the key in the node starting with the given shift/hash.
2090
2091 If a value is found, the result will be set to F_FOUND, and
2092 *val will point to the found value object.
2093
2094 If a value wasn't found, the result will be set to F_NOT_FOUND.
2095
2096 If an exception occurs during the call, the result will be F_ERROR.
2097
2098 This method automatically dispatches to the suitable
2099 hamt_node_{nodetype}_find method.
2100 */
2101
2102 if (IS_BITMAP_NODE(node)) {
2103 return hamt_node_bitmap_find(
2104 (PyHamtNode_Bitmap *)node,
2105 shift, hash, key, val);
2106
2107 }
2108 else if (IS_ARRAY_NODE(node)) {
2109 return hamt_node_array_find(
2110 (PyHamtNode_Array *)node,
2111 shift, hash, key, val);
2112 }
2113 else {
2114 assert(IS_COLLISION_NODE(node));
2115 return hamt_node_collision_find(
2116 (PyHamtNode_Collision *)node,
2117 shift, hash, key, val);
2118 }
2119}
2120
2121#ifdef Py_DEBUG
2122static int
2123hamt_node_dump(PyHamtNode *node,
2124 _PyUnicodeWriter *writer, int level)
2125{
2126 /* Debug build: __dump__() method implementation for a node.
2127
2128 This method automatically dispatches to the suitable
2129 hamt_node_{nodetype})_dump method.
2130 */
2131
2132 if (IS_BITMAP_NODE(node)) {
2133 return hamt_node_bitmap_dump(
2134 (PyHamtNode_Bitmap *)node, writer, level);
2135 }
2136 else if (IS_ARRAY_NODE(node)) {
2137 return hamt_node_array_dump(
2138 (PyHamtNode_Array *)node, writer, level);
2139 }
2140 else {
2141 assert(IS_COLLISION_NODE(node));
2142 return hamt_node_collision_dump(
2143 (PyHamtNode_Collision *)node, writer, level);
2144 }
2145}
2146#endif /* Py_DEBUG */
2147
2148
2149/////////////////////////////////// Iterators: Machinery
2150
2151
2152static hamt_iter_t
2153hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2154
2155
2156static void
2157hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2158{
2159 for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2160 iter->i_nodes[i] = NULL;
2161 iter->i_pos[i] = 0;
2162 }
2163
2164 iter->i_level = 0;
2165
2166 /* Note: we don't incref/decref nodes in i_nodes. */
2167 iter->i_nodes[0] = root;
2168}
2169
2170static hamt_iter_t
2171hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2172 PyObject **key, PyObject **val)
2173{
2174 int8_t level = iter->i_level;
2175
2176 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2177 Py_ssize_t pos = iter->i_pos[level];
2178
2179 if (pos + 1 >= Py_SIZE(node)) {
2180#ifdef Py_DEBUG
2181 assert(iter->i_level >= 0);
2182 iter->i_nodes[iter->i_level] = NULL;
2183#endif
2184 iter->i_level--;
2185 return hamt_iterator_next(iter, key, val);
2186 }
2187
2188 if (node->b_array[pos] == NULL) {
2189 iter->i_pos[level] = pos + 2;
2190
2191 int8_t next_level = level + 1;
2192 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2193 iter->i_level = next_level;
2194 iter->i_pos[next_level] = 0;
2195 iter->i_nodes[next_level] = (PyHamtNode *)
2196 node->b_array[pos + 1];
2197
2198 return hamt_iterator_next(iter, key, val);
2199 }
2200
2201 *key = node->b_array[pos];
2202 *val = node->b_array[pos + 1];
2203 iter->i_pos[level] = pos + 2;
2204 return I_ITEM;
2205}
2206
2207static hamt_iter_t
2208hamt_iterator_collision_next(PyHamtIteratorState *iter,
2209 PyObject **key, PyObject **val)
2210{
2211 int8_t level = iter->i_level;
2212
2213 PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2214 Py_ssize_t pos = iter->i_pos[level];
2215
2216 if (pos + 1 >= Py_SIZE(node)) {
2217#ifdef Py_DEBUG
2218 assert(iter->i_level >= 0);
2219 iter->i_nodes[iter->i_level] = NULL;
2220#endif
2221 iter->i_level--;
2222 return hamt_iterator_next(iter, key, val);
2223 }
2224
2225 *key = node->c_array[pos];
2226 *val = node->c_array[pos + 1];
2227 iter->i_pos[level] = pos + 2;
2228 return I_ITEM;
2229}
2230
2231static hamt_iter_t
2232hamt_iterator_array_next(PyHamtIteratorState *iter,
2233 PyObject **key, PyObject **val)
2234{
2235 int8_t level = iter->i_level;
2236
2237 PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2238 Py_ssize_t pos = iter->i_pos[level];
2239
2240 if (pos >= HAMT_ARRAY_NODE_SIZE) {
2241#ifdef Py_DEBUG
2242 assert(iter->i_level >= 0);
2243 iter->i_nodes[iter->i_level] = NULL;
2244#endif
2245 iter->i_level--;
2246 return hamt_iterator_next(iter, key, val);
2247 }
2248
2249 for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2250 if (node->a_array[i] != NULL) {
2251 iter->i_pos[level] = i + 1;
2252
2253 int8_t next_level = level + 1;
2254 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2255 iter->i_pos[next_level] = 0;
2256 iter->i_nodes[next_level] = node->a_array[i];
2257 iter->i_level = next_level;
2258
2259 return hamt_iterator_next(iter, key, val);
2260 }
2261 }
2262
2263#ifdef Py_DEBUG
2264 assert(iter->i_level >= 0);
2265 iter->i_nodes[iter->i_level] = NULL;
2266#endif
2267
2268 iter->i_level--;
2269 return hamt_iterator_next(iter, key, val);
2270}
2271
2272static hamt_iter_t
2273hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2274{
2275 if (iter->i_level < 0) {
2276 return I_END;
2277 }
2278
2279 assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2280
2281 PyHamtNode *current = iter->i_nodes[iter->i_level];
2282
2283 if (IS_BITMAP_NODE(current)) {
2284 return hamt_iterator_bitmap_next(iter, key, val);
2285 }
2286 else if (IS_ARRAY_NODE(current)) {
2287 return hamt_iterator_array_next(iter, key, val);
2288 }
2289 else {
2290 assert(IS_COLLISION_NODE(current));
2291 return hamt_iterator_collision_next(iter, key, val);
2292 }
2293}
2294
2295
2296/////////////////////////////////// HAMT high-level functions
2297
2298
2299PyHamtObject *
2300_PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2301{
2302 int32_t key_hash;
2303 int added_leaf = 0;
2304 PyHamtNode *new_root;
2305 PyHamtObject *new_o;
2306
2307 key_hash = hamt_hash(key);
2308 if (key_hash == -1) {
2309 return NULL;
2310 }
2311
2312 new_root = hamt_node_assoc(
2313 (PyHamtNode *)(o->h_root),
2314 0, key_hash, key, val, &added_leaf);
2315 if (new_root == NULL) {
2316 return NULL;
2317 }
2318
2319 if (new_root == o->h_root) {
2320 Py_DECREF(new_root);
2321 Py_INCREF(o);
2322 return o;
2323 }
2324
2325 new_o = hamt_alloc();
2326 if (new_o == NULL) {
2327 Py_DECREF(new_root);
2328 return NULL;
2329 }
2330
2331 new_o->h_root = new_root; /* borrow */
2332 new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2333
2334 return new_o;
2335}
2336
2337PyHamtObject *
2338_PyHamt_Without(PyHamtObject *o, PyObject *key)
2339{
2340 int32_t key_hash = hamt_hash(key);
2341 if (key_hash == -1) {
2342 return NULL;
2343 }
2344
2345 PyHamtNode *new_root;
2346
2347 hamt_without_t res = hamt_node_without(
2348 (PyHamtNode *)(o->h_root),
2349 0, key_hash, key,
2350 &new_root);
2351
2352 switch (res) {
2353 case W_ERROR:
2354 return NULL;
2355 case W_EMPTY:
2356 return _PyHamt_New();
2357 case W_NOT_FOUND:
2358 Py_INCREF(o);
2359 return o;
2360 case W_NEWNODE: {
Yury Selivanov55e08392018-02-01 22:24:56 -05002361 assert(new_root != NULL);
2362
Yury Selivanovf23746a2018-01-22 19:11:18 -05002363 PyHamtObject *new_o = hamt_alloc();
2364 if (new_o == NULL) {
2365 Py_DECREF(new_root);
2366 return NULL;
2367 }
2368
2369 new_o->h_root = new_root; /* borrow */
2370 new_o->h_count = o->h_count - 1;
2371 assert(new_o->h_count >= 0);
2372 return new_o;
2373 }
2374 default:
2375 Py_UNREACHABLE();
2376 }
2377}
2378
2379static hamt_find_t
2380hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2381{
2382 if (o->h_count == 0) {
2383 return F_NOT_FOUND;
2384 }
2385
2386 int32_t key_hash = hamt_hash(key);
2387 if (key_hash == -1) {
2388 return F_ERROR;
2389 }
2390
2391 return hamt_node_find(o->h_root, 0, key_hash, key, val);
2392}
2393
2394
2395int
2396_PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2397{
2398 hamt_find_t res = hamt_find(o, key, val);
2399 switch (res) {
2400 case F_ERROR:
2401 return -1;
2402 case F_NOT_FOUND:
2403 return 0;
2404 case F_FOUND:
2405 return 1;
2406 default:
2407 Py_UNREACHABLE();
2408 }
2409}
2410
2411
2412int
2413_PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2414{
2415 if (v == w) {
2416 return 1;
2417 }
2418
2419 if (v->h_count != w->h_count) {
2420 return 0;
2421 }
2422
2423 PyHamtIteratorState iter;
2424 hamt_iter_t iter_res;
2425 hamt_find_t find_res;
2426 PyObject *v_key;
2427 PyObject *v_val;
2428 PyObject *w_val;
2429
2430 hamt_iterator_init(&iter, v->h_root);
2431
2432 do {
2433 iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2434 if (iter_res == I_ITEM) {
2435 find_res = hamt_find(w, v_key, &w_val);
2436 switch (find_res) {
2437 case F_ERROR:
2438 return -1;
2439
2440 case F_NOT_FOUND:
2441 return 0;
2442
2443 case F_FOUND: {
2444 int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2445 if (cmp < 0) {
2446 return -1;
2447 }
2448 if (cmp == 0) {
2449 return 0;
2450 }
2451 }
2452 }
2453 }
2454 } while (iter_res != I_END);
2455
2456 return 1;
2457}
2458
2459Py_ssize_t
2460_PyHamt_Len(PyHamtObject *o)
2461{
2462 return o->h_count;
2463}
2464
2465static PyHamtObject *
2466hamt_alloc(void)
2467{
2468 PyHamtObject *o;
2469 o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2470 if (o == NULL) {
2471 return NULL;
2472 }
2473 o->h_weakreflist = NULL;
2474 PyObject_GC_Track(o);
2475 return o;
2476}
2477
2478PyHamtObject *
2479_PyHamt_New(void)
2480{
2481 if (_empty_hamt != NULL) {
2482 /* HAMT is an immutable object so we can easily cache an
2483 empty instance. */
2484 Py_INCREF(_empty_hamt);
2485 return _empty_hamt;
2486 }
2487
2488 PyHamtObject *o = hamt_alloc();
2489 if (o == NULL) {
2490 return NULL;
2491 }
2492
2493 o->h_root = hamt_node_bitmap_new(0);
2494 if (o->h_root == NULL) {
2495 Py_DECREF(o);
2496 return NULL;
2497 }
2498
2499 o->h_count = 0;
2500
2501 if (_empty_hamt == NULL) {
2502 Py_INCREF(o);
2503 _empty_hamt = o;
2504 }
2505
2506 return o;
2507}
2508
2509#ifdef Py_DEBUG
2510static PyObject *
2511hamt_dump(PyHamtObject *self)
2512{
2513 _PyUnicodeWriter writer;
2514
2515 _PyUnicodeWriter_Init(&writer);
2516
2517 if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2518 goto error;
2519 }
2520
2521 if (hamt_node_dump(self->h_root, &writer, 0)) {
2522 goto error;
2523 }
2524
2525 return _PyUnicodeWriter_Finish(&writer);
2526
2527error:
2528 _PyUnicodeWriter_Dealloc(&writer);
2529 return NULL;
2530}
2531#endif /* Py_DEBUG */
2532
2533
2534/////////////////////////////////// Iterators: Shared Iterator Implementation
2535
2536
2537static int
2538hamt_baseiter_tp_clear(PyHamtIterator *it)
2539{
2540 Py_CLEAR(it->hi_obj);
2541 return 0;
2542}
2543
2544static void
2545hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2546{
2547 PyObject_GC_UnTrack(it);
2548 (void)hamt_baseiter_tp_clear(it);
2549 PyObject_GC_Del(it);
2550}
2551
2552static int
2553hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2554{
2555 Py_VISIT(it->hi_obj);
2556 return 0;
2557}
2558
2559static PyObject *
2560hamt_baseiter_tp_iternext(PyHamtIterator *it)
2561{
2562 PyObject *key;
2563 PyObject *val;
2564 hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2565
2566 switch (res) {
2567 case I_END:
2568 PyErr_SetNone(PyExc_StopIteration);
2569 return NULL;
2570
2571 case I_ITEM: {
2572 return (*(it->hi_yield))(key, val);
2573 }
2574
2575 default: {
2576 Py_UNREACHABLE();
2577 }
2578 }
2579}
2580
2581static Py_ssize_t
2582hamt_baseiter_tp_len(PyHamtIterator *it)
2583{
2584 return it->hi_obj->h_count;
2585}
2586
2587static PyMappingMethods PyHamtIterator_as_mapping = {
2588 (lenfunc)hamt_baseiter_tp_len,
2589};
2590
2591static PyObject *
2592hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2593{
2594 PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2595 if (it == NULL) {
2596 return NULL;
2597 }
2598
2599 Py_INCREF(o);
2600 it->hi_obj = o;
2601 it->hi_yield = yield;
2602
2603 hamt_iterator_init(&it->hi_iter, o->h_root);
2604
2605 return (PyObject*)it;
2606}
2607
2608#define ITERATOR_TYPE_SHARED_SLOTS \
2609 .tp_basicsize = sizeof(PyHamtIterator), \
2610 .tp_itemsize = 0, \
2611 .tp_as_mapping = &PyHamtIterator_as_mapping, \
2612 .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc, \
2613 .tp_getattro = PyObject_GenericGetAttr, \
2614 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, \
2615 .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse, \
2616 .tp_clear = (inquiry)hamt_baseiter_tp_clear, \
2617 .tp_iter = PyObject_SelfIter, \
2618 .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2619
2620
2621/////////////////////////////////// _PyHamtItems_Type
2622
2623
2624PyTypeObject _PyHamtItems_Type = {
2625 PyVarObject_HEAD_INIT(NULL, 0)
2626 "items",
2627 ITERATOR_TYPE_SHARED_SLOTS
2628};
2629
2630static PyObject *
2631hamt_iter_yield_items(PyObject *key, PyObject *val)
2632{
2633 return PyTuple_Pack(2, key, val);
2634}
2635
2636PyObject *
2637_PyHamt_NewIterItems(PyHamtObject *o)
2638{
2639 return hamt_baseiter_new(
2640 &_PyHamtItems_Type, hamt_iter_yield_items, o);
2641}
2642
2643
2644/////////////////////////////////// _PyHamtKeys_Type
2645
2646
2647PyTypeObject _PyHamtKeys_Type = {
2648 PyVarObject_HEAD_INIT(NULL, 0)
2649 "keys",
2650 ITERATOR_TYPE_SHARED_SLOTS
2651};
2652
2653static PyObject *
2654hamt_iter_yield_keys(PyObject *key, PyObject *val)
2655{
2656 Py_INCREF(key);
2657 return key;
2658}
2659
2660PyObject *
2661_PyHamt_NewIterKeys(PyHamtObject *o)
2662{
2663 return hamt_baseiter_new(
2664 &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2665}
2666
2667
2668/////////////////////////////////// _PyHamtValues_Type
2669
2670
2671PyTypeObject _PyHamtValues_Type = {
2672 PyVarObject_HEAD_INIT(NULL, 0)
2673 "values",
2674 ITERATOR_TYPE_SHARED_SLOTS
2675};
2676
2677static PyObject *
2678hamt_iter_yield_values(PyObject *key, PyObject *val)
2679{
2680 Py_INCREF(val);
2681 return val;
2682}
2683
2684PyObject *
2685_PyHamt_NewIterValues(PyHamtObject *o)
2686{
2687 return hamt_baseiter_new(
2688 &_PyHamtValues_Type, hamt_iter_yield_values, o);
2689}
2690
2691
2692/////////////////////////////////// _PyHamt_Type
2693
2694
2695#ifdef Py_DEBUG
2696static PyObject *
2697hamt_dump(PyHamtObject *self);
2698#endif
2699
2700
2701static PyObject *
2702hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2703{
2704 return (PyObject*)_PyHamt_New();
2705}
2706
2707static int
2708hamt_tp_clear(PyHamtObject *self)
2709{
2710 Py_CLEAR(self->h_root);
2711 return 0;
2712}
2713
2714
2715static int
2716hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2717{
2718 Py_VISIT(self->h_root);
2719 return 0;
2720}
2721
2722static void
2723hamt_tp_dealloc(PyHamtObject *self)
2724{
2725 PyObject_GC_UnTrack(self);
2726 if (self->h_weakreflist != NULL) {
2727 PyObject_ClearWeakRefs((PyObject*)self);
2728 }
2729 (void)hamt_tp_clear(self);
2730 Py_TYPE(self)->tp_free(self);
2731}
2732
2733
2734static PyObject *
2735hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2736{
2737 if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2738 Py_RETURN_NOTIMPLEMENTED;
2739 }
2740
2741 int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2742 if (res < 0) {
2743 return NULL;
2744 }
2745
2746 if (op == Py_NE) {
2747 res = !res;
2748 }
2749
2750 if (res) {
2751 Py_RETURN_TRUE;
2752 }
2753 else {
2754 Py_RETURN_FALSE;
2755 }
2756}
2757
2758static int
2759hamt_tp_contains(PyHamtObject *self, PyObject *key)
2760{
2761 PyObject *val;
2762 return _PyHamt_Find(self, key, &val);
2763}
2764
2765static PyObject *
2766hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2767{
2768 PyObject *val;
2769 hamt_find_t res = hamt_find(self, key, &val);
2770 switch (res) {
2771 case F_ERROR:
2772 return NULL;
2773 case F_FOUND:
2774 Py_INCREF(val);
2775 return val;
2776 case F_NOT_FOUND:
2777 PyErr_SetObject(PyExc_KeyError, key);
2778 return NULL;
2779 default:
2780 Py_UNREACHABLE();
2781 }
2782}
2783
2784static Py_ssize_t
2785hamt_tp_len(PyHamtObject *self)
2786{
2787 return _PyHamt_Len(self);
2788}
2789
2790static PyObject *
2791hamt_tp_iter(PyHamtObject *self)
2792{
2793 return _PyHamt_NewIterKeys(self);
2794}
2795
2796static PyObject *
2797hamt_py_set(PyHamtObject *self, PyObject *args)
2798{
2799 PyObject *key;
2800 PyObject *val;
2801
2802 if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2803 return NULL;
2804 }
2805
2806 return (PyObject *)_PyHamt_Assoc(self, key, val);
2807}
2808
2809static PyObject *
2810hamt_py_get(PyHamtObject *self, PyObject *args)
2811{
2812 PyObject *key;
2813 PyObject *def = NULL;
2814
2815 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2816 return NULL;
2817 }
2818
2819 PyObject *val = NULL;
2820 hamt_find_t res = hamt_find(self, key, &val);
2821 switch (res) {
2822 case F_ERROR:
2823 return NULL;
2824 case F_FOUND:
2825 Py_INCREF(val);
2826 return val;
2827 case F_NOT_FOUND:
2828 if (def == NULL) {
2829 Py_RETURN_NONE;
2830 }
2831 Py_INCREF(def);
2832 return def;
2833 default:
2834 Py_UNREACHABLE();
2835 }
2836}
2837
2838static PyObject *
2839hamt_py_delete(PyHamtObject *self, PyObject *key)
2840{
2841 return (PyObject *)_PyHamt_Without(self, key);
2842}
2843
2844static PyObject *
2845hamt_py_items(PyHamtObject *self, PyObject *args)
2846{
2847 return _PyHamt_NewIterItems(self);
2848}
2849
2850static PyObject *
2851hamt_py_values(PyHamtObject *self, PyObject *args)
2852{
2853 return _PyHamt_NewIterValues(self);
2854}
2855
2856static PyObject *
2857hamt_py_keys(PyHamtObject *self, PyObject *args)
2858{
2859 return _PyHamt_NewIterKeys(self);
2860}
2861
2862#ifdef Py_DEBUG
2863static PyObject *
2864hamt_py_dump(PyHamtObject *self, PyObject *args)
2865{
2866 return hamt_dump(self);
2867}
2868#endif
2869
2870
2871static PyMethodDef PyHamt_methods[] = {
2872 {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2873 {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2874 {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2875 {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2876 {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2877 {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2878#ifdef Py_DEBUG
2879 {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2880#endif
2881 {NULL, NULL}
2882};
2883
2884static PySequenceMethods PyHamt_as_sequence = {
2885 0, /* sq_length */
2886 0, /* sq_concat */
2887 0, /* sq_repeat */
2888 0, /* sq_item */
2889 0, /* sq_slice */
2890 0, /* sq_ass_item */
2891 0, /* sq_ass_slice */
2892 (objobjproc)hamt_tp_contains, /* sq_contains */
2893 0, /* sq_inplace_concat */
2894 0, /* sq_inplace_repeat */
2895};
2896
2897static PyMappingMethods PyHamt_as_mapping = {
2898 (lenfunc)hamt_tp_len, /* mp_length */
2899 (binaryfunc)hamt_tp_subscript, /* mp_subscript */
2900};
2901
2902PyTypeObject _PyHamt_Type = {
2903 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2904 "hamt",
2905 sizeof(PyHamtObject),
2906 .tp_methods = PyHamt_methods,
2907 .tp_as_mapping = &PyHamt_as_mapping,
2908 .tp_as_sequence = &PyHamt_as_sequence,
2909 .tp_iter = (getiterfunc)hamt_tp_iter,
2910 .tp_dealloc = (destructor)hamt_tp_dealloc,
2911 .tp_getattro = PyObject_GenericGetAttr,
2912 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2913 .tp_richcompare = hamt_tp_richcompare,
2914 .tp_traverse = (traverseproc)hamt_tp_traverse,
2915 .tp_clear = (inquiry)hamt_tp_clear,
2916 .tp_new = hamt_tp_new,
2917 .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2918 .tp_hash = PyObject_HashNotImplemented,
2919};
2920
2921
2922/////////////////////////////////// Tree Node Types
2923
2924
2925PyTypeObject _PyHamt_ArrayNode_Type = {
2926 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2927 "hamt_array_node",
2928 sizeof(PyHamtNode_Array),
2929 0,
2930 .tp_dealloc = (destructor)hamt_node_array_dealloc,
2931 .tp_getattro = PyObject_GenericGetAttr,
2932 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2933 .tp_traverse = (traverseproc)hamt_node_array_traverse,
2934 .tp_free = PyObject_GC_Del,
2935 .tp_hash = PyObject_HashNotImplemented,
2936};
2937
2938PyTypeObject _PyHamt_BitmapNode_Type = {
2939 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2940 "hamt_bitmap_node",
2941 sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2942 sizeof(PyObject *),
2943 .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2944 .tp_getattro = PyObject_GenericGetAttr,
2945 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2946 .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2947 .tp_free = PyObject_GC_Del,
2948 .tp_hash = PyObject_HashNotImplemented,
2949};
2950
2951PyTypeObject _PyHamt_CollisionNode_Type = {
2952 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2953 "hamt_collision_node",
2954 sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2955 sizeof(PyObject *),
2956 .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2957 .tp_getattro = PyObject_GenericGetAttr,
2958 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2959 .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2960 .tp_free = PyObject_GC_Del,
2961 .tp_hash = PyObject_HashNotImplemented,
2962};
2963
2964
2965int
2966_PyHamt_Init(void)
2967{
2968 if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2969 (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2970 (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2971 (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2972 (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2973 (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2974 (PyType_Ready(&_PyHamtItems_Type) < 0))
2975 {
2976 return 0;
2977 }
2978
2979 return 1;
2980}
2981
2982void
2983_PyHamt_Fini(void)
2984{
2985 Py_CLEAR(_empty_hamt);
2986 Py_CLEAR(_empty_bitmap_node);
2987}