Fix long spinning in rtree_node_init
rtree_node_init spinlocks the node, allocates, and then sets the node.
This is under heavy contention at the top of the tree if many threads
start to allocate at the same time.
Instead, take a per-rtree sleeping mutex to reduce spinning. Tested
both pthreads and osx OSSpinLock, and both reduce spinning adequately
Previous benchmark time:
./ttest1 500 100
~15s
New benchmark time:
./ttest1 500 100
.57s
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index fc88dfe..9c6cc22 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -23,9 +23,6 @@
#define RTREE_HEIGHT_MAX \
((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
-/* Used for two-stage lock-free node initialization. */
-#define RTREE_NODE_INITIALIZING ((rtree_elm_t *)0x1)
-
#define RTREE_CTX_INITIALIZER { \
false, \
0, \
@@ -139,6 +136,7 @@
*/
unsigned start_level[RTREE_HEIGHT_MAX + 1];
rtree_level_t levels[RTREE_HEIGHT_MAX];
+ malloc_mutex_t init_lock;
};
#endif /* JEMALLOC_H_STRUCTS */
@@ -251,7 +249,7 @@
rtree_node_valid(rtree_elm_t *node)
{
- return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
+ return ((uintptr_t)node != (uintptr_t)0);
}
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 26024ac..86ddb64 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -28,7 +28,8 @@
#define WITNESS_RANK_ARENA_EXTENT_CACHE 10
#define WITNESS_RANK_RTREE_ELM 11U
-#define WITNESS_RANK_BASE 12U
+#define WITNESS_RANK_RTREE 12U
+#define WITNESS_RANK_BASE 13U
#define WITNESS_RANK_LEAF 0xffffffffU
#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF
diff --git a/src/rtree.c b/src/rtree.c
index 0a42a98..b6b9ed7 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -59,6 +59,8 @@
}
rtree->start_level[RTREE_HEIGHT_MAX] = 0;
+ malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE);
+
return (false);
}
@@ -135,25 +137,18 @@
{
rtree_elm_t *node;
- if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
- spin_t spinner;
-
- /*
- * Another thread is already in the process of initializing.
- * Spin-wait until initialization is complete.
- */
- spin_init(&spinner);
- do {
- spin_adaptive(&spinner);
- node = atomic_read_p((void **)elmp);
- } while (node == RTREE_NODE_INITIALIZING);
- } else {
+ malloc_mutex_lock(tsdn, &rtree->init_lock);
+ node = atomic_read_p((void**)elmp);
+ if (node == NULL) {
node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
rtree->levels[level].bits);
- if (node == NULL)
+ if (node == NULL) {
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
return (NULL);
+ }
atomic_write_p((void **)elmp, node);
}
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
return (node);
}
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index a05834f..03f4e26 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -13,8 +13,10 @@
if (rtree != test_rtree)
return rtree_node_alloc_orig(tsdn, rtree, nelms);
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t));
assert_ptr_not_null(node, "Unexpected calloc() failure");
+ malloc_mutex_lock(tsdn, &rtree->init_lock);
return (node);
}