Separate arena_avail trees
Separate run trees by index, replacing the previous quantize logic.
Quantization by index is now performed only on insertion / removal from
the tree, and not on node comparison, saving some cpu. This also means
we don't have to dereference the miscelm* pointers, saving half of the
memory loads from miscelms/mapbits that have fallen out of cache. A
linear scan of the indicies appears to be fast enough.
The only cost of this is an extra tree array in each arena.
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 8dc6852..2548082 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -352,12 +352,6 @@
size_t ndirty;
/*
- * Size/address-ordered tree of this arena's available runs. The tree
- * is used for first-best-fit run allocation.
- */
- arena_avail_tree_t runs_avail;
-
- /*
* Unused dirty memory this arena manages. Dirty memory is conceptually
* tracked as an arbitrarily interleaved LRU of dirty runs and cached
* chunks, but the list linkage is actually semi-duplicated in order to
@@ -462,6 +456,12 @@
/* bins is used to store trees of free regions. */
arena_bin_t bins[NBINS];
+
+ /*
+ * Quantized address-ordered trees of this arena's available runs. The
+ * trees are used for first-best-fit run allocation.
+ */
+ arena_avail_tree_t runs_avail[1]; /* Dynamically sized. */
};
/* Used in conjunction with tsd for fast arena-related context lookup. */
diff --git a/src/arena.c b/src/arena.c
index c414946..0642272 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -28,6 +28,8 @@
static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
unsigned nlclasses; /* Number of large size classes. */
unsigned nhclasses; /* Number of huge size classes. */
+static szind_t runs_avail_bias; /* Size index for first runs_avail tree. */
+static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */
/******************************************************************************/
/*
@@ -45,42 +47,12 @@
/******************************************************************************/
-#define CHUNK_MAP_KEY ((uintptr_t)0x1U)
-
-JEMALLOC_INLINE_C arena_chunk_map_misc_t *
-arena_miscelm_key_create(size_t size)
-{
-
- return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) |
- CHUNK_MAP_KEY));
-}
-
-JEMALLOC_INLINE_C bool
-arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm)
-{
-
- return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0);
-}
-
-#undef CHUNK_MAP_KEY
-
-JEMALLOC_INLINE_C size_t
-arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm)
-{
-
- assert(arena_miscelm_is_key(miscelm));
-
- return (arena_mapbits_size_decode((uintptr_t)miscelm));
-}
-
JEMALLOC_INLINE_C size_t
arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
- assert(!arena_miscelm_is_key(miscelm));
-
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
pageind = arena_miscelm_to_pageind(miscelm);
mapbits = arena_mapbits_get(chunk, pageind);
@@ -88,7 +60,8 @@
}
JEMALLOC_INLINE_C int
-arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
+arena_run_addr_comp(const arena_chunk_map_misc_t *a,
+ const arena_chunk_map_misc_t *b)
{
uintptr_t a_miscelm = (uintptr_t)a;
uintptr_t b_miscelm = (uintptr_t)b;
@@ -101,7 +74,7 @@
/* Generate red-black tree functions. */
rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
- rb_link, arena_run_comp)
+ rb_link, arena_run_addr_comp)
static size_t
run_quantize_floor_compute(size_t size)
@@ -226,61 +199,42 @@
run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
#endif
-JEMALLOC_INLINE_C int
-arena_avail_comp(const arena_chunk_map_misc_t *a,
- const arena_chunk_map_misc_t *b)
-{
- int ret;
- uintptr_t a_miscelm = (uintptr_t)a;
- size_t a_qsize = run_quantize_floor(arena_miscelm_is_key(a) ?
- arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a));
- size_t b_qsize = run_quantize_floor(arena_miscelm_size_get(b));
-
- /*
- * Compare based on quantized size rather than size, in order to sort
- * equally useful runs only by address.
- */
- ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
- if (ret == 0) {
- if (!arena_miscelm_is_key(a)) {
- uintptr_t b_miscelm = (uintptr_t)b;
-
- ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm);
- } else {
- /*
- * Treat keys as if they are lower than anything else.
- */
- ret = -1;
- }
- }
-
- return (ret);
-}
-
/* Generate red-black tree functions. */
rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
- arena_chunk_map_misc_t, rb_link, arena_avail_comp)
+ arena_chunk_map_misc_t, rb_link, arena_run_addr_comp)
+
+static arena_avail_tree_t *
+arena_runs_avail_get(arena_t *arena, szind_t ind)
+{
+
+ assert(ind >= runs_avail_bias);
+ assert(ind - runs_avail_bias < runs_avail_nclasses);
+
+ return (&arena->runs_avail[ind - runs_avail_bias]);
+}
static void
arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
size_t npages)
{
-
+ szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+ arena_miscelm_get(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
- arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk,
- pageind));
+ arena_avail_tree_insert(arena_runs_avail_get(arena, ind),
+ arena_miscelm_get(chunk, pageind));
}
static void
arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
size_t npages)
{
-
+ szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+ arena_miscelm_get(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
- arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk,
- pageind));
+ arena_avail_tree_remove(arena_runs_avail_get(arena, ind),
+ arena_miscelm_get(chunk, pageind));
}
static void
@@ -770,7 +724,6 @@
return (NULL);
}
- /* Insert the run into the runs_avail tree. */
arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
return (chunk);
@@ -791,10 +744,7 @@
assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
arena_mapbits_decommitted_get(chunk, chunk_npages-1));
- /*
- * Remove run from the runs_avail tree, so that the arena does not use
- * it.
- */
+ /* Remove run from runs_avail, so that the arena does not use it. */
arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
if (arena->spare != NULL) {
@@ -1124,19 +1074,23 @@
/*
* Do first-best-fit run selection, i.e. select the lowest run that best fits.
- * Run sizes are quantized, so not all candidate runs are necessarily exactly
- * the same size.
+ * Run sizes are indexed, so not all candidate runs are necessarily exactly the
+ * same size.
*/
static arena_run_t *
arena_run_first_best_fit(arena_t *arena, size_t size)
{
- size_t search_size = run_quantize_ceil(size);
- arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size);
- arena_chunk_map_misc_t *miscelm =
- arena_avail_tree_nsearch(&arena->runs_avail, key);
- if (miscelm == NULL)
- return (NULL);
- return (&miscelm->run);
+ szind_t ind, i;
+
+ ind = size2index(run_quantize_ceil(size));
+ for (i = ind; i < runs_avail_nclasses; i++) {
+ arena_chunk_map_misc_t *miscelm = arena_avail_tree_first(
+ arena_runs_avail_get(arena, i));
+ if (miscelm != NULL)
+ return (&miscelm->run);
+ }
+
+ return (NULL);
}
static arena_run_t *
@@ -3315,19 +3269,23 @@
arena_new(unsigned ind)
{
arena_t *arena;
+ size_t arena_size;
unsigned i;
arena_bin_t *bin;
+ /* Compute arena size to incorporate sufficient runs_avail elements. */
+ arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_avail_tree_t)
+ * (runs_avail_nclasses - 1));
/*
* Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
* because there is no way to clean up if base_alloc() OOMs.
*/
if (config_stats) {
- arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t))
- + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
+ arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
+ QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
nhclasses) * sizeof(malloc_huge_stats_t));
} else
- arena = (arena_t *)base_alloc(sizeof(arena_t));
+ arena = (arena_t *)base_alloc(arena_size);
if (arena == NULL)
return (NULL);
@@ -3339,11 +3297,11 @@
if (config_stats) {
memset(&arena->stats, 0, sizeof(arena_stats_t));
arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
- + CACHELINE_CEILING(sizeof(arena_t)));
+ + CACHELINE_CEILING(arena_size));
memset(arena->stats.lstats, 0, nlclasses *
sizeof(malloc_large_stats_t));
arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
- + CACHELINE_CEILING(sizeof(arena_t)) +
+ + CACHELINE_CEILING(arena_size) +
QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
memset(arena->stats.hstats, 0, nhclasses *
sizeof(malloc_huge_stats_t));
@@ -3375,7 +3333,8 @@
arena->nactive = 0;
arena->ndirty = 0;
- arena_avail_tree_new(&arena->runs_avail);
+ for(i = 0; i < runs_avail_nclasses; i++)
+ arena_avail_tree_new(&arena->runs_avail[i]);
qr_new(&arena->runs_dirty, rd_link);
qr_new(&arena->chunks_cache, cc_link);
@@ -3635,6 +3594,9 @@
if (run_quantize_init())
return (true);
+ runs_avail_bias = size2index(PAGE);
+ runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
+
return (false);
}