Use table lookup for run_quantize_{floor,ceil}().
Reduce run quantization overhead by generating lookup tables during
bootstrapping, and using the tables for all subsequent run quantization.
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f98aeb8..8dc6852 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -494,7 +494,7 @@
extern size_t map_misc_offset;
extern size_t arena_maxrun; /* Max run size for arenas. */
extern size_t large_maxclass; /* Max large size class. */
-extern size_t small_maxrun; /* Max run size for small size classes. */
+extern size_t run_quantize_max; /* Max run_quantize_*() input. */
extern unsigned nlclasses; /* Number of large size classes. */
extern unsigned nhclasses; /* Number of huge size classes. */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 3e37a61..761aa75 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -447,6 +447,7 @@
rtree_val_write
run_quantize_ceil
run_quantize_floor
+run_quantize_max
s2u
s2u_compute
s2u_lookup
diff --git a/src/arena.c b/src/arena.c
index 97dea84..c414946 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,8 +21,11 @@
size_t map_misc_offset;
size_t arena_maxrun; /* Max run size for arenas. */
size_t large_maxclass; /* Max large size class. */
-size_t small_maxrun; /* Max run size for small size classes. */
+size_t run_quantize_max; /* Max run_quantize_*() input. */
+static size_t small_maxrun; /* Max run size for small size classes. */
static bool *small_run_tab; /* Valid small run page multiples. */
+static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */
+static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
unsigned nlclasses; /* Number of large size classes. */
unsigned nhclasses; /* Number of huge size classes. */
@@ -100,12 +103,8 @@
rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
rb_link, arena_run_comp)
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
-#endif
static size_t
-run_quantize_floor(size_t size)
+run_quantize_floor_compute(size_t size)
{
size_t qsize;
@@ -123,18 +122,13 @@
*/
qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
if (qsize <= SMALL_MAXCLASS + large_pad)
- return (run_quantize_floor(size - large_pad));
+ return (run_quantize_floor_compute(size - large_pad));
assert(qsize <= size);
return (qsize);
}
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define run_quantize_floor JEMALLOC_N(run_quantize_floor)
-run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
-#endif
static size_t
-run_quantize_ceil_hard(size_t size)
+run_quantize_ceil_compute_hard(size_t size)
{
size_t large_run_size_next;
@@ -167,14 +161,10 @@
}
}
-#ifdef JEMALLOC_JET
-#undef run_quantize_ceil
-#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
-#endif
static size_t
-run_quantize_ceil(size_t size)
+run_quantize_ceil_compute(size_t size)
{
- size_t qsize = run_quantize_floor(size);
+ size_t qsize = run_quantize_floor_compute(size);
if (qsize < size) {
/*
@@ -185,10 +175,51 @@
* search would potentially find sufficiently aligned available
* memory somewhere lower.
*/
- qsize = run_quantize_ceil_hard(qsize);
+ qsize = run_quantize_ceil_compute_hard(qsize);
}
return (qsize);
}
+
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
+#endif
+static size_t
+run_quantize_floor(size_t size)
+{
+ size_t ret;
+
+ assert(size > 0);
+ assert(size <= run_quantize_max);
+ assert((size & PAGE_MASK) == 0);
+
+ ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
+ assert(ret == run_quantize_floor_compute(size));
+ return (ret);
+}
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define run_quantize_floor JEMALLOC_N(run_quantize_floor)
+run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef run_quantize_ceil
+#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
+#endif
+static size_t
+run_quantize_ceil(size_t size)
+{
+ size_t ret;
+
+ assert(size > 0);
+ assert(size <= run_quantize_max);
+ assert((size & PAGE_MASK) == 0);
+
+ ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
+ assert(ret == run_quantize_ceil_compute(size));
+ return (ret);
+}
#ifdef JEMALLOC_JET
#undef run_quantize_ceil
#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
@@ -3522,6 +3553,35 @@
return (false);
}
+static bool
+run_quantize_init(void)
+{
+ unsigned i;
+
+ run_quantize_max = chunksize + large_pad;
+
+ run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) *
+ (run_quantize_max >> LG_PAGE));
+ if (run_quantize_floor_tab == NULL)
+ return (true);
+
+ run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) *
+ (run_quantize_max >> LG_PAGE));
+ if (run_quantize_ceil_tab == NULL)
+ return (true);
+
+ for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
+ size_t run_size = i << LG_PAGE;
+
+ run_quantize_floor_tab[i-1] =
+ run_quantize_floor_compute(run_size);
+ run_quantize_ceil_tab[i-1] =
+ run_quantize_ceil_compute(run_size);
+ }
+
+ return (false);
+}
+
bool
arena_boot(void)
{
@@ -3570,7 +3630,12 @@
nhclasses = NSIZES - nlclasses - NBINS;
bin_info_init();
- return (small_run_size_init());
+ if (small_run_size_init())
+ return (true);
+ if (run_quantize_init())
+ return (true);
+
+ return (false);
}
void
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index aff4056..f6a2f74 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c
@@ -93,19 +93,14 @@
TEST_BEGIN(test_monotonic)
{
- bool cache_oblivious;
unsigned nbins, nlruns, i;
- size_t sz, max_run_size, floor_prev, ceil_prev;
+ size_t sz, floor_prev, ceil_prev;
/*
* Iterate over all run sizes and verify that
* run_quantize_{floor,ceil}() are monotonic.
*/
- sz = sizeof(bool);
- assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
- NULL, 0), 0, "Unexpected mallctl failure");
-
sz = sizeof(unsigned);
assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
"Unexpected mallctl failure");
@@ -114,12 +109,9 @@
assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
"Unexpected mallctl failure");
- max_run_size = (large_maxclass > small_maxrun) ? large_maxclass :
- small_maxrun;
-
floor_prev = 0;
ceil_prev = 0;
- for (i = 1; i < max_run_size >> LG_PAGE; i++) {
+ for (i = 1; i < run_quantize_max >> LG_PAGE; i++) {
size_t run_size, floor, ceil;
run_size = i << LG_PAGE;