Refactor runs_avail. Use pszind_t size classes rather than szind_t size classes, and always reserve space for NPSIZES elements. This removes unused heaps that are not multiples of the page size, and adds (currently) unused heaps for all huge size classes, with the immediate benefit that the size of arena_t allocations is constant (no longer dependent on chunk size).

commit: 7bb00ae9d656b3d3ea9a01777cf1a13ab97f2430 [log] [tgz]
author: Jason Evans <jasone@canonware.com> Fri Apr 08 14:17:57 2016 -0700
committer: Jason Evans <jasone@canonware.com> Mon May 16 12:21:21 2016 -0700
tree: ede82e7fc5816f183ffab27d1a28261410488b69
parent: 226c446979ba88b5d5f05e6442c024d95c5656b0 [diff]
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 866b12f..bb65c7a 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h

@@ -441,10 +441,12 @@
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered heaps of this arena's available runs.  The
-	 * heaps are used for first-best-fit run allocation.
+	 * Size-segregated address-ordered heaps of this arena's available runs,
+	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
+	 * they reside in the last heap which corresponds to a size class less
+	 * than or equal to the run size.
 	 */
-	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[NPSIZES];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -476,7 +478,6 @@
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 224cedd..eabb9ce 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in

@@ -444,10 +444,15 @@
 extern arena_t	**arenas;
 
 /*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES];
+/*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES+1];
+extern size_t const	index2size_tab[NSIZES];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
@@ -529,6 +534,8 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
+size_t	pind2sz_compute(pszind_t pind);
+size_t	pind2sz_lookup(pszind_t pind);
 size_t	pind2sz(pszind_t pind);
 size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
@@ -576,7 +583,7 @@
 }
 
 JEMALLOC_INLINE size_t
-pind2sz(pszind_t pind)
+pind2sz_compute(pszind_t pind)
 {
 
 	{
@@ -597,6 +604,22 @@
 }
 
 JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind)
+{
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	assert(pind < NPSIZES);
+	return (pind2sz_lookup(pind));
+}
+
+JEMALLOC_INLINE size_t
 psz2u(size_t psz)
 {
 

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index cbafc2b..e046c3b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt

@@ -394,6 +394,8 @@
 pages_trim
 pages_unmap
 pind2sz
+pind2sz_compute
+pind2sz_lookup
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -468,7 +470,6 @@
 rtree_val_write
 run_quantize_ceil
 run_quantize_floor
-run_quantize_max
 s2u
 s2u_compute
 s2u_lookup

diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index ecee1a0..d1b1db1 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh

@@ -40,6 +40,16 @@
   done
 }
 
+reg_size_compute() {
+  lg_grp=$1
+  lg_delta=$2
+  ndelta=$3
+
+  pow2 ${lg_grp}; grp=${pow2_result}
+  pow2 ${lg_delta}; delta=${pow2_result}
+  reg_size=$((${grp} + ${delta}*${ndelta}))
+}
+
 run_size() {
   lg_p=$1
   lg_grp=$2
@@ -47,10 +57,7 @@
   ndelta=$4
 
   pow2 ${lg_p}; p=${pow2_result}
-
-  pow2 ${lg_grp}; grp=${pow2_result}
-  pow2 ${lg_delta}; delta=${pow2_result}
-  reg_size=$((${grp} + ${delta}*${ndelta}))
+  reg_size_compute ${lg_grp} ${lg_delta} ${ndelta}
 
   # Compute smallest run size that is an integer multiple of reg_size.
   try_run_size=${p}

diff --git a/src/arena.c b/src/arena.c
index ff119ba..a0fd2ce 100644
--- a/src/arena.c
+++ b/src/arena.c

@@ -34,14 +34,11 @@
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-size_t		run_quantize_max; /* Max run_quantize_*() input. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
 static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
 static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
-static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
-static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 
 /******************************************************************************/
 /*
@@ -177,7 +174,7 @@
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
@@ -200,7 +197,7 @@
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
@@ -213,25 +210,15 @@
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
-static arena_run_heap_t *
-arena_runs_avail_get(arena_t *arena, szind_t ind)
-{
-
-	assert(ind >= runs_avail_bias);
-	assert(ind - runs_avail_bias < runs_avail_nclasses);
-
-	return (&arena->runs_avail[ind - runs_avail_bias]);
-}
-
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	arena_run_heap_insert(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -239,11 +226,11 @@
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	arena_run_heap_remove(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -1088,12 +1075,13 @@
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	szind_t ind, i;
+	pszind_t pind, i;
 
-	ind = size2index(run_quantize_ceil(size));
-	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
+	pind = psz2ind(run_quantize_ceil(size));
+
+	for (i = pind; pind2sz(i) <= large_maxclass; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
-		    arena_runs_avail_get(arena, i));
+		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
 			return (&miscelm->run);
 	}
@@ -1946,7 +1934,8 @@
 	assert(!arena->purging);
 	arena->nactive = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -3388,23 +3377,19 @@
 arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
-	size_t arena_size;
 	unsigned i;
 
-	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
-	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
 		arena = (arena_t *)base_alloc(tsdn,
-		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
-		    sizeof(malloc_large_stats_t) + nhclasses) *
-		    sizeof(malloc_huge_stats_t));
+		    CACHELINE_CEILING(sizeof(arena_t)) +
+		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) +
+		    (nhclasses * sizeof(malloc_huge_stats_t))));
 	} else
-		arena = (arena_t *)base_alloc(tsdn, arena_size);
+		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)
 		return (NULL);
 
@@ -3416,11 +3401,11 @@
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size));
+		    + CACHELINE_CEILING(sizeof(arena_t)));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size) +
+		    + CACHELINE_CEILING(sizeof(arena_t)) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
@@ -3454,8 +3439,10 @@
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
+
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3526,6 +3513,7 @@
 static bool
 run_quantize_init(void)
 {
+	size_t run_quantize_max;
 	unsigned i;
 
 	run_quantize_max = chunksize + large_pad;
@@ -3604,9 +3592,6 @@
 	if (run_quantize_init())
 		return (true);
 
-	runs_avail_bias = size2index(PAGE);
-	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
-
 	return (false);
 }
 

diff --git a/src/jemalloc.c b/src/jemalloc.c
index b907d9e..849e941 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c

@@ -78,14 +78,25 @@
 };
 static uint8_t	malloc_slow_flags;
 
-/* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES+1] = {
+const size_t	pind2sz_tab[NPSIZES] = {
+#define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define	PSZ_no(lg_grp, ndelta, lg_delta)
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t	index2size_tab[NSIZES] = {
 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
-	ZU(0)
 };
 
 JEMALLOC_ALIGNED(CACHELINE)

diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index f6a2f74..45f3201 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c

@@ -111,7 +111,7 @@
 
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i < run_quantize_max >> LG_PAGE; i++) {
+	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
 		size_t run_size, floor, ceil;
 
 		run_size = i << LG_PAGE;
commit	7bb00ae9d656b3d3ea9a01777cf1a13ab97f2430	[log] [tgz]
author	Jason Evans <jasone@canonware.com>	Fri Apr 08 14:17:57 2016 -0700
committer	Jason Evans <jasone@canonware.com>	Mon May 16 12:21:21 2016 -0700
tree	ede82e7fc5816f183ffab27d1a28261410488b69
parent	226c446979ba88b5d5f05e6442c024d95c5656b0 [diff]