Move centralized chunk management into arenas.

Migrate all centralized data structures related to huge allocations and
recyclable chunks into arena_t, so that each arena can manage huge
allocations and recyclable virtual memory completely independently of
other arenas.

Add chunk node caching to arenas, in order to avoid contention on the
base allocator.

Use chunks_rtree to look up huge allocations rather than a red-black
tree.  Maintain a per arena unsorted list of huge allocations (which
will be needed to enumerate huge allocations during arena reset).

Remove the --enable-ivsalloc option, make ivsalloc() always available,
and use it for size queries if --enable-debug is enabled.  The only
practical implications to this removal are that 1) ivsalloc() is now
always available during live debugging (and the underlying radix tree is
available during core-based debugging), and 2) size query validation can
no longer be enabled independent of --enable-debug.

Remove the stats.chunks.{current,total,high} mallctls, and replace their
underlying statistics with simpler atomically updated counters used
exclusively for gdump triggering.  These statistics are no longer very
useful because each arena manages chunks independently, and per arena
statistics provide similar information.

Simplify chunk synchronization code, now that base chunk allocation
cannot cause recursive lock acquisition.
diff --git a/INSTALL b/INSTALL
index b8459a8..517fe02 100644
--- a/INSTALL
+++ b/INSTALL
@@ -92,7 +92,6 @@
 --enable-debug
     Enable assertions and validation code.  This incurs a substantial
     performance hit, but is very useful during application development.
-    Implies --enable-ivsalloc.
 
 --enable-code-coverage
     Enable code coverage support, for use during jemalloc test development.
@@ -107,11 +106,6 @@
     there are interactions between the various coverage targets, so it is
     usually advisable to run 'make clean' between repeated code coverage runs.
 
---enable-ivsalloc
-    Enable validation code, which verifies that pointers reside within
-    jemalloc-owned chunks before dereferencing them.  This incurs a substantial
-    performance hit.
-
 --disable-stats
     Disable statistics gathering functionality.  See the "opt.stats_print"
     option documentation for usage details.
diff --git a/configure.ac b/configure.ac
index dc8aa02..2922880 100644
--- a/configure.ac
+++ b/configure.ac
@@ -625,7 +625,7 @@
 
 dnl Do not compile with debugging by default.
 AC_ARG_ENABLE([debug],
-  [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])],
+  [AS_HELP_STRING([--enable-debug], [Build debugging code])],
 [if test "x$enable_debug" = "xno" ; then
   enable_debug="0"
 else
@@ -634,27 +634,8 @@
 ],
 [enable_debug="0"]
 )
-if test "x$enable_debug" = "x1" ; then
-  AC_DEFINE([JEMALLOC_DEBUG], [ ])
-  enable_ivsalloc="1"
-fi
 AC_SUBST([enable_debug])
 
-dnl Do not validate pointers by default.
-AC_ARG_ENABLE([ivsalloc],
-  [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])],
-[if test "x$enable_ivsalloc" = "xno" ; then
-  enable_ivsalloc="0"
-else
-  enable_ivsalloc="1"
-fi
-],
-[enable_ivsalloc="0"]
-)
-if test "x$enable_ivsalloc" = "x1" ; then
-  AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
-fi
-
 dnl Only optimize if not debugging.
 if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
   dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS.
@@ -1401,7 +1382,6 @@
   if test "x${abi}" != "xmacho"; then
     AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
   fi
-  AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
   AC_DEFINE([JEMALLOC_ZONE], [ ])
 
   dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index da800de..b392fa9 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1847,7 +1847,7 @@
         equal to <link
         linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link>.
         This does not include <link linkend="stats.arenas.i.pdirty">
-        <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link> and pages
+        <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link>, nor pages
         entirely devoted to allocator metadata.</para></listitem>
       </varlistentry>
 
@@ -1880,39 +1880,6 @@
         does not include inactive chunks.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.chunks.current">
-        <term>
-          <mallctl>stats.chunks.current</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Total number of chunks actively mapped on behalf of the
-        application.  This does not include inactive chunks.
-        </para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.chunks.total">
-        <term>
-          <mallctl>stats.chunks.total</mallctl>
-          (<type>uint64_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Cumulative number of chunks allocated.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.chunks.high">
-        <term>
-          <mallctl>stats.chunks.high</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Maximum number of active chunks at any time thus far.
-        </para></listitem>
-      </varlistentry>
-
       <varlistentry id="stats.arenas.i.dss">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.dss</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 5476899..2ae4609 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -151,8 +151,12 @@
 
 /* Arena chunk header. */
 struct arena_chunk_s {
-	/* Arena that owns the chunk. */
-	arena_t			*arena;
+	/*
+	 * The arena that owns the chunk is node.arena.  This field as a whole
+	 * is used by chunks_rtree to support both ivsalloc() and core-based
+	 * debugging.
+	 */
+	extent_node_t		node;
 
 	/*
 	 * Map of pages within chunk that keeps track of free/large/small.  The
@@ -313,6 +317,27 @@
 	/* List of dirty runs this arena manages. */
 	arena_chunk_miscelms_t	runs_dirty;
 
+	/* Extant huge allocations. */
+	ql_head(extent_node_t)	huge;
+	/* Synchronizes all huge allocation/update/deallocation. */
+	malloc_mutex_t		huge_mtx;
+
+	/*
+	 * Trees of chunks that were previously allocated (trees differ only in
+	 * node ordering).  These are used when allocating chunks, in an attempt
+	 * to re-use address space.  Depending on function, different tree
+	 * orderings are needed, which is why there are two trees with the same
+	 * contents.
+	 */
+	extent_tree_t		chunks_szad_mmap;
+	extent_tree_t		chunks_ad_mmap;
+	extent_tree_t		chunks_szad_dss;
+	extent_tree_t		chunks_ad_dss;
+	malloc_mutex_t		chunks_mtx;
+	/* Cache of nodes that were allocated via base_alloc(). */
+	ql_head(extent_node_t)	node_cache;
+	malloc_mutex_t		node_cache_mtx;
+
 	/*
 	 * User-configurable chunk allocation and deallocation functions.
 	 */
@@ -338,6 +363,8 @@
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
+extent_node_t	*arena_node_alloc(arena_t *arena);
+void	arena_node_dalloc(arena_t *arena, extent_node_t *node);
 void	*arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
     bool *zero);
 void	arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
@@ -453,8 +480,7 @@
     tcache_t *tcache);
 arena_t	*arena_aalloc(const void *ptr);
 size_t	arena_salloc(const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr,
-    tcache_t *tcache);
+void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
 void	arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size,
     tcache_t *tcache);
 #endif
@@ -792,7 +818,7 @@
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena = chunk->arena;
+		arena = chunk->node.arena;
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		actual_mapbits = arena_mapbits_get(chunk, pageind);
 		assert(mapbits == actual_mapbits);
@@ -980,7 +1006,7 @@
 	arena_chunk_t *chunk;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	return (chunk->arena);
+	return (chunk->node.arena);
 }
 
 /* Return the size of the allocation pointed to by ptr. */
@@ -1024,11 +1050,18 @@
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, tcache_t *tcache)
+arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 {
+	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
 	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (unlikely(chunk == ptr)) {
+		huge_dalloc(tsd, ptr, tcache);
+		return;
+	}
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -1040,8 +1073,10 @@
 			index_t binind = arena_ptr_small_binind_get(ptr,
 			    mapbits);
 			tcache_dalloc_small(tsd, tcache, ptr, binind);
-		} else
-			arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
+		} else {
+			arena_dalloc_small(chunk->node.arena, chunk, ptr,
+			    pageind);
+		}
 	} else {
 		size_t size = arena_mapbits_large_size_get(chunk, pageind);
 
@@ -1050,7 +1085,7 @@
 		if (likely(tcache != NULL) && size <= tcache_maxclass)
 			tcache_dalloc_large(tsd, tcache, ptr, size);
 		else
-			arena_dalloc_large(chunk->arena, chunk, ptr);
+			arena_dalloc_large(chunk->node.arena, chunk, ptr);
 	}
 }
 
@@ -1081,7 +1116,8 @@
 		} else {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
-			arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
+			arena_dalloc_small(chunk->node.arena, chunk, ptr,
+			    pageind);
 		}
 	} else {
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
@@ -1089,7 +1125,7 @@
 		if (likely(tcache != NULL) && size <= tcache_maxclass)
 			tcache_dalloc_large(tsd, tcache, ptr, size);
 		else
-			arena_dalloc_large(chunk->arena, chunk, ptr);
+			arena_dalloc_large(chunk->node.arena, chunk, ptr);
 	}
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index af2c687..0d33065 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -52,7 +52,7 @@
 void	*atomic_add_p(void **p, void *x);
 void	*atomic_sub_p(void **p, void *x);
 bool	atomic_cas_p(void **p, void *c, void *s);
-void	atomic_write_p(void **p, void *x);
+void	atomic_write_p(void **p, const void *x);
 size_t	atomic_add_z(size_t *p, size_t x);
 size_t	atomic_sub_z(size_t *p, size_t x);
 bool	atomic_cas_z(size_t *p, size_t c, size_t s);
@@ -538,7 +538,7 @@
 }
 
 JEMALLOC_INLINE void
-atomic_write_p(void **p, void *x)
+atomic_write_p(void **p, const void *x)
 {
 
 #if (LG_SIZEOF_PTR == 3)
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index a0798ee..bec76b3 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -10,8 +10,6 @@
 #ifdef JEMALLOC_H_EXTERNS
 
 void	*base_alloc(size_t size);
-extent_node_t *base_node_alloc(void);
-void	base_node_dalloc(extent_node_t *node);
 size_t	base_allocated_get(void);
 bool	base_boot(void);
 void	base_prefork(void);
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 62ac3e7..5e0fb14 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -30,24 +30,21 @@
 extern size_t		opt_lg_chunk;
 extern const char	*opt_dss;
 
-/* Protects stats_chunks; currently not used for any other purpose. */
-extern malloc_mutex_t	chunks_mtx;
-/* Chunk statistics. */
-extern chunk_stats_t	stats_chunks;
-
 extern rtree_t		chunks_rtree;
 
 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
 
+bool	chunk_register(const void *chunk, const extent_node_t *node);
+void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_arena(chunk_alloc_t *chunk_alloc,
     chunk_dalloc_t *chunk_dalloc, unsigned arena_ind, void *new_addr,
     size_t size, size_t alignment, bool *zero);
 void	*chunk_alloc_default(void *new_addr, size_t size, size_t alignment,
     bool *zero, unsigned arena_ind);
-void	chunk_unmap(void *chunk, size_t size);
+void	chunk_unmap(arena_t *arena, void *chunk, size_t size);
 bool	chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind);
 bool	chunk_boot(void);
 void	chunk_prefork(void);
@@ -58,6 +55,19 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
+#ifndef JEMALLOC_ENABLE_INLINE
+extent_node_t	*chunk_lookup(const void *chunk);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
+JEMALLOC_INLINE extent_node_t *
+chunk_lookup(const void *chunk)
+{
+
+	return (rtree_get(&chunks_rtree, (uintptr_t)chunk));
+}
+#endif
+
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
 
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 0989647..87366a2 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -23,8 +23,8 @@
 
 dss_prec_t	chunk_dss_prec_get(void);
 bool	chunk_dss_prec_set(dss_prec_t dss_prec);
-void	*chunk_alloc_dss(void *new_addr, size_t size, size_t alignment,
-    bool *zero);
+void	*chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero);
 bool	chunk_in_dss(void *chunk);
 bool	chunk_dss_boot(void);
 void	chunk_dss_prefork(void);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 65617bc..ab9c986 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -54,11 +54,6 @@
 	size_t			active;
 	size_t			metadata;
 	size_t			mapped;
-	struct {
-		size_t		current;	/* stats_chunks.curchunks */
-		uint64_t	total;		/* stats_chunks.nchunks */
-		size_t		high;		/* stats_chunks.highchunks */
-	} chunks;
 	unsigned		narenas;
 	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
 };
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f45940c..fbcdcf9 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -9,21 +9,17 @@
 
 /* Tree of extents. */
 struct extent_node_s {
-	/* Linkage for the size/address-ordered tree. */
-	rb_node(extent_node_t)	link_szad;
-
-	/* Linkage for the address-ordered tree. */
-	rb_node(extent_node_t)	link_ad;
+	/* Arena from which this extent came, if any. */
+	arena_t			*arena;
 
 	/* Pointer to the extent that this tree node is responsible for. */
 	void			*addr;
 
-	/* Total region size. */
+	/*
+	 * Total region size, or 0 if this node corresponds to an arena chunk.
+	 */
 	size_t			size;
 
-	/* Arena from which this extent came, if any. */
-	arena_t			*arena;
-
 	/*
 	 * 'prof_tctx' and 'zeroed' are never needed at the same time, so
 	 * overlay them in order to fit extent_node_t in one cache line.
@@ -35,6 +31,17 @@
 		/* True if zero-filled; used by chunk recycling code. */
 		bool		zeroed;
 	};
+
+	union {
+		/* Linkage for the size/address-ordered tree. */
+		rb_node(extent_node_t)	link_szad;
+
+		/* Linkage for huge allocations and cached chunks nodes. */
+		ql_elm(extent_node_t)	link_ql;
+	};
+
+	/* Linkage for the address-ordered tree. */
+	rb_node(extent_node_t)	link_ad;
 };
 typedef rb_tree(extent_node_t) extent_tree_t;
 
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 231cc36..c478d16 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -27,10 +27,6 @@
 size_t	huge_salloc(const void *ptr);
 prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
 void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
-bool	huge_boot(void);
-void	huge_prefork(void);
-void	huge_postfork_parent(void);
-void	huge_postfork_child(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index b8c994c..ab93aa5 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -119,13 +119,6 @@
     false
 #endif
     ;
-static const bool config_ivsalloc =
-#ifdef JEMALLOC_IVSALLOC
-    true
-#else
-    false
-#endif
-    ;
 
 #ifdef JEMALLOC_C11ATOMICS
 #include <stdatomic.h>
@@ -352,9 +345,9 @@
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
@@ -378,9 +371,9 @@
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
@@ -457,9 +450,9 @@
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
@@ -483,6 +476,7 @@
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
 
@@ -777,7 +771,6 @@
 #endif
 
 #include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/rtree.h"
 /*
  * Include portions of arena.h interleaved with tcache.h in order to resolve
  * circular dependencies.
@@ -966,10 +959,14 @@
 JEMALLOC_ALWAYS_INLINE size_t
 ivsalloc(const void *ptr, bool demote)
 {
+	extent_node_t *node;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
-	if (rtree_get(&chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0)
+	node = chunk_lookup(CHUNK_ADDR2BASE(ptr));
+	if (node == NULL)
 		return (0);
+	/* Only arena chunks should be looked up via interior pointers. */
+	assert(node->addr == ptr || node->size == 0);
 
 	return (isalloc(ptr, demote));
 }
@@ -999,7 +996,6 @@
 JEMALLOC_ALWAYS_INLINE void
 idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata)
 {
-	arena_chunk_t *chunk;
 
 	assert(ptr != NULL);
 	if (config_stats && is_metadata) {
@@ -1007,11 +1003,7 @@
 		    config_prof));
 	}
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (likely(chunk != ptr))
-		arena_dalloc(tsd, chunk, ptr, tcache);
-	else
-		huge_dalloc(tsd, ptr, tcache);
+	arena_dalloc(tsd, ptr, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c8d7daf..0f0db8a 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -187,12 +187,6 @@
 #undef JEMALLOC_INTERNAL_FFS
 
 /*
- * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
- * within jemalloc-owned chunks before dereferencing them.
- */
-#undef JEMALLOC_IVSALLOC
-
-/*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
 #undef JEMALLOC_ZONE
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index cf42bea..d5601a6 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -60,6 +60,8 @@
 arena_miscelm_to_rpages
 arena_nbound
 arena_new
+arena_node_alloc
+arena_node_dalloc
 arena_palloc
 arena_postfork_child
 arena_postfork_parent
@@ -103,8 +105,6 @@
 base_alloc
 base_allocated_get
 base_boot
-base_node_alloc
-base_node_dalloc
 base_postfork_child
 base_postfork_parent
 base_prefork
@@ -130,6 +130,7 @@
 chunk_boot
 chunk_dalloc_default
 chunk_dalloc_mmap
+chunk_deregister
 chunk_dss_boot
 chunk_dss_postfork_child
 chunk_dss_postfork_parent
@@ -137,12 +138,13 @@
 chunk_dss_prec_set
 chunk_dss_prefork
 chunk_in_dss
+chunk_lookup
 chunk_npages
 chunk_postfork_child
 chunk_postfork_parent
 chunk_prefork
+chunk_register
 chunk_unmap
-chunks_mtx
 chunks_rtree
 chunksize
 chunksize_mask
@@ -218,16 +220,12 @@
 hash_x86_32
 huge_aalloc
 huge_allocated
-huge_boot
 huge_dalloc
 huge_dalloc_junk
 huge_malloc
 huge_ndalloc
 huge_nmalloc
 huge_palloc
-huge_postfork_child
-huge_postfork_parent
-huge_prefork
 huge_prof_tctx_get
 huge_prof_tctx_set
 huge_ralloc
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index e86e17c..2eb726d 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -37,7 +37,7 @@
 struct rtree_node_elm_s {
 	union {
 		rtree_node_elm_t	*child;
-		void			*val;
+		extent_node_t		*val;
 	};
 };
 
@@ -110,13 +110,14 @@
 rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
     unsigned level);
-void	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm);
-void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val);
+extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm);
+void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
+    const extent_node_t *val);
 rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level);
 rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level);
 
-void	*rtree_get(rtree_t *rtree, uintptr_t key);
-bool	rtree_set(rtree_t *rtree, uintptr_t key, void *val);
+extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key);
+bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
@@ -173,18 +174,18 @@
 	return (child);
 }
 
-JEMALLOC_INLINE void *
+JEMALLOC_INLINE extent_node_t *
 rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm)
 {
 
-	return (atomic_read_p(&elm->val));
+	return (atomic_read_p((void **)&elm->val));
 }
 
 JEMALLOC_INLINE void
-rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void *val)
+rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
 {
 
-	atomic_write_p(&elm->val, val);
+	atomic_write_p((void **)&elm->val, val);
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
@@ -210,7 +211,7 @@
 	return (subtree);
 }
 
-JEMALLOC_INLINE void *
+JEMALLOC_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key)
 {
 	uintptr_t subkey;
@@ -238,7 +239,7 @@
 }
 
 JEMALLOC_INLINE bool
-rtree_set(rtree_t *rtree, uintptr_t key, void *val)
+rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
 {
 	uintptr_t subkey;
 	unsigned i, start_level;
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 7cba77b..c91dba9 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -135,21 +135,6 @@
 	malloc_huge_stats_t	*hstats;
 };
 
-struct chunk_stats_s {
-	/* Number of chunks that were allocated. */
-	uint64_t	nchunks;
-
-	/* High-water mark for number of chunks allocated. */
-	size_t		highchunks;
-
-	/*
-	 * Current number of chunks allocated.  This value isn't maintained for
-	 * any other purpose, so keep track of it in order to be able to set
-	 * highchunks.
-	 */
-	size_t		curchunks;
-};
-
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
diff --git a/src/arena.c b/src/arena.c
index 907fbd7..2bd1a2c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -20,6 +20,7 @@
  * definition.
  */
 
+static void	arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk);
 static void	arena_purge(arena_t *arena, bool all);
 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
     bool cleaned);
@@ -392,8 +393,7 @@
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal(arena_t *arena, size_t size, size_t alignment,
-    bool *zero)
+arena_chunk_alloc_internal(arena_t *arena, bool *zero)
 {
 	arena_chunk_t *chunk;
 	chunk_alloc_t *chunk_alloc;
@@ -403,7 +403,16 @@
 	chunk_dalloc = arena->chunk_dalloc;
 	malloc_mutex_unlock(&arena->lock);
 	chunk = (arena_chunk_t *)chunk_alloc_arena(chunk_alloc, chunk_dalloc,
-	    arena->ind, NULL, size, alignment, zero);
+	    arena->ind, NULL, chunksize, chunksize, zero);
+	if (chunk != NULL) {
+		chunk->node.arena = arena;
+		chunk->node.addr = chunk;
+		chunk->node.size = 0; /* Indicates this is an arena chunk. */
+		if (chunk_register(chunk, &chunk->node)) {
+			chunk_dalloc((void *)chunk, chunksize, arena->ind);
+			chunk = NULL;
+		}
+	}
 	malloc_mutex_lock(&arena->lock);
 	if (config_stats && chunk != NULL) {
 		arena->stats.mapped += chunksize;
@@ -423,12 +432,10 @@
 	assert(arena->spare == NULL);
 
 	zero = false;
-	chunk = arena_chunk_alloc_internal(arena, chunksize, chunksize, &zero);
+	chunk = arena_chunk_alloc_internal(arena, &zero);
 	if (chunk == NULL)
 		return (NULL);
 
-	chunk->arena = arena;
-
 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
 	 * the pages as zeroed iff chunk_alloc() returned a zeroed chunk.
@@ -514,6 +521,7 @@
 		}
 		chunk_dalloc = arena->chunk_dalloc;
 		malloc_mutex_unlock(&arena->lock);
+		chunk_deregister(spare, &spare->node);
 		chunk_dalloc((void *)spare, chunksize, arena->ind);
 		malloc_mutex_lock(&arena->lock);
 		if (config_stats) {
@@ -593,6 +601,32 @@
 	arena_huge_malloc_stats_update_undo(arena, usize);
 }
 
+extent_node_t *
+arena_node_alloc(arena_t *arena)
+{
+	extent_node_t *node;
+
+	malloc_mutex_lock(&arena->node_cache_mtx);
+	node = ql_last(&arena->node_cache, link_ql);
+	if (node == NULL) {
+		malloc_mutex_unlock(&arena->node_cache_mtx);
+		return (base_alloc(sizeof(extent_node_t)));
+	}
+	ql_tail_remove(&arena->node_cache, extent_node_t, link_ql);
+	malloc_mutex_unlock(&arena->node_cache_mtx);
+	return (node);
+}
+
+void
+arena_node_dalloc(arena_t *arena, extent_node_t *node)
+{
+
+	malloc_mutex_lock(&arena->node_cache_mtx);
+	ql_elm_new(node, link_ql);
+	ql_tail_insert(&arena->node_cache, node, link_ql);
+	malloc_mutex_unlock(&arena->node_cache_mtx);
+}
+
 void *
 arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
     bool *zero)
@@ -1782,7 +1816,7 @@
 	if (run == bin->runcur)
 		bin->runcur = NULL;
 	else {
-		index_t binind = arena_bin_index(chunk->arena, bin);
+		index_t binind = arena_bin_index(chunk->node.arena, bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		if (bin_info->nregs != 1) {
@@ -2123,7 +2157,7 @@
 		arena_t *arena;
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena = chunk->arena;
+		arena = chunk->node.arena;
 
 		if (usize < oldsize) {
 			/* Fill before shrinking in order avoid a race. */
@@ -2338,10 +2372,21 @@
 
 	arena->ind = ind;
 	arena->nthreads = 0;
+	if (malloc_mutex_init(&arena->lock))
+		return (NULL);
 	arena->chunk_alloc = chunk_alloc_default;
 	arena->chunk_dalloc = chunk_dalloc_default;
-
-	if (malloc_mutex_init(&arena->lock))
+	ql_new(&arena->huge);
+	if (malloc_mutex_init(&arena->huge_mtx))
+		return (NULL);
+	extent_tree_szad_new(&arena->chunks_szad_mmap);
+	extent_tree_ad_new(&arena->chunks_ad_mmap);
+	extent_tree_szad_new(&arena->chunks_szad_dss);
+	extent_tree_ad_new(&arena->chunks_ad_dss);
+	ql_new(&arena->node_cache);
+	if (malloc_mutex_init(&arena->chunks_mtx))
+		return (NULL);
+	if (malloc_mutex_init(&arena->node_cache_mtx))
 		return (NULL);
 
 	if (config_stats) {
@@ -2551,6 +2596,9 @@
 	unsigned i;
 
 	malloc_mutex_prefork(&arena->lock);
+	malloc_mutex_prefork(&arena->huge_mtx);
+	malloc_mutex_prefork(&arena->chunks_mtx);
+	malloc_mutex_prefork(&arena->node_cache_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_prefork(&arena->bins[i].lock);
 }
@@ -2562,6 +2610,9 @@
 
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(&arena->bins[i].lock);
+	malloc_mutex_postfork_parent(&arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(&arena->chunks_mtx);
+	malloc_mutex_postfork_parent(&arena->huge_mtx);
 	malloc_mutex_postfork_parent(&arena->lock);
 }
 
@@ -2572,5 +2623,8 @@
 
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(&arena->bins[i].lock);
+	malloc_mutex_postfork_child(&arena->node_cache_mtx);
+	malloc_mutex_postfork_child(&arena->chunks_mtx);
+	malloc_mutex_postfork_child(&arena->huge_mtx);
 	malloc_mutex_postfork_child(&arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 0d1de7f..7b5804e 100644
--- a/src/base.c
+++ b/src/base.c
@@ -11,8 +11,9 @@
 
 /******************************************************************************/
 
+/* base_mtx must be held. */
 static extent_node_t *
-base_node_try_alloc_locked(void)
+base_node_try_alloc(void)
 {
 	extent_node_t *node;
 
@@ -24,8 +25,9 @@
 	return (node);
 }
 
+/* base_mtx must be held. */
 static void
-base_node_dalloc_locked(extent_node_t *node)
+base_node_dalloc(extent_node_t *node)
 {
 
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
@@ -42,14 +44,14 @@
 	void *addr;
 
 	assert(minsize != 0);
-	node = base_node_try_alloc_locked();
+	node = base_node_try_alloc();
 	/* Allocate enough space to also carve a node out if necessary. */
 	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
 		if (node != NULL)
-			base_node_dalloc_locked(node);
+			base_node_dalloc(node);
 		return (NULL);
 	}
 	if (node == NULL) {
@@ -63,8 +65,13 @@
 	return (node);
 }
 
-static void *
-base_alloc_locked(size_t size)
+/*
+ * base_alloc() guarantees demand-zeroed memory, in order to make multi-page
+ * sparse data structures such as radix tree nodes efficient with respect to
+ * physical memory usage.
+ */
+void *
+base_alloc(size_t size)
 {
 	void *ret;
 	size_t csize;
@@ -79,6 +86,7 @@
 
 	key.addr = NULL;
 	key.size = csize;
+	malloc_mutex_lock(&base_mtx);
 	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
@@ -87,8 +95,10 @@
 		/* Try to allocate more space. */
 		node = base_chunk_alloc(csize);
 	}
-	if (node == NULL)
-		return (NULL);
+	if (node == NULL) {
+		ret = NULL;
+		goto label_return;
+	}
 
 	ret = node->addr;
 	if (node->size > csize) {
@@ -96,50 +106,15 @@
 		node->size -= csize;
 		extent_tree_szad_insert(&base_avail_szad, node);
 	} else
-		base_node_dalloc_locked(node);
+		base_node_dalloc(node);
 	if (config_stats)
 		base_allocated += csize;
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, csize);
-	return (ret);
-}
-
-/*
- * base_alloc() guarantees demand-zeroed memory, in order to make multi-page
- * sparse data structures such as radix tree nodes efficient with respect to
- * physical memory usage.
- */
-void *
-base_alloc(size_t size)
-{
-	void *ret;
-
-	malloc_mutex_lock(&base_mtx);
-	ret = base_alloc_locked(size);
+label_return:
 	malloc_mutex_unlock(&base_mtx);
 	return (ret);
 }
 
-extent_node_t *
-base_node_alloc(void)
-{
-	extent_node_t *ret;
-
-	malloc_mutex_lock(&base_mtx);
-	if ((ret = base_node_try_alloc_locked()) == NULL)
-		ret = (extent_node_t *)base_alloc_locked(sizeof(extent_node_t));
-	malloc_mutex_unlock(&base_mtx);
-	return (ret);
-}
-
-void
-base_node_dalloc(extent_node_t *node)
-{
-
-	malloc_mutex_lock(&base_mtx);
-	base_node_dalloc_locked(node);
-	malloc_mutex_unlock(&base_mtx);
-}
-
 size_t
 base_allocated_get(void)
 {
diff --git a/src/chunk.c b/src/chunk.c
index 9ba0b0c..6f705de 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -7,19 +7,9 @@
 const char	*opt_dss = DSS_DEFAULT;
 size_t		opt_lg_chunk = LG_CHUNK_DEFAULT;
 
-malloc_mutex_t	chunks_mtx;
-chunk_stats_t	stats_chunks;
-
-/*
- * Trees of chunks that were previously allocated (trees differ only in node
- * ordering).  These are used when allocating chunks, in an attempt to re-use
- * address space.  Depending on function, different tree orderings are needed,
- * which is why there are two trees with the same contents.
- */
-static extent_tree_t	chunks_szad_mmap;
-static extent_tree_t	chunks_ad_mmap;
-static extent_tree_t	chunks_szad_dss;
-static extent_tree_t	chunks_ad_dss;
+/* Used exclusively for gdump triggering. */
+static size_t	curchunks;
+static size_t	highchunks;
 
 rtree_t		chunks_rtree;
 
@@ -29,18 +19,51 @@
 size_t		chunk_npages;
 
 /******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
 
-static void	chunk_dalloc_core(void *chunk, size_t size);
+bool
+chunk_register(const void *chunk, const extent_node_t *node)
+{
 
-/******************************************************************************/
+	assert(node->addr == chunk);
+
+	if (rtree_set(&chunks_rtree, (uintptr_t)chunk, node))
+		return (true);
+	if (config_prof && opt_prof) {
+		size_t nadd = (node->size == 0) ? 1 : node->size / chunksize;
+		size_t cur = atomic_add_z(&curchunks, nadd);
+		size_t high = atomic_read_z(&highchunks);
+		while (cur > high && atomic_cas_z(&highchunks, high, cur)) {
+			/*
+			 * Don't refresh cur, because it may have decreased
+			 * since this thread lost the highchunks update race.
+			 */
+			high = atomic_read_z(&highchunks);
+		}
+		if (cur > high && prof_gdump_get_unlocked())
+			prof_gdump();
+	}
+
+	return (false);
+}
+
+void
+chunk_deregister(const void *chunk, const extent_node_t *node)
+{
+	bool err;
+
+	err = rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL);
+	assert(!err);
+	if (config_prof && opt_prof) {
+		size_t nsub = (node->size == 0) ? 1 : node->size / chunksize;
+		assert(atomic_read_z(&curchunks) >= nsub);
+		atomic_sub_z(&curchunks, nsub);
+	}
+}
 
 static void *
-chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad,
-    void *new_addr, size_t size, size_t alignment, bool base, bool *zero)
+chunk_recycle(arena_t *arena, extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, void *new_addr, size_t size, size_t alignment,
+    bool *zero)
 {
 	void *ret;
 	extent_node_t *node;
@@ -50,27 +73,17 @@
 
 	assert(new_addr == NULL || alignment == chunksize);
 
-	if (base) {
-		/*
-		 * This function may need to call base_node_{,de}alloc(), but
-		 * the current chunk allocation request is on behalf of the
-		 * base allocator.  Avoid deadlock (and if that weren't an
-		 * issue, potential for infinite recursion) by returning NULL.
-		 */
-		return (NULL);
-	}
-
 	alloc_size = size + alignment - chunksize;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
 	key.addr = new_addr;
 	key.size = alloc_size;
-	malloc_mutex_lock(&chunks_mtx);
+	malloc_mutex_lock(&arena->chunks_mtx);
 	node = (new_addr != NULL) ? extent_tree_ad_search(chunks_ad, &key) :
 	    extent_tree_szad_nsearch(chunks_szad, &key);
 	if (node == NULL) {
-		malloc_mutex_unlock(&chunks_mtx);
+		malloc_mutex_unlock(&arena->chunks_mtx);
 		return (NULL);
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
@@ -95,20 +108,12 @@
 	if (trailsize != 0) {
 		/* Insert the trailing space as a smaller chunk. */
 		if (node == NULL) {
-			/*
-			 * An additional node is required, but
-			 * base_node_alloc() can cause a new base chunk to be
-			 * allocated.  Drop chunks_mtx in order to avoid
-			 * deadlock, and if node allocation fails, deallocate
-			 * the result before returning an error.
-			 */
-			malloc_mutex_unlock(&chunks_mtx);
-			node = base_node_alloc();
+			node = arena_node_alloc(arena);
 			if (node == NULL) {
-				chunk_dalloc_core(ret, size);
+				malloc_mutex_unlock(&arena->chunks_mtx);
+				chunk_unmap(arena, ret, size);
 				return (NULL);
 			}
-			malloc_mutex_lock(&chunks_mtx);
 		}
 		node->addr = (void *)((uintptr_t)(ret) + size);
 		node->size = trailsize;
@@ -117,10 +122,10 @@
 		extent_tree_ad_insert(chunks_ad, node);
 		node = NULL;
 	}
-	malloc_mutex_unlock(&chunks_mtx);
+	malloc_mutex_unlock(&arena->chunks_mtx);
 
 	if (node != NULL)
-		base_node_dalloc(node);
+		arena_node_dalloc(arena, node);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
@@ -137,15 +142,15 @@
 }
 
 static void *
-chunk_alloc_core_dss(void *new_addr, size_t size, size_t alignment, bool base,
-    bool *zero)
+chunk_alloc_core_dss(arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero)
 {
 	void *ret;
 
-	if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss,
-	    new_addr, size, alignment, base, zero)) != NULL)
+	if ((ret = chunk_recycle(arena, &arena->chunks_szad_dss,
+	    &arena->chunks_ad_dss, new_addr, size, alignment, zero)) != NULL)
 		return (ret);
-	ret = chunk_alloc_dss(new_addr, size, alignment, zero);
+	ret = chunk_alloc_dss(arena, new_addr, size, alignment, zero);
 	return (ret);
 }
 
@@ -156,7 +161,7 @@
  * them if they are returned.
  */
 static void *
-chunk_alloc_core(void *new_addr, size_t size, size_t alignment, bool base,
+chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
     bool *zero, dss_prec_t dss_prec)
 {
 	void *ret;
@@ -168,12 +173,13 @@
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_core_dss(new_addr, size, alignment, base, zero)) !=
+	    chunk_alloc_core_dss(arena, new_addr, size, alignment, zero)) !=
 	    NULL)
 		return (ret);
 	/* mmap. */
-	if (!config_munmap && (ret = chunk_recycle(&chunks_szad_mmap,
-	    &chunks_ad_mmap, new_addr, size, alignment, base, zero)) != NULL)
+	if (!config_munmap && (ret = chunk_recycle(arena,
+	    &arena->chunks_szad_mmap, &arena->chunks_ad_mmap, new_addr, size,
+	    alignment, zero)) != NULL)
 		return (ret);
 	/*
 	 * Requesting an address is not implemented for chunk_alloc_mmap(), so
@@ -184,7 +190,7 @@
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_core_dss(new_addr, size, alignment, base, zero)) !=
+	    chunk_alloc_core_dss(arena, new_addr, size, alignment, zero)) !=
 	    NULL)
 		return (ret);
 
@@ -192,40 +198,6 @@
 	return (NULL);
 }
 
-static bool
-chunk_register(void *chunk, size_t size, bool base)
-{
-
-	assert(chunk != NULL);
-	assert(CHUNK_ADDR2BASE(chunk) == chunk);
-
-	if (config_ivsalloc && !base) {
-		if (rtree_set(&chunks_rtree, (uintptr_t)chunk, chunk))
-			return (true);
-	}
-	if (config_stats || config_prof) {
-		bool gdump;
-		malloc_mutex_lock(&chunks_mtx);
-		if (config_stats)
-			stats_chunks.nchunks += (size / chunksize);
-		stats_chunks.curchunks += (size / chunksize);
-		if (stats_chunks.curchunks > stats_chunks.highchunks) {
-			stats_chunks.highchunks =
-			    stats_chunks.curchunks;
-			if (config_prof)
-				gdump = true;
-		} else if (config_prof)
-			gdump = false;
-		malloc_mutex_unlock(&chunks_mtx);
-		if (config_prof && opt_prof && prof_gdump_get_unlocked() &&
-		    gdump)
-			prof_gdump();
-	}
-	if (config_valgrind)
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(chunk, size);
-	return (false);
-}
-
 void *
 chunk_alloc_base(size_t size)
 {
@@ -239,10 +211,10 @@
 	 */
 	zero = true;
 	ret = chunk_alloc_mmap(size, chunksize, &zero);
-	if (ret != NULL && chunk_register(ret, size, true)) {
-		chunk_dalloc_core(ret, size);
-		ret = NULL;
-	}
+	if (ret == NULL)
+		return (NULL);
+	if (config_valgrind)
+		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
@@ -255,18 +227,16 @@
 	void *ret;
 
 	ret = chunk_alloc(new_addr, size, alignment, zero, arena_ind);
-	if (ret != NULL && chunk_register(ret, size, false)) {
-		chunk_dalloc(ret, size, arena_ind);
-		ret = NULL;
-	}
+	if (ret == NULL)
+		return (NULL);
+	if (config_valgrind)
+		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
 
-/* Default arena chunk allocation routine in the absence of user override. */
-void *
-chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
-    unsigned arena_ind)
+static arena_t *
+chunk_arena_get(unsigned arena_ind)
 {
 	arena_t *arena;
 
@@ -278,32 +248,32 @@
 	 * already.
 	 */
 	assert(arena != NULL);
+	return (arena);
+}
 
-	return (chunk_alloc_core(new_addr, size, alignment, false, zero,
+/* Default arena chunk allocation routine in the absence of user override. */
+void *
+chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
+    unsigned arena_ind)
+{
+	arena_t *arena;
+
+	arena = chunk_arena_get(arena_ind);
+	return (chunk_alloc_core(arena, new_addr, size, alignment, zero,
 	    arena->dss_prec));
 }
 
 static void
-chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk,
-    size_t size)
+chunk_record(arena_t *arena, extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, void *chunk, size_t size)
 {
 	bool unzeroed;
-	extent_node_t *xnode, *node, *prev, *xprev, key;
+	extent_node_t *node, *prev, key;
 
 	unzeroed = pages_purge(chunk, size);
 	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
-	/*
-	 * Allocate a node before acquiring chunks_mtx even though it might not
-	 * be needed, because base_node_alloc() may cause a new base chunk to
-	 * be allocated, which could cause deadlock if chunks_mtx were already
-	 * held.
-	 */
-	xnode = base_node_alloc();
-	/* Use xprev to implement conditional deferred deallocation of prev. */
-	xprev = NULL;
-
-	malloc_mutex_lock(&chunks_mtx);
+	malloc_mutex_lock(&arena->chunks_mtx);
 	key.addr = (void *)((uintptr_t)chunk + size);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
 	/* Try to coalesce forward. */
@@ -320,17 +290,16 @@
 		extent_tree_szad_insert(chunks_szad, node);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
-		if (xnode == NULL) {
+		node = arena_node_alloc(arena);
+		if (node == NULL) {
 			/*
-			 * base_node_alloc() failed, which is an exceedingly
+			 * Node allocation failed, which is an exceedingly
 			 * unlikely failure.  Leak chunk; its pages have
 			 * already been purged, so this is only a virtual
 			 * memory leak.
 			 */
 			goto label_return;
 		}
-		node = xnode;
-		xnode = NULL; /* Prevent deallocation below. */
 		node->addr = chunk;
 		node->size = size;
 		node->zeroed = !unzeroed;
@@ -356,37 +325,15 @@
 		node->zeroed = (node->zeroed && prev->zeroed);
 		extent_tree_szad_insert(chunks_szad, node);
 
-		xprev = prev;
+		arena_node_dalloc(arena, prev);
 	}
 
 label_return:
-	malloc_mutex_unlock(&chunks_mtx);
-	/*
-	 * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to
-	 * avoid potential deadlock.
-	 */
-	if (xnode != NULL)
-		base_node_dalloc(xnode);
-	if (xprev != NULL)
-		base_node_dalloc(xprev);
+	malloc_mutex_unlock(&arena->chunks_mtx);
 }
 
 void
-chunk_unmap(void *chunk, size_t size)
-{
-	assert(chunk != NULL);
-	assert(CHUNK_ADDR2BASE(chunk) == chunk);
-	assert(size != 0);
-	assert((size & chunksize_mask) == 0);
-
-	if (have_dss && chunk_in_dss(chunk))
-		chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size);
-	else if (chunk_dalloc_mmap(chunk, size))
-		chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size);
-}
-
-static void
-chunk_dalloc_core(void *chunk, size_t size)
+chunk_unmap(arena_t *arena, void *chunk, size_t size)
 {
 
 	assert(chunk != NULL);
@@ -394,16 +341,13 @@
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	if (config_ivsalloc)
-		rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL);
-	if (config_stats || config_prof) {
-		malloc_mutex_lock(&chunks_mtx);
-		assert(stats_chunks.curchunks >= (size / chunksize));
-		stats_chunks.curchunks -= (size / chunksize);
-		malloc_mutex_unlock(&chunks_mtx);
+	if (have_dss && chunk_in_dss(chunk)) {
+		chunk_record(arena, &arena->chunks_szad_dss,
+		    &arena->chunks_ad_dss, chunk, size);
+	} else if (chunk_dalloc_mmap(chunk, size)) {
+		chunk_record(arena, &arena->chunks_szad_mmap,
+		    &arena->chunks_ad_mmap, chunk, size);
 	}
-
-	chunk_unmap(chunk, size);
 }
 
 /* Default arena chunk deallocation routine in the absence of user override. */
@@ -411,7 +355,7 @@
 chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind)
 {
 
-	chunk_dalloc_core(chunk, size);
+	chunk_unmap(chunk_arena_get(arena_ind), chunk, size);
 	return (false);
 }
 
@@ -433,21 +377,11 @@
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> LG_PAGE);
 
-	if (malloc_mutex_init(&chunks_mtx))
-		return (true);
-	if (config_stats || config_prof)
-		memset(&stats_chunks, 0, sizeof(chunk_stats_t));
 	if (have_dss && chunk_dss_boot())
 		return (true);
-	extent_tree_szad_new(&chunks_szad_mmap);
-	extent_tree_ad_new(&chunks_ad_mmap);
-	extent_tree_szad_new(&chunks_szad_dss);
-	extent_tree_ad_new(&chunks_ad_dss);
-	if (config_ivsalloc) {
-		if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) -
-		    opt_lg_chunk, chunks_rtree_node_alloc, NULL))
-			return (true);
-	}
+	if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) -
+	    opt_lg_chunk, chunks_rtree_node_alloc, NULL))
+		return (true);
 
 	return (false);
 }
@@ -456,7 +390,6 @@
 chunk_prefork(void)
 {
 
-	malloc_mutex_prefork(&chunks_mtx);
 	chunk_dss_prefork();
 }
 
@@ -465,7 +398,6 @@
 {
 
 	chunk_dss_postfork_parent();
-	malloc_mutex_postfork_parent(&chunks_mtx);
 }
 
 void
@@ -473,5 +405,4 @@
 {
 
 	chunk_dss_postfork_child();
-	malloc_mutex_postfork_child(&chunks_mtx);
 }
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index edba3b2..9c3eea8 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -66,7 +66,8 @@
 }
 
 void *
-chunk_alloc_dss(void *new_addr, size_t size, size_t alignment, bool *zero)
+chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
+    bool *zero)
 {
 	void *ret;
 
@@ -133,7 +134,7 @@
 				dss_max = dss_next;
 				malloc_mutex_unlock(&dss_mtx);
 				if (cpad_size != 0)
-					chunk_unmap(cpad, cpad_size);
+					chunk_unmap(arena, cpad, cpad_size);
 				if (*zero) {
 					JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
 					    ret, size);
diff --git a/src/ctl.c b/src/ctl.c
index a283803..cd7927f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -144,9 +144,6 @@
 CTL_PROTO(prof_reset)
 CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
-CTL_PROTO(stats_chunks_current)
-CTL_PROTO(stats_chunks_total)
-CTL_PROTO(stats_chunks_high)
 CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
@@ -363,12 +360,6 @@
 	{NAME("lg_sample"),	CTL(lg_prof_sample)}
 };
 
-static const ctl_named_node_t stats_chunks_node[] = {
-	{NAME("current"),	CTL(stats_chunks_current)},
-	{NAME("total"),		CTL(stats_chunks_total)},
-	{NAME("high"),		CTL(stats_chunks_high)}
-};
-
 static const ctl_named_node_t stats_arenas_i_metadata_node[] = {
 	{NAME("mapped"),	CTL(stats_arenas_i_metadata_mapped)},
 	{NAME("allocated"),	CTL(stats_arenas_i_metadata_allocated)}
@@ -473,7 +464,6 @@
 	{NAME("active"),	CTL(stats_active)},
 	{NAME("metadata"),	CTL(stats_metadata)},
 	{NAME("mapped"),	CTL(stats_mapped)},
-	{NAME("chunks"),	CHILD(named, stats_chunks)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
@@ -688,14 +678,6 @@
 	unsigned i;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
 
-	if (config_stats) {
-		malloc_mutex_lock(&chunks_mtx);
-		ctl_stats.chunks.current = stats_chunks.curchunks;
-		ctl_stats.chunks.total = stats_chunks.nchunks;
-		ctl_stats.chunks.high = stats_chunks.highchunks;
-		malloc_mutex_unlock(&chunks_mtx);
-	}
-
 	/*
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
@@ -733,7 +715,8 @@
 		    + ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped
 		    + ctl_stats.arenas[ctl_stats.narenas].astats
 		    .metadata_allocated;
-		ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk);
+		ctl_stats.mapped =
+		    ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
 	}
 
 	ctl_epoch++;
@@ -1950,11 +1933,6 @@
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
 
-CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current,
-    size_t)
-CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t)
-CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t)
-
 CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
 CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
diff --git a/src/huge.c b/src/huge.c
index db0ecd5..0032727 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -2,15 +2,33 @@
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
-/* Data. */
 
-/* Protects chunk-related data structures. */
-static malloc_mutex_t	huge_mtx;
+static extent_node_t *
+huge_node_get(const void *ptr)
+{
+	extent_node_t *node;
 
-/******************************************************************************/
+	node = chunk_lookup(ptr);
+	assert(node->size != 0);
 
-/* Tree of chunks that are stand-alone huge allocations. */
-static extent_tree_t	huge;
+	return (node);
+}
+
+static bool
+huge_node_set(const void *ptr, extent_node_t *node)
+{
+
+	assert(node->addr == ptr);
+	assert(node->size != 0);
+	return (chunk_register(ptr, node));
+}
+
+static void
+huge_node_unset(const void *ptr, const extent_node_t *node)
+{
+
+	chunk_deregister(ptr, node);
+}
 
 void *
 huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
@@ -55,15 +73,22 @@
 		return (NULL);
 	}
 
-	/* Insert node into huge. */
 	node->addr = ret;
 	node->size = usize;
 	node->zeroed = is_zeroed;
 	node->arena = arena;
 
-	malloc_mutex_lock(&huge_mtx);
-	extent_tree_ad_insert(&huge, node);
-	malloc_mutex_unlock(&huge_mtx);
+	if (huge_node_set(ret, node)) {
+		arena_chunk_dalloc_huge(arena, ret, usize);
+		idalloctm(tsd, node, tcache, true);
+		return (NULL);
+	}
+
+	/* Insert node into huge. */
+	malloc_mutex_lock(&arena->huge_mtx);
+	ql_elm_new(node, link_ql);
+	ql_tail_insert(&arena->huge, node, link_ql);
+	malloc_mutex_unlock(&arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
@@ -74,32 +99,6 @@
 	return (ret);
 }
 
-static extent_node_t *
-huge_node_locked(const void *ptr)
-{
-	extent_node_t *node, key;
-
-	/* Extract from tree of huge allocations. */
-	key.addr = __DECONST(void *, ptr);
-	node = extent_tree_ad_search(&huge, &key);
-	assert(node != NULL);
-	assert(node->addr == ptr);
-
-	return (node);
-}
-
-static extent_node_t *
-huge_node(const void *ptr)
-{
-	extent_node_t *node;
-
-	malloc_mutex_lock(&huge_mtx);
-	node = huge_node_locked(ptr);
-	malloc_mutex_unlock(&huge_mtx);
-
-	return (node);
-}
-
 #ifdef JEMALLOC_JET
 #undef huge_dalloc_junk
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
@@ -152,15 +151,15 @@
 	} else
 		zeroed = true;
 
-	malloc_mutex_lock(&huge_mtx);
-	node = huge_node_locked(ptr);
+	node = huge_node_get(ptr);
 	arena = node->arena;
+	malloc_mutex_lock(&arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	assert(node->size != usize);
 	node->size = usize;
 	/* Clear node->zeroed if zeroing failed above. */
 	node->zeroed = (node->zeroed && zeroed);
-	malloc_mutex_unlock(&huge_mtx);
+	malloc_mutex_unlock(&arena->huge_mtx);
 
 	arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize);
 
@@ -195,14 +194,14 @@
 		zeroed = false;
 	}
 
-	malloc_mutex_lock(&huge_mtx);
-	node = huge_node_locked(ptr);
+	node = huge_node_get(ptr);
 	arena = node->arena;
+	malloc_mutex_lock(&arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	node->size = usize;
 	/* Clear node->zeroed if zeroing failed above. */
 	node->zeroed = (node->zeroed && zeroed);
-	malloc_mutex_unlock(&huge_mtx);
+	malloc_mutex_unlock(&arena->huge_mtx);
 
 	/* Zap the excess chunks. */
 	arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize);
@@ -221,11 +220,11 @@
 		return (true);
 	}
 
-	malloc_mutex_lock(&huge_mtx);
-	node = huge_node_locked(ptr);
+	node = huge_node_get(ptr);
 	arena = node->arena;
+	malloc_mutex_lock(&arena->huge_mtx);
 	is_zeroed_subchunk = node->zeroed;
-	malloc_mutex_unlock(&huge_mtx);
+	malloc_mutex_unlock(&arena->huge_mtx);
 
 	/*
 	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
@@ -237,10 +236,10 @@
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(&huge_mtx);
+	malloc_mutex_lock(&arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	node->size = usize;
-	malloc_mutex_unlock(&huge_mtx);
+	malloc_mutex_unlock(&arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -356,11 +355,14 @@
 huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 {
 	extent_node_t *node;
+	arena_t *arena;
 
-	malloc_mutex_lock(&huge_mtx);
-	node = huge_node_locked(ptr);
-	extent_tree_ad_remove(&huge, node);
-	malloc_mutex_unlock(&huge_mtx);
+	node = huge_node_get(ptr);
+	arena = node->arena;
+	huge_node_unset(ptr, node);
+	malloc_mutex_lock(&arena->huge_mtx);
+	ql_remove(&arena->huge, node, link_ql);
+	malloc_mutex_unlock(&arena->huge_mtx);
 
 	huge_dalloc_junk(node->addr, node->size);
 	arena_chunk_dalloc_huge(node->arena, node->addr, node->size);
@@ -371,59 +373,50 @@
 huge_aalloc(const void *ptr)
 {
 
-	return (huge_node(ptr)->arena);
+	return (huge_node_get(ptr)->arena);
 }
 
 size_t
 huge_salloc(const void *ptr)
 {
+	size_t size;
+	extent_node_t *node;
+	arena_t *arena;
 
-	return (huge_node(ptr)->size);
+	node = huge_node_get(ptr);
+	arena = node->arena;
+	malloc_mutex_lock(&arena->huge_mtx);
+	size = node->size;
+	malloc_mutex_unlock(&arena->huge_mtx);
+
+	return (size);
 }
 
 prof_tctx_t *
 huge_prof_tctx_get(const void *ptr)
 {
+	prof_tctx_t *tctx;
+	extent_node_t *node;
+	arena_t *arena;
 
-	return (huge_node(ptr)->prof_tctx);
+	node = huge_node_get(ptr);
+	arena = node->arena;
+	malloc_mutex_lock(&arena->huge_mtx);
+	tctx = node->prof_tctx;
+	malloc_mutex_unlock(&arena->huge_mtx);
+
+	return (tctx);
 }
 
 void
 huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
 {
+	extent_node_t *node;
+	arena_t *arena;
 
-	huge_node(ptr)->prof_tctx = tctx;
-}
-
-bool
-huge_boot(void)
-{
-
-	/* Initialize chunks data. */
-	if (malloc_mutex_init(&huge_mtx))
-		return (true);
-	extent_tree_ad_new(&huge);
-
-	return (false);
-}
-
-void
-huge_prefork(void)
-{
-
-	malloc_mutex_prefork(&huge_mtx);
-}
-
-void
-huge_postfork_parent(void)
-{
-
-	malloc_mutex_postfork_parent(&huge_mtx);
-}
-
-void
-huge_postfork_child(void)
-{
-
-	malloc_mutex_postfork_child(&huge_mtx);
+	node = huge_node_get(ptr);
+	arena = node->arena;
+	malloc_mutex_lock(&arena->huge_mtx);
+	node->prof_tctx = tctx;
+	malloc_mutex_unlock(&arena->huge_mtx);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9447791..3903209 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1195,8 +1195,6 @@
 		return (true);
 	if (config_tcache && tcache_boot())
 		malloc_mutex_unlock(&init_lock);
-	if (huge_boot())
-		return (true);
 	if (malloc_mutex_init(&arenas_lock))
 		return (true);
 	/*
@@ -2310,12 +2308,10 @@
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
+	if (config_debug)
 		usize = ivsalloc(ptr, config_prof);
-	else {
-		assert(ptr != NULL);
+	else
 		usize = isalloc(ptr, config_prof);
-	}
 
 	return (usize);
 }
@@ -2440,10 +2436,10 @@
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
+	if (config_debug)
 		ret = ivsalloc(ptr, config_prof);
 	else
-		ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0;
+		ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof);
 
 	return (ret);
 }
@@ -2504,7 +2500,6 @@
 	}
 	chunk_prefork();
 	base_prefork();
-	huge_prefork();
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2524,7 +2519,6 @@
 	assert(malloc_initialized());
 
 	/* Release all mutexes, now that fork() has completed. */
-	huge_postfork_parent();
 	base_postfork_parent();
 	chunk_postfork_parent();
 	for (i = 0; i < narenas_total; i++) {
@@ -2544,7 +2538,6 @@
 	assert(malloc_initialized());
 
 	/* Release all mutexes, now that fork() has completed. */
-	huge_postfork_child();
 	base_postfork_child();
 	chunk_postfork_child();
 	for (i = 0; i < narenas_total; i++) {
diff --git a/src/stats.c b/src/stats.c
index 865f775..e0f7165 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -547,8 +547,6 @@
 	if (config_stats) {
 		size_t *cactive;
 		size_t allocated, active, metadata, mapped;
-		size_t chunks_current, chunks_high;
-		uint64_t chunks_total;
 
 		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
@@ -561,16 +559,6 @@
 		malloc_cprintf(write_cb, cbopaque,
 		    "Current active ceiling: %zu\n", atomic_read_z(cactive));
 
-		/* Print chunk stats. */
-		CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
-		CTL_GET("stats.chunks.high", &chunks_high, size_t);
-		CTL_GET("stats.chunks.current", &chunks_current, size_t);
-		malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
-		    "highchunks    curchunks\n");
-		malloc_cprintf(write_cb, cbopaque,
-		    "  %13"PRIu64" %12zu %12zu\n",
-		    chunks_total, chunks_high, chunks_current);
-
 		if (merged) {
 			unsigned narenas;
 
diff --git a/src/tcache.c b/src/tcache.c
index 1166d60..10c85dd 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -102,7 +102,7 @@
 		/* Lock the arena bin associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
 		    tbin->avail[0]);
-		arena_t *bin_arena = chunk->arena;
+		arena_t *bin_arena = chunk->node.arena;
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
@@ -124,7 +124,7 @@
 			ptr = tbin->avail[i];
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (chunk->arena == bin_arena) {
+			if (chunk->node.arena == bin_arena) {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
@@ -182,7 +182,7 @@
 		/* Lock the arena associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
 		    tbin->avail[0]);
-		arena_t *locked_arena = chunk->arena;
+		arena_t *locked_arena = chunk->node.arena;
 		UNUSED bool idump;
 
 		if (config_prof)
@@ -208,7 +208,7 @@
 			ptr = tbin->avail[i];
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (chunk->arena == locked_arena) {
+			if (chunk->node.arena == locked_arena) {
 				arena_dalloc_large_junked_locked(locked_arena,
 				    chunk, ptr);
 			} else {
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 946e737..1099967 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -29,32 +29,6 @@
 }
 TEST_END
 
-TEST_BEGIN(test_stats_chunks)
-{
-	size_t current, high;
-	uint64_t total;
-	size_t sz;
-	int expected = config_stats ? 0 : ENOENT;
-
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.chunks.current", &current, &sz, NULL, 0),
-	    expected, "Unexpected mallctl() result");
-	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.chunks.total", &total, &sz, NULL, 0),
-	    expected, "Unexpected mallctl() result");
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.chunks.high", &high, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
-
-	if (config_stats) {
-		assert_zu_le(current, high,
-		    "current should be no larger than high");
-		assert_u64_le((uint64_t)high, total,
-		    "high should be no larger than total");
-	}
-}
-TEST_END
-
 TEST_BEGIN(test_stats_huge)
 {
 	void *p;
@@ -458,7 +432,6 @@
 
 	return (test(
 	    test_stats_summary,
-	    test_stats_chunks,
 	    test_stats_huge,
 	    test_stats_arenas_summary,
 	    test_stats_arenas_small,