Use mremap(2) for huge realloc().

If mremap(2) is available and supports MREMAP_FIXED, use it for huge
realloc().

Initialize rtree later during bootstrapping, so that --enable-debug
--enable-dss works.

Fix a minor swap_avail stats bug.
diff --git a/.gitignore b/.gitignore
index f3ee5ae..d6fa8fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,6 @@
 /jemalloc/test/*.[od]
 /jemalloc/test/*.out
 /jemalloc/test/[a-z]*
+!/jemalloc/test/*.c
+!/jemalloc/test/*.exp
 /jemalloc/VERSION
diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in
index 335f6f6..ee674b3 100644
--- a/jemalloc/Makefile.in
+++ b/jemalloc/Makefile.in
@@ -64,7 +64,7 @@
 DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
 CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
-	@srcroot@test/posix_memalign.c \
+	@srcroot@test/mremap.c @srcroot@test/posix_memalign.c \
 	@srcroot@test/rallocm.c @srcroot@test/thread_arena.c
 
 .PHONY: all dist doc_html doc_man doc
diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac
index 7aea6a8..46a2bd4 100644
--- a/jemalloc/configure.ac
+++ b/jemalloc/configure.ac
@@ -227,6 +227,16 @@
 AC_SUBST([abi])
 AC_SUBST([RPATH])
 
+JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
+#define _GNU_SOURCE
+#include <sys/mman.h>
+], [
+void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
+], [mremap_fixed])
+if test "x${mremap_fixed}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_MREMAP_FIXED])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/jemalloc/include/jemalloc/internal/chunk_dss.h b/jemalloc/include/jemalloc/internal/chunk_dss.h
index 6be4ad1..6f00522 100644
--- a/jemalloc/include/jemalloc/internal/chunk_dss.h
+++ b/jemalloc/include/jemalloc/internal/chunk_dss.h
@@ -17,6 +17,7 @@
 extern malloc_mutex_t	dss_mtx;
 
 void	*chunk_alloc_dss(size_t size, bool *zero);
+bool	chunk_in_dss(void *chunk);
 bool	chunk_dealloc_dss(void *chunk, size_t size);
 bool	chunk_dss_boot(void);
 
diff --git a/jemalloc/include/jemalloc/internal/chunk_swap.h b/jemalloc/include/jemalloc/internal/chunk_swap.h
index d50cb19..9faa739 100644
--- a/jemalloc/include/jemalloc/internal/chunk_swap.h
+++ b/jemalloc/include/jemalloc/internal/chunk_swap.h
@@ -20,6 +20,7 @@
 #endif
 
 void	*chunk_alloc_swap(size_t size, bool *zero);
+bool	chunk_in_swap(void *chunk);
 bool	chunk_dealloc_swap(void *chunk, size_t size);
 bool	chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed);
 bool	chunk_swap_boot(void);
diff --git a/jemalloc/include/jemalloc/internal/huge.h b/jemalloc/include/jemalloc/internal/huge.h
index bf23127..66544cf 100644
--- a/jemalloc/include/jemalloc/internal/huge.h
+++ b/jemalloc/include/jemalloc/internal/huge.h
@@ -25,7 +25,7 @@
     size_t extra);
 void	*huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero);
-void	huge_dalloc(void *ptr);
+void	huge_dalloc(void *ptr, bool unmap);
 size_t	huge_salloc(const void *ptr);
 #ifdef JEMALLOC_PROF
 prof_ctx_t	*huge_prof_ctx_get(const void *ptr);
diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index 3d25300..0680b43 100644
--- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -666,7 +666,7 @@
 	if (chunk != ptr)
 		arena_dalloc(chunk->arena, chunk, ptr);
 	else
-		huge_dalloc(ptr);
+		huge_dalloc(ptr, true);
 }
 
 JEMALLOC_INLINE void *
diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in
index b8f3f36..5f46c5c 100644
--- a/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -115,6 +115,9 @@
 #undef JEMALLOC_ZONE
 #undef JEMALLOC_ZONE_VERSION
 
+/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */
+#undef JEMALLOC_MREMAP_FIXED
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c
index 00bf50a..301519e 100644
--- a/jemalloc/src/chunk.c
+++ b/jemalloc/src/chunk.c
@@ -146,11 +146,6 @@
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> PAGE_SHIFT);
 
-#ifdef JEMALLOC_IVSALLOC
-	chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
-	if (chunks_rtree == NULL)
-		return (true);
-#endif
 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 	if (malloc_mutex_init(&chunks_mtx))
 		return (true);
@@ -166,6 +161,11 @@
 	if (chunk_dss_boot())
 		return (true);
 #endif
+#ifdef JEMALLOC_IVSALLOC
+	chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
+	if (chunks_rtree == NULL)
+		return (true);
+#endif
 
 	return (false);
 }
diff --git a/jemalloc/src/chunk_dss.c b/jemalloc/src/chunk_dss.c
index d9bd63c..5c0e290 100644
--- a/jemalloc/src/chunk_dss.c
+++ b/jemalloc/src/chunk_dss.c
@@ -200,6 +200,22 @@
 }
 
 bool
+chunk_in_dss(void *chunk)
+{
+	bool ret;
+
+	malloc_mutex_lock(&dss_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)dss_base
+	    && (uintptr_t)chunk < (uintptr_t)dss_max)
+		ret = true;
+	else
+		ret = false;
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (ret);
+}
+
+bool
 chunk_dealloc_dss(void *chunk, size_t size)
 {
 	bool ret;
diff --git a/jemalloc/src/chunk_swap.c b/jemalloc/src/chunk_swap.c
index ee038ba..cb25ae0 100644
--- a/jemalloc/src/chunk_swap.c
+++ b/jemalloc/src/chunk_swap.c
@@ -185,6 +185,24 @@
 }
 
 bool
+chunk_in_swap(void *chunk)
+{
+	bool ret;
+
+	assert(swap_enabled);
+
+	malloc_mutex_lock(&swap_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)swap_base
+	    && (uintptr_t)chunk < (uintptr_t)swap_max)
+		ret = true;
+	else
+		ret = false;
+	malloc_mutex_unlock(&swap_mtx);
+
+	return (ret);
+}
+
+bool
 chunk_dealloc_swap(void *chunk, size_t size)
 {
 	bool ret;
@@ -219,15 +237,15 @@
 		} else
 			madvise(chunk, size, MADV_DONTNEED);
 
+#ifdef JEMALLOC_STATS
+		swap_avail += size;
+#endif
 		ret = false;
 		goto RETURN;
 	}
 
 	ret = true;
 RETURN:
-#ifdef JEMALLOC_STATS
-	swap_avail += size;
-#endif
 	malloc_mutex_unlock(&swap_mtx);
 	return (ret);
 }
diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c
index a035197..0aadc43 100644
--- a/jemalloc/src/huge.c
+++ b/jemalloc/src/huge.c
@@ -215,13 +215,56 @@
 	 * expectation that the extra bytes will be reliably preserved.
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
-	memcpy(ret, ptr, copysize);
-	idalloc(ptr);
+
+	/*
+	 * Use mremap(2) if this is a huge-->huge reallocation, and neither the
+	 * source nor the destination are in swap or dss.
+	 */
+#ifdef JEMALLOC_MREMAP_FIXED
+	if (oldsize >= chunksize
+#  ifdef JEMALLOC_SWAP
+	    && (swap_enabled == false || (chunk_in_swap(ptr) == false &&
+	    chunk_in_swap(ret) == false))
+#  endif
+#  ifdef JEMALLOC_DSS
+	    && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false
+#  endif
+	    ) {
+		size_t newsize = huge_salloc(ret);
+
+		if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
+		    ret) == MAP_FAILED) {
+			/*
+			 * Assuming no chunk management bugs in the allocator,
+			 * the only documented way an error can occur here is
+			 * if the application changed the map type for a
+			 * portion of the old allocation.  This is firmly in
+			 * undefined behavior territory, so write a diagnostic
+			 * message, and optionally abort.
+			 */
+			char buf[BUFERROR_BUF];
+
+			buferror(errno, buf, sizeof(buf));
+			malloc_write("<jemalloc>: Error in mremap(): ");
+			malloc_write(buf);
+			malloc_write("\n");
+			if (opt_abort)
+				abort();
+			memcpy(ret, ptr, copysize);
+			idalloc(ptr);
+		} else
+			huge_dalloc(ptr, false);
+	} else
+#endif
+	{
+		memcpy(ret, ptr, copysize);
+		idalloc(ptr);
+	}
 	return (ret);
 }
 
 void
-huge_dalloc(void *ptr)
+huge_dalloc(void *ptr, bool unmap)
 {
 	extent_node_t *node, key;
 
@@ -241,14 +284,16 @@
 
 	malloc_mutex_unlock(&huge_mtx);
 
+	if (unmap) {
 	/* Unmap chunk. */
 #ifdef JEMALLOC_FILL
 #if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
-	if (opt_junk)
-		memset(node->addr, 0x5a, node->size);
+		if (opt_junk)
+			memset(node->addr, 0x5a, node->size);
 #endif
 #endif
-	chunk_dealloc(node->addr, node->size);
+		chunk_dealloc(node->addr, node->size);
+	}
 
 	base_node_dealloc(node);
 }
diff --git a/jemalloc/test/mremap.c b/jemalloc/test/mremap.c
new file mode 100644
index 0000000..146c66f
--- /dev/null
+++ b/jemalloc/test/mremap.c
@@ -0,0 +1,67 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+	int ret, err;
+	size_t sz, lg_chunk, chunksize, i;
+	char *p, *q;
+
+	fprintf(stderr, "Test begin\n");
+
+	sz = sizeof(lg_chunk);
+	if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL,
+	    0))) {
+		assert(err != ENOENT);
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		ret = 1;
+		goto RETURN;
+	}
+	chunksize = ((size_t)1U) << lg_chunk;
+
+	p = (char *)malloc(chunksize);
+	if (p == NULL) {
+		fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p);
+		ret = 1;
+		goto RETURN;
+	}
+	memset(p, 'a', chunksize);
+
+	q = (char *)realloc(p, chunksize * 2);
+	if (q == NULL) {
+		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2,
+		    q);
+		ret = 1;
+		goto RETURN;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	p = q;
+
+	q = (char *)realloc(p, chunksize);
+	if (q == NULL) {
+		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q);
+		ret = 1;
+		goto RETURN;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	free(q);
+
+	ret = 0;
+RETURN:
+	fprintf(stderr, "Test end\n");
+	return (ret);
+}
diff --git a/jemalloc/test/mremap.exp b/jemalloc/test/mremap.exp
new file mode 100644
index 0000000..369a88d
--- /dev/null
+++ b/jemalloc/test/mremap.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end