Port to Mac OS X.

Add Mac OS X support, based in large part on the OS X support in
Mozilla's version of jemalloc.
diff --git a/jemalloc/COPYING b/jemalloc/COPYING
index 1baaf50..10ade12 100644
--- a/jemalloc/COPYING
+++ b/jemalloc/COPYING
@@ -3,6 +3,7 @@
 --------------------------------------------------------------------------------
 Copyright (C) 2002-2010 Jason Evans <jasone@canonware.com>.
 All rights reserved.
+Copyright (C) 2007-2010 Mozilla Foundation.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL
index eec3b37..e9a8798 100644
--- a/jemalloc/INSTALL
+++ b/jemalloc/INSTALL
@@ -31,6 +31,10 @@
     becomes <prefix>malloc().  This makes it possible to use jemalloc at the
     same time as the system allocator.
 
+    By default, the prefix is "", except on OS X, where it is "je_".  On OS X,
+    jemalloc overlays the default malloc zone, but makes no attempt to actually
+    replace the "malloc", "calloc", etc. symbols.
+
 --with-install-suffix=<suffix>
     Append <suffix> to the base name of all installed files, such that multiple
     versions of jemalloc can coexist in the same installation directory.  For
diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in
index a7acc96..aa3bf6b 100644
--- a/jemalloc/Makefile.in
+++ b/jemalloc/Makefile.in
@@ -28,10 +28,17 @@
 RPATH_EXTRA := @RPATH_EXTRA@
 ifeq (macho, @abi@)
 SO := dylib
+WL_SONAME := dylib_install_name
 else
 SO := so
+WL_SONAME := soname
 endif
 REV := 0
+ifeq (macho, @abi@)
+TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib
+else
+TEST_LIBRARY_PATH :=
+endif
 
 # Lists of files.
 BINS := @srcroot@bin/pprof
@@ -42,15 +49,16 @@
 	@srcroot@src/chunk_mmap.c @srcroot@src/chunk_swap.c @srcroot@src/ckh.c \
 	@srcroot@src/ctl.c @srcroot@src/extent.c @srcroot@src/hash.c \
 	@srcroot@src/huge.c @srcroot@src/mb.c @srcroot@src/mutex.c \
-	@srcroot@src/prof.c @srcroot@src/stats.c @srcroot@src/tcache.c
-DSOS := @objroot@lib/libjemalloc@install_suffix@.so.$(REV) \
-	@objroot@lib/libjemalloc@install_suffix@.so \
+	@srcroot@src/prof.c @srcroot@src/rtree.c \
+	@srcroot@src/stats.c @srcroot@src/tcache.c
+ifeq (macho, @abi@)
+CSRCS += @srcroot@src/zone.c
+endif
+DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
+	@objroot@lib/libjemalloc@install_suffix@.$(SO) \
 	@objroot@lib/libjemalloc@install_suffix@_pic.a
 MAN3 := @objroot@doc/jemalloc@install_suffix@.3
-CTESTS :=
-ifeq (1, @enable_tls@)
-CTESTS += @srcroot@test/thread_arena.c
-endif
+CTESTS := @srcroot@test/thread_arena.c
 
 .PHONY: all dist install check clean distclean relclean
 
@@ -67,13 +75,13 @@
 	$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $<
 	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
 
-%.so : %.so.$(REV)
+%.$(SO) : %.$(SO).$(REV)
 	@mkdir -p $(@D)
 	ln -sf $(<F) $@
 
-@objroot@lib/libjemalloc@install_suffix@.so.$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.o)
+@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.o)
 	@mkdir -p $(@D)
-	$(CC) -shared -Wl,-soname,$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
+	$(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
 
 @objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
 	@mkdir -p $(@D)
@@ -85,9 +93,13 @@
 	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
 
 @objroot@test/%: @objroot@test/%.o \
-		 @objroot@lib/libjemalloc@install_suffix@.so
+		 @objroot@lib/libjemalloc@install_suffix@.$(SO)
 	@mkdir -p $(@D)
-	$(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc
+ifneq (@RPATH@, )
+	$(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@
+else
+	$(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@
+endif
 
 install_bin:
 	install -d $(BINDIR)
@@ -105,8 +117,8 @@
 
 install_lib: $(DSOS)
 	install -d $(LIBDIR)
-	install -m 755 @objroot@lib/libjemalloc@install_suffix@.so.$(REV) $(LIBDIR)
-	ln -sf libjemalloc@install_suffix@.so.$(REV) $(LIBDIR)/libjemalloc@install_suffix@.so
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)
+	ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO)
 	install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR)
 
 install_man:
@@ -128,7 +140,7 @@
 		for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \
 			total=`expr $$total + 1`; \
 			/bin/echo -n "$${t} ... "; \
-			$${t} @abs_srcroot@ @abs_objroot@ \
+			$(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \
 			  > @objroot@$${t}.out 2>&1; \
 			if test -e "@srcroot@$${t}.exp"; then \
 				diff -u @srcroot@$${t}.exp \
@@ -161,8 +173,7 @@
 	rm -rf @objroot@autom4te.cache
 	rm -f @objroot@config.log
 	rm -f @objroot@config.status
-	rm -f @objroot@cfghdrs.stamp
-	rm -f @objroot@cfgoutputs.stamp
+	rm -f @objroot@config.stamp
 	rm -f @cfghdrs_out@
 	rm -f @cfgoutputs_out@
 
diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac
index bf16596..21f502c 100644
--- a/jemalloc/configure.ac
+++ b/jemalloc/configure.ac
@@ -150,7 +150,7 @@
               [attribute])
 if test "x${attribute}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
-  if test "x$GCC" = "xyes" ; then
+  if test "x$GCC" = "xyes" -a "${abi}" = "xelf"; then
     JE_CFLAGS_APPEND([-fvisibility=internal])
   fi
 fi
@@ -166,17 +166,20 @@
   *-*-darwin*)
 	CFLAGS="$CFLAGS -fno-common -no-cpp-precomp"
 	abi="macho"
+	AC_DEFINE([JEMALLOC_PURGE_MSYNC_KILLPAGES])
 	RPATH=""
 	;;
   *-*-freebsd*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
 	RPATH="-Wl,-rpath,"
 	;;
   *-*-linux*)
 	CFLAGS="$CFLAGS"
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED])
 	RPATH="-Wl,-rpath,"
 	;;
   *-*-netbsd*)
@@ -191,6 +194,7 @@
                           [CFLAGS="$CFLAGS"; abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
 	RPATH="-Wl,-rpath,"
 	;;
   *-*-solaris2*)
@@ -245,7 +249,11 @@
 AC_ARG_WITH([jemalloc_prefix],
   [AS_HELP_STRING([--with-jemalloc-prefix=<prefix>], [Prefix to prepend to all public APIs])],
   [JEMALLOC_PREFIX="$with_jemalloc_prefix"],
-  [JEMALLOC_PREFIX=]
+  [if test "x$abi" != "xmacho" ; then
+  JEMALLOC_PREFIX=""
+else
+  JEMALLOC_PREFIX="je_"
+fi]
 )
 if test "x$JEMALLOC_PREFIX" != "x" ; then
   AC_DEFINE([JEMALLOC_PREFIX], [ ])
@@ -294,6 +302,7 @@
 )
 if test "x$enable_debug" = "x1" ; then
   AC_DEFINE([JEMALLOC_DEBUG], [ ])
+  AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
 fi
 AC_SUBST([enable_debug])
 
@@ -379,7 +388,44 @@
 fi,
   LUNWIND="-lunwind"
 )
-dnl Finish prof-related definitions below, once TLS configuration is done.
+if test "x$enable_prof" = "x1" ; then
+  LIBS="$LIBS -lm"
+  AC_DEFINE([JEMALLOC_PROF], [ ])
+  if test "x$enable_prof_libunwind" = "x1" ; then
+    AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
+    if test "x$LUNWIND" = "x-lunwind" ; then
+      AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
+                   [enable_prof_libunwind="0"])
+    else
+      LIBS="$LIBS $LUNWIND"
+    fi
+    if test "x${enable_prof_libunwind}" = "x1" ; then
+      AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
+    fi
+  fi
+fi
+AC_SUBST([enable_prof])
+if test "x$enable_prof" = "x0" ; then
+  roff_prof=".\\\" "
+  roff_no_prof=""
+else
+  roff_prof=""
+  roff_no_prof=".\\\" "
+fi
+AC_SUBST([roff_prof])
+AC_SUBST([roff_no_prof])
+
+dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics
+dnl for backtracing.
+if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \
+ -a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then
+  enable_prof_libgcc="1"
+  AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
+  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+  if test "x${enable_prof_libgcc}" = "x1" ; then
+    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+  fi
+fi
 
 dnl Enable tiny allocations by default.
 AC_ARG_ENABLE([tiny],
@@ -417,7 +463,19 @@
 ],
 [enable_tcache="1"]
 )
-dnl Finish tcache-related definitions below, once TLS configuration is done.
+if test "x$enable_tcache" = "x1" ; then
+  AC_DEFINE([JEMALLOC_TCACHE], [ ])
+fi
+AC_SUBST([enable_tcache])
+if test "x$enable_tcache" = "x0" ; then
+  roff_tcache=".\\\" "
+  roff_no_tcache=""
+else
+  roff_tcache=""
+  roff_no_tcache=".\\\" "
+fi
+AC_SUBST([roff_tcache])
+AC_SUBST([roff_no_tcache])
 
 dnl Do not enable mmap()ped swap files by default.
 AC_ARG_ENABLE([swap],
@@ -650,71 +708,52 @@
 AC_SUBST([enable_tls])
 if test "x${enable_tls}" = "x0" ; then
   AC_DEFINE_UNQUOTED([NO_TLS], [ ])
-  roff_tls=".\\\" "
-else
-  roff_tls=""
 fi
-AC_SUBST([roff_tls])
 
-dnl Finish tcache-related definitions, now that TLS configuration is done.
-if test "x$enable_tls" = "x0" ; then
-  enable_tcache="0"
-fi
-if test "x$enable_tcache" = "x1" ; then
-  AC_DEFINE([JEMALLOC_TCACHE], [ ])
-fi
-AC_SUBST([enable_tcache])
-if test "x$enable_tcache" = "x0" ; then
-  roff_tcache=".\\\" "
-  roff_no_tcache=""
-else
-  roff_tcache=""
-  roff_no_tcache=".\\\" "
-fi
-AC_SUBST([roff_tcache])
-AC_SUBST([roff_no_tcache])
+dnl ============================================================================
+dnl Darwin-related configuration.
 
-dnl Finish prof-related definitions, now that TLS configuration is done.
-if test "x$enable_tls" = "x0" ; then
-  enable_prof="0"
-fi
-if test "x$enable_prof" = "x1" ; then
-  LIBS="$LIBS -lm"
-  AC_DEFINE([JEMALLOC_PROF], [ ])
-  if test "x$enable_prof_libunwind" = "x1" ; then
-    AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
-    if test "x$LUNWIND" = "x-lunwind" ; then
-      AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
-                   [enable_prof_libunwind="0"])
-    else
-      LIBS="$LIBS $LUNWIND"
-    fi
-    if test "x${enable_prof_libunwind}" = "x1" ; then
-      AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
-    fi
-  fi
-fi
-AC_SUBST([enable_prof])
-if test "x$enable_prof" = "x0" ; then
-  roff_prof=".\\\" "
-  roff_no_prof=""
-else
-  roff_prof=""
-  roff_no_prof=".\\\" "
-fi
-AC_SUBST([roff_prof])
-AC_SUBST([roff_no_prof])
+if test "x${abi}" = "xmacho" ; then
+  AC_DEFINE([JEMALLOC_IVSALLOC])
+  AC_DEFINE([JEMALLOC_ZONE])
 
-dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics
-dnl for backtracing.
-if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \
- -a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then
-  enable_prof_libgcc="1"
-  AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
-  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
-  if test "x${enable_prof_libgcc}" = "x1" ; then
-    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
-  fi
+  dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
+  dnl releases.  malloc_zone_t and malloc_introspection_t have new fields in
+  dnl 10.6, which is the only source-level indication of the change.
+  AC_MSG_CHECKING([malloc zone version])
+  AC_TRY_COMPILE([#include <stdlib.h>
+#include <malloc/malloc.h>], [
+	static malloc_zone_t zone;
+	static struct malloc_introspection_t zone_introspect;
+
+	zone.size = NULL;
+	zone.malloc = NULL;
+	zone.calloc = NULL;
+	zone.valloc = NULL;
+	zone.free = NULL;
+	zone.realloc = NULL;
+	zone.destroy = NULL;
+	zone.zone_name = "jemalloc_zone";
+	zone.batch_malloc = NULL;
+	zone.batch_free = NULL;
+	zone.introspect = &zone_introspect;
+	zone.version = 6;
+	zone.memalign = NULL;
+	zone.free_definite_size = NULL;
+
+	zone_introspect.enumerator = NULL;
+	zone_introspect.good_size = NULL;
+	zone_introspect.check = NULL;
+	zone_introspect.print = NULL;
+	zone_introspect.log = NULL;
+	zone_introspect.force_lock = NULL;
+	zone_introspect.force_unlock = NULL;
+	zone_introspect.statistics = NULL;
+	zone_introspect.zone_locked = NULL;
+], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6])
+    AC_MSG_RESULT([6])],
+   [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3])
+   AC_MSG_RESULT([3])])
 fi
 
 dnl ============================================================================
@@ -773,4 +812,5 @@
 AC_MSG_RESULT([dss                : ${enable_dss}])
 AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
+AC_MSG_RESULT([tls                : ${enable_tls}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index d2d5b77..dfc4d76 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -464,7 +464,7 @@
 @roff_swap@This option is enabled by default.
 .It P
 The
-.Fn malloc_stats_print
+.Fn @jemalloc_prefix@malloc_stats_print
 function is called at program exit via an
 .Xr atexit 3
 function.
@@ -626,7 +626,7 @@
 size.
 .Sh MALLCTL NAMESPACE
 The following names are defined in the namespace accessible via the
-.Fn mallctl*
+.Fn @jemalloc_prefix@mallctl*
 functions.
 Value types are specified in parentheses, and their readable/writable statuses
 are encoded as rw, r-, -w, or --.
@@ -648,7 +648,7 @@
 .It Sy "epoch (uint64_t) rw"
 .Bd -ragged -offset indent -compact
 If a value is passed in, refresh the data from which the
-.Fn mallctl*
+.Fn @jemalloc_prefix@mallctl*
 functions report values, and increment the epoch.
 Return the current epoch.
 This is useful for detecting whether another thread caused a refresh.
@@ -669,18 +669,17 @@
 @roff_tcache@find manual flushing useful.
 .Ed
 .\"-----------------------------------------------------------------------------
-@roff_tls@.It Sy "thread.arena (unsigned) rw"
-@roff_tls@.Bd -ragged -offset indent -compact
-@roff_tls@Get or set the arena associated with the calling thread.
-@roff_tls@The arena index must be less than the maximum number of arenas (see
-@roff_tls@the
-@roff_tls@.Dq arenas.narenas
-@roff_tls@mallctl).
-@roff_tls@If the specified arena was not initialized beforehand (see the
-@roff_tls@.Dq arenas.initialized
-@roff_tls@mallctl), it will be automatically initialized as a side effect of
-@roff_tls@calling this interface.
-@roff_tls@.Ed
+.It Sy "thread.arena (unsigned) rw"
+.Bd -ragged -offset indent -compact
+Get or set the arena associated with the calling thread.
+The arena index must be less than the maximum number of arenas (see the
+.Dq arenas.narenas
+mallctl).
+If the specified arena was not initialized beforehand (see the
+.Dq arenas.initialized
+mallctl), it will be automatically initialized as a side effect of calling this
+interface.
+.Ed
 .\"-----------------------------------------------------------------------------
 .It Sy "config.debug (bool) r-"
 .Bd -ragged -offset indent -compact
@@ -1442,7 +1441,7 @@
 A memory allocation failure occurred.
 .It Bq Er EFAULT
 An interface with side effects failed in some way not directly related to
-.Fn mallctl*
+.Fn @jemalloc_prefix@mallctl*
 read/write processing.
 .El
 .Sh ENVIRONMENT
diff --git a/jemalloc/include/jemalloc/internal/chunk.h b/jemalloc/include/jemalloc/internal/chunk.h
index 1f6abf7..d795529 100644
--- a/jemalloc/include/jemalloc/internal/chunk.h
+++ b/jemalloc/include/jemalloc/internal/chunk.h
@@ -39,13 +39,17 @@
 extern chunk_stats_t	stats_chunks;
 #endif
 
+#ifdef JEMALLOC_IVSALLOC
+extern rtree_t		*chunks_rtree;
+#endif
+
 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
 extern size_t		arena_chunk_header_npages;
 extern size_t		arena_maxclass; /* Max size class for arenas. */
 
-void	*chunk_alloc(size_t size, bool *zero);
+void	*chunk_alloc(size_t size, bool base, bool *zero);
 void	chunk_dealloc(void *chunk, size_t size);
 bool	chunk_boot(void);
 
diff --git a/jemalloc/include/jemalloc/internal/chunk_mmap.h b/jemalloc/include/jemalloc/internal/chunk_mmap.h
index dc52448..07b50a4 100644
--- a/jemalloc/include/jemalloc/internal/chunk_mmap.h
+++ b/jemalloc/include/jemalloc/internal/chunk_mmap.h
@@ -13,6 +13,8 @@
 void	*chunk_alloc_mmap_noreserve(size_t size);
 void	chunk_dealloc_mmap(void *chunk, size_t size);
 
+bool	chunk_mmap_boot(void);
+
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index 2c3f32f..a8d27fa 100644
--- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -27,6 +27,13 @@
 #define	JEMALLOC_MANGLE
 #include "../jemalloc@install_suffix@.h"
 
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#include <malloc/malloc.h>
+#endif
+
 #ifdef JEMALLOC_LAZY_LOCK
 #include <dlfcn.h>
 #endif
@@ -159,6 +166,16 @@
 #define	STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
 #define	STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
 
+#ifdef PAGE_SHIFT
+#  undef PAGE_SHIFT
+#endif
+#ifdef PAGE_SIZE
+#  undef PAGE_SIZE
+#endif
+#ifdef PAGE_MASK
+#  undef PAGE_MASK
+#endif
+
 #ifdef DYNAMIC_PAGE_SHIFT
 #  define PAGE_SHIFT	lg_pagesize
 #  define PAGE_SIZE	pagesize
@@ -184,9 +201,13 @@
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/prof.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
 
 #undef JEMALLOC_H_TYPES
 /******************************************************************************/
@@ -203,9 +224,13 @@
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/prof.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
 
 #undef JEMALLOC_H_STRUCTS
 /******************************************************************************/
@@ -240,8 +265,19 @@
  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
  * for allocations.
  */
-extern __thread arena_t	*arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+extern __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define ARENA_GET()	arenas_tls
+#  define ARENA_SET(v)	do {						\
+	arenas_tls = (v);						\
+} while (0)
+#else
+extern pthread_key_t	arenas_tsd;
+#  define ARENA_GET()	((arena_t *)pthread_getspecific(arenas_tsd))
+#  define ARENA_SET(v)	do {						\
+	pthread_setspecific(arenas_tsd, (void *)(v));			\
+} while (0)
 #endif
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -250,9 +286,9 @@
 extern unsigned		narenas;
 
 arena_t	*arenas_extend(unsigned ind);
-#ifndef NO_TLS
 arena_t	*choose_arena_hard(void);
-#endif
+void	jemalloc_prefork(void);
+void	jemalloc_postfork(void);
 
 #include "jemalloc/internal/prn.h"
 #include "jemalloc/internal/ckh.h"
@@ -265,9 +301,13 @@
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/prof.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
 
 #undef JEMALLOC_H_EXTERNS
 /******************************************************************************/
@@ -285,11 +325,30 @@
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+size_t	pow2_ceil(size_t x);
 void	malloc_write(const char *s);
 arena_t	*choose_arena(void);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil(size_t x)
+{
+
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+#if (LG_SIZEOF_PTR == 3)
+	x |= x >> 32;
+#endif
+	x++;
+	return (x);
+}
+
 /*
  * Wrapper around malloc_message() that avoids the need for
  * JEMALLOC_P(malloc_message)(...) throughout the code.
@@ -310,76 +369,33 @@
 {
 	arena_t *ret;
 
-	/*
-	 * We can only use TLS if this is a PIC library, since for the static
-	 * library version, libc's malloc is used by TLS allocation, which
-	 * introduces a bootstrapping issue.
-	 */
-#ifndef NO_TLS
-	ret = arenas_map;
+	ret = ARENA_GET();
 	if (ret == NULL) {
 		ret = choose_arena_hard();
 		assert(ret != NULL);
 	}
-#else
-	if (isthreaded && narenas > 1) {
-		unsigned long ind;
 
-		/*
-		 * Hash pthread_self() to one of the arenas.  There is a prime
-		 * number of arenas, so this has a reasonable chance of
-		 * working.  Even so, the hashing can be easily thwarted by
-		 * inconvenient pthread_self() values.  Without specific
-		 * knowledge of how pthread_self() calculates values, we can't
-		 * easily do much better than this.
-		 */
-		ind = (unsigned long) pthread_self() % narenas;
-
-		/*
-		 * Optimistially assume that arenas[ind] has been initialized.
-		 * At worst, we find out that some other thread has already
-		 * done so, after acquiring the lock in preparation.  Note that
-		 * this lazy locking also has the effect of lazily forcing
-		 * cache coherency; without the lock acquisition, there's no
-		 * guarantee that modification of arenas[ind] by another thread
-		 * would be seen on this CPU for an arbitrary amount of time.
-		 *
-		 * In general, this approach to modifying a synchronized value
-		 * isn't a good idea, but in this case we only ever modify the
-		 * value once, so things work out well.
-		 */
-		ret = arenas[ind];
-		if (ret == NULL) {
-			/*
-			 * Avoid races with another thread that may have already
-			 * initialized arenas[ind].
-			 */
-			malloc_mutex_lock(&arenas_lock);
-			if (arenas[ind] == NULL)
-				ret = arenas_extend((unsigned)ind);
-			else
-				ret = arenas[ind];
-			malloc_mutex_unlock(&arenas_lock);
-		}
-	} else
-		ret = arenas[0];
-#endif
-
-	assert(ret != NULL);
 	return (ret);
 }
 #endif
 
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/prof.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	*imalloc(size_t size);
 void	*icalloc(size_t size);
 void	*ipalloc(size_t alignment, size_t size);
 size_t	isalloc(const void *ptr);
+#  ifdef JEMALLOC_IVSALLOC
+size_t	ivsalloc(const void *ptr);
+#  endif
 void	*iralloc(void *ptr, size_t size);
 void	idalloc(void *ptr);
 #endif
@@ -526,6 +542,19 @@
 	return (ret);
 }
 
+#ifdef JEMALLOC_IVSALLOC
+JEMALLOC_INLINE size_t
+ivsalloc(const void *ptr)
+{
+
+	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
+	if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL)
+		return (0);
+
+	return (isalloc(ptr));
+}
+#endif
+
 JEMALLOC_INLINE void *
 iralloc(void *ptr, size_t size)
 {
diff --git a/jemalloc/include/jemalloc/internal/mutex.h b/jemalloc/include/jemalloc/internal/mutex.h
index 108bfa8..8113415 100644
--- a/jemalloc/include/jemalloc/internal/mutex.h
+++ b/jemalloc/include/jemalloc/internal/mutex.h
@@ -3,6 +3,12 @@
 
 typedef pthread_mutex_t malloc_mutex_t;
 
+#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#  define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#else
+#  define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
diff --git a/jemalloc/include/jemalloc/internal/rtree.h b/jemalloc/include/jemalloc/internal/rtree.h
new file mode 100644
index 0000000..9d58eba
--- /dev/null
+++ b/jemalloc/include/jemalloc/internal/rtree.h
@@ -0,0 +1,161 @@
+/*
+ * This radix tree implementation is tailored to the singular purpose of
+ * tracking which chunks are currently owned by jemalloc.  This functionality
+ * is mandatory for OS X, where jemalloc must be able to respond to object
+ * ownership queries.
+ *
+ *******************************************************************************
+ */
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct rtree_s rtree_t;
+
+/*
+ * Size of each radix tree node (must be a power of 2).  This impacts tree
+ * depth.
+ */
+#if (LG_SIZEOF_PTR == 2)
+#  define RTREE_NODESIZE (1U << 14)
+#else
+#  define RTREE_NODESIZE CACHELINE
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct rtree_s {
+	malloc_mutex_t	mutex;
+	void		**root;
+	unsigned	height;
+	unsigned	level2bits[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+rtree_t	*rtree_new(unsigned bits);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+#ifndef JEMALLOC_DEBUG
+void	*rtree_get_locked(rtree_t *rtree, uintptr_t key);
+#endif
+void	*rtree_get(rtree_t *rtree, uintptr_t key);
+bool	rtree_set(rtree_t *rtree, uintptr_t key, void *val);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_))
+#define	RTREE_GET_GENERATE(f)						\
+/* The least significant bits of the key are ignored. */		\
+JEMALLOC_INLINE void *							\
+f(rtree_t *rtree, uintptr_t key)					\
+{									\
+	void *ret;							\
+	uintptr_t subkey;						\
+	unsigned i, lshift, height, bits;				\
+	void **node, **child;						\
+									\
+	RTREE_LOCK(&rtree->mutex);					\
+	for (i = lshift = 0, height = rtree->height, node = rtree->root;\
+	    i < height - 1;						\
+	    i++, lshift += bits, node = child) {			\
+		bits = rtree->level2bits[i];				\
+		subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
+		    3)) - bits);					\
+		child = (void**)node[subkey];				\
+		if (child == NULL) {					\
+			RTREE_UNLOCK(&rtree->mutex);			\
+			return (NULL);					\
+		}							\
+	}								\
+									\
+	/*								\
+	 * node is a leaf, so it contains values rather than node	\
+	 * pointers.							\
+	 */								\
+	bits = rtree->level2bits[i];					\
+	subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -	\
+	    bits);							\
+	ret = node[subkey];						\
+	RTREE_UNLOCK(&rtree->mutex);					\
+									\
+	RTREE_GET_VALIDATE						\
+	return (ret);							\
+}
+
+#ifdef JEMALLOC_DEBUG
+#  define RTREE_LOCK(l)		malloc_mutex_lock(l)
+#  define RTREE_UNLOCK(l)	malloc_mutex_unlock(l)
+#  define RTREE_GET_VALIDATE
+RTREE_GET_GENERATE(rtree_get_locked)
+#  undef RTREE_LOCK
+#  undef RTREE_UNLOCK
+#  undef RTREE_GET_VALIDATE
+#endif
+
+#define	RTREE_LOCK(l)
+#define	RTREE_UNLOCK(l)
+#ifdef JEMALLOC_DEBUG
+   /*
+    * Suppose that it were possible for a jemalloc-allocated chunk to be
+    * munmap()ped, followed by a different allocator in another thread re-using
+    * overlapping virtual memory, all without invalidating the cached rtree
+    * value.  The result would be a false positive (the rtree would claim that
+    * jemalloc owns memory that it had actually discarded).  This scenario
+    * seems impossible, but the following assertion is a prudent sanity check.
+    */
+#  define RTREE_GET_VALIDATE						\
+	assert(rtree_get_locked(rtree, key) == ret);
+#else
+#  define RTREE_GET_VALIDATE
+#endif
+RTREE_GET_GENERATE(rtree_get)
+#undef RTREE_LOCK
+#undef RTREE_UNLOCK
+#undef RTREE_GET_VALIDATE
+
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, void *val)
+{
+	uintptr_t subkey;
+	unsigned i, lshift, height, bits;
+	void **node, **child;
+
+	malloc_mutex_lock(&rtree->mutex);
+	for (i = lshift = 0, height = rtree->height, node = rtree->root;
+	    i < height - 1;
+	    i++, lshift += bits, node = child) {
+		bits = rtree->level2bits[i];
+		subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+		    bits);
+		child = (void**)node[subkey];
+		if (child == NULL) {
+			child = (void**)base_alloc(sizeof(void *) <<
+			    rtree->level2bits[i+1]);
+			if (child == NULL) {
+				malloc_mutex_unlock(&rtree->mutex);
+				return (true);
+			}
+			memset(child, 0, sizeof(void *) <<
+			    rtree->level2bits[i+1]);
+			node[subkey] = child;
+		}
+	}
+
+	/* node is a leaf, so it contains values rather than node pointers. */
+	bits = rtree->level2bits[i];
+	subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
+	node[subkey] = val;
+	malloc_mutex_unlock(&rtree->mutex);
+
+	return (false);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h
index a8be436..df302fb 100644
--- a/jemalloc/include/jemalloc/internal/tcache.h
+++ b/jemalloc/include/jemalloc/internal/tcache.h
@@ -65,8 +65,21 @@
 extern ssize_t	opt_lg_tcache_gc_sweep;
 
 /* Map of thread-specific caches. */
+#ifndef NO_TLS
 extern __thread tcache_t	*tcache_tls
     JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define TCACHE_GET()	tcache_tls
+#  define TCACHE_SET(v)	do {						\
+	tcache_tls = (v);						\
+	pthread_setspecific(tcache_tsd, (void *)(v));			\
+} while (0)
+#else
+extern pthread_key_t		tcache_tsd;
+#  define TCACHE_GET()	((tcache_t *)pthread_getspecific(tcache_tsd))
+#  define TCACHE_SET(v)	do {						\
+	pthread_setspecific(tcache_tsd, (void *)(v));			\
+} while (0)
+#endif
 
 /*
  * Number of tcache bins.  There are nbins small-object bins, plus 0 or more
@@ -122,14 +135,23 @@
 	if ((isthreaded & opt_tcache) == false)
 		return (NULL);
 
-	tcache = tcache_tls;
-	if ((uintptr_t)tcache <= (uintptr_t)1) {
+	tcache = TCACHE_GET();
+	if ((uintptr_t)tcache <= (uintptr_t)2) {
 		if (tcache == NULL) {
 			tcache = tcache_create(choose_arena());
 			if (tcache == NULL)
 				return (NULL);
-		} else
+		} else {
+			if (tcache == (void *)(uintptr_t)1) {
+				/*
+				 * Make a note that an allocator function was
+				 * called after the tcache_thread_cleanup() was
+				 * called.
+				 */
+				TCACHE_SET((uintptr_t)2);
+			}
 			return (NULL);
+		}
 	}
 
 	return (tcache);
diff --git a/jemalloc/include/jemalloc/internal/zone.h b/jemalloc/include/jemalloc/internal/zone.h
new file mode 100644
index 0000000..859b529
--- /dev/null
+++ b/jemalloc/include/jemalloc/internal/zone.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_ZONE
+#  error "This source file is for zones on Darwin (OS X)."
+#endif
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+malloc_zone_t *create_zone(void);
+void	szone2ozone(malloc_zone_t *zone);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in
index 8b98d67..eed33a6 100644
--- a/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -92,6 +92,34 @@
 /* TLS is used to map arenas and magazine caches to threads. */
 #undef NO_TLS
 
+/*
+ * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
+ * within jemalloc-owned chunks before dereferencing them.
+ */
+#undef JEMALLOC_IVSALLOC
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+#undef JEMALLOC_ZONE
+#undef JEMALLOC_ZONE_VERSION
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched.
+ *   madvise(..., MADV_FREE) : On FreeBSD, this marks pages as being unused,
+ *                             such that they will be discarded rather than
+ *                             swapped out.
+ *   msync(..., MS_KILLPAGES) : On Darwin, this behaves similarly to
+ *                              madvise(..., MADV_FREE) on FreeBSD.
+ */
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_MADVISE_FREE
+#undef JEMALLOC_PURGE_MSYNC_KILLPAGES
+
 /* sizeof(void *) == 2^LG_SIZEOF_PTR. */
 #undef LG_SIZEOF_PTR
 
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index ee859fc..db3d401 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -181,9 +181,6 @@
 static bool	arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t size, size_t oldsize);
 static bool	arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
-#ifdef JEMALLOC_TINY
-static size_t	pow2_ceil(size_t x);
-#endif
 static bool	small_size2bin_init(void);
 #ifdef JEMALLOC_DEBUG
 static void	small_size2bin_validate(void);
@@ -426,7 +423,7 @@
 
 		zero = false;
 		malloc_mutex_unlock(&arena->lock);
-		chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
+		chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero);
 		malloc_mutex_lock(&arena->lock);
 		if (chunk == NULL)
 			return (NULL);
@@ -606,10 +603,18 @@
 	ql_new(&mapelms);
 
 	flag_zeroed =
-#ifdef JEMALLOC_SWAP
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+   /*
+    * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous
+    * mappings, but not for file-backed mappings.
+    */
+#  ifdef JEMALLOC_SWAP
 	    swap_enabled ? 0 :
-#endif
+#  endif
 	    CHUNK_MAP_ZEROED;
+#else
+	    0;
+#endif
 
 	/*
 	 * If chunk is the spare, temporarily re-allocate it, 1) so that its
@@ -649,9 +654,6 @@
 				/*
 				 * Update internal elements in the page map, so
 				 * that CHUNK_MAP_ZEROED is properly set.
-				 * madvise(..., MADV_DONTNEED) results in
-				 * zero-filled pages for anonymous mappings,
-				 * but not for file-backed mappings.
 				 */
 				mapelm->bits = (npages << PAGE_SHIFT) |
 				    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED |
@@ -715,8 +717,20 @@
 		assert(ndirty >= npages);
 		ndirty -= npages;
 #endif
+
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
 		madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
 		    (npages << PAGE_SHIFT), MADV_DONTNEED);
+#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+		madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+		    (npages << PAGE_SHIFT), MADV_FREE);
+#elif defined(JEMALLOC_PURGE_MSYNC_KILLPAGES)
+		msync((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+		    (npages << PAGE_SHIFT), MS_KILLPAGES);
+#else
+#  error "No method defined for purging unused dirty pages."
+#endif
+
 #ifdef JEMALLOC_STATS
 		nmadvise++;
 #endif
@@ -2239,26 +2253,6 @@
 	return (false);
 }
 
-#ifdef JEMALLOC_TINY
-/* Compute the smallest power of 2 that is >= x. */
-static size_t
-pow2_ceil(size_t x)
-{
-
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-#if (SIZEOF_PTR == 8)
-	x |= x >> 32;
-#endif
-	x++;
-	return (x);
-}
-#endif
-
 #ifdef JEMALLOC_DEBUG
 static void
 small_size2bin_validate(void)
diff --git a/jemalloc/src/base.c b/jemalloc/src/base.c
index 605197e..cc85e84 100644
--- a/jemalloc/src/base.c
+++ b/jemalloc/src/base.c
@@ -32,7 +32,7 @@
 	assert(minsize != 0);
 	csize = CHUNK_CEILING(minsize);
 	zero = false;
-	base_pages = chunk_alloc(csize, &zero);
+	base_pages = chunk_alloc(csize, true, &zero);
 	if (base_pages == NULL)
 		return (true);
 	base_next_addr = base_pages;
diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c
index e6e3bcd..5cb9961 100644
--- a/jemalloc/src/chunk.c
+++ b/jemalloc/src/chunk.c
@@ -14,6 +14,10 @@
 chunk_stats_t	stats_chunks;
 #endif
 
+#ifdef JEMALLOC_IVSALLOC
+rtree_t		*chunks_rtree;
+#endif
+
 /* Various chunk-related settings. */
 size_t		chunksize;
 size_t		chunksize_mask; /* (chunksize - 1). */
@@ -30,7 +34,7 @@
  * advantage of them if they are returned.
  */
 void *
-chunk_alloc(size_t size, bool *zero)
+chunk_alloc(size_t size, bool base, bool *zero)
 {
 	void *ret;
 
@@ -63,6 +67,14 @@
 	/* All strategies for allocation failed. */
 	ret = NULL;
 RETURN:
+#ifdef JEMALLOC_IVSALLOC
+	if (base == false && ret != NULL) {
+		if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
+			chunk_dealloc(ret, size);
+			return (NULL);
+		}
+	}
+#endif
 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 	if (ret != NULL) {
 #  ifdef JEMALLOC_PROF
@@ -104,6 +116,9 @@
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
+#ifdef JEMALLOC_IVSALLOC
+	rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
+#endif
 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 	malloc_mutex_lock(&chunks_mtx);
 	stats_chunks.curchunks -= (size / chunksize);
@@ -126,21 +141,27 @@
 {
 
 	/* Set variables according to the value of opt_lg_chunk. */
-	chunksize = (1LU << opt_lg_chunk);
+	chunksize = (ZU(1) << opt_lg_chunk);
 	assert(chunksize >= PAGE_SIZE);
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> PAGE_SHIFT);
 
+#ifdef JEMALLOC_IVSALLOC
+	chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
+	if (chunks_rtree == NULL)
+		return (true);
+#endif
 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 	if (malloc_mutex_init(&chunks_mtx))
 		return (true);
 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
 #endif
-
 #ifdef JEMALLOC_SWAP
 	if (chunk_swap_boot())
 		return (true);
 #endif
+	if (chunk_mmap_boot())
+		return (true);
 #ifdef JEMALLOC_DSS
 	if (chunk_dss_boot())
 		return (true);
diff --git a/jemalloc/src/chunk_mmap.c b/jemalloc/src/chunk_mmap.c
index d9f9e86..a3d09e9 100644
--- a/jemalloc/src/chunk_mmap.c
+++ b/jemalloc/src/chunk_mmap.c
@@ -6,19 +6,22 @@
 
 /*
  * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
- * potentially avoid some system calls.  We can get away without TLS here,
- * since the state of mmap_unaligned only affects performance, rather than
- * correct function.
+ * potentially avoid some system calls.
  */
-static
 #ifndef NO_TLS
-       __thread
+static __thread bool	mmap_unaligned_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#define	MMAP_UNALIGNED_GET()	mmap_unaligned_tls
+#define	MMAP_UNALIGNED_SET(v)	do {					\
+	mmap_unaligned_tls = (v);					\
+} while (0)
+#else
+static pthread_key_t	mmap_unaligned_tsd;
+#define	MMAP_UNALIGNED_GET()	((bool)pthread_getspecific(mmap_unaligned_tsd))
+#define	MMAP_UNALIGNED_SET(v)	do {					\
+	pthread_setspecific(mmap_unaligned_tsd, (void *)(v));		\
+} while (0)
 #endif
-                bool	mmap_unaligned
-#ifndef NO_TLS
-                                       JEMALLOC_ATTR(tls_model("initial-exec"))
-#endif
-                                                                               ;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -128,7 +131,7 @@
 	 * method.
 	 */
 	if (unaligned == false)
-		mmap_unaligned = false;
+		MMAP_UNALIGNED_SET(false);
 
 	return (ret);
 }
@@ -166,7 +169,7 @@
 	 * fast method next time.
 	 */
 
-	if (mmap_unaligned == false) {
+	if (MMAP_UNALIGNED_GET() == false) {
 		size_t offset;
 
 		ret = pages_map(NULL, size, noreserve);
@@ -175,7 +178,7 @@
 
 		offset = CHUNK_ADDR2OFFSET(ret);
 		if (offset != 0) {
-			mmap_unaligned = true;
+			MMAP_UNALIGNED_SET(true);
 			/* Try to extend chunk boundary. */
 			if (pages_map((void *)((uintptr_t)ret + size),
 			    chunksize - offset, noreserve) == NULL) {
@@ -184,7 +187,8 @@
 				 * the reliable-but-expensive method.
 				 */
 				pages_unmap(ret, size);
-				ret = chunk_alloc_mmap_slow(size, true, noreserve);
+				ret = chunk_alloc_mmap_slow(size, true,
+				    noreserve);
 			} else {
 				/* Clean up unneeded leading space. */
 				pages_unmap(ret, chunksize - offset);
@@ -216,3 +220,17 @@
 
 	pages_unmap(chunk, size);
 }
+
+bool
+chunk_mmap_boot(void)
+{
+
+#ifdef NO_TLS
+	if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) {
+		malloc_write("<jemalloc>: Error in pthread_key_create()\n");
+		return (true);
+	}
+#endif
+
+	return (false);
+}
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index 128883f..6491306 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -41,9 +41,7 @@
 #ifdef JEMALLOC_TCACHE
 CTL_PROTO(tcache_flush)
 #endif
-#ifndef NO_TLS
 CTL_PROTO(thread_arena)
-#endif
 CTL_PROTO(config_debug)
 CTL_PROTO(config_dss)
 CTL_PROTO(config_dynamic_page_shift)
@@ -213,11 +211,9 @@
 };
 #endif
 
-#ifndef NO_TLS
 static const ctl_node_t	thread_node[] = {
 	{NAME("arena"),		CTL(thread_arena)}
 };
-#endif
 
 static const ctl_node_t	config_node[] = {
 	{NAME("debug"),			CTL(config_debug)},
@@ -457,9 +453,7 @@
 #ifdef JEMALLOC_TCACHE
 	{NAME("tcache"),	CHILD(tcache)},
 #endif
-#ifndef NO_TLS
 	{NAME("thread"),	CHILD(thread)},
-#endif
 	{NAME("config"),	CHILD(config)},
 	{NAME("opt"),		CHILD(opt)},
 	{NAME("arenas"),	CHILD(arenas)},
@@ -1040,13 +1034,13 @@
 
 	VOID();
 
-	tcache = tcache_tls;
+	tcache = TCACHE_GET();
 	if (tcache == NULL) {
 		ret = 0;
 		goto RETURN;
 	}
 	tcache_destroy(tcache);
-	tcache_tls = NULL;
+	TCACHE_SET(NULL);
 
 	ret = 0;
 RETURN:
@@ -1054,7 +1048,6 @@
 }
 #endif
 
-#ifndef NO_TLS
 static int
 thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen)
@@ -1085,14 +1078,13 @@
 		}
 
 		/* Set new arena association. */
-		arenas_map = arena;
+		ARENA_SET(arena);
 	}
 
 	ret = 0;
 RETURN:
 	return (ret);
 }
-#endif
 
 /******************************************************************************/
 
diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c
index 49962ea..be35d16 100644
--- a/jemalloc/src/huge.c
+++ b/jemalloc/src/huge.c
@@ -37,7 +37,7 @@
 	if (node == NULL)
 		return (NULL);
 
-	ret = chunk_alloc(csize, &zero);
+	ret = chunk_alloc(csize, false, &zero);
 	if (ret == NULL) {
 		base_node_dealloc(node);
 		return (NULL);
@@ -99,7 +99,7 @@
 		return (NULL);
 
 	zero = false;
-	ret = chunk_alloc(alloc_size, &zero);
+	ret = chunk_alloc(alloc_size, false, &zero);
 	if (ret == NULL) {
 		base_node_dealloc(node);
 		return (NULL);
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index b36590d..ebce3ca 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -89,12 +89,12 @@
 malloc_mutex_t		arenas_lock;
 arena_t			**arenas;
 unsigned		narenas;
-#ifndef NO_TLS
 static unsigned		next_arena;
-#endif
 
 #ifndef NO_TLS
-__thread arena_t	*arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+__thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#else
+pthread_key_t		arenas_tsd;
 #endif
 
 /* Set to true once the allocator has been initialized. */
@@ -104,7 +104,7 @@
 static pthread_t malloc_initializer = (unsigned long)0;
 
 /* Used to avoid initialization races. */
-static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
 
 #ifdef DYNAMIC_PAGE_SHIFT
 size_t		pagesize;
@@ -146,8 +146,6 @@
 static void	stats_print_atexit(void);
 static unsigned	malloc_ncpus(void);
 static bool	malloc_init_hard(void);
-static void	jemalloc_prefork(void);
-static void	jemalloc_postfork(void);
 
 /******************************************************************************/
 /* malloc_message() setup. */
@@ -200,7 +198,6 @@
 	return (arenas[0]);
 }
 
-#ifndef NO_TLS
 /*
  * Choose an arena based on a per-thread value (slow-path code only, called
  * only by choose_arena()).
@@ -219,11 +216,10 @@
 	} else
 		ret = arenas[0];
 
-	arenas_map = ret;
+	ARENA_SET(ret);
 
 	return (ret);
 }
-#endif
 
 static void
 stats_print_atexit(void)
@@ -697,14 +693,12 @@
 		return (true);
 	}
 
-#ifndef NO_TLS
 	/*
 	 * Assign the initial arena to the initial thread, in order to avoid
 	 * spurious creation of an extra arena if the application switches to
 	 * threaded mode.
 	 */
-	arenas_map = arenas[0];
-#endif
+	ARENA_SET(arenas[0]);
 
 	malloc_mutex_init(&arenas_lock);
 
@@ -748,35 +742,13 @@
 			narenas = 1;
 	}
 
-#ifdef NO_TLS
-	if (narenas > 1) {
-		static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
-		    23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
-		    89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
-		    151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
-		    223, 227, 229, 233, 239, 241, 251, 257, 263};
-		unsigned nprimes, parenas;
-
-		/*
-		 * Pick a prime number of hash arenas that is more than narenas
-		 * so that direct hashing of pthread_self() pointers tends to
-		 * spread allocations evenly among the arenas.
-		 */
-		assert((narenas & 1) == 0); /* narenas must be even. */
-		nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
-		parenas = primes[nprimes - 1]; /* In case not enough primes. */
-		for (i = 1; i < nprimes; i++) {
-			if (primes[i] > narenas) {
-				parenas = primes[i];
-				break;
-			}
-		}
-		narenas = parenas;
-	}
-#endif
-
-#ifndef NO_TLS
 	next_arena = (narenas > 0) ? 1 : 0;
+
+#ifdef NO_TLS
+	if (pthread_key_create(&arenas_tsd, NULL) != 0) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
 #endif
 
 	/* Allocate and initialize arenas. */
@@ -793,11 +765,35 @@
 	/* Copy the pointer to the one arena that was already initialized. */
 	arenas[0] = init_arenas[0];
 
+#ifdef JEMALLOC_ZONE
+	/* Register the custom zone. */
+	malloc_zone_register(create_zone());
+
+	/*
+	 * Convert the default szone to an "overlay zone" that is capable of
+	 * deallocating szone-allocated objects, but allocating new objects
+	 * from jemalloc.
+	 */
+	szone2ozone(malloc_default_zone());
+#endif
+
 	malloc_initialized = true;
 	malloc_mutex_unlock(&init_lock);
 	return (false);
 }
 
+
+#ifdef JEMALLOC_ZONE
+JEMALLOC_ATTR(constructor)
+void
+jemalloc_darwin_init(void)
+{
+
+	if (malloc_init_hard())
+		abort();
+}
+#endif
+
 /*
  * End initialization functions.
  */
@@ -1219,8 +1215,12 @@
 {
 	size_t ret;
 
+#ifdef JEMALLOC_IVSALLOC
+	ret = ivsalloc(ptr);
+#else
 	assert(ptr != NULL);
 	ret = isalloc(ptr);
+#endif
 
 	return (ret);
 }
@@ -1298,11 +1298,13 @@
  * is threaded here.
  */
 
-static void
+void
 jemalloc_prefork(void)
 {
 	unsigned i;
 
+	assert(isthreaded);
+
 	/* Acquire all mutexes in a safe order. */
 
 	malloc_mutex_lock(&arenas_lock);
@@ -1324,11 +1326,13 @@
 #endif
 }
 
-static void
+void
 jemalloc_postfork(void)
 {
 	unsigned i;
 
+	assert(isthreaded);
+
 	/* Release all mutexes, now that fork() has completed. */
 
 #ifdef JEMALLOC_SWAP
@@ -1349,3 +1353,5 @@
 	}
 	malloc_mutex_unlock(&arenas_lock);
 }
+
+/******************************************************************************/
diff --git a/jemalloc/src/mutex.c b/jemalloc/src/mutex.c
index 3b6081a..337312b 100644
--- a/jemalloc/src/mutex.c
+++ b/jemalloc/src/mutex.c
@@ -59,7 +59,11 @@
 
 	if (pthread_mutexattr_init(&attr) != 0)
 		return (true);
+#ifdef PTHREAD_MUTEX_ADAPTIVE_NP
 	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+#else
+	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
+#endif
 	if (pthread_mutex_init(mutex, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
 		return (true);
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index 6d6910e..e70b132 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -45,7 +45,19 @@
  * into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t
  * objects.
  */
+#ifndef NO_TLS
 static __thread ckh_t	*bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define BT2CNT_GET()	bt2cnt_tls
+#  define BT2CNT_SET(v)	do {						\
+	bt2cnt_tls = (v);						\
+	pthread_setspecific(bt2cnt_tsd, (void *)(v));			\
+} while (0)
+#else
+#  define BT2CNT_GET()	((ckh_t *)pthread_getspecific(bt2cnt_tsd))
+#  define BT2CNT_SET(v)	do {						\
+	pthread_setspecific(bt2cnt_tsd, (void *)(v));			\
+} while (0)
+#endif
 
 /*
  * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
@@ -57,12 +69,45 @@
 /* (1U << opt_lg_prof_bt_max). */
 static unsigned		prof_bt_max;
 
-static __thread uint64_t prof_sample_prn_state
+typedef struct prof_sample_state_s prof_sample_state_t;
+struct prof_sample_state_s {
+	uint64_t	prn_state;
+	uint64_t	threshold;
+	uint64_t	accum;
+};
+
+#ifndef NO_TLS
+static __thread prof_sample_state_t prof_sample_state_tls
     JEMALLOC_ATTR(tls_model("initial-exec"));
-static __thread uint64_t prof_sample_threshold
-    JEMALLOC_ATTR(tls_model("initial-exec"));
-static __thread uint64_t prof_sample_accum
-    JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define PROF_SAMPLE_STATE_GET(r)	do {				\
+	r = &prof_sample_state_tls;					\
+} while (0)
+#else
+static pthread_key_t	prof_sample_state_tsd;
+/* Used only if an OOM error occurs in PROF_SAMPLE_STATE_GET(). */
+prof_sample_state_t prof_sample_state_oom;
+#  define PROF_SAMPLE_STATE_GET(r)	do {				\
+	r = (prof_sample_state_t *)pthread_getspecific(			\
+	    prof_sample_state_tsd);					\
+	if (r == NULL) {						\
+		r = ipalloc(CACHELINE, sizeof(prof_sample_state_t));	\
+		if (r == NULL) {					\
+			malloc_write("<jemalloc>: Error in heap "	\
+			    "profiler: out of memory; subsequent heap "	\
+			    "profiles may be inaccurate\n");		\
+			if (opt_abort)					\
+				abort();				\
+			/* Failure is not an option... */		\
+			r = &prof_sample_state_oom;			\
+		}							\
+		pthread_setspecific(prof_sample_state_tsd, (void *)r);	\
+	}								\
+} while (0)
+#  define ARENA_GET()	((arena_t *)pthread_getspecific(arenas_tsd))
+#  define ARENA_SET(v)	do {						\
+	pthread_setspecific(arenas_tsd, (void *)(v));			\
+} while (0)
+#endif
 
 static malloc_mutex_t	prof_dump_seq_mtx;
 static uint64_t		prof_dump_seq;
@@ -116,6 +161,9 @@
     size_t *hash2);
 static bool	prof_bt_keycomp(const void *k1, const void *k2);
 static void	bt2cnt_thread_cleanup(void *arg);
+#ifdef NO_TLS
+static void	prof_sample_state_thread_cleanup(void *arg);
+#endif
 
 /******************************************************************************/
 
@@ -436,7 +484,7 @@
 prof_lookup(prof_bt_t *bt)
 {
 	prof_thr_cnt_t *ret;
-	ckh_t *bt2cnt = bt2cnt_tls;
+	ckh_t *bt2cnt = BT2CNT_GET();
 
 	if (bt2cnt == NULL) {
 		/* Initialize an empty cache for this thread. */
@@ -448,8 +496,8 @@
 			idalloc(bt2cnt);
 			return (NULL);
 		}
-		bt2cnt_tls = bt2cnt;
-		pthread_setspecific(bt2cnt_tsd, bt2cnt);
+
+		BT2CNT_SET(bt2cnt);
 	}
 
 	if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) {
@@ -519,15 +567,17 @@
 {
 	uint64_t r;
 	double u;
+	prof_sample_state_t *prof_sample_state;
 
 	/*
 	 * Compute prof_sample_threshold as a geometrically distributed random
 	 * variable with mean (2^opt_lg_prof_sample).
 	 */
-	prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU,
-	    1058392653243283975);
+	PROF_SAMPLE_STATE_GET(prof_sample_state);
+	prn64(r, 53, prof_sample_state->prn_state,
+	    (uint64_t)1125899906842625LLU, 1058392653243283975);
 	u = (double)r * (1.0/9007199254740992.0L);
-	prof_sample_threshold = (uint64_t)(log(u) /
+	prof_sample_state->threshold = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
 	    + (uint64_t)1U;
 }
@@ -551,26 +601,31 @@
 		prof_backtrace(&bt, 2, prof_bt_max);
 		ret = prof_lookup(&bt);
 	} else {
-		if (prof_sample_threshold == 0) {
+		prof_sample_state_t *prof_sample_state;
+
+		PROF_SAMPLE_STATE_GET(prof_sample_state);
+		if (prof_sample_state->threshold == 0) {
 			/*
 			 * Initialize.  Seed the prng differently for each
 			 * thread.
 			 */
-			prof_sample_prn_state = (uint64_t)(uintptr_t)&size;
+			prof_sample_state->prn_state =
+			    (uint64_t)(uintptr_t)&size;
 			prof_sample_threshold_update();
 		}
 
 		/*
 		 * Determine whether to capture a backtrace based on whether
 		 * size is enough for prof_accum to reach
-		 * prof_sample_threshold.  However, delay updating these
+		 * prof_sample_state->threshold.  However, delay updating these
 		 * variables until prof_{m,re}alloc(), because we don't know
 		 * for sure that the allocation will succeed.
 		 *
 		 * Use subtraction rather than addition to avoid potential
 		 * integer overflow.
 		 */
-		if (size >= prof_sample_threshold - prof_sample_accum) {
+		if (size >= prof_sample_state->threshold -
+		    prof_sample_state->accum) {
 			bt_init(&bt, vec);
 			prof_backtrace(&bt, 2, prof_bt_max);
 			ret = prof_lookup(&bt);
@@ -621,21 +676,26 @@
 static inline void
 prof_sample_accum_update(size_t size)
 {
+	prof_sample_state_t *prof_sample_state;
 
 	/* Sampling logic is unnecessary if the interval is 1. */
 	assert(opt_lg_prof_sample != 0);
 
 	/* Take care to avoid integer overflow. */
-	if (size >= prof_sample_threshold - prof_sample_accum) {
-		prof_sample_accum -= (prof_sample_threshold - size);
+	PROF_SAMPLE_STATE_GET(prof_sample_state);
+	if (size >= prof_sample_state->threshold - prof_sample_state->accum) {
+		prof_sample_state->accum -= (prof_sample_state->threshold -
+		    size);
 		/* Compute new prof_sample_threshold. */
 		prof_sample_threshold_update();
-		while (prof_sample_accum >= prof_sample_threshold) {
-			prof_sample_accum -= prof_sample_threshold;
+		while (prof_sample_state->accum >=
+		    prof_sample_state->threshold) {
+			prof_sample_state->accum -=
+			    prof_sample_state->threshold;
 			prof_sample_threshold_update();
 		}
 	} else
-		prof_sample_accum += size;
+		prof_sample_state->accum += size;
 }
 
 void
@@ -1244,7 +1304,7 @@
 {
 	ckh_t *bt2cnt;
 
-	bt2cnt = bt2cnt_tls;
+	bt2cnt = BT2CNT_GET();
 	if (bt2cnt != NULL) {
 		ql_head(prof_thr_cnt_t) cnts_ql;
 		size_t tabind;
@@ -1278,7 +1338,7 @@
 		 */
 		ckh_delete(bt2cnt);
 		idalloc(bt2cnt);
-		bt2cnt_tls = NULL;
+		BT2CNT_SET(NULL);
 
 		/* Delete cnt's. */
 		while ((cnt = ql_last(&cnts_ql, link)) != NULL) {
@@ -1288,6 +1348,17 @@
 	}
 }
 
+#ifdef NO_TLS
+static void
+prof_sample_state_thread_cleanup(void *arg)
+{
+	prof_sample_state_t *prof_sample_state = (prof_sample_state_t *)arg;
+
+	if (prof_sample_state != &prof_sample_state_oom)
+		idalloc(prof_sample_state);
+}
+#endif
+
 void
 prof_boot0(void)
 {
@@ -1332,6 +1403,14 @@
 			    "<jemalloc>: Error in pthread_key_create()\n");
 			abort();
 		}
+#ifdef NO_TLS
+		if (pthread_key_create(&prof_sample_state_tsd,
+		    prof_sample_state_thread_cleanup) != 0) {
+			malloc_write(
+			    "<jemalloc>: Error in pthread_key_create()\n");
+			abort();
+		}
+#endif
 
 		prof_bt_max = (1U << opt_lg_prof_bt_max);
 		if (malloc_mutex_init(&prof_dump_seq_mtx))
diff --git a/jemalloc/src/rtree.c b/jemalloc/src/rtree.c
new file mode 100644
index 0000000..a583751
--- /dev/null
+++ b/jemalloc/src/rtree.c
@@ -0,0 +1,42 @@
+#define	RTREE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+rtree_t *
+rtree_new(unsigned bits)
+{
+	rtree_t *ret;
+	unsigned bits_per_level, height, i;
+
+	bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
+	height = bits / bits_per_level;
+	if (height * bits_per_level != bits)
+		height++;
+	assert(height * bits_per_level >= bits);
+
+	ret = (rtree_t*)base_alloc(sizeof(rtree_t) + (sizeof(unsigned) *
+	    (height - 1)));
+	if (ret == NULL)
+		return (NULL);
+	memset(ret, 0, sizeof(rtree_t) + (sizeof(unsigned) * (height - 1)));
+
+	malloc_mutex_init(&ret->mutex);
+	ret->height = height;
+	if (bits_per_level * height > bits)
+		ret->level2bits[0] = bits % bits_per_level;
+	else
+		ret->level2bits[0] = bits_per_level;
+	for (i = 1; i < height; i++)
+		ret->level2bits[i] = bits_per_level;
+
+	ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]);
+	if (ret->root == NULL) {
+		/*
+		 * We leak the rtree here, since there's no generic base
+		 * deallocation.
+		 */
+		return (NULL);
+	}
+	memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
+
+	return (ret);
+}
diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c
index ace24ce..8634383 100644
--- a/jemalloc/src/tcache.c
+++ b/jemalloc/src/tcache.c
@@ -9,13 +9,15 @@
 ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
 
 /* Map of thread-specific caches. */
+#ifndef NO_TLS
 __thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
 
 /*
  * Same contents as tcache, but initialized such that the TSD destructor is
  * called when a thread exits, so that the cache can be cleaned up.
  */
-static pthread_key_t		tcache_tsd;
+pthread_key_t		tcache_tsd;
 
 size_t				nhbins;
 size_t				tcache_maxclass;
@@ -239,8 +241,7 @@
 	for (; i < nhbins; i++)
 		tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
 
-	tcache_tls = tcache;
-	pthread_setspecific(tcache_tsd, tcache);
+	TCACHE_SET(tcache);
 
 	return (tcache);
 }
@@ -328,11 +329,24 @@
 {
 	tcache_t *tcache = (tcache_t *)arg;
 
-	assert(tcache == tcache_tls);
-	if (tcache != NULL) {
+	if (tcache == (void *)(uintptr_t)1) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to 1 so that other destructors wouldn't cause re-creation of
+		 * the tcache.  This time, do nothing, so that the destructor
+		 * will not be called again.
+		 */
+	} else if (tcache == (void *)(uintptr_t)2) {
+		/*
+		 * Another destructor called an allocator function after this
+		 * destructor was called.  Reset tcache to 1 in order to
+		 * receive another callback.
+		 */
+		TCACHE_SET((uintptr_t)1);
+	} else if (tcache != NULL) {
 		assert(tcache != (void *)(uintptr_t)1);
 		tcache_destroy(tcache);
-		tcache_tls = (void *)(uintptr_t)1;
+		TCACHE_SET((uintptr_t)1);
 	}
 }
 
diff --git a/jemalloc/src/zone.c b/jemalloc/src/zone.c
new file mode 100644
index 0000000..2c1b231
--- /dev/null
+++ b/jemalloc/src/zone.c
@@ -0,0 +1,354 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifndef JEMALLOC_ZONE
+#  error "This source file is for zones on Darwin (OS X)."
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_zone_t zone, szone;
+static struct malloc_introspection_t zone_introspect, ozone_introspect;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t	zone_size(malloc_zone_t *zone, void *ptr);
+static void	*zone_malloc(malloc_zone_t *zone, size_t size);
+static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void	*zone_valloc(malloc_zone_t *zone, size_t size);
+static void	zone_free(malloc_zone_t *zone, void *ptr);
+static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
+    size_t size);
+static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
+    size_t size);
+#endif
+static void	*zone_destroy(malloc_zone_t *zone);
+static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
+static void	zone_force_lock(malloc_zone_t *zone);
+static void	zone_force_unlock(malloc_zone_t *zone);
+static size_t	ozone_size(malloc_zone_t *zone, void *ptr);
+static void	ozone_free(malloc_zone_t *zone, void *ptr);
+static void	*ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static unsigned	ozone_batch_malloc(malloc_zone_t *zone, size_t size,
+    void **results, unsigned num_requested);
+static void	ozone_batch_free(malloc_zone_t *zone, void **to_be_freed,
+    unsigned num);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void	ozone_free_definite_size(malloc_zone_t *zone, void *ptr,
+    size_t size);
+#endif
+static void	ozone_force_lock(malloc_zone_t *zone);
+static void	ozone_force_unlock(malloc_zone_t *zone);
+
+/******************************************************************************/
+/*
+ * Functions.
+ */
+
+static size_t
+zone_size(malloc_zone_t *zone, void *ptr)
+{
+
+	/*
+	 * There appear to be places within Darwin (such as setenv(3)) that
+	 * cause calls to this function with pointers that *no* zone owns.  If
+	 * we knew that all pointers were owned by *some* zone, we could split
+	 * our zone into two parts, and use one as the default allocator and
+	 * the other as the default deallocator/reallocator.  Since that will
+	 * not work in practice, we must check all pointers to assure that they
+	 * reside within a mapped chunk before determining size.
+	 */
+	return (ivsalloc(ptr));
+}
+
+static void *
+zone_malloc(malloc_zone_t *zone, size_t size)
+{
+
+	return (JEMALLOC_P(malloc)(size));
+}
+
+static void *
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
+{
+
+	return (JEMALLOC_P(calloc)(num, size));
+}
+
+static void *
+zone_valloc(malloc_zone_t *zone, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+
+	return (ret);
+}
+
+static void
+zone_free(malloc_zone_t *zone, void *ptr)
+{
+
+	JEMALLOC_P(free)(ptr);
+}
+
+static void *
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	return (JEMALLOC_P(realloc)(ptr, size));
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void *
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+
+	return (ret);
+}
+
+static void
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	assert(ivsalloc(ptr) == size);
+	JEMALLOC_P(free)(ptr);
+}
+#endif
+
+static void *
+zone_destroy(malloc_zone_t *zone)
+{
+
+	/* This function should never be called. */
+	assert(false);
+	return (NULL);
+}
+
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size)
+{
+	size_t ret;
+	void *p;
+
+	/*
+	 * Actually create an object of the appropriate size, then find out
+	 * how large it could have been without moving up to the next size
+	 * class.
+	 */
+	p = JEMALLOC_P(malloc)(size);
+	if (p != NULL) {
+		ret = isalloc(p);
+		JEMALLOC_P(free)(p);
+	} else
+		ret = size;
+
+	return (ret);
+}
+
+static void
+zone_force_lock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_prefork();
+}
+
+static void
+zone_force_unlock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_postfork();
+}
+
+malloc_zone_t *
+create_zone(void)
+{
+
+	zone.size = (void *)zone_size;
+	zone.malloc = (void *)zone_malloc;
+	zone.calloc = (void *)zone_calloc;
+	zone.valloc = (void *)zone_valloc;
+	zone.free = (void *)zone_free;
+	zone.realloc = (void *)zone_realloc;
+	zone.destroy = (void *)zone_destroy;
+	zone.zone_name = "jemalloc_zone";
+	zone.batch_malloc = NULL;
+	zone.batch_free = NULL;
+	zone.introspect = &zone_introspect;
+	zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone.memalign = zone_memalign;
+	zone.free_definite_size = zone_free_definite_size;
+#endif
+
+	zone_introspect.enumerator = NULL;
+	zone_introspect.good_size = (void *)zone_good_size;
+	zone_introspect.check = NULL;
+	zone_introspect.print = NULL;
+	zone_introspect.log = NULL;
+	zone_introspect.force_lock = (void *)zone_force_lock;
+	zone_introspect.force_unlock = (void *)zone_force_unlock;
+	zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone_introspect.zone_locked = NULL;
+#endif
+
+	return (&zone);
+}
+
+static size_t
+ozone_size(malloc_zone_t *zone, void *ptr)
+{
+	size_t ret;
+
+	ret = ivsalloc(ptr);
+	if (ret == 0)
+		ret = szone.size(zone, ptr);
+
+	return (ret);
+}
+
+static void
+ozone_free(malloc_zone_t *zone, void *ptr)
+{
+
+	if (ivsalloc(ptr) != 0)
+		JEMALLOC_P(free)(ptr);
+	else {
+		size_t size = szone.size(zone, ptr);
+		if (size != 0)
+			(szone.free)(zone, ptr);
+	}
+}
+
+static void *
+ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+	size_t oldsize;
+
+	if (ptr == NULL)
+		return (JEMALLOC_P(malloc)(size));
+
+	oldsize = ivsalloc(ptr);
+	if (oldsize != 0)
+		return (JEMALLOC_P(realloc)(ptr, size));
+	else {
+		oldsize = szone.size(zone, ptr);
+		if (oldsize == 0)
+			return (JEMALLOC_P(malloc)(size));
+		else {
+			void *ret = JEMALLOC_P(malloc)(size);
+			if (ret != NULL) {
+				memcpy(ret, ptr, (oldsize < size) ? oldsize :
+				    size);
+				(szone.free)(zone, ptr);
+			}
+			return (ret);
+		}
+	}
+}
+
+static unsigned
+ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results,
+    unsigned num_requested)
+{
+
+	/* Don't bother implementing this interface, since it isn't required. */
+	return (0);
+}
+
+static void
+ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++)
+		ozone_free(zone, to_be_freed[i]);
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void
+ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	if (ivsalloc(ptr) != 0) {
+		assert(ivsalloc(ptr) == size);
+		JEMALLOC_P(free)(ptr);
+	} else {
+		assert(size == szone.size(zone, ptr));
+		szone.free_definite_size(zone, ptr, size);
+	}
+}
+#endif
+
+static void
+ozone_force_lock(malloc_zone_t *zone)
+{
+
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone.introspect->force_lock(zone);
+}
+
+static void
+ozone_force_unlock(malloc_zone_t *zone)
+{
+
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone.introspect->force_unlock(zone);
+}
+
+/*
+ * Overlay the default scalable zone (szone) such that existing allocations are
+ * drained, and further allocations come from jemalloc.  This is necessary
+ * because Core Foundation directly accesses and uses the szone before the
+ * jemalloc library is even loaded.
+ */
+void
+szone2ozone(malloc_zone_t *zone)
+{
+
+	/*
+	 * Stash a copy of the original szone so that we can call its
+	 * functions as needed.  Note that the internally, the szone stores its
+	 * bookkeeping data structures immediately following the malloc_zone_t
+	 * header, so when calling szone functions, we need to pass a pointer
+	 * to the original zone structure.
+	 */
+	memcpy(&szone, zone, sizeof(malloc_zone_t));
+
+	zone->size = (void *)ozone_size;
+	zone->malloc = (void *)zone_malloc;
+	zone->calloc = (void *)zone_calloc;
+	zone->valloc = (void *)zone_valloc;
+	zone->free = (void *)ozone_free;
+	zone->realloc = (void *)ozone_realloc;
+	zone->destroy = (void *)zone_destroy;
+	zone->zone_name = "jemalloc_ozone";
+	zone->batch_malloc = ozone_batch_malloc;
+	zone->batch_free = ozone_batch_free;
+	zone->introspect = &ozone_introspect;
+	zone->version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone->memalign = zone_memalign;
+	zone->free_definite_size = ozone_free_definite_size;
+#endif
+
+	ozone_introspect.enumerator = NULL;
+	ozone_introspect.good_size = (void *)zone_good_size;
+	ozone_introspect.check = NULL;
+	ozone_introspect.print = NULL;
+	ozone_introspect.log = NULL;
+	ozone_introspect.force_lock = (void *)ozone_force_lock;
+	ozone_introspect.force_unlock = (void *)ozone_force_unlock;
+	ozone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	ozone_introspect.zone_locked = NULL;
+#endif
+}
diff --git a/jemalloc/test/thread_arena.c b/jemalloc/test/thread_arena.c
index 99e9669..d52435f 100644
--- a/jemalloc/test/thread_arena.c
+++ b/jemalloc/test/thread_arena.c
@@ -3,6 +3,7 @@
 #include <pthread.h>
 #include <string.h>
 
+#define	JEMALLOC_MANGLE
 #include "jemalloc/jemalloc.h"
 
 void *
@@ -13,10 +14,10 @@
 	size_t size;
 	int err;
 
-	malloc(1);
+	JEMALLOC_P(malloc)(1);
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size,
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size,
 	    &main_arena_ind, sizeof(main_arena_ind)))) {
 		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
@@ -37,10 +38,11 @@
 
 	fprintf(stderr, "Test begin\n");
 
-	malloc(1);
+	JEMALLOC_P(malloc)(1);
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
+	    0))) {
 		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		ret = 1;