Replace JEMALLOC_OPTIONS with MALLOC_CONF.

Replace the single-character run-time flags with key/value pairs, which
can be set via the malloc_conf global, /etc/malloc.conf, and the
MALLOC_CONF environment variable.

Replace the JEMALLOC_PROF_PREFIX environment variable with the
"opt.prof_prefix" option.

Replace umax2s() with u2s().
diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL
index 1bf5158..c5697c6 100644
--- a/jemalloc/INSTALL
+++ b/jemalloc/INSTALL
@@ -27,9 +27,17 @@
     it is linked to.  This works only on ELF-based systems.
 
 --with-jemalloc-prefix=<prefix>
-    Prefix all public APIs with <prefix>, so that, for example, malloc()
-    becomes <prefix>malloc().  This makes it possible to use jemalloc at the
-    same time as the system allocator.
+    Prefix all public APIs with <prefix>.  For example, if <prefix> is
+    "prefix_", the API changes like the following occur:
+
+      malloc()         --> prefix_malloc()
+      malloc_conf      --> prefix_malloc_conf
+      /etc/malloc.conf --> /etc/prefix_malloc.conf
+      MALLOC_CONF      --> PREFIX_MALLOC_CONF
+
+    This makes it possible to use jemalloc at the same time as the
+    system allocator, or even to use multiple copies of jemalloc
+    simultaneously.
 
     By default, the prefix is "", except on OS X, where it is "je_".  On OS X,
     jemalloc overlays the default malloc zone, but makes no attempt to actually
diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac
index b27955d..b613cb1 100644
--- a/jemalloc/configure.ac
+++ b/jemalloc/configure.ac
@@ -256,9 +256,13 @@
 fi]
 )
 if test "x$JEMALLOC_PREFIX" != "x" ; then
-  AC_DEFINE([JEMALLOC_PREFIX], [ ])
+  JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
+  AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
+  AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
   jemalloc_prefix="$JEMALLOC_PREFIX"
+  jemalloc_cprefix="$JEMALLOC_CPREFIX"
   AC_SUBST([jemalloc_prefix])
+  AC_SUBST([jemalloc_cprefix])
   AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix])
 fi
 
@@ -325,6 +329,15 @@
   AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
 fi
 AC_SUBST([enable_debug])
+if test "x$enable_debug" = "x0" ; then
+  roff_debug=".\\\" "
+  roff_no_debug=""
+else
+  roff_debug=""
+  roff_no_debug=".\\\" "
+fi
+AC_SUBST([roff_debug])
+AC_SUBST([roff_no_debug])
 
 dnl Only optimize if not debugging.
 if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index 5202a2b..1557ecb 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -38,7 +38,7 @@
 .\"     @(#)malloc.3	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
 .\"
-.Dd September 30, 2010
+.Dd October 23, 2010
 .Dt JEMALLOC 3
 .Os
 .Sh NAME
@@ -85,7 +85,7 @@
 .Ft int
 .Fn @jemalloc_prefix@mallctlbymib "const size_t *mib" "size_t miblen" "void *oldp" "size_t *oldlenp" "void *newp" "size_t newlen"
 .Ft const char *
-.Va @jemalloc_prefix@malloc_options ;
+.Va @jemalloc_prefix@malloc_conf ;
 .Ft void
 .Fn \*(lp*@jemalloc_prefix@malloc_message\*(rp "void *cbopaque" "const char *s"
 .Ss Experimental API
@@ -381,8 +381,8 @@
 .Fa ( size
 +
 .Fa extra )
-bytes, though an inability to allocate the extra byte(s) will not by itself
-result in failure.
+bytes, though inability to allocate the extra byte(s) will not by itself result
+in failure.
 Behavior is undefined if
 .Fa ( size
 +
@@ -402,292 +402,33 @@
 .Fa ptr
 to be made available for future allocations.
 .Sh TUNING
-Once, when the first call is made to one of these memory allocation
-routines, various flags will be set or reset, which affects the
-workings of this allocator implementation.
+Once, when the first call is made to one of the memory allocation routines, the
+allocator initializes its internals based in part on various options that can
+be specified at compile- or run-time.
 .Pp
-The
+The string pointed to by the global variable
+.Va @jemalloc_prefix@malloc_conf ,
+the
 .Dq name
 of the file referenced by the symbolic link named
-.Pa /etc/jemalloc.conf ,
-the value of the environment variable
-.Ev JEMALLOC_OPTIONS ,
-and the string pointed to by the global variable
-.Va @jemalloc_prefix@malloc_options
-will be interpreted, in that order, from left to right as flags.
+.Pa /etc/@jemalloc_prefix@malloc.conf ,
+and the value of the environment variable
+.Ev @jemalloc_cprefix@MALLOC_CONF ,
+will be interpreted, in that order, from left to right as options.
 .Pp
-Each flag is a single letter, optionally prefixed by a non-negative base 10
-integer repetition count.
+An options string is a comma-separated list of option:value pairs.
+There is one key corresponding to each
+.Dq opt.*
+mallctl.
 For example,
-.Dq 3N
-is equivalent to
-.Dq NNN .
-Some flags control parameter magnitudes, where uppercase increases the
-magnitude, and lowercase decreases the magnitude.
-Other flags control boolean parameters, where uppercase indicates that a
-behavior is set, or on, and lowercase means that a behavior is not set, or off.
-.Bl -tag -width indent
-.It A
-All warnings (except for the warning about unknown
-flags being set) become fatal.
-The process will call
-.Xr abort 3
-in these cases.
-@roff_prof@.It B
-@roff_prof@Double/halve the maximum backtrace depth when profiling memory
-@roff_prof@allocation activity.
-@roff_prof@The default is 128.
-.It C
-Double/halve the size of the maximum size class that is a multiple of the
-cacheline size (64).
-Above this size, subpage spacing (256 bytes) is used for size classes.
-The default value is 512 bytes.
-.It D
-Halve/double the per-arena minimum ratio of active to dirty pages.
-Some dirty unused pages may be allowed to accumulate, within the limit set by
-the ratio (or one chunk worth of dirty pages, whichever is greater), before
-informing the kernel about some of those pages via
-.Xr madvise 2 .
-This provides the kernel with sufficient information to recycle dirty pages if
-physical memory becomes scarce and the pages remain unused.
-The default minimum ratio is 32:1;
-.Ev JEMALLOC_OPTIONS=6D
-will disable dirty page purging.
-@roff_prof@.It E
-@roff_prof@Activate/deactivate profiling.
-@roff_prof@This is a secondary control mechanism that makes it possible to
-@roff_prof@start the application with profiling enabled (see the
-@roff_prof@.Dq F
-@roff_prof@option) but inactive, then toggle profiling at any time during
-@roff_prof@program execution with the
-@roff_prof@.Dq prof.active
-@roff_prof@mallctl.
-@roff_prof@This option is enabled by default.
-@roff_prof@.It F
-@roff_prof@Profile memory allocation activity, and use an
-@roff_prof@.Xr atexit 3
-@roff_prof@function to dump final memory usage to a file named according to
-@roff_prof@the pattern
-@roff_prof@.Pa <prefix>.<pid>.<seq>.f.heap ,
-@roff_prof@where
-@roff_prof@.Pa <prefix>
-@roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
-@roff_prof@See the
-@roff_prof@.Dq B
-@roff_prof@option for backtrace depth control.
-@roff_prof@See the
-@roff_prof@.Dq E
-@roff_prof@option for on-the-fly activation/deactivation.
-@roff_prof@See the
-@roff_prof@.Dq S
-@roff_prof@option for probabilistic sampling control.
-@roff_prof@See the
-@roff_prof@.Dq R
-@roff_prof@option for control of cumulative sample reporting.
-@roff_prof@See the
-@roff_prof@.Dq T
-@roff_prof@option for control of per thread backtrace caching.
-@roff_prof@See the
-@roff_prof@.Dq I
-@roff_prof@option for information on interval-triggered profile dumping, and the
-@roff_prof@.Dq U
-@roff_prof@option for information on high-water-triggered profile dumping.
-@roff_prof@Profile output is compatible with the included pprof Perl script,
-@roff_prof@which originates from the google-perftools package
-@roff_prof@(http://code.google.com/p/google-perftools/).
-@roff_tcache@.It G
-@roff_tcache@Double/halve the approximate interval (counted in terms of
-@roff_tcache@thread-specific cache allocation/deallocation events) between full
-@roff_tcache@thread-specific cache garbage collection sweeps.
-@roff_tcache@Garbage collection is actually performed incrementally, one size
-@roff_tcache@class at a time, in order to avoid large collection pauses.
-@roff_tcache@The default sweep interval is 8192;
-@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
-@roff_tcache@will disable garbage collection.
-@roff_tcache@.It H
-@roff_tcache@Enable/disable thread-specific caching.
-@roff_tcache@When there are multiple threads, each thread uses a
-@roff_tcache@thread-specific cache for objects up to a certain size.
-@roff_tcache@Thread-specific caching allows many allocations to be satisfied
-@roff_tcache@without performing any thread synchronization, at the cost of
-@roff_tcache@increased memory use.
-@roff_tcache@See the
-@roff_tcache@.Dq G
-@roff_tcache@and
-@roff_tcache@.Dq M
-@roff_tcache@options for related tuning information.
-@roff_tcache@This option is enabled by default.
-@roff_prof@.It I
-@roff_prof@Double/halve the average interval between memory profile dumps, as
-@roff_prof@measured in bytes of allocation activity.
-@roff_prof@The actual interval between dumps may be sporadic because
-@roff_prof@decentralized allocation counters are used to avoid synchronization
-@roff_prof@bottlenecks.
-@roff_prof@Profiles are dumped to files named according to the pattern
-@roff_prof@.Pa <prefix>.<pid>.<seq>.i<iseq>.heap ,
-@roff_prof@where
-@roff_prof@.Pa <prefix>
-@roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
-@roff_prof@By default, interval-triggered profile dumping is disabled.
-@roff_prof@This is internally encoded as (1 << -1), and each
-@roff_prof@.Dq I
-@roff_prof@that is specified increments the shift amount.
-@roff_prof@Therefore, e.g.
-@roff_prof@.Ev JEMALLOC_OPTIONS=31I
-@roff_prof@specifies a dump interval of 1 GiB.
-@roff_fill@.It J
-@roff_fill@Each byte of new memory allocated by
-@roff_fill@.Fn @jemalloc_prefix@malloc
-@roff_fill@or
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@will be initialized to 0xa5.
-@roff_fill@All memory returned by
-@roff_fill@.Fn @jemalloc_prefix@free
-@roff_fill@or
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@will be initialized to 0x5a.
-@roff_fill@This is intended for debugging and will impact performance
-@roff_fill@negatively.
-.It K
-Double/halve the virtual memory chunk size.
-The default chunk size is 4 MiB.
-@roff_prof@.It L
-@roff_prof@Use an
-@roff_prof@.Xr atexit 3
-@roff_prof@function to report memory leaks.
-@roff_prof@See the
-@roff_prof@.Dq B
-@roff_prof@option for backtrace depth control.
-@roff_prof@See the
-@roff_prof@.Dq F option for information on analyzing heap profile output.
-@roff_prof@This option is disabled by default.
-@roff_tcache@.It M
-@roff_tcache@Double/halve the maximum size class to cache.
-@roff_tcache@At a minimum, all small size classes are cached, and at a maximum
-@roff_tcache@all large size classes are cached.
-@roff_tcache@The default maximum is 32 KiB.
-.It N
-Double/halve the number of arenas.
-The default number of arenas is four times the number of CPUs, or one if there
-is a single CPU.
-@roff_swap@.It O
-@roff_swap@Over-commit memory as a side effect of using anonymous
-@roff_swap@.Xr mmap 2
-@roff_swap@@roff_dss@ and
-@roff_swap@@roff_dss@.Xr sbrk 2
-@roff_swap@for virtual memory allocation.
-@roff_swap@In order for overcommit to be disabled, the
-@roff_swap@.Dq swap.fds
-@roff_swap@mallctl must have been successfully written to.
-@roff_swap@This option is enabled by default.
-.It P
-The
-.Fn @jemalloc_prefix@malloc_stats_print
-function is called at program exit via an
-.Xr atexit 3
-function.
-@roff_stats@This has the potential to cause deadlock for a multi-threaded
-@roff_stats@process that exits while one or more threads are executing in the
-@roff_stats@memory allocation functions.
-@roff_stats@Therefore, this option should only be used with care; it is
-@roff_stats@primarily intended as a performance tuning aid during application
-@roff_stats@development.
-.It Q
-Double/halve the size of the maximum size class that is a multiple of the
-quantum (8 or 16 bytes, depending on architecture).
-Above this size, cacheline spacing is used for size classes.
-The default value is 128 bytes.
-@roff_prof@.It R
-@roff_prof@Enable/disable reporting of cumulative object/byte counts in profile
-@roff_prof@dumps.
-@roff_prof@If this option is enabled, every unique backtrace must be stored for
-@roff_prof@the duration of execution.
-@roff_prof@Depending on the application, this can impose a large memory
-@roff_prof@overhead, and the cumulative counts are not always of interest.
-@roff_prof@See the
-@roff_prof@.Dq T
-@roff_prof@option for control of per thread backtrace caching, which has
-@roff_prof@important interactions.
-@roff_prof@This option is enabled by default.
-@roff_prof@.It S
-@roff_prof@Double/halve the average interval between allocation samples, as
-@roff_prof@measured in bytes of allocation activity.
-@roff_prof@Increasing the sampling interval decreases profile fidelity, but
-@roff_prof@also decreases the computational overhead.
-@roff_prof@The default sample interval is one (i.e. all allocations are
-@roff_prof@sampled).
-@roff_prof@.It T
-@roff_prof@Double/halve the maximum per thread backtrace cache used for heap
-@roff_prof@profiling.
-@roff_prof@A backtrace can only be discarded if the
-@roff_prof@.Dq R
-@roff_prof@option is disabled, and no thread caches currently refer to the
-@roff_prof@backtrace.
-@roff_prof@Therefore, a backtrace cache limit should be imposed if the
-@roff_prof@intention is to limit how much memory is used by backtraces.
-@roff_prof@By default, no limit is imposed.
-@roff_prof@This is internally encoded as (1 << -1), and each
-@roff_prof@.Dq T
-@roff_prof@that is specified increments the shift amount.
-@roff_prof@Therefore, e.g.
-@roff_prof@.Ev JEMALLOC_OPTIONS=11T
-@roff_prof@specifies a backtrace cache limit of 1024 backtraces.
-@roff_prof@.It U
-@roff_prof@Trigger a memory profile dump every time the total virtual memory
-@roff_prof@exceeds the previous maximum.
-@roff_prof@Profiles are dumped to files named according to the pattern
-@roff_prof@.Pa <prefix>.<pid>.<seq>.u<useq>.heap ,
-@roff_prof@where
-@roff_prof@.Pa <prefix>
-@roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
-@roff_prof@This option is disabled by default.
-@roff_sysv@.It V
-@roff_sysv@Attempting to allocate zero bytes will return a
-@roff_sysv@.Dv NULL
-@roff_sysv@pointer instead of a valid pointer.
-@roff_sysv@(The default behavior is to make a minimal allocation and return a
-@roff_sysv@pointer to it.)
-@roff_sysv@This option is provided for System V compatibility.
-@roff_sysv@@roff_xmalloc@This option is incompatible with the
-@roff_sysv@@roff_xmalloc@.Dq X
-@roff_sysv@@roff_xmalloc@option.
-@roff_xmalloc@.It X
-@roff_xmalloc@Rather than return failure for any allocation function, display a
-@roff_xmalloc@diagnostic message on
-@roff_xmalloc@.Dv STDERR_FILENO
-@roff_xmalloc@and cause the program to drop core (using
-@roff_xmalloc@.Xr abort 3 ) .
-@roff_xmalloc@This option should be set at compile time by including the
-@roff_xmalloc@following in the source code:
-@roff_xmalloc@.Bd -literal -offset indent
-@roff_xmalloc@@jemalloc_prefix@malloc_options = "X";
-@roff_xmalloc@.Ed
-@roff_fill@.It Z
-@roff_fill@Each byte of new memory allocated by
-@roff_fill@.Fn @jemalloc_prefix@malloc
-@roff_fill@or
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@will be initialized to 0.
-@roff_fill@Note that this initialization only happens once for each byte, so
-@roff_fill@.Fn @jemalloc_prefix@realloc
-@roff_fill@calls do not zero memory that was previously allocated.
-@roff_fill@This is intended for debugging and will impact performance
-@roff_fill@negatively.
-.El
-.Pp
-@roff_fill@The
-@roff_fill@.Dq J
-@roff_fill@and
-@roff_fill@.Dq Z
-@roff_fill@options are intended for testing and debugging.
-@roff_fill@An application which changes its behavior when these options are used
-@roff_fill@is flawed.
+.Dq abort:true,narenas:1
+sets the
+.Dq opt.abort
+and
+.Dq opt.narenas
+options.
+Some options have boolean values (true/false), others have integer values (base
+8, 10, or 16, depending on prefix), and yet others have raw string values.
 .Sh IMPLEMENTATION NOTES
 @roff_dss@Traditionally, allocators have used
 @roff_dss@.Xr sbrk 2
@@ -715,7 +456,7 @@
 .Pp
 @roff_tcache@In addition to multiple arenas, this allocator supports
 @roff_tcache@thread-specific caching for small and large objects, in order to
-@roff_tcache@make it possible to completely avoid synchronization for most small
+@roff_tcache@make it possible to completely avoid synchronization for most
 @roff_tcache@allocation requests.
 @roff_tcache@Such caching allows very fast allocation in the common case, but it
 @roff_tcache@increases memory usage and fragmentation, since a bounded number of
@@ -744,31 +485,37 @@
 determine all metadata regarding small and large allocations in constant time.
 .Pp
 Small objects are managed in groups by page runs.
-Each run maintains a bitmap that tracks which regions are in use.
+Each run maintains a frontier and free list to track which regions are in use.
 @roff_tiny@Allocation requests that are no more than half the quantum (8 or 16,
 @roff_tiny@depending on architecture) are rounded up to the nearest power of
 @roff_tiny@two.
 Allocation requests that are
 @roff_tiny@more than half the quantum, but
 no more than the minimum cacheline-multiple size class (see the
-.Dq Q
+.Dq opt.lg_qspace_max
 option) are rounded up to the nearest multiple of the
 @roff_tiny@quantum.
 @roff_no_tiny@quantum (8 or 16, depending on architecture).
 Allocation requests that are more than the minimum cacheline-multiple size
 class, but no more than the minimum subpage-multiple size class (see the
-.Dq C
+.Dq opt.lg_cspace_max
 option) are rounded up to the nearest multiple of the cacheline size (64).
 Allocation requests that are more than the minimum subpage-multiple size class,
 but no more than the maximum subpage-multiple size class are rounded up to the
 nearest multiple of the subpage size (256).
 Allocation requests that are more than the maximum subpage-multiple size class,
 but small enough to fit in an arena-managed chunk (see the
-.Dq K
+.Dq opt.lg_chunk
 option), are rounded up to the nearest run size.
 Allocation requests that are too large to fit in an arena-managed chunk are
 rounded up to the nearest multiple of the chunk size.
 .Pp
+Allocations are packed tightly together, which can be an issue for
+multi-threaded applications.
+If you need to assure that allocations do not suffer from cacheline sharing,
+round your allocation requests up to the nearest multiple of the cacheline
+size, or specify cacheline alignment when allocating.
+.Pp
 Assuming 4 MiB chunks, 4 KiB pages, and a 16 byte quantum on a 64-bit system,
 the size classes in each category are as follows:
 .TS
@@ -825,12 +572,6 @@
 ;;12 MiB
 ;;...
 .TE
-.Pp
-Allocations are packed tightly together, which can be an issue for
-multi-threaded applications.
-If you need to assure that allocations do not suffer from cacheline sharing,
-round your allocation requests up to the nearest multiple of the cacheline
-size, or specify cacheline alignment when allocating.
 .Sh MALLCTL NAMESPACE
 The following names are defined in the namespace accessible via the
 .Fn @jemalloc_prefix@mallctl*
@@ -845,6 +586,10 @@
 @roff_stats@<i> equal to
 @roff_stats@.Dq arenas.narenas
 @roff_stats@can be used to access the summation of statistics from all arenas.
+.Pp
+Take special note of the
+.Dq epoch
+mallctl, which controls refreshing of cached dynamic statistics.
 .Bl -ohang
 .\"-----------------------------------------------------------------------------
 .It Sy "version (const char *) r-"
@@ -861,48 +606,6 @@
 This is useful for detecting whether another thread caused a refresh.
 .Ed
 .\"-----------------------------------------------------------------------------
-@roff_tcache@.It Sy "tcache.flush (void) --"
-@roff_tcache@.Bd -ragged -offset indent -compact
-@roff_tcache@Flush calling thread's tcache.
-@roff_tcache@This interface releases all cached objects and internal data
-@roff_tcache@structures associated with the calling thread's thread-specific
-@roff_tcache@cache.
-@roff_tcache@Ordinarily, this interface need not be called, since automatic
-@roff_tcache@periodic incremental garbage collection occurs, and the thread
-@roff_tcache@cache is automatically discarded when a thread exits.
-@roff_tcache@However, garbage collection is triggered by allocation activity,
-@roff_tcache@so it is possible for a thread that stops allocating/deallocating
-@roff_tcache@to retain its cache indefinitely, in which case the developer may
-@roff_tcache@find manual flushing useful.
-.Ed
-.\"-----------------------------------------------------------------------------
-.It Sy "thread.arena (unsigned) rw"
-.Bd -ragged -offset indent -compact
-Get or set the arena associated with the calling thread.
-The arena index must be less than the maximum number of arenas (see the
-.Dq arenas.narenas
-mallctl).
-If the specified arena was not initialized beforehand (see the
-.Dq arenas.initialized
-mallctl), it will be automatically initialized as a side effect of calling this
-interface.
-.Ed
-.\"-----------------------------------------------------------------------------
-@roff_stats@.It Sy "thread.allocated (uint64_t) r-"
-@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Get the total number of bytes ever allocated by the calling thread.
-@roff_stats@This counter has the potential to wrap around; it is up to the
-@roff_stats@application to appropriately interpret the counter in such cases.
-@roff_stats@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_stats@.It Sy "thread.deallocated (uint64_t) r-"
-@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Get the total number of bytes ever deallocated by the calling
-@roff_stats@thread.
-@roff_stats@This counter has the potential to wrap around; it is up to the
-@roff_stats@application to appropriately interpret the counter in such cases.
-@roff_stats@.Ed
-.\"-----------------------------------------------------------------------------
 .It Sy "config.debug (bool) r-"
 .Bd -ragged -offset indent -compact
 --enable-debug was specified during build configuration.
@@ -980,150 +683,384 @@
 .\"-----------------------------------------------------------------------------
 .It Sy "opt.abort (bool) r-"
 .Bd -ragged -offset indent -compact
-See the
-.Dq A
-option.
+Abort-on-warning enabled/disabled.
+If true, most warnings are fatal.
+The process will call
+.Xr abort 3
+in these cases.
+This option is
+@roff_debug@enabled
+@roff_no_debug@disabled
+by default.
 .Ed
 .\"-----------------------------------------------------------------------------
-@roff_fill@.It Sy "opt.junk (bool) r-"
-@roff_fill@.Bd -ragged -offset indent -compact
-@roff_fill@See the
-@roff_fill@.Dq J
-@roff_fill@option.
-@roff_fill@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_fill@.It Sy "opt.zero (bool) r-"
-@roff_fill@.Bd -ragged -offset indent -compact
-@roff_fill@See the
-@roff_fill@.Dq Z
-@roff_fill@option.
-@roff_fill@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_xmalloc@.It Sy "opt.xmalloc (bool) r-"
-@roff_xmalloc@.Bd -ragged -offset indent -compact
-@roff_xmalloc@See the
-@roff_xmalloc@.Dq X
-@roff_xmalloc@option.
-@roff_xmalloc@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_tcache@.It Sy "opt.tcache (bool) r-"
-@roff_tcache@.Bd -ragged -offset indent -compact
-@roff_tcache@See the
-@roff_tcache@.Dq H
-@roff_tcache@option.
-@roff_tcache@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_tcache@.It Sy "opt.lg_tcache_gc_sweep (ssize_t) r-"
-@roff_tcache@.Bd -ragged -offset indent -compact
-@roff_tcache@See the
-@roff_tcache@.Dq G
-@roff_tcache@option.
-@roff_tcache@.Ed
-.\"-----------------------------------------------------------------------------
-.It Sy "opt.stats_print (bool) r-"
-.Bd -ragged -offset indent -compact
-See the
-.Dq P
-option.
-.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.prof (bool) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq F
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.lg_prof_bt_max (size_t) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq B
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.prof_accum (bool) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq R
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.lg_prof_tcmax (ssize_t) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq T
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.lg_prof_sample (ssize_t) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq S
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.lg_prof_interval (ssize_t) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq I
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.prof_udump (bool) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq U
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_prof@.It Sy "opt.prof_leak (bool) r-"
-@roff_prof@.Bd -ragged -offset indent -compact
-@roff_prof@See the
-@roff_prof@.Dq L
-@roff_prof@option.
-@roff_prof@.Ed
-.\"-----------------------------------------------------------------------------
 .It Sy "opt.lg_qspace_max (size_t) r-"
 .Bd -ragged -offset indent -compact
-See the
-.Dq Q
-option.
+Size (log base 2) of the maximum size class that is a multiple of the quantum
+(8 or 16 bytes, depending on architecture).
+Above this size, cacheline spacing is used for size classes.
+The default value is 128 bytes (2^7).
 .Ed
 .\"-----------------------------------------------------------------------------
 .It Sy "opt.lg_cspace_max (size_t) r-"
 .Bd -ragged -offset indent -compact
-See the
-.Dq C
-option.
-.Ed
-.\"-----------------------------------------------------------------------------
-.It Sy "opt.lg_dirty_mult (ssize_t) r-"
-.Bd -ragged -offset indent -compact
-See the
-.Dq D
-option.
+Size (log base 2) of the maximum size class that is a multiple of the cacheline
+size (64).
+Above this size, subpage spacing (256 bytes) is used for size classes.
+The default value is 512 bytes (2^9).
 .Ed
 .\"-----------------------------------------------------------------------------
 .It Sy "opt.lg_chunk (size_t) r-"
 .Bd -ragged -offset indent -compact
-See the
-.Dq K
-option.
+Virtual memory chunk size (log base 2).
+The default chunk size is 4 MiB (2^22).
 .Ed
 .\"-----------------------------------------------------------------------------
+.It Sy "opt.narenas (size_t) r-"
+.Bd -ragged -offset indent -compact
+Maximum number of arenas to use.
+The default maximum number of arenas is four times the number of CPUs, or one
+if there is a single CPU.
+.Ed
+.\"-----------------------------------------------------------------------------
+.It Sy "opt.lg_dirty_mult (ssize_t) r-"
+.Bd -ragged -offset indent -compact
+Per-arena minimum ratio (log base 2) of active to dirty pages.
+Some dirty unused pages may be allowed to accumulate, within the limit set by
+the ratio (or one chunk worth of dirty pages, whichever is greater), before
+informing the kernel about some of those pages via
+.Xr madvise 2
+or a similar system call.
+This provides the kernel with sufficient information to recycle dirty pages if
+physical memory becomes scarce and the pages remain unused.
+The default minimum ratio is 32:1 (2^5:1); an option value of -1 will disable
+dirty page purging.
+.Ed
+.\"-----------------------------------------------------------------------------
+.It Sy "opt.stats_print (bool) r-"
+.Bd -ragged -offset indent -compact
+Enable/disable statistics printing at exit.
+If enabled, the
+.Fn @jemalloc_prefix@malloc_stats_print
+function is called at program exit via an
+.Xr atexit 3
+function.
+@roff_stats@This has the potential to cause deadlock for a multi-threaded
+@roff_stats@process that exits while one or more threads are executing in the
+@roff_stats@memory allocation functions.
+@roff_stats@Therefore, this option should only be used with care; it is
+@roff_stats@primarily intended as a performance tuning aid during application
+@roff_stats@development.
+This option is disabled by default.
+.Ed
+.\"-----------------------------------------------------------------------------
+@roff_fill@.It Sy "opt.junk (bool) r-"
+@roff_fill@.Bd -ragged -offset indent -compact
+@roff_fill@Junk filling enabled/disabled.
+@roff_fill@If enabled, each byte of uninitialized allocated memory will be
+@roff_fill@initialized to 0xa5.
+@roff_fill@All deallocated memory will be initialized to 0x5a.
+@roff_fill@This is intended for debugging and will impact performance
+@roff_fill@negatively.
+@roff_fill@This option is
+@roff_fill@@roff_debug@enabled
+@roff_fill@@roff_no_debug@disabled
+@roff_fill@by default.
+@roff_fill@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_fill@.It Sy "opt.zero (bool) r-"
+@roff_fill@.Bd -ragged -offset indent -compact
+@roff_fill@Zero filling enabled/disabled.
+@roff_fill@If enabled, each byte of uninitialized allocated memory will be
+@roff_fill@initialized to 0.
+@roff_fill@Note that this initialization only happens once for each byte, so
+@roff_fill@.Fn @jemalloc_prefix@realloc
+@roff_fill@calls do not zero memory that was previously allocated.
+@roff_fill@This is intended for debugging and will impact performance
+@roff_fill@negatively.
+@roff_fill@This option is disabled by default.
+@roff_fill@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_sysv@.It Sy "opt.sysv (bool) r-"
+@roff_sysv@.Bd -ragged -offset indent -compact
+@roff_sysv@If enabled, attempting to allocate zero bytes will return a
+@roff_sysv@.Dv NULL
+@roff_sysv@pointer instead of a valid pointer.
+@roff_sysv@(The default behavior is to make a minimal allocation and return a
+@roff_sysv@pointer to it.)
+@roff_sysv@This option is provided for System V compatibility.
+@roff_sysv@@roff_xmalloc@This option is incompatible with the
+@roff_sysv@@roff_xmalloc@.Dq opt.xmalloc
+@roff_sysv@@roff_xmalloc@option.
+@roff_sysv@This option is disabled by default.
+@roff_sysv@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_xmalloc@.It Sy "opt.xmalloc (bool) r-"
+@roff_xmalloc@.Bd -ragged -offset indent -compact
+@roff_xmalloc@Abort-on-out-of-memory enabled/disabled.
+@roff_xmalloc@If enabled, rather than returning failure for any allocation
+@roff_xmalloc@function, display a diagnostic message on
+@roff_xmalloc@.Dv STDERR_FILENO
+@roff_xmalloc@and cause the program to drop core (using
+@roff_xmalloc@.Xr abort 3 ) .
+@roff_xmalloc@If an application is designed to depend on this behavior, set the
+@roff_xmalloc@option at compile time by including the following in the source
+@roff_xmalloc@code:
+@roff_xmalloc@.Bd -literal -offset indent
+@roff_xmalloc@@jemalloc_prefix@malloc_conf = "xmalloc:true";
+@roff_xmalloc@.Ed
+@roff_xmalloc@.Pp
+@roff_xmalloc@This option is disabled by default.
+@roff_xmalloc@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "opt.tcache (bool) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Thread-specific caching enabled/disabled.
+@roff_tcache@When there are multiple threads, each thread uses a
+@roff_tcache@thread-specific cache for objects up to a certain size.
+@roff_tcache@Thread-specific caching allows many allocations to be satisfied
+@roff_tcache@without performing any thread synchronization, at the cost of
+@roff_tcache@increased memory use.
+@roff_tcache@See the
+@roff_tcache@.Dq opt.lg_tcache_gc_sweep
+@roff_tcache@and
+@roff_tcache@.Dq opt.tcache_max
+@roff_tcache@options for related tuning information.
+@roff_tcache@This option is enabled by default.
+@roff_tcache@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "opt.lg_tcache_gc_sweep (ssize_t) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Approximate interval (log base 2) between full thread-specific
+@roff_tcache@cache garbage collection sweeps, counted in terms of
+@roff_tcache@thread-specific cache allocation/deallocation events.
+@roff_tcache@Garbage collection is actually performed incrementally, one size
+@roff_tcache@class at a time, in order to avoid large collection pauses.
+@roff_tcache@The default sweep interval is 8192 (2^13); setting this option to
+@roff_tcache@-1 will disable garbage collection.
+@roff_tcache@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "opt.lg_tcache_max (size_t) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Maximum size class (log base 2) to cache in the thread-specific
+@roff_tcache@cache.
+@roff_tcache@At a minimum, all small size classes are cached, and at a maximum
+@roff_tcache@all large size classes are cached.
+@roff_tcache@The default maximum is 32 KiB (2^15).
+@roff_tcache@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Memory profiling enabled/disabled.
+@roff_prof@If enabled, profile memory allocation activity, and use an
+@roff_prof@.Xr atexit 3
+@roff_prof@function to dump final memory usage to a file named according to
+@roff_prof@the pattern
+@roff_prof@.Pa <prefix>.<pid>.<seq>.f.heap ,
+@roff_prof@where
+@roff_prof@.Pa <prefix>
+@roff_prof@is controlled by the
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_bt_max
+@roff_prof@option for backtrace depth control.
+@roff_prof@See the
+@roff_prof@.Dq opt.prof_active
+@roff_prof@option for on-the-fly activation/deactivation.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_sample
+@roff_prof@option for probabilistic sampling control.
+@roff_prof@See the
+@roff_prof@.Dq opt.prof_accum
+@roff_prof@option for control of cumulative sample reporting.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_tcmax
+@roff_prof@option for control of per thread backtrace caching.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_interval
+@roff_prof@option for information on interval-triggered profile dumping, and the
+@roff_prof@.Dq opt.prof_gdump
+@roff_prof@option for information on high-water-triggered profile dumping.
+@roff_prof@Profile output is compatible with the included pprof Perl script,
+@roff_prof@which originates from the google-perftools package
+@roff_prof@(http://code.google.com/p/google-perftools/).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_prefix (const char *) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Filename prefix for profile dumps.
+@roff_prof@If the prefix is set to the empty string, no automatic dumps will
+@roff_prof@occur; this is primarily useful for disabling the automatic final
+@roff_prof@heap dump (which also disables leak reporting, if enabled).
+@roff_prof@The default prefix is
+@roff_prof@.Pa jeprof .
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_bt_max (size_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Maximum backtrace depth (log base 2) when profiling memory
+@roff_prof@allocation activity.
+@roff_prof@The default is 128 (2^7).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_active (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Profiling activated/deactivated.
+@roff_prof@This is a secondary control mechanism that makes it possible to
+@roff_prof@start the application with profiling enabled (see the
+@roff_prof@.Dq opt.prof
+@roff_prof@option) but inactive, then toggle profiling at any time during
+@roff_prof@program execution with the
+@roff_prof@.Dq prof.active
+@roff_prof@mallctl.
+@roff_prof@This option is enabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_sample (ssize_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Average interval (log base 2) between allocation samples, as
+@roff_prof@measured in bytes of allocation activity.
+@roff_prof@Increasing the sampling interval decreases profile fidelity, but
+@roff_prof@also decreases the computational overhead.
+@roff_prof@The default sample interval is 1 (2^0) (i.e. all allocations are
+@roff_prof@sampled).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_accum (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Reporting of cumulative object/byte counts in profile dumps
+@roff_prof@enabled/disabled.
+@roff_prof@If this option is enabled, every unique backtrace must be stored for
+@roff_prof@the duration of execution.
+@roff_prof@Depending on the application, this can impose a large memory
+@roff_prof@overhead, and the cumulative counts are not always of interest.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_tcmax
+@roff_prof@option for control of per thread backtrace caching, which has
+@roff_prof@important interactions.
+@roff_prof@This option is enabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_tcmax (ssize_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Maximum per thread backtrace cache (log base 2) used for heap
+@roff_prof@profiling.
+@roff_prof@A backtrace can only be discarded if the
+@roff_prof@.Dq opt.prof_accum
+@roff_prof@option is disabled, and no thread caches currently refer to the
+@roff_prof@backtrace.
+@roff_prof@Therefore, a backtrace cache limit should be imposed if the
+@roff_prof@intention is to limit how much memory is used by backtraces.
+@roff_prof@By default, no limit is imposed (encoded as -1).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.lg_prof_interval (ssize_t) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Average interval (log base 2) between memory profile dumps, as
+@roff_prof@measured in bytes of allocation activity.
+@roff_prof@The actual interval between dumps may be sporadic because
+@roff_prof@decentralized allocation counters are used to avoid synchronization
+@roff_prof@bottlenecks.
+@roff_prof@Profiles are dumped to files named according to the pattern
+@roff_prof@.Pa <prefix>.<pid>.<seq>.i<iseq>.heap ,
+@roff_prof@where
+@roff_prof@.Pa <prefix>
+@roff_prof@is controlled by the
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
+@roff_prof@By default, interval-triggered profile dumping is disabled (encoded
+@roff_prof@as -1).
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_gdump (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Trigger a memory profile dump every time the total virtual memory
+@roff_prof@exceeds the previous maximum.
+@roff_prof@Profiles are dumped to files named according to the pattern
+@roff_prof@.Pa <prefix>.<pid>.<seq>.u<useq>.heap ,
+@roff_prof@where
+@roff_prof@.Pa <prefix>
+@roff_prof@is controlled by the
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
+@roff_prof@This option is disabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_prof@.It Sy "opt.prof_leak (bool) r-"
+@roff_prof@.Bd -ragged -offset indent -compact
+@roff_prof@Leak reporting enabled/disabled.
+@roff_prof@If enabled, use an
+@roff_prof@.Xr atexit 3
+@roff_prof@function to report memory leaks detected by allocation sampling.
+@roff_prof@See the
+@roff_prof@.Dq opt.lg_prof_bt_max
+@roff_prof@option for backtrace depth control.
+@roff_prof@See the
+@roff_prof@.Dq opt.prof
+@roff_prof@option for information on analyzing heap profile output.
+@roff_prof@This option is disabled by default.
+@roff_prof@.Ed
+.\"-----------------------------------------------------------------------------
 .It Sy "opt.overcommit (bool) r-"
 .Bd -ragged -offset indent -compact
-See the
-.Dq O
-option.
+@roff_swap@Over-commit enabled/disabled.
+@roff_swap@If enabled, over-commit memory as a side effect of using anonymous
+@roff_swap@.Xr mmap 2
+@roff_swap@@roff_dss@ and
+@roff_swap@@roff_dss@.Xr sbrk 2
+@roff_swap@for virtual memory allocation.
+@roff_swap@In order for overcommit to be disabled, the
+@roff_swap@.Dq swap.fds
+@roff_swap@mallctl must have been successfully written to.
+@roff_swap@This option is enabled by default.
 .Ed
 .\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "tcache.flush (void) --"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Flush calling thread's tcache.
+@roff_tcache@This interface releases all cached objects and internal data
+@roff_tcache@structures associated with the calling thread's thread-specific
+@roff_tcache@cache.
+@roff_tcache@Ordinarily, this interface need not be called, since automatic
+@roff_tcache@periodic incremental garbage collection occurs, and the thread
+@roff_tcache@cache is automatically discarded when a thread exits.
+@roff_tcache@However, garbage collection is triggered by allocation activity,
+@roff_tcache@so it is possible for a thread that stops allocating/deallocating
+@roff_tcache@to retain its cache indefinitely, in which case the developer may
+@roff_tcache@find manual flushing useful.
+.Ed
+.\"-----------------------------------------------------------------------------
+.It Sy "thread.arena (unsigned) rw"
+.Bd -ragged -offset indent -compact
+Get or set the arena associated with the calling thread.
+The arena index must be less than the maximum number of arenas (see the
+.Dq arenas.narenas
+mallctl).
+If the specified arena was not initialized beforehand (see the
+.Dq arenas.initialized
+mallctl), it will be automatically initialized as a side effect of calling this
+interface.
+.Ed
+.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "thread.allocated (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Get the total number of bytes ever allocated by the calling thread.
+@roff_stats@This counter has the potential to wrap around; it is up to the
+@roff_stats@application to appropriately interpret the counter in such cases.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "thread.deallocated (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Get the total number of bytes ever deallocated by the calling
+@roff_stats@thread.
+@roff_stats@This counter has the potential to wrap around; it is up to the
+@roff_stats@application to appropriately interpret the counter in such cases.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
 .It Sy "arenas.narenas (unsigned) r-"
 .Bd -ragged -offset indent -compact
 Maximum number of arenas.
-See the
-.Dq N
-option.
 .Ed
 .\"-----------------------------------------------------------------------------
 .It Sy "arenas.initialized (bool *) r-"
@@ -1269,7 +1206,7 @@
 @roff_prof@.Bd -ragged -offset indent -compact
 @roff_prof@Control whether sampling is currently active.
 @roff_prof@See the
-@roff_prof@.Dq E
+@roff_prof@.Dq opt.prof_active
 @roff_prof@option for additional information.
 @roff_prof@.Ed
 .\"-----------------------------------------------------------------------------
@@ -1281,8 +1218,8 @@
 @roff_prof@where
 @roff_prof@.Pa <prefix>
 @roff_prof@is controlled by the
-@roff_prof@JEMALLOC_PROF_PREFIX
-@roff_prof@environment variable.
+@roff_prof@.Dq opt.prof_prefix
+@roff_prof@option.
 @roff_prof@.Ed
 .\"-----------------------------------------------------------------------------
 @roff_prof@.It Sy "prof.interval (uint64_t) r-"
@@ -1290,7 +1227,7 @@
 @roff_prof@Average number of bytes allocated between inverval-based profile
 @roff_prof@dumps.
 @roff_prof@See the
-@roff_prof@.Dq I
+@roff_prof@.Dq opt.lg_prof_interval
 @roff_prof@option for additional information.
 @roff_prof@.Ed
 .\"-----------------------------------------------------------------------------
@@ -1544,10 +1481,9 @@
 .\"-----------------------------------------------------------------------------
 .El
 .Sh DEBUGGING MALLOC PROBLEMS
-The first thing to do is to set the
-.Dq A
-option.
-This option forces a coredump (if possible) at the first sign of trouble,
+Start by setting the
+.Dq opt.abort
+option, which forces a coredump (if possible) at the first sign of trouble,
 rather than the normal policy of trying to continue if at all possible.
 .Pp
 It is probably also a good idea to recompile the program with suitable
@@ -1558,19 +1494,19 @@
 @roff_fill@the next section, it is likely because it depends on the storage
 @roff_fill@being filled with zero bytes.
 @roff_fill@Try running it with the
-@roff_fill@.Dq Z
+@roff_fill@.Dq opt.zero
 @roff_fill@option set;
 @roff_fill@if that improves the situation, this diagnosis has been confirmed.
 @roff_fill@If the program still misbehaves,
 @roff_fill@the likely problem is accessing memory outside the allocated area.
 @roff_fill@.Pp
 @roff_fill@Alternatively, if the symptoms are not easy to reproduce, setting the
-@roff_fill@.Dq J
+@roff_fill@.Dq opt.junk
 @roff_fill@option may help provoke the problem.
 @roff_fill@.Pp
-Unfortunately this implementation does not provide much detail about
-the problems it detects; the performance impact for storing such information
-would be prohibitive.
+This implementation does not provide much detail about the problems it detects,
+because the performance impact for storing such information would be
+prohibitive.
 There are a number of allocator implementations available on the Internet
 which focus on detecting and pinpointing problems by trading performance for
 extra sanity checks and detailed diagnostics.
@@ -1580,7 +1516,7 @@
 .Dv STDERR_FILENO .
 Errors will result in the process dumping core.
 If the
-.Dq A
+.Dq opt.abort
 option is set, all warnings are treated as errors.
 .Pp
 The
@@ -1736,33 +1672,24 @@
 moving the object.
 .El
 .Sh ENVIRONMENT
-The following environment variables affect the execution of the allocation
+The following environment variable affects the execution of the allocation
 functions:
-@roff_prof@.Bl -tag -width ".Ev JEMALLOC_PROF_PREFIX"
-@roff_no_prof@.Bl -tag -width ".Ev JEMALLOC_OPTIONS"
-.It Ev JEMALLOC_OPTIONS
+.Bl -tag -width ".Ev @jemalloc_cprefix@MALLOC_CONF"
+.It Ev @jemalloc_cprefix@MALLOC_CONF
 If the environment variable
-.Ev JEMALLOC_OPTIONS
-is set, the characters it contains will be interpreted as flags to the
-allocation functions.
-@roff_prof@.It Ev JEMALLOC_PROF_PREFIX
-@roff_prof@If the environment variable
-@roff_prof@.Ev JEMALLOC_PROF_PREFIX
-@roff_prof@is set, use it as the filename prefix for profile dumps; otherwise
-@roff_prof@use
-@roff_prof@.Pa jeprof
-@roff_prof@as the prefix.
+.Ev @jemalloc_cprefix@MALLOC_CONF
+is set, the characters it contains will be interpreted as options.
 .El
 .Sh EXAMPLES
 To dump core whenever a problem occurs:
 .Pp
 .Bd -literal -offset indent
-ln -s 'A' /etc/jemalloc.conf
+ln -s 'abort:true' /etc/@jemalloc_prefix@malloc.conf
 .Ed
 .Pp
-To specify in the source a chunk size that is twice the default:
+To specify in the source a chunk size that is 16 MiB:
 .Bd -literal -offset indent
-@jemalloc_prefix@malloc_options = "K";
+@jemalloc_prefix@malloc_conf = "lg_chunk:24";
 .Ed
 .Sh SEE ALSO
 .Xr madvise 2 ,
diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h
index 7bbf21e..8776ad1 100644
--- a/jemalloc/include/jemalloc/internal/ctl.h
+++ b/jemalloc/include/jemalloc/internal/ctl.h
@@ -82,9 +82,9 @@
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen)	\
 	    != 0) {							\
-		malloc_write("<jemalloc>: Invalid xmallctl(\"");	\
+		malloc_write("<jemalloc>: Failure in xmallctl(\"");	\
 		malloc_write(name);					\
-		malloc_write("\", ...) call\n");			\
+		malloc_write("\", ...)\n");				\
 		abort();						\
 	}								\
 } while (0)
@@ -92,9 +92,9 @@
 #define	xmallctlnametomib(name, mibp, miblenp) do {			\
 	if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) {	\
 		malloc_write(						\
-		    "<jemalloc>: Invalid xmallctlnametomib(\"");	\
+		    "<jemalloc>: Failure in xmallctlnametomib(\"");	\
 		malloc_write(name);					\
-		malloc_write("\", ...) call\n");			\
+		malloc_write("\", ...)\n");				\
 		abort();						\
 	}								\
 } while (0)
@@ -103,7 +103,7 @@
 	if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp,	\
 	    newlen) != 0) {						\
 		malloc_write(						\
-		    "<jemalloc>: Invalid xmallctlbymib() call\n");	\
+		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
 		abort();						\
 	}								\
 } while (0)
diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index 99746bb..3d25300 100644
--- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -24,6 +24,7 @@
 #include <inttypes.h>
 #include <string.h>
 #include <strings.h>
+#include <ctype.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <pthread.h>
@@ -61,7 +62,7 @@
 		malloc_write("<jemalloc>: ");				\
 		malloc_write(__FILE__);					\
 		malloc_write(":");					\
-		malloc_write(umax2s(__LINE__, 10, line_buf));		\
+		malloc_write(u2s(__LINE__, 10, line_buf));		\
 		malloc_write(": Failed assertion: ");			\
 		malloc_write("\"");					\
 		malloc_write(#e);					\
@@ -256,6 +257,7 @@
 #ifdef JEMALLOC_FILL
 extern bool	opt_zero;
 #endif
+extern size_t	opt_narenas;
 
 #ifdef DYNAMIC_PAGE_SHIFT
 extern size_t		pagesize;
diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h
index 3e85bda..7864000 100644
--- a/jemalloc/include/jemalloc/internal/prof.h
+++ b/jemalloc/include/jemalloc/internal/prof.h
@@ -9,6 +9,7 @@
 typedef struct prof_tdata_s prof_tdata_t;
 
 /* Option defaults. */
+#define	PROF_PREFIX_DEFAULT		"jeprof"
 #define	LG_PROF_BT_MAX_DEFAULT		7
 #define	LG_PROF_SAMPLE_DEFAULT		0
 #define	LG_PROF_INTERVAL_DEFAULT	-1
@@ -164,10 +165,11 @@
 extern size_t	opt_lg_prof_bt_max;   /* Maximum backtrace depth. */
 extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
 extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool	opt_prof_udump;       /* High-water memory dumping. */
+extern bool	opt_prof_gdump;       /* High-water memory dumping. */
 extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
 extern bool	opt_prof_accum;       /* Report cumulative bytes. */
 extern ssize_t	opt_lg_prof_tcmax;    /* lg(max per thread bactrace cache) */
+extern char	opt_prof_prefix[PATH_MAX + 1];
 
 /*
  * Profile dump interval, measured in bytes allocated.  Each arena triggers a
@@ -215,10 +217,11 @@
 prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
 void	prof_idump(void);
 bool	prof_mdump(const char *filename);
-void	prof_udump(void);
+void	prof_gdump(void);
 prof_tdata_t	*prof_tdata_init(void);
 void	prof_boot0(void);
-bool	prof_boot1(void);
+void	prof_boot1(void);
+bool	prof_boot2(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/jemalloc/include/jemalloc/internal/stats.h b/jemalloc/include/jemalloc/internal/stats.h
index cbf035f..3fc2080 100644
--- a/jemalloc/include/jemalloc/internal/stats.h
+++ b/jemalloc/include/jemalloc/internal/stats.h
@@ -154,7 +154,7 @@
 
 extern bool	opt_stats_print;
 
-char	*umax2s(uintmax_t x, unsigned base, char *s);
+char	*u2s(uint64_t x, unsigned base, char *s);
 #ifdef JEMALLOC_STATS
 void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
     const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h
index 168a306..1ad91a9 100644
--- a/jemalloc/include/jemalloc/internal/tcache.h
+++ b/jemalloc/include/jemalloc/internal/tcache.h
@@ -17,7 +17,7 @@
 /* Number of cache slots for large size classes. */
 #define	TCACHE_NSLOTS_LARGE		20
 
-/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
 #define	LG_TCACHE_MAXCLASS_DEFAULT	15
 
 /*
@@ -61,7 +61,7 @@
 #ifdef JEMALLOC_H_EXTERNS
 
 extern bool	opt_tcache;
-extern ssize_t	opt_lg_tcache_maxclass;
+extern ssize_t	opt_lg_tcache_max;
 extern ssize_t	opt_lg_tcache_gc_sweep;
 
 /* Map of thread-specific caches. */
diff --git a/jemalloc/include/jemalloc/jemalloc.h.in b/jemalloc/include/jemalloc/jemalloc.h.in
index e398307..4dd3981 100644
--- a/jemalloc/include/jemalloc/jemalloc.h.in
+++ b/jemalloc/include/jemalloc/jemalloc.h.in
@@ -32,7 +32,7 @@
 #define	ALLOCM_ERR_OOM		1
 #define	ALLOCM_ERR_NOT_MOVED	2
 
-extern const char	*JEMALLOC_P(malloc_options);
+extern const char	*JEMALLOC_P(malloc_conf);
 extern void		(*JEMALLOC_P(malloc_message))(void *, const char *);
 
 void	*JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc);
diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in
index fe35170..54e5d94 100644
--- a/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -13,6 +13,7 @@
  * the API prefixing.
  */
 #undef JEMALLOC_PREFIX
+#undef JEMALLOC_CPREFIX
 #if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE))
 #undef JEMALLOC_P
 #endif
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index d811f65..00f425f 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -290,7 +290,7 @@
 	assert((uintptr_t)ptr >= (uintptr_t)run +
 	    (uintptr_t)run->bin->reg0_offset);
 	/*
-	 * Freeing a pointer in the run's wilderness can cause assertion
+	 * Freeing a pointer past in the run's frontier can cause assertion
 	 * failure.
 	 */
 	assert((uintptr_t)ptr < (uintptr_t)run->next);
@@ -2532,7 +2532,7 @@
 		if (nbins > 255) {
 		    char line_buf[UMAX2S_BUFSIZE];
 		    malloc_write("<jemalloc>: Too many small size classes (");
-		    malloc_write(umax2s(nbins, 10, line_buf));
+		    malloc_write(u2s(nbins, 10, line_buf));
 		    malloc_write(" > max 255)\n");
 		    abort();
 		}
@@ -2541,7 +2541,7 @@
 	if (nbins > 256) {
 	    char line_buf[UMAX2S_BUFSIZE];
 	    malloc_write("<jemalloc>: Too many small size classes (");
-	    malloc_write(umax2s(nbins, 10, line_buf));
+	    malloc_write(u2s(nbins, 10, line_buf));
 	    malloc_write(" > max 256)\n");
 	    abort();
 	}
diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c
index 0be24fb..00bf50a 100644
--- a/jemalloc/src/chunk.c
+++ b/jemalloc/src/chunk.c
@@ -78,7 +78,7 @@
 #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 	if (ret != NULL) {
 #  ifdef JEMALLOC_PROF
-		bool udump;
+		bool gdump;
 #  endif
 		malloc_mutex_lock(&chunks_mtx);
 #  ifdef JEMALLOC_STATS
@@ -88,17 +88,17 @@
 		if (stats_chunks.curchunks > stats_chunks.highchunks) {
 			stats_chunks.highchunks = stats_chunks.curchunks;
 #  ifdef JEMALLOC_PROF
-			udump = true;
+			gdump = true;
 #  endif
 		}
 #  ifdef JEMALLOC_PROF
 		else
-			udump = false;
+			gdump = false;
 #  endif
 		malloc_mutex_unlock(&chunks_mtx);
 #  ifdef JEMALLOC_PROF
-		if (opt_prof && opt_prof_udump && udump)
-			prof_udump();
+		if (opt_prof && opt_prof_gdump && gdump)
+			prof_gdump();
 #  endif
 	}
 #endif
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index dbc5cd4..c83ee4f 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -62,8 +62,15 @@
 CTL_PROTO(config_tls)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
+CTL_PROTO(opt_lg_qspace_max)
+CTL_PROTO(opt_lg_cspace_max)
+CTL_PROTO(opt_lg_chunk)
+CTL_PROTO(opt_narenas)
+CTL_PROTO(opt_lg_dirty_mult)
+CTL_PROTO(opt_stats_print)
 #ifdef JEMALLOC_FILL
 CTL_PROTO(opt_junk)
+CTL_PROTO(opt_zero)
 #endif
 #ifdef JEMALLOC_SYSV
 CTL_PROTO(opt_sysv)
@@ -71,29 +78,22 @@
 #ifdef JEMALLOC_XMALLOC
 CTL_PROTO(opt_xmalloc)
 #endif
-#ifdef JEMALLOC_ZERO
-CTL_PROTO(opt_zero)
-#endif
 #ifdef JEMALLOC_TCACHE
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_lg_tcache_gc_sweep)
 #endif
 #ifdef JEMALLOC_PROF
 CTL_PROTO(opt_prof)
+CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_lg_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
 CTL_PROTO(opt_lg_prof_interval)
-CTL_PROTO(opt_prof_udump)
+CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_lg_prof_tcmax)
 #endif
-CTL_PROTO(opt_stats_print)
-CTL_PROTO(opt_lg_qspace_max)
-CTL_PROTO(opt_lg_cspace_max)
-CTL_PROTO(opt_lg_dirty_mult)
-CTL_PROTO(opt_lg_chunk)
 #ifdef JEMALLOC_SWAP
 CTL_PROTO(opt_overcommit)
 #endif
@@ -247,38 +247,43 @@
 
 static const ctl_node_t opt_node[] = {
 	{NAME("abort"),			CTL(opt_abort)},
+	{NAME("lg_qspace_max"),		CTL(opt_lg_qspace_max)},
+	{NAME("lg_cspace_max"),		CTL(opt_lg_cspace_max)},
+	{NAME("lg_chunk"),		CTL(opt_lg_chunk)},
+	{NAME("narenas"),		CTL(opt_narenas)},
+	{NAME("lg_dirty_mult"),		CTL(opt_lg_dirty_mult)},
+	{NAME("stats_print"),		CTL(opt_stats_print)}
 #ifdef JEMALLOC_FILL
+	,
 	{NAME("junk"),			CTL(opt_junk)},
+	{NAME("zero"),			CTL(opt_zero)}
 #endif
 #ifdef JEMALLOC_SYSV
-	{NAME("sysv"),			CTL(opt_sysv)},
+	,
+	{NAME("sysv"),			CTL(opt_sysv)}
 #endif
 #ifdef JEMALLOC_XMALLOC
-	{NAME("xmalloc"),		CTL(opt_xmalloc)},
-#endif
-#ifdef JEMALLOC_ZERO
-	{NAME("zero"),			CTL(opt_zero)},
+	,
+	{NAME("xmalloc"),		CTL(opt_xmalloc)}
 #endif
 #ifdef JEMALLOC_TCACHE
+	,
 	{NAME("tcache"),		CTL(opt_tcache)},
-	{NAME("lg_tcache_gc_sweep"),	CTL(opt_lg_tcache_gc_sweep)},
+	{NAME("lg_tcache_gc_sweep"),	CTL(opt_lg_tcache_gc_sweep)}
 #endif
 #ifdef JEMALLOC_PROF
+	,
 	{NAME("prof"),			CTL(opt_prof)},
+	{NAME("prof_prefix"),		CTL(opt_prof_prefix)},
 	{NAME("prof_active"),		CTL(opt_prof_active)},
 	{NAME("lg_prof_bt_max"),	CTL(opt_lg_prof_bt_max)},
 	{NAME("lg_prof_sample"),	CTL(opt_lg_prof_sample)},
 	{NAME("lg_prof_interval"),	CTL(opt_lg_prof_interval)},
-	{NAME("prof_udump"),		CTL(opt_prof_udump)},
+	{NAME("prof_gdump"),		CTL(opt_prof_gdump)},
 	{NAME("prof_leak"),		CTL(opt_prof_leak)},
 	{NAME("prof_accum"),		CTL(opt_prof_accum)},
-	{NAME("lg_prof_tcmax"),		CTL(opt_lg_prof_tcmax)},
+	{NAME("lg_prof_tcmax"),		CTL(opt_lg_prof_tcmax)}
 #endif
-	{NAME("stats_print"),		CTL(opt_stats_print)},
-	{NAME("lg_qspace_max"),		CTL(opt_lg_qspace_max)},
-	{NAME("lg_cspace_max"),		CTL(opt_lg_cspace_max)},
-	{NAME("lg_dirty_mult"),		CTL(opt_lg_dirty_mult)},
-	{NAME("lg_chunk"),		CTL(opt_lg_chunk)}
 #ifdef JEMALLOC_SWAP
 	,
 	{NAME("overcommit"),		CTL(opt_overcommit)}
@@ -1201,8 +1206,15 @@
 /******************************************************************************/
 
 CTL_RO_GEN(opt_abort, opt_abort, bool)
+CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
+CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
+CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
+CTL_RO_GEN(opt_narenas, opt_narenas, size_t)
+CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
+CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
 #ifdef JEMALLOC_FILL
 CTL_RO_GEN(opt_junk, opt_junk, bool)
+CTL_RO_GEN(opt_zero, opt_zero, bool)
 #endif
 #ifdef JEMALLOC_SYSV
 CTL_RO_GEN(opt_sysv, opt_sysv, bool)
@@ -1210,29 +1222,22 @@
 #ifdef JEMALLOC_XMALLOC
 CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool)
 #endif
-#ifdef JEMALLOC_ZERO
-CTL_RO_GEN(opt_zero, opt_zero, bool)
-#endif
 #ifdef JEMALLOC_TCACHE
 CTL_RO_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
 #endif
 #ifdef JEMALLOC_PROF
 CTL_RO_GEN(opt_prof, opt_prof, bool)
+CTL_RO_GEN(opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_GEN(opt_prof_active, opt_prof_active, bool)
 CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
 CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
 CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
-CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool)
+CTL_RO_GEN(opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_GEN(opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t)
 #endif
-CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
-CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
-CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
-CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
-CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
 #ifdef JEMALLOC_SWAP
 CTL_RO_GEN(opt_overcommit, opt_overcommit, bool)
 #endif
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index dedf011..012c4a8 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -41,8 +41,7 @@
 unsigned	ncpus;
 
 /* Runtime configuration options. */
-const char	*JEMALLOC_P(malloc_options)
-    JEMALLOC_ATTR(visibility("default"));
+const char	*JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default"));
 #ifdef JEMALLOC_DEBUG
 bool	opt_abort = true;
 #  ifdef JEMALLOC_FILL
@@ -63,7 +62,7 @@
 #ifdef JEMALLOC_FILL
 bool	opt_zero = false;
 #endif
-static int	opt_narenas_lshift = 0;
+size_t	opt_narenas = 0;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -74,6 +73,11 @@
 #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
 static void	thread_allocated_cleanup(void *arg);
 #endif
+static bool	malloc_conf_next(char const **opts_p, char const **k_p,
+    size_t *klen_p, char const **v_p, size_t *vlen_p);
+static void	malloc_conf_error(const char *msg, const char *k, size_t klen,
+    const char *v, size_t vlen);
+static void	malloc_conf_init(void);
 static bool	malloc_init_hard(void);
 
 /******************************************************************************/
@@ -260,12 +264,323 @@
 }
 
 static bool
-malloc_init_hard(void)
+malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p)
+{
+	bool accept;
+	const char *opts = *opts_p;
+
+	*k_p = opts;
+
+	for (accept = false; accept == false;) {
+		switch (*opts) {
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F': case 'G': case 'H': case 'I': case 'J':
+			case 'K': case 'L': case 'M': case 'N': case 'O':
+			case 'P': case 'Q': case 'R': case 'S': case 'T':
+			case 'U': case 'V': case 'W': case 'X': case 'Y':
+			case 'Z':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f': case 'g': case 'h': case 'i': case 'j':
+			case 'k': case 'l': case 'm': case 'n': case 'o':
+			case 'p': case 'q': case 'r': case 's': case 't':
+			case 'u': case 'v': case 'w': case 'x': case 'y':
+			case 'z':
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case '_':
+				opts++;
+				break;
+			case ':':
+				opts++;
+				*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+				*v_p = opts;
+				accept = true;
+				break;
+			case '\0':
+				if (opts != *opts_p) {
+					malloc_write("<jemalloc>: Conf string "
+					    "ends with key\n");
+				}
+				return (true);
+			default:
+				malloc_write("<jemalloc>: Malformed conf "
+				    "string\n");
+				return (true);
+		}
+	}
+
+	for (accept = false; accept == false;) {
+		switch (*opts) {
+			case ',':
+				opts++;
+				/*
+				 * Look ahead one character here, because the
+				 * next time this function is called, it will
+				 * assume that end of input has been cleanly
+				 * reached if no input remains, but we have
+				 * optimistically already consumed the comma if
+				 * one exists.
+				 */
+				if (*opts == '\0') {
+					malloc_write("<jemalloc>: Conf string "
+					    "ends with comma\n");
+				}
+				*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+				accept = true;
+				break;
+			case '\0':
+				*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+				accept = true;
+				break;
+			default:
+				opts++;
+				break;
+		}
+	}
+
+	*opts_p = opts;
+	return (false);
+}
+
+static void
+malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
+    size_t vlen)
+{
+	char buf[PATH_MAX + 1];
+
+	malloc_write("<jemalloc>: ");
+	malloc_write(msg);
+	malloc_write(": ");
+	memcpy(buf, k, klen);
+	memcpy(&buf[klen], ":", 1);
+	memcpy(&buf[klen+1], v, vlen);
+	buf[klen+1+vlen] = '\0';
+	malloc_write(buf);
+	malloc_write("\n");
+}
+
+static void
+malloc_conf_init(void)
 {
 	unsigned i;
-	int linklen;
 	char buf[PATH_MAX + 1];
-	const char *opts;
+	const char *opts, *k, *v;
+	size_t klen, vlen;
+
+	for (i = 0; i < 3; i++) {
+		/* Get runtime configuration. */
+		switch (i) {
+		case 0:
+			if (JEMALLOC_P(malloc_conf) != NULL) {
+				/*
+				 * Use options that were compiled into the
+				 * program.
+				 */
+				opts = JEMALLOC_P(malloc_conf);
+			} else {
+				/* No configuration specified. */
+				buf[0] = '\0';
+				opts = buf;
+			}
+			break;
+		case 1: {
+			int linklen;
+			const char *linkname =
+#ifdef JEMALLOC_PREFIX
+			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
+#else
+			    "/etc/malloc.conf"
+#endif
+			    ;
+
+			if ((linklen = readlink(linkname, buf,
+			    sizeof(buf) - 1)) != -1) {
+				/*
+				 * Use the contents of the "/etc/malloc.conf"
+				 * symbolic link's name.
+				 */
+				buf[linklen] = '\0';
+				opts = buf;
+			} else {
+				/* No configuration specified. */
+				buf[0] = '\0';
+				opts = buf;
+			}
+			break;
+		}
+		case 2: {
+			const char *envname =
+#ifdef JEMALLOC_PREFIX
+			    JEMALLOC_CPREFIX"MALLOC_CONF"
+#else
+			    "MALLOC_CONF"
+#endif
+			    ;
+
+			if ((opts = getenv(envname)) != NULL) {
+				/*
+				 * Do nothing; opts is already initialized to
+				 * the value of the JEMALLOC_OPTIONS
+				 * environment variable.
+				 */
+			} else {
+				/* No configuration specified. */
+				buf[0] = '\0';
+				opts = buf;
+			}
+			break;
+		}
+		default:
+			/* NOTREACHED */
+			assert(false);
+			buf[0] = '\0';
+			opts = buf;
+		}
+
+		while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v,
+		    &vlen) == false) {
+#define	CONF_HANDLE_BOOL(n)						\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				if (strncmp("true", v, vlen) == 0 &&	\
+				    vlen == sizeof("true")-1)		\
+					opt_##n = true;			\
+				else if (strncmp("false", v, vlen) ==	\
+				    0 && vlen == sizeof("false")-1)	\
+					opt_##n = false;		\
+				else {					\
+					malloc_conf_error(		\
+					    "Invalid conf value",	\
+					    k, klen, v, vlen);		\
+				}					\
+				continue;				\
+			}
+#define	CONF_HANDLE_SIZE_T(n, min, max)					\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				unsigned long ul;			\
+				char *end;				\
+									\
+				errno = 0;				\
+				ul = strtoul(v, &end, 0);		\
+				if (errno != 0 || (uintptr_t)end -	\
+				    (uintptr_t)v != vlen) {		\
+					malloc_conf_error(		\
+					    "Invalid conf value",	\
+					    k, klen, v, vlen);		\
+				} else if (ul < min || ul > max) {	\
+					malloc_conf_error(		\
+					    "Out-of-range conf value",	\
+					    k, klen, v, vlen);		\
+				} else					\
+					opt_##n = ul;			\
+				continue;				\
+			}
+#define	CONF_HANDLE_SSIZE_T(n, min, max)				\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				long l;					\
+				char *end;				\
+									\
+				errno = 0;				\
+				l = strtol(v, &end, 0);			\
+				if (errno != 0 || (uintptr_t)end -	\
+				    (uintptr_t)v != vlen) {		\
+					malloc_conf_error(		\
+					    "Invalid conf value",	\
+					    k, klen, v, vlen);		\
+				} else if (l < (ssize_t)min || l >	\
+				    (ssize_t)max) {			\
+					malloc_conf_error(		\
+					    "Out-of-range conf value",	\
+					    k, klen, v, vlen);		\
+				} else					\
+					opt_##n = l;			\
+				continue;				\
+			}
+#define	CONF_HANDLE_CHAR_P(n, d)					\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				size_t cpylen = (vlen <=		\
+				    sizeof(opt_##n)-1) ? vlen :		\
+				    sizeof(opt_##n)-1;			\
+				strncpy(opt_##n, v, cpylen);		\
+				opt_##n[cpylen] = '\0';			\
+				continue;				\
+			}
+
+			CONF_HANDLE_BOOL(abort)
+			CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM,
+			    PAGE_SHIFT-1)
+			CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM,
+			    PAGE_SHIFT-1)
+			/*
+			 * Chunks always require at least one * header page,
+			 * plus one data page.
+			 */
+			CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX)
+			CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_BOOL(stats_print)
+#ifdef JEMALLOC_FILL
+			CONF_HANDLE_BOOL(junk)
+			CONF_HANDLE_BOOL(zero)
+#endif
+#ifdef JEMALLOC_SYSV
+			CONF_HANDLE_BOOL(sysv)
+#endif
+#ifdef JEMALLOC_XMALLOC
+			CONF_HANDLE_BOOL(xmalloc)
+#endif
+#ifdef JEMALLOC_TCACHE
+			CONF_HANDLE_BOOL(tcache)
+			CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SSIZE_T(lg_tcache_max, -1,
+			    (sizeof(size_t) << 3) - 1)
+#endif
+#ifdef JEMALLOC_PROF
+			CONF_HANDLE_BOOL(prof)
+			CONF_HANDLE_CHAR_P(prof_prefix, "jeprof")
+			CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX)
+			CONF_HANDLE_BOOL(prof_active)
+			CONF_HANDLE_SSIZE_T(lg_prof_sample, 0,
+			    (sizeof(uint64_t) << 3) - 1)
+			CONF_HANDLE_BOOL(prof_accum)
+			CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SSIZE_T(lg_prof_interval, -1,
+			    (sizeof(uint64_t) << 3) - 1)
+			CONF_HANDLE_BOOL(prof_gdump)
+			CONF_HANDLE_BOOL(prof_leak)
+#endif
+#ifdef JEMALLOC_SWAP
+			CONF_HANDLE_BOOL(overcommit)
+#endif
+			malloc_conf_error("Invalid conf pair", k, klen, v,
+			    vlen);
+#undef CONF_HANDLE_BOOL
+#undef CONF_HANDLE_SIZE_T
+#undef CONF_HANDLE_SSIZE_T
+#undef CONF_HANDLE_CHAR_P
+		}
+
+		/* Validate configuration of options that are inter-related. */
+		if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) {
+			malloc_write("<jemalloc>: Invalid lg_[qc]space_max "
+			    "relationship; restoring defaults\n");
+			opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
+			opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
+		}
+	}
+}
+
+static bool
+malloc_init_hard(void)
+{
 	arena_t *init_arenas[1];
 
 	malloc_mutex_lock(&init_lock);
@@ -308,302 +623,9 @@
 	}
 #endif
 
-	for (i = 0; i < 3; i++) {
-		unsigned j;
+	prof_boot0();
 
-		/* Get runtime configuration. */
-		switch (i) {
-		case 0:
-			if ((linklen = readlink("/etc/jemalloc.conf", buf,
-						sizeof(buf) - 1)) != -1) {
-				/*
-				 * Use the contents of the "/etc/jemalloc.conf"
-				 * symbolic link's name.
-				 */
-				buf[linklen] = '\0';
-				opts = buf;
-			} else {
-				/* No configuration specified. */
-				buf[0] = '\0';
-				opts = buf;
-			}
-			break;
-		case 1:
-			if ((opts = getenv("JEMALLOC_OPTIONS")) != NULL) {
-				/*
-				 * Do nothing; opts is already initialized to
-				 * the value of the JEMALLOC_OPTIONS
-				 * environment variable.
-				 */
-			} else {
-				/* No configuration specified. */
-				buf[0] = '\0';
-				opts = buf;
-			}
-			break;
-		case 2:
-			if (JEMALLOC_P(malloc_options) != NULL) {
-				/*
-				 * Use options that were compiled into the
-				 * program.
-				 */
-				opts = JEMALLOC_P(malloc_options);
-			} else {
-				/* No configuration specified. */
-				buf[0] = '\0';
-				opts = buf;
-			}
-			break;
-		default:
-			/* NOTREACHED */
-			assert(false);
-			buf[0] = '\0';
-			opts = buf;
-		}
-
-		for (j = 0; opts[j] != '\0'; j++) {
-			unsigned k, nreps;
-			bool nseen;
-
-			/* Parse repetition count, if any. */
-			for (nreps = 0, nseen = false;; j++, nseen = true) {
-				switch (opts[j]) {
-					case '0': case '1': case '2': case '3':
-					case '4': case '5': case '6': case '7':
-					case '8': case '9':
-						nreps *= 10;
-						nreps += opts[j] - '0';
-						break;
-					default:
-						goto MALLOC_OUT;
-				}
-			}
-MALLOC_OUT:
-			if (nseen == false)
-				nreps = 1;
-
-			for (k = 0; k < nreps; k++) {
-				switch (opts[j]) {
-				case 'a':
-					opt_abort = false;
-					break;
-				case 'A':
-					opt_abort = true;
-					break;
-#ifdef JEMALLOC_PROF
-				case 'b':
-					if (opt_lg_prof_bt_max > 0)
-						opt_lg_prof_bt_max--;
-					break;
-				case 'B':
-					if (opt_lg_prof_bt_max < LG_PROF_BT_MAX)
-						opt_lg_prof_bt_max++;
-					break;
-#endif
-				case 'c':
-					if (opt_lg_cspace_max - 1 >
-					    opt_lg_qspace_max &&
-					    opt_lg_cspace_max >
-					    LG_CACHELINE)
-						opt_lg_cspace_max--;
-					break;
-				case 'C':
-					if (opt_lg_cspace_max < PAGE_SHIFT
-					    - 1)
-						opt_lg_cspace_max++;
-					break;
-				case 'd':
-					if (opt_lg_dirty_mult + 1 <
-					    (sizeof(size_t) << 3))
-						opt_lg_dirty_mult++;
-					break;
-				case 'D':
-					if (opt_lg_dirty_mult >= 0)
-						opt_lg_dirty_mult--;
-					break;
-#ifdef JEMALLOC_PROF
-				case 'e':
-					opt_prof_active = false;
-					break;
-				case 'E':
-					opt_prof_active = true;
-					break;
-				case 'f':
-					opt_prof = false;
-					break;
-				case 'F':
-					opt_prof = true;
-					break;
-#endif
-#ifdef JEMALLOC_TCACHE
-				case 'g':
-					if (opt_lg_tcache_gc_sweep >= 0)
-						opt_lg_tcache_gc_sweep--;
-					break;
-				case 'G':
-					if (opt_lg_tcache_gc_sweep + 1 <
-					    (sizeof(size_t) << 3))
-						opt_lg_tcache_gc_sweep++;
-					break;
-				case 'h':
-					opt_tcache = false;
-					break;
-				case 'H':
-					opt_tcache = true;
-					break;
-#endif
-#ifdef JEMALLOC_PROF
-				case 'i':
-					if (opt_lg_prof_interval >= 0)
-						opt_lg_prof_interval--;
-					break;
-				case 'I':
-					if (opt_lg_prof_interval + 1 <
-					    (sizeof(uint64_t) << 3))
-						opt_lg_prof_interval++;
-					break;
-#endif
-#ifdef JEMALLOC_FILL
-				case 'j':
-					opt_junk = false;
-					break;
-				case 'J':
-					opt_junk = true;
-					break;
-#endif
-				case 'k':
-					/*
-					 * Chunks always require at least one
-					 * header page, plus one data page.
-					 */
-					if ((1U << (opt_lg_chunk - 1)) >=
-					    (2U << PAGE_SHIFT))
-						opt_lg_chunk--;
-					break;
-				case 'K':
-					if (opt_lg_chunk + 1 <
-					    (sizeof(size_t) << 3))
-						opt_lg_chunk++;
-					break;
-#ifdef JEMALLOC_PROF
-				case 'l':
-					opt_prof_leak = false;
-					break;
-				case 'L':
-					opt_prof_leak = true;
-					break;
-#endif
-#ifdef JEMALLOC_TCACHE
-				case 'm':
-					if (opt_lg_tcache_maxclass >= 0)
-						opt_lg_tcache_maxclass--;
-					break;
-				case 'M':
-					if (opt_lg_tcache_maxclass + 1 <
-					    (sizeof(size_t) << 3))
-						opt_lg_tcache_maxclass++;
-					break;
-#endif
-				case 'n':
-					opt_narenas_lshift--;
-					break;
-				case 'N':
-					opt_narenas_lshift++;
-					break;
-#ifdef JEMALLOC_SWAP
-				case 'o':
-					opt_overcommit = false;
-					break;
-				case 'O':
-					opt_overcommit = true;
-					break;
-#endif
-				case 'p':
-					opt_stats_print = false;
-					break;
-				case 'P':
-					opt_stats_print = true;
-					break;
-				case 'q':
-					if (opt_lg_qspace_max > LG_QUANTUM)
-						opt_lg_qspace_max--;
-					break;
-				case 'Q':
-					if (opt_lg_qspace_max + 1 <
-					    opt_lg_cspace_max)
-						opt_lg_qspace_max++;
-					break;
-#ifdef JEMALLOC_PROF
-				case 'r':
-					opt_prof_accum = false;
-					break;
-				case 'R':
-					opt_prof_accum = true;
-					break;
-				case 's':
-					if (opt_lg_prof_sample > 0)
-						opt_lg_prof_sample--;
-					break;
-				case 'S':
-					if (opt_lg_prof_sample + 1 <
-					    (sizeof(uint64_t) << 3))
-						opt_lg_prof_sample++;
-					break;
-				case 't':
-					if (opt_lg_prof_tcmax >= 0)
-						opt_lg_prof_tcmax--;
-					break;
-				case 'T':
-					if (opt_lg_prof_tcmax + 1 <
-					    (sizeof(size_t) << 3))
-						opt_lg_prof_tcmax++;
-					break;
-				case 'u':
-					opt_prof_udump = false;
-					break;
-				case 'U':
-					opt_prof_udump = true;
-					break;
-#endif
-#ifdef JEMALLOC_SYSV
-				case 'v':
-					opt_sysv = false;
-					break;
-				case 'V':
-					opt_sysv = true;
-					break;
-#endif
-#ifdef JEMALLOC_XMALLOC
-				case 'x':
-					opt_xmalloc = false;
-					break;
-				case 'X':
-					opt_xmalloc = true;
-					break;
-#endif
-#ifdef JEMALLOC_FILL
-				case 'z':
-					opt_zero = false;
-					break;
-				case 'Z':
-					opt_zero = true;
-					break;
-#endif
-				default: {
-					char cbuf[2];
-
-					cbuf[0] = opts[j];
-					cbuf[1] = '\0';
-					malloc_write(
-					    "<jemalloc>: Unsupported character "
-					    "in malloc options: '");
-					malloc_write(cbuf);
-					malloc_write("'\n");
-				}
-				}
-			}
-		}
-	}
+	malloc_conf_init();
 
 	/* Register fork handlers. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork,
@@ -638,7 +660,7 @@
 	}
 
 #ifdef JEMALLOC_PROF
-	prof_boot0();
+	prof_boot1();
 #endif
 
 	if (arena_boot()) {
@@ -692,7 +714,7 @@
 	malloc_mutex_init(&arenas_lock);
 
 #ifdef JEMALLOC_PROF
-	if (prof_boot1()) {
+	if (prof_boot2()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
@@ -704,31 +726,29 @@
 	ncpus = malloc_ncpus();
 	malloc_mutex_lock(&init_lock);
 
-	if (ncpus > 1) {
+	if (opt_narenas == 0) {
 		/*
 		 * For SMP systems, create more than one arena per CPU by
 		 * default.
 		 */
-		opt_narenas_lshift += 2;
+		if (ncpus > 1)
+			opt_narenas = ncpus << 2;
+		else
+			opt_narenas = 1;
 	}
+	narenas = opt_narenas;
+	/*
+	 * Make sure that the arenas array can be allocated.  In practice, this
+	 * limit is enough to allow the allocator to function, but the ctl
+	 * machinery will fail to allocate memory at far lower limits.
+	 */
+	if (narenas > chunksize / sizeof(arena_t *)) {
+		char buf[UMAX2S_BUFSIZE];
 
-	/* Determine how many arenas to use. */
-	narenas = ncpus;
-	if (opt_narenas_lshift > 0) {
-		if ((narenas << opt_narenas_lshift) > narenas)
-			narenas <<= opt_narenas_lshift;
-		/*
-		 * Make sure not to exceed the limits of what base_alloc() can
-		 * handle.
-		 */
-		if (narenas * sizeof(arena_t *) > chunksize)
-			narenas = chunksize / sizeof(arena_t *);
-	} else if (opt_narenas_lshift < 0) {
-		if ((narenas >> -opt_narenas_lshift) < narenas)
-			narenas >>= -opt_narenas_lshift;
-		/* Make sure there is at least one arena. */
-		if (narenas == 0)
-			narenas = 1;
+		narenas = chunksize / sizeof(arena_t *);
+		malloc_write("<jemalloc>: Reducing narenas to limit (");
+		malloc_write(u2s(narenas, 10, buf));
+		malloc_write(")\n");
 	}
 
 	next_arena = (narenas > 0) ? 1 : 0;
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index fb0e765..84ce1ba 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -20,10 +20,11 @@
 size_t		opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
-bool		opt_prof_udump = false;
+bool		opt_prof_gdump = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = true;
 ssize_t		opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
+char		opt_prof_prefix[PATH_MAX + 1];
 
 uint64_t	prof_interval;
 bool		prof_promote;
@@ -64,7 +65,7 @@
 static malloc_mutex_t	enq_mtx;
 static bool		enq;
 static bool		enq_idump;
-static bool		enq_udump;
+static bool		enq_gdump;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -150,7 +151,7 @@
 static inline void
 prof_leave(void)
 {
-	bool idump, udump;
+	bool idump, gdump;
 
 	malloc_mutex_unlock(&bt2ctx_mtx);
 
@@ -158,14 +159,14 @@
 	enq = false;
 	idump = enq_idump;
 	enq_idump = false;
-	udump = enq_udump;
-	enq_udump = false;
+	gdump = enq_gdump;
+	enq_gdump = false;
 	malloc_mutex_unlock(&enq_mtx);
 
 	if (idump)
 		prof_idump();
-	if (udump)
-		prof_udump();
+	if (gdump)
+		prof_gdump();
 }
 
 #ifdef JEMALLOC_PROF_LIBGCC
@@ -681,22 +682,22 @@
 		return (false);
 	}
 
-	if (prof_write(umax2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
+	if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
 	    || prof_write(": ", propagate_err)
-	    || prof_write(umax2s(ctx->cnt_summed.curbytes, 10, buf),
+	    || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
 	    propagate_err)
 	    || prof_write(" [", propagate_err)
-	    || prof_write(umax2s(ctx->cnt_summed.accumobjs, 10, buf),
+	    || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
 	    propagate_err)
 	    || prof_write(": ", propagate_err)
-	    || prof_write(umax2s(ctx->cnt_summed.accumbytes, 10, buf),
+	    || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
 	    propagate_err)
 	    || prof_write("] @", propagate_err))
 		return (true);
 
 	for (i = 0; i < bt->len; i++) {
 		if (prof_write(" 0x", propagate_err)
-		    || prof_write(umax2s((uintptr_t)bt->vec[i], 16, buf),
+		    || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
 		    propagate_err))
 			return (true);
 	}
@@ -725,7 +726,7 @@
 	memcpy(&mpath[i], s, slen);
 	i += slen;
 
-	s = umax2s(getpid(), 10, buf);
+	s = u2s(getpid(), 10, buf);
 	slen = strlen(s);
 	memcpy(&mpath[i], s, slen);
 	i += slen;
@@ -799,13 +800,13 @@
 
 	/* Dump profile header. */
 	if (prof_write("heap profile: ", propagate_err)
-	    || prof_write(umax2s(cnt_all.curobjs, 10, buf), propagate_err)
+	    || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
 	    || prof_write(": ", propagate_err)
-	    || prof_write(umax2s(cnt_all.curbytes, 10, buf), propagate_err)
+	    || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
 	    || prof_write(" [", propagate_err)
-	    || prof_write(umax2s(cnt_all.accumobjs, 10, buf), propagate_err)
+	    || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
 	    || prof_write(": ", propagate_err)
-	    || prof_write(umax2s(cnt_all.accumbytes, 10, buf), propagate_err))
+	    || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
 		goto ERROR;
 
 	if (opt_lg_prof_sample == 0) {
@@ -813,7 +814,7 @@
 			goto ERROR;
 	} else {
 		if (prof_write("] @ heap_v2/", propagate_err)
-		    || prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10,
+		    || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
 		    buf), propagate_err)
 		    || prof_write("\n", propagate_err))
 			goto ERROR;
@@ -837,12 +838,12 @@
 
 	if (leakcheck && cnt_all.curbytes != 0) {
 		malloc_write("<jemalloc>: Leak summary: ");
-		malloc_write(umax2s(cnt_all.curbytes, 10, buf));
+		malloc_write(u2s(cnt_all.curbytes, 10, buf));
 		malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
-		malloc_write(umax2s(cnt_all.curobjs, 10, buf));
+		malloc_write(u2s(cnt_all.curobjs, 10, buf));
 		malloc_write((cnt_all.curobjs != 1) ? " objects, " :
 		    " object, ");
-		malloc_write(umax2s(leak_nctx, 10, buf));
+		malloc_write(u2s(leak_nctx, 10, buf));
 		malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
 		malloc_write("<jemalloc>: Run pprof on \"");
 		malloc_write(filename);
@@ -872,31 +873,11 @@
 	 * Construct a filename of the form:
 	 *
 	 *   <prefix>.<pid>.<seq>.v<vseq>.heap\0
-	 * or
-	 *   jeprof.<pid>.<seq>.v<vseq>.heap\0
 	 */
 
 	i = 0;
 
-	/*
-	 * Use JEMALLOC_PROF_PREFIX if it's set, and if it is short enough to
-	 * avoid overflowing DUMP_FILENAME_BUFSIZE.  The result may exceed
-	 * PATH_MAX, but creat(2) will catch that problem.
-	 */
-	if ((s = getenv("JEMALLOC_PROF_PREFIX")) != NULL
-	    && strlen(s) + (DUMP_FILENAME_BUFSIZE - PATH_MAX) <= PATH_MAX) {
-		slen = strlen(s);
-		memcpy(&filename[i], s, slen);
-		i += slen;
-
-		s = ".";
-	} else
-		s = "jeprof.";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = umax2s(getpid(), 10, buf);
+	s = opt_prof_prefix;
 	slen = strlen(s);
 	memcpy(&filename[i], s, slen);
 	i += slen;
@@ -906,7 +887,17 @@
 	memcpy(&filename[i], s, slen);
 	i += slen;
 
-	s = umax2s(prof_dump_seq, 10, buf);
+	s = u2s(getpid(), 10, buf);
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	s = ".";
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	s = u2s(prof_dump_seq, 10, buf);
 	prof_dump_seq++;
 	slen = strlen(s);
 	memcpy(&filename[i], s, slen);
@@ -921,7 +912,7 @@
 	i++;
 
 	if (vseq != 0xffffffffffffffffLLU) {
-		s = umax2s(vseq, 10, buf);
+		s = u2s(vseq, 10, buf);
 		slen = strlen(s);
 		memcpy(&filename[i], s, slen);
 		i += slen;
@@ -943,10 +934,12 @@
 	if (prof_booted == false)
 		return;
 
-	malloc_mutex_lock(&prof_dump_seq_mtx);
-	prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
-	malloc_mutex_unlock(&prof_dump_seq_mtx);
-	prof_dump(filename, opt_prof_leak, false);
+	if (opt_prof_prefix[0] != '\0') {
+		malloc_mutex_lock(&prof_dump_seq_mtx);
+		prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
+		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		prof_dump(filename, opt_prof_leak, false);
+	}
 }
 
 void
@@ -964,11 +957,13 @@
 	}
 	malloc_mutex_unlock(&enq_mtx);
 
-	malloc_mutex_lock(&prof_dump_seq_mtx);
-	prof_dump_filename(filename, 'i', prof_dump_iseq);
-	prof_dump_iseq++;
-	malloc_mutex_unlock(&prof_dump_seq_mtx);
-	prof_dump(filename, false, false);
+	if (opt_prof_prefix[0] != '\0') {
+		malloc_mutex_lock(&prof_dump_seq_mtx);
+		prof_dump_filename(filename, 'i', prof_dump_iseq);
+		prof_dump_iseq++;
+		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		prof_dump(filename, false, false);
+	}
 }
 
 bool
@@ -981,6 +976,8 @@
 
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
+		if (opt_prof_prefix[0] == '\0')
+			return (true);
 		malloc_mutex_lock(&prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
@@ -991,7 +988,7 @@
 }
 
 void
-prof_udump(void)
+prof_gdump(void)
 {
 	char filename[DUMP_FILENAME_BUFSIZE];
 
@@ -999,17 +996,19 @@
 		return;
 	malloc_mutex_lock(&enq_mtx);
 	if (enq) {
-		enq_udump = true;
+		enq_gdump = true;
 		malloc_mutex_unlock(&enq_mtx);
 		return;
 	}
 	malloc_mutex_unlock(&enq_mtx);
 
-	malloc_mutex_lock(&prof_dump_seq_mtx);
-	prof_dump_filename(filename, 'u', prof_dump_useq);
-	prof_dump_useq++;
-	malloc_mutex_unlock(&prof_dump_seq_mtx);
-	prof_dump(filename, false, false);
+	if (opt_prof_prefix[0] != '\0') {
+		malloc_mutex_lock(&prof_dump_seq_mtx);
+		prof_dump_filename(filename, 'u', prof_dump_useq);
+		prof_dump_useq++;
+		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		prof_dump(filename, false, false);
+	}
 }
 
 static void
@@ -1122,6 +1121,14 @@
 prof_boot0(void)
 {
 
+	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
+	    sizeof(PROF_PREFIX_DEFAULT));
+}
+
+void
+prof_boot1(void)
+{
+
 	/*
 	 * opt_prof and prof_promote must be in their final state before any
 	 * arenas are initialized, so this function must be executed early.
@@ -1133,7 +1140,7 @@
 		 * automatically dumped.
 		 */
 		opt_prof = true;
-		opt_prof_udump = false;
+		opt_prof_gdump = false;
 		prof_interval = 0;
 	} else if (opt_prof) {
 		if (opt_lg_prof_interval >= 0) {
@@ -1147,7 +1154,7 @@
 }
 
 bool
-prof_boot1(void)
+prof_boot2(void)
 {
 
 	if (opt_prof) {
@@ -1171,7 +1178,7 @@
 			return (true);
 		enq = false;
 		enq_idump = false;
-		enq_udump = false;
+		enq_gdump = false;
 
 		if (atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c
index 9b3271b..3dfe0d2 100644
--- a/jemalloc/src/stats.c
+++ b/jemalloc/src/stats.c
@@ -57,12 +57,12 @@
 
 /*
  * We don't want to depend on vsnprintf() for production builds, since that can
- * cause unnecessary bloat for static binaries.  umax2s() provides minimal
- * integer printing functionality, so that malloc_printf() use can be limited to
+ * cause unnecessary bloat for static binaries.  u2s() provides minimal integer
+ * printing functionality, so that malloc_printf() use can be limited to
  * JEMALLOC_STATS code.
  */
 char *
-umax2s(uintmax_t x, unsigned base, char *s)
+u2s(uint64_t x, unsigned base, char *s)
 {
 	unsigned i;
 
@@ -72,8 +72,8 @@
 	case 10:
 		do {
 			i--;
-			s[i] = "0123456789"[x % 10];
-			x /= 10;
+			s[i] = "0123456789"[x % (uint64_t)10];
+			x /= (uint64_t)10;
 		} while (x > 0);
 		break;
 	case 16:
@@ -86,8 +86,9 @@
 	default:
 		do {
 			i--;
-			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
-			x /= base;
+			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x %
+			    (uint64_t)base];
+			x /= (uint64_t)base;
 		} while (x > 0);
 	}
 
@@ -374,6 +375,7 @@
 stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
+	int err;
 	uint64_t epoch;
 	size_t u64sz;
 	char s[UMAX2S_BUFSIZE];
@@ -383,10 +385,27 @@
 	bool bins = true;
 	bool large = true;
 
-	/* Refresh stats, in case mallctl() was called by the application. */
+	/*
+	 * Refresh stats, in case mallctl() was called by the application.
+	 *
+	 * Check for OOM here, since refreshing the ctl cache can trigger
+	 * allocation.  In practice, none of the subsequent mallctl()-related
+	 * calls in this function will cause OOM if this one succeeds.
+	 * */
 	epoch = 1;
 	u64sz = sizeof(uint64_t);
-	xmallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+	err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch,
+	    sizeof(uint64_t));
+	if (err != 0) {
+		if (err == EAGAIN) {
+			malloc_write("<jemalloc>: Memory allocation failure in "
+			    "mallctl(\"epoch\", ...)\n");
+			return;
+		}
+		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		    "...)\n");
+		abort();
+	}
 
 	if (write_cb == NULL) {
 		/*
@@ -430,10 +449,12 @@
 		bool bv;
 		unsigned uv;
 		ssize_t ssv;
-		size_t sv, bsz, ssz;
+		size_t sv, bsz, ssz, sssz, cpsz;
 
 		bsz = sizeof(bool);
 		ssz = sizeof(size_t);
+		sssz = sizeof(ssize_t);
+		cpsz = sizeof(const char *);
 
 		CTL_GET("version", &cpv, const char *);
 		write_cb(cbopaque, "Version: ");
@@ -444,116 +465,140 @@
 		write_cb(cbopaque, bv ? "enabled" : "disabled");
 		write_cb(cbopaque, "\n");
 
-		write_cb(cbopaque, "Boolean JEMALLOC_OPTIONS: ");
-		if ((err = JEMALLOC_P(mallctl)("opt.abort", &bv, &bsz, NULL, 0))
-		    == 0)
-			write_cb(cbopaque, bv ? "A" : "a");
-		if ((err = JEMALLOC_P(mallctl)("prof.active", &bv, &bsz,
-		    NULL, 0)) == 0)
-			write_cb(cbopaque, bv ? "E" : "e");
-		if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
-		    == 0)
-			write_cb(cbopaque, bv ? "F" : "f");
-		if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL,
-		    0)) == 0)
-			write_cb(cbopaque, bv ? "H" : "h");
-		if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0))
-		    == 0)
-			write_cb(cbopaque, bv ? "J" : "j");
-		if ((err = JEMALLOC_P(mallctl)("opt.prof_leak", &bv, &bsz, NULL,
-		    0)) == 0)
-			write_cb(cbopaque, bv ? "L" : "l");
-		if ((err = JEMALLOC_P(mallctl)("opt.overcommit", &bv, &bsz,
-		    NULL, 0)) == 0)
-			write_cb(cbopaque, bv ? "O" : "o");
-		if ((err = JEMALLOC_P(mallctl)("opt.stats_print", &bv, &bsz,
-		    NULL, 0)) == 0)
-			write_cb(cbopaque, bv ? "P" : "p");
-		if ((err = JEMALLOC_P(mallctl)("opt.prof_accum", &bv, &bsz,
-		    NULL, 0)) == 0)
-			write_cb(cbopaque, bv ? "R" : "r");
-		if ((err = JEMALLOC_P(mallctl)("opt.prof_udump", &bv, &bsz,
-		    NULL, 0)) == 0)
-			write_cb(cbopaque, bv ? "U" : "u");
-		if ((err = JEMALLOC_P(mallctl)("opt.sysv", &bv, &bsz, NULL, 0))
-		    == 0)
-			write_cb(cbopaque, bv ? "V" : "v");
-		if ((err = JEMALLOC_P(mallctl)("opt.xmalloc", &bv, &bsz, NULL,
-		    0)) == 0)
-			write_cb(cbopaque, bv ? "X" : "x");
-		if ((err = JEMALLOC_P(mallctl)("opt.zero", &bv, &bsz, NULL, 0))
-		    == 0)
-			write_cb(cbopaque, bv ? "Z" : "z");
-		write_cb(cbopaque, "\n");
+#define OPT_WRITE_BOOL(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": ");		\
+			write_cb(cbopaque, bv ? "true" : "false");	\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_SIZE_T(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": ");		\
+			write_cb(cbopaque, u2s(sv, 10, s));		\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_SSIZE_T(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz,	\
+		    NULL, 0)) == 0) {					\
+			if (ssv >= 0) {					\
+				write_cb(cbopaque, "  opt."#n": ");	\
+				write_cb(cbopaque, u2s(ssv, 10, s));	\
+			} else {					\
+				write_cb(cbopaque, "  opt."#n": -");	\
+				write_cb(cbopaque, u2s(-ssv, 10, s));	\
+			}						\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_CHAR_P(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": \"");		\
+			write_cb(cbopaque, cpv);			\
+			write_cb(cbopaque, "\"\n");			\
+		}
+
+		write_cb(cbopaque, "Run-time option settings:\n");
+		OPT_WRITE_BOOL(abort)
+		OPT_WRITE_SIZE_T(lg_qspace_max)
+		OPT_WRITE_SIZE_T(lg_cspace_max)
+		OPT_WRITE_SIZE_T(lg_chunk)
+		OPT_WRITE_SIZE_T(narenas)
+		OPT_WRITE_SSIZE_T(lg_dirty_mult)
+		OPT_WRITE_BOOL(stats_print)
+		OPT_WRITE_BOOL(junk)
+		OPT_WRITE_BOOL(zero)
+		OPT_WRITE_BOOL(sysv)
+		OPT_WRITE_BOOL(xmalloc)
+		OPT_WRITE_BOOL(tcache)
+		OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep)
+		OPT_WRITE_SSIZE_T(lg_tcache_max)
+		OPT_WRITE_BOOL(prof)
+		OPT_WRITE_CHAR_P(prof_prefix)
+		OPT_WRITE_SIZE_T(lg_prof_bt_max)
+		OPT_WRITE_BOOL(prof_active)
+		OPT_WRITE_SSIZE_T(lg_prof_sample)
+		OPT_WRITE_BOOL(prof_accum)
+		OPT_WRITE_SSIZE_T(lg_prof_tcmax)
+		OPT_WRITE_SSIZE_T(lg_prof_interval)
+		OPT_WRITE_BOOL(prof_gdump)
+		OPT_WRITE_BOOL(prof_leak)
+		OPT_WRITE_BOOL(overcommit)
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
 
 		write_cb(cbopaque, "CPUs: ");
-		write_cb(cbopaque, umax2s(ncpus, 10, s));
+		write_cb(cbopaque, u2s(ncpus, 10, s));
 		write_cb(cbopaque, "\n");
 
 		CTL_GET("arenas.narenas", &uv, unsigned);
 		write_cb(cbopaque, "Max arenas: ");
-		write_cb(cbopaque, umax2s(uv, 10, s));
+		write_cb(cbopaque, u2s(uv, 10, s));
 		write_cb(cbopaque, "\n");
 
 		write_cb(cbopaque, "Pointer size: ");
-		write_cb(cbopaque, umax2s(sizeof(void *), 10, s));
+		write_cb(cbopaque, u2s(sizeof(void *), 10, s));
 		write_cb(cbopaque, "\n");
 
 		CTL_GET("arenas.quantum", &sv, size_t);
 		write_cb(cbopaque, "Quantum size: ");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "\n");
 
 		CTL_GET("arenas.cacheline", &sv, size_t);
 		write_cb(cbopaque, "Cacheline size (assumed): ");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "\n");
 
 		CTL_GET("arenas.subpage", &sv, size_t);
 		write_cb(cbopaque, "Subpage spacing: ");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "\n");
 
 		if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz,
 		    NULL, 0)) == 0) {
 			write_cb(cbopaque, "Tiny 2^n-spaced sizes: [");
-			write_cb(cbopaque, umax2s(sv, 10, s));
+			write_cb(cbopaque, u2s(sv, 10, s));
 			write_cb(cbopaque, "..");
 
 			CTL_GET("arenas.tspace_max", &sv, size_t);
-			write_cb(cbopaque, umax2s(sv, 10, s));
+			write_cb(cbopaque, u2s(sv, 10, s));
 			write_cb(cbopaque, "]\n");
 		}
 
 		CTL_GET("arenas.qspace_min", &sv, size_t);
 		write_cb(cbopaque, "Quantum-spaced sizes: [");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "..");
 		CTL_GET("arenas.qspace_max", &sv, size_t);
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "]\n");
 
 		CTL_GET("arenas.cspace_min", &sv, size_t);
 		write_cb(cbopaque, "Cacheline-spaced sizes: [");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "..");
 		CTL_GET("arenas.cspace_max", &sv, size_t);
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "]\n");
 
 		CTL_GET("arenas.sspace_min", &sv, size_t);
 		write_cb(cbopaque, "Subpage-spaced sizes: [");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "..");
 		CTL_GET("arenas.sspace_max", &sv, size_t);
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, "]\n");
 
 		CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
 		if (ssv >= 0) {
 			write_cb(cbopaque,
 			    "Min active:dirty page ratio per arena: ");
-			write_cb(cbopaque, umax2s((1U << ssv), 10, s));
+			write_cb(cbopaque, u2s((1U << ssv), 10, s));
 			write_cb(cbopaque, ":1\n");
 		} else {
 			write_cb(cbopaque,
@@ -563,7 +608,7 @@
 		    &ssz, NULL, 0)) == 0) {
 			write_cb(cbopaque,
 			    "Maximum thread-cached size class: ");
-			write_cb(cbopaque, umax2s(sv, 10, s));
+			write_cb(cbopaque, u2s(sv, 10, s));
 			write_cb(cbopaque, "\n");
 		}
 		if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
@@ -573,50 +618,51 @@
 			CTL_GET("opt.tcache", &tcache_enabled, bool);
 			write_cb(cbopaque, "Thread cache GC sweep interval: ");
 			write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
-			    umax2s(tcache_gc_sweep, 10, s) : "N/A");
+			    u2s(tcache_gc_sweep, 10, s) : "N/A");
 			write_cb(cbopaque, "\n");
 		}
 		if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
 		   == 0 && bv) {
 			CTL_GET("opt.lg_prof_bt_max", &sv, size_t);
 			write_cb(cbopaque, "Maximum profile backtrace depth: ");
-			write_cb(cbopaque, umax2s((1U << sv), 10, s));
+			write_cb(cbopaque, u2s((1U << sv), 10, s));
 			write_cb(cbopaque, "\n");
 
 			CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t);
 			write_cb(cbopaque,
 			    "Maximum per thread backtrace cache: ");
 			if (ssv >= 0) {
-				write_cb(cbopaque, umax2s((1U << ssv), 10, s));
+				write_cb(cbopaque, u2s((1U << ssv), 10, s));
 				write_cb(cbopaque, " (2^");
-				write_cb(cbopaque, umax2s(ssv, 10, s));
+				write_cb(cbopaque, u2s(ssv, 10, s));
 				write_cb(cbopaque, ")\n");
 			} else
 				write_cb(cbopaque, "N/A\n");
 
 			CTL_GET("opt.lg_prof_sample", &sv, size_t);
 			write_cb(cbopaque, "Average profile sample interval: ");
-			write_cb(cbopaque, umax2s((1U << sv), 10, s));
+			write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s));
 			write_cb(cbopaque, " (2^");
-			write_cb(cbopaque, umax2s(sv, 10, s));
+			write_cb(cbopaque, u2s(sv, 10, s));
 			write_cb(cbopaque, ")\n");
 
 			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
 			write_cb(cbopaque, "Average profile dump interval: ");
 			if (ssv >= 0) {
-				write_cb(cbopaque, umax2s((1U << ssv), 10, s));
+				write_cb(cbopaque, u2s((((uint64_t)1U) << ssv),
+				    10, s));
 				write_cb(cbopaque, " (2^");
-				write_cb(cbopaque, umax2s(ssv, 10, s));
+				write_cb(cbopaque, u2s(ssv, 10, s));
 				write_cb(cbopaque, ")\n");
 			} else
 				write_cb(cbopaque, "N/A\n");
 		}
 		CTL_GET("arenas.chunksize", &sv, size_t);
 		write_cb(cbopaque, "Chunk size: ");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		CTL_GET("opt.lg_chunk", &sv, size_t);
 		write_cb(cbopaque, " (2^");
-		write_cb(cbopaque, umax2s(sv, 10, s));
+		write_cb(cbopaque, u2s(sv, 10, s));
 		write_cb(cbopaque, ")\n");
 	}
 
diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c
index 3fb8f2b..cbbe7a1 100644
--- a/jemalloc/src/tcache.c
+++ b/jemalloc/src/tcache.c
@@ -5,7 +5,7 @@
 /* Data. */
 
 bool	opt_tcache = true;
-ssize_t	opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT;
+ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
 
 /* Map of thread-specific caches. */
@@ -384,16 +384,16 @@
 
 	if (opt_tcache) {
 		/*
-		 * If necessary, clamp opt_lg_tcache_maxclass, now that
+		 * If necessary, clamp opt_lg_tcache_max, now that
 		 * small_maxclass and arena_maxclass are known.
 		 */
-		if (opt_lg_tcache_maxclass < 0 || (1U <<
-		    opt_lg_tcache_maxclass) < small_maxclass)
+		if (opt_lg_tcache_max < 0 || (1U <<
+		    opt_lg_tcache_max) < small_maxclass)
 			tcache_maxclass = small_maxclass;
-		else if ((1U << opt_lg_tcache_maxclass) > arena_maxclass)
+		else if ((1U << opt_lg_tcache_max) > arena_maxclass)
 			tcache_maxclass = arena_maxclass;
 		else
-			tcache_maxclass = (1U << opt_lg_tcache_maxclass);
+			tcache_maxclass = (1U << opt_lg_tcache_max);
 
 		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
 
diff --git a/jemalloc/test/rallocm.c b/jemalloc/test/rallocm.c
index 7e8a271..a8cadeb 100644
--- a/jemalloc/test/rallocm.c
+++ b/jemalloc/test/rallocm.c
@@ -14,14 +14,14 @@
 
 	fprintf(stderr, "Test begin\n");
 
-	r = allocm(&p, &sz, 42, 0);
+	r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
 	if (r != ALLOCM_SUCCESS) {
 		fprintf(stderr, "Unexpected allocm() error\n");
 		abort();
 	}
 
 	q = p;
-	r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
 		fprintf(stderr, "Unexpected rallocm() error\n");
 	if (q != p)
@@ -32,7 +32,7 @@
 	}
 
 	q = p;
-	r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
 		fprintf(stderr, "Unexpected rallocm() error\n");
 	if (q != p)
@@ -43,7 +43,7 @@
 	}
 
 	q = p;
-	r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_ERR_NOT_MOVED)
 		fprintf(stderr, "Unexpected rallocm() result\n");
 	if (q != p)
@@ -54,7 +54,7 @@
 	}
 
 	q = p;
-	r = rallocm(&q, &tsz, sz + 5, 0, 0);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0);
 	if (r != ALLOCM_SUCCESS)
 		fprintf(stderr, "Unexpected rallocm() error\n");
 	if (q == p)
@@ -66,7 +66,7 @@
 	p = q;
 	sz = tsz;
 
-	r = rallocm(&q, &tsz, 8192, 0, 0);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0);
 	if (r != ALLOCM_SUCCESS)
 		fprintf(stderr, "Unexpected rallocm() error\n");
 	if (q == p)
@@ -78,7 +78,7 @@
 	p = q;
 	sz = tsz;
 
-	r = rallocm(&q, &tsz, 16384, 0, 0);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0);
 	if (r != ALLOCM_SUCCESS)
 		fprintf(stderr, "Unexpected rallocm() error\n");
 	if (tsz == sz) {
@@ -88,7 +88,7 @@
 	p = q;
 	sz = tsz;
 
-	r = rallocm(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
 		fprintf(stderr, "Unexpected rallocm() error\n");
 	if (q != p)
@@ -99,7 +99,7 @@
 	}
 	sz = tsz;
 
-	r = rallocm(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE);
+	r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
 		fprintf(stderr, "Unexpected rallocm() error\n");
 	if (q != p)
@@ -110,7 +110,7 @@
 	}
 	sz = tsz;
 
-	dallocm(p, 0);
+	JEMALLOC_P(dallocm)(p, 0);
 
 	fprintf(stderr, "Test end\n");
 	return (0);