diff --git a/Makefile.in b/Makefile.in
index 1f1ffd3..6286455 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -50,7 +50,7 @@
 	@srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \
 	@srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \
 	@srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \
-	@srcroot@src/stats.c @srcroot@src/tcache.c
+	@srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c
 ifeq (macho, @abi@)
 CSRCS += @srcroot@src/zone.c
 endif
diff --git a/configure.ac b/configure.ac
index 5b6c6b3..5a11588 100644
--- a/configure.ac
+++ b/configure.ac
@@ -144,6 +144,18 @@
 fi
 AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
 
+AC_CHECK_SIZEOF([intmax_t])
+if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
+  LG_SIZEOF_INTMAX_T=4
+elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then
+  LG_SIZEOF_INTMAX_T=3
+elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then
+  LG_SIZEOF_INTMAX_T=2
+else
+  AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_long}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T])
+
 AC_CANONICAL_HOST
 dnl CPU-specific settings.
 CPU_SPINWAIT=""
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 8f72f7f..a48d09f 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -76,19 +76,17 @@
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
 	    != 0) {							\
-		malloc_write("<jemalloc>: Failure in xmallctl(\"");	\
-		malloc_write(name);					\
-		malloc_write("\", ...)\n");				\
+		malloc_printf(						\
+		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
+		    name);						\
 		abort();						\
 	}								\
 } while (0)
 
 #define	xmallctlnametomib(name, mibp, miblenp) do {			\
 	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
-		malloc_write(						\
-		    "<jemalloc>: Failure in xmallctlnametomib(\"");	\
-		malloc_write(name);					\
-		malloc_write("\", ...)\n");				\
+		malloc_printf("<jemalloc>: Failure in "			\
+		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
 		abort();						\
 	}								\
 } while (0)
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index d695e77..2f501f5 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -48,14 +48,14 @@
 
 	data2 = (const unsigned char *)data;
 	switch(len & 7) {
-		case 7: h ^= ((uint64_t)(data2[6])) << 48;
-		case 6: h ^= ((uint64_t)(data2[5])) << 40;
-		case 5: h ^= ((uint64_t)(data2[4])) << 32;
-		case 4: h ^= ((uint64_t)(data2[3])) << 24;
-		case 3: h ^= ((uint64_t)(data2[2])) << 16;
-		case 2: h ^= ((uint64_t)(data2[1])) << 8;
-		case 1: h ^= ((uint64_t)(data2[0]));
-			h *= m;
+	case 7: h ^= ((uint64_t)(data2[6])) << 48;
+	case 6: h ^= ((uint64_t)(data2[5])) << 40;
+	case 5: h ^= ((uint64_t)(data2[4])) << 32;
+	case 4: h ^= ((uint64_t)(data2[3])) << 24;
+	case 3: h ^= ((uint64_t)(data2[2])) << 16;
+	case 2: h ^= ((uint64_t)(data2[1])) << 8;
+	case 1: h ^= ((uint64_t)(data2[0]));
+		h *= m;
 	}
 
 	h ^= h >> r;
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3e445f7..3774bb5 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -149,39 +149,6 @@
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
 
-extern void	(*je_malloc_message)(void *wcbopaque, const char *s);
-
-/*
- * Define a custom assert() in order to reduce the chances of deadlock during
- * assertion failure.
- */
-#ifndef assert
-#  ifdef JEMALLOC_DEBUG
-#    define assert(e) do {						\
-	if (!(e)) {							\
-		char line_buf[UMAX2S_BUFSIZE];				\
-		malloc_write("<jemalloc>: ");				\
-		malloc_write(__FILE__);					\
-		malloc_write(":");					\
-		malloc_write(u2s(__LINE__, 10, line_buf));		\
-		malloc_write(": Failed assertion: ");			\
-		malloc_write("\"");					\
-		malloc_write(#e);					\
-		malloc_write("\"\n");					\
-		abort();						\
-	}								\
-} while (0)
-#  else
-#    define assert(e)
-#  endif
-#endif
-
-/* Use to assert a particular configuration, e.g., cassert(config_debug). */
-#define	cassert(c) do {							\
-	if ((c) == false)						\
-		assert(false);						\
-} while (0)
-
 /*
  * jemalloc can conceptually be broken into components (arena, tcache, etc.),
  * but there are circular dependencies that cannot be broken without
@@ -215,9 +182,6 @@
 #  define JEMALLOC_INLINE static inline
 #endif
 
-/* Size of stack-allocated buffer passed to buferror(). */
-#define	BUFERROR_BUF		64
-
 /* Smallest size class to support. */
 #define	LG_TINY_MIN		3
 #define	TINY_MIN		(1U << LG_TINY_MIN)
@@ -318,6 +282,7 @@
 #define	PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
 
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
@@ -344,6 +309,7 @@
 /******************************************************************************/
 #define JEMALLOC_H_STRUCTS
 
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
@@ -443,10 +409,10 @@
 
 arena_t	*arenas_extend(unsigned ind);
 arena_t	*choose_arena_hard(void);
-int	buferror(int errnum, char *buf, size_t buflen);
 void	jemalloc_prefork(void);
 void	jemalloc_postfork(void);
 
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
@@ -473,6 +439,7 @@
 /******************************************************************************/
 #define JEMALLOC_H_INLINES
 
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
@@ -487,33 +454,13 @@
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
-size_t	pow2_ceil(size_t x);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment, size_t *run_size_p);
-void	malloc_write(const char *s);
 arena_t	*choose_arena(void);
 thread_allocated_t	*thread_allocated_get(void);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-/* Compute the smallest power of 2 that is >= x. */
-JEMALLOC_INLINE size_t
-pow2_ceil(size_t x)
-{
-
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-#if (LG_SIZEOF_PTR == 3)
-	x |= x >> 32;
-#endif
-	x++;
-	return (x);
-}
-
 /*
  * Compute usable size that would result from allocating an object with the
  * specified size.
@@ -620,17 +567,6 @@
 }
 
 /*
- * Wrapper around malloc_message() that avoids the need for
- * je_malloc_message(...) throughout the code.
- */
-JEMALLOC_INLINE void
-malloc_write(const char *s)
-{
-
-	je_malloc_message(NULL, s);
-}
-
-/*
  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
  * code if necessary).
  */
diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h
index db2192e..89d3b5c 100644
--- a/include/jemalloc/internal/private_namespace.h
+++ b/include/jemalloc/internal/private_namespace.h
@@ -23,6 +23,7 @@
 #define	arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index)
 #define	arenas_extend JEMALLOC_N(arenas_extend)
 #define	arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index)
+#define	arenas_tls JEMALLOC_N(arenas_tls)
 #define	atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
 #define	atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
 #define	atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
@@ -137,8 +138,30 @@
 #define	malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock)
 #define	malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
 #define	malloc_printf JEMALLOC_N(malloc_printf)
+#define	malloc_snprintf JEMALLOC_N(malloc_snprintf)
+#define	malloc_tprintf JEMALLOC_N(malloc_tprintf)
+#define	malloc_vcprintf JEMALLOC_N(malloc_vcprintf)
+#define	malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf)
+#define	malloc_vtprintf JEMALLOC_N(malloc_vtprintf)
 #define	malloc_write JEMALLOC_N(malloc_write)
 #define	mb_write JEMALLOC_N(mb_write)
+#define	opt_abort JEMALLOC_N(opt_abort)
+#define	opt_junk JEMALLOC_N(opt_junk)
+#define	opt_lg_chunk JEMALLOC_N(opt_lg_chunk)
+#define	opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult)
+#define	opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval)
+#define	opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample)
+#define	opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max)
+#define	opt_narenas JEMALLOC_N(opt_narenas)
+#define	opt_prof JEMALLOC_N(opt_prof)
+#define	opt_prof_accum JEMALLOC_N(opt_prof_accum)
+#define	opt_prof_active JEMALLOC_N(opt_prof_active)
+#define	opt_prof_gdump JEMALLOC_N(opt_prof_gdump)
+#define	opt_prof_leak JEMALLOC_N(opt_prof_leak)
+#define	opt_stats_print JEMALLOC_N(opt_stats_print)
+#define	opt_tcache JEMALLOC_N(opt_tcache)
+#define	opt_xmalloc JEMALLOC_N(opt_xmalloc)
+#define	opt_zero JEMALLOC_N(opt_zero)
 #define	pow2_ceil JEMALLOC_N(pow2_ceil)
 #define	prof_backtrace JEMALLOC_N(prof_backtrace)
 #define	prof_boot0 JEMALLOC_N(prof_boot0)
@@ -156,6 +179,7 @@
 #define	prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
 #define	prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
 #define	prof_tdata_init JEMALLOC_N(prof_tdata_init)
+#define	prof_tdata_tls JEMALLOC_N(prof_tdata_tls)
 #define	pthread_create JEMALLOC_N(pthread_create)
 #define	rtree_get JEMALLOC_N(rtree_get)
 #define	rtree_get_locked JEMALLOC_N(rtree_get_locked)
@@ -166,6 +190,7 @@
 #define	stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index)
 #define	stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index)
 #define	stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index)
+#define	stats_cactive JEMALLOC_N(stats_cactive)
 #define	stats_cactive_add JEMALLOC_N(stats_cactive_add)
 #define	stats_cactive_get JEMALLOC_N(stats_cactive_get)
 #define	stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
@@ -185,6 +210,7 @@
 #define	tcache_event JEMALLOC_N(tcache_event)
 #define	tcache_get JEMALLOC_N(tcache_get)
 #define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
+#define	tcache_tls JEMALLOC_N(tcache_tls)
 #define	thread_allocated_get JEMALLOC_N(thread_allocated_get)
 #define	thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard)
-#define	u2s JEMALLOC_N(u2s)
+#define	thread_allocated_tls JEMALLOC_N(thread_allocated_tls)
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 4af23c3..27f68e3 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,8 +1,6 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define	UMAX2S_BUFSIZE	65
-
 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 typedef struct malloc_large_stats_s malloc_large_stats_t;
@@ -135,11 +133,6 @@
 
 extern size_t	stats_cactive;
 
-char	*u2s(uint64_t x, unsigned base, char *s);
-void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
-    const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
-void	malloc_printf(const char *format, ...)
-    JEMALLOC_ATTR(format(printf, 1, 2));
 void	stats_print(void (*write)(void *, const char *), void *cbopaque,
     const char *opts);
 
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
new file mode 100644
index 0000000..a268109
--- /dev/null
+++ b/include/jemalloc/internal/util.h
@@ -0,0 +1,130 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define	BUFERROR_BUF		64
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#define	assert(e) do {							\
+	if (config_debug && !(e)) {					\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+/* Use to assert a particular configuration, e.g., cassert(config_debug). */
+#define	cassert(c) do {							\
+	if ((c) == false)						\
+		assert(false);						\
+} while (0)
+
+#ifndef not_reached
+#define	not_reached() do {						\
+	if (config_debug) {						\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#ifndef not_implemented
+#define	not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#define	assert_not_implemented(e) do {					\
+	if (config_debug && !(e))					\
+		not_implemented();					\
+} while (0)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern void	(*je_malloc_message)(void *wcbopaque, const char *s);
+
+int	buferror(int errnum, char *buf, size_t buflen);
+
+/*
+ * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
+ * point math.
+ */
+int	malloc_vsnprintf(char *str, size_t size, const char *format,
+    va_list ap);
+int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 3, 4));
+/*
+ * malloc_[v]tprintf() prints to a thread-local string buffer, so the result is
+ * overwritten by the next call to malloc_[v]{,c,t}printf().
+ */
+const char *	malloc_vtprintf(const char *format, va_list ap);
+const char *	malloc_tprintf(const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 1, 2));
+void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap);
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
+void	malloc_printf(const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 1, 2));
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	pow2_ceil(size_t x);
+void	malloc_write(const char *s);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil(size_t x)
+{
+
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+#if (LG_SIZEOF_PTR == 3)
+	x |= x >> 32;
+#endif
+	x++;
+	return (x);
+}
+
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * je_malloc_message(...) throughout the code.
+ */
+JEMALLOC_INLINE void
+malloc_write(const char *s)
+{
+
+	je_malloc_message(NULL, s);
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 049cf01..6b2b0d0 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -166,3 +166,6 @@
 
 /* sizeof(long) == 2^LG_SIZEOF_LONG. */
 #undef LG_SIZEOF_LONG
+
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#undef LG_SIZEOF_INTMAX_T
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index 164e86e..c740928 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -61,9 +61,8 @@
 			char buf[BUFERROR_BUF];
 
 			buferror(errno, buf, sizeof(buf));
-			malloc_write("<jemalloc>: Error in munmap(): ");
-			malloc_write(buf);
-			malloc_write("\n");
+			malloc_printf("<jemalloc: Error in munmap(): %s\n",
+			    buf);
 			if (opt_abort)
 				abort();
 		}
@@ -83,9 +82,7 @@
 		char buf[BUFERROR_BUF];
 
 		buferror(errno, buf, sizeof(buf));
-		malloc_write("<jemalloc>: Error in munmap(): ");
-		malloc_write(buf);
-		malloc_write("\n");
+		malloc_printf("<jemalloc>: Error in munmap(): %s\n", buf);
 		if (opt_abort)
 			abort();
 	}
diff --git a/src/huge.c b/src/huge.c
index f2fba86..2d51c52 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -239,9 +239,8 @@
 			char buf[BUFERROR_BUF];
 
 			buferror(errno, buf, sizeof(buf));
-			malloc_write("<jemalloc>: Error in mremap(): ");
-			malloc_write(buf);
-			malloc_write("\n");
+			malloc_printf("<jemalloc>: Error in mremap(): %s\n",
+			    buf);
 			if (opt_abort)
 				abort();
 			memcpy(ret, ptr, copysize);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ad1ee8e..e148ae0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -55,7 +55,6 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void	wrtmessage(void *cbopaque, const char *s);
 static void	stats_print_atexit(void);
 static unsigned	malloc_ncpus(void);
 static void	arenas_cleanup(void *arg);
@@ -72,19 +71,6 @@
     bool enforce_min_alignment);
 
 /******************************************************************************/
-/* malloc_message() setup. */
-
-JEMALLOC_CATTR(visibility("hidden"), static)
-void
-wrtmessage(void *cbopaque, const char *s)
-{
-	UNUSED int result = write(STDERR_FILENO, s, strlen(s));
-}
-
-void	(*je_malloc_message)(void *, const char *s)
-    JEMALLOC_ATTR(visibility("default")) = wrtmessage;
-
-/******************************************************************************/
 /*
  * Begin miscellaneous support functions.
  */
@@ -178,25 +164,6 @@
 	return (ret);
 }
 
-/*
- * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
- * provide a wrapper.
- */
-int
-buferror(int errnum, char *buf, size_t buflen)
-{
-#ifdef _GNU_SOURCE
-	char *b = strerror_r(errno, buf, buflen);
-	if (b != buf) {
-		strncpy(buf, b, buflen);
-		buf[buflen-1] = '\0';
-	}
-	return (0);
-#else
-	return (strerror_r(errno, buf, buflen));
-#endif
-}
-
 static void
 stats_print_atexit(void)
 {
@@ -324,68 +291,64 @@
 
 	for (accept = false; accept == false;) {
 		switch (*opts) {
-			case 'A': case 'B': case 'C': case 'D': case 'E':
-			case 'F': case 'G': case 'H': case 'I': case 'J':
-			case 'K': case 'L': case 'M': case 'N': case 'O':
-			case 'P': case 'Q': case 'R': case 'S': case 'T':
-			case 'U': case 'V': case 'W': case 'X': case 'Y':
-			case 'Z':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
-			case 'f': case 'g': case 'h': case 'i': case 'j':
-			case 'k': case 'l': case 'm': case 'n': case 'o':
-			case 'p': case 'q': case 'r': case 's': case 't':
-			case 'u': case 'v': case 'w': case 'x': case 'y':
-			case 'z':
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case '_':
-				opts++;
-				break;
-			case ':':
-				opts++;
-				*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
-				*v_p = opts;
-				accept = true;
-				break;
-			case '\0':
-				if (opts != *opts_p) {
-					malloc_write("<jemalloc>: Conf string "
-					    "ends with key\n");
-				}
-				return (true);
-			default:
-				malloc_write("<jemalloc>: Malformed conf "
-				    "string\n");
-				return (true);
+		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+		case 'Y': case 'Z':
+		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+		case 'y': case 'z':
+		case '0': case '1': case '2': case '3': case '4': case '5':
+		case '6': case '7': case '8': case '9':
+		case '_':
+			opts++;
+			break;
+		case ':':
+			opts++;
+			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+			*v_p = opts;
+			accept = true;
+			break;
+		case '\0':
+			if (opts != *opts_p) {
+				malloc_write("<jemalloc>: Conf string ends "
+				    "with key\n");
+			}
+			return (true);
+		default:
+			malloc_write("<jemalloc>: Malformed conf string\n");
+			return (true);
 		}
 	}
 
 	for (accept = false; accept == false;) {
 		switch (*opts) {
-			case ',':
-				opts++;
-				/*
-				 * Look ahead one character here, because the
-				 * next time this function is called, it will
-				 * assume that end of input has been cleanly
-				 * reached if no input remains, but we have
-				 * optimistically already consumed the comma if
-				 * one exists.
-				 */
-				if (*opts == '\0') {
-					malloc_write("<jemalloc>: Conf string "
-					    "ends with comma\n");
-				}
-				*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
-				accept = true;
-				break;
-			case '\0':
-				*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
-				accept = true;
-				break;
-			default:
-				opts++;
-				break;
+		case ',':
+			opts++;
+			/*
+			 * Look ahead one character here, because the next time
+			 * this function is called, it will assume that end of
+			 * input has been cleanly reached if no input remains,
+			 * but we have optimistically already consumed the
+			 * comma if one exists.
+			 */
+			if (*opts == '\0') {
+				malloc_write("<jemalloc>: Conf string ends "
+				    "with comma\n");
+			}
+			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		case '\0':
+			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		default:
+			opts++;
+			break;
 		}
 	}
 
@@ -397,17 +360,9 @@
 malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
     size_t vlen)
 {
-	char buf[PATH_MAX + 1];
 
-	malloc_write("<jemalloc>: ");
-	malloc_write(msg);
-	malloc_write(": ");
-	memcpy(buf, k, klen);
-	memcpy(&buf[klen], ":", 1);
-	memcpy(&buf[klen+1], v, vlen);
-	buf[klen+1+vlen] = '\0';
-	malloc_write(buf);
-	malloc_write("\n");
+	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
+	    (int)vlen, v);
 }
 
 static void
@@ -458,8 +413,7 @@
 				opts = buf;
 			}
 			break;
-		}
-		case 2: {
+		} case 2: {
 			const char *envname =
 #ifdef JEMALLOC_PREFIX
 			    JEMALLOC_CPREFIX"MALLOC_CONF"
@@ -480,8 +434,7 @@
 				opts = buf;
 			}
 			break;
-		}
-		default:
+		} default:
 			/* NOTREACHED */
 			assert(false);
 			buf[0] = '\0';
@@ -490,15 +443,15 @@
 
 		while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v,
 		    &vlen) == false) {
-#define	CONF_HANDLE_BOOL(n)						\
+#define	CONF_HANDLE_BOOL(o, n)						\
 			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
 			    klen) == 0) {				\
 				if (strncmp("true", v, vlen) == 0 &&	\
 				    vlen == sizeof("true")-1)		\
-					opt_##n = true;			\
+					o = true;			\
 				else if (strncmp("false", v, vlen) ==	\
 				    0 && vlen == sizeof("false")-1)	\
-					opt_##n = false;		\
+					o = false;			\
 				else {					\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
@@ -506,7 +459,7 @@
 				}					\
 				continue;				\
 			}
-#define	CONF_HANDLE_SIZE_T(n, min, max)					\
+#define	CONF_HANDLE_SIZE_T(o, n, min, max)				\
 			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
 			    klen) == 0) {				\
 				unsigned long ul;			\
@@ -524,10 +477,10 @@
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
 				} else					\
-					opt_##n = ul;			\
+					o = ul;				\
 				continue;				\
 			}
-#define	CONF_HANDLE_SSIZE_T(n, min, max)				\
+#define	CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
 			    klen) == 0) {				\
 				long l;					\
@@ -546,54 +499,58 @@
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
 				} else					\
-					opt_##n = l;			\
+					o = l;				\
 				continue;				\
 			}
-#define	CONF_HANDLE_CHAR_P(n, d)					\
+#define	CONF_HANDLE_CHAR_P(o, n, d)					\
 			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
 			    klen) == 0) {				\
 				size_t cpylen = (vlen <=		\
-				    sizeof(opt_##n)-1) ? vlen :		\
-				    sizeof(opt_##n)-1;			\
-				strncpy(opt_##n, v, cpylen);		\
-				opt_##n[cpylen] = '\0';			\
+				    sizeof(o)-1) ? vlen :		\
+				    sizeof(o)-1;			\
+				strncpy(o, v, cpylen);			\
+				o[cpylen] = '\0';			\
 				continue;				\
 			}
 
-			CONF_HANDLE_BOOL(abort)
+			CONF_HANDLE_BOOL(opt_abort, abort)
 			/*
 			 * Chunks always require at least one * header page,
 			 * plus one data page.
 			 */
-			CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1,
+			CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, PAGE_SHIFT+1,
 			    (sizeof(size_t) << 3) - 1)
-			CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX)
-			CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1,
-			    (sizeof(size_t) << 3) - 1)
-			CONF_HANDLE_BOOL(stats_print)
+			CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX)
+			CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult,
+			    -1, (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_BOOL(opt_stats_print, stats_print)
 			if (config_fill) {
-				CONF_HANDLE_BOOL(junk)
-				CONF_HANDLE_BOOL(zero)
+				CONF_HANDLE_BOOL(opt_junk, junk)
+				CONF_HANDLE_BOOL(opt_zero, zero)
 			}
 			if (config_xmalloc) {
-				CONF_HANDLE_BOOL(xmalloc)
+				CONF_HANDLE_BOOL(opt_xmalloc, xmalloc)
 			}
 			if (config_tcache) {
-				CONF_HANDLE_BOOL(tcache)
-				CONF_HANDLE_SSIZE_T(lg_tcache_max, -1,
+				CONF_HANDLE_BOOL(opt_tcache, tcache)
+				CONF_HANDLE_SSIZE_T(opt_lg_tcache_max,
+				    lg_tcache_max, -1,
 				    (sizeof(size_t) << 3) - 1)
 			}
 			if (config_prof) {
-				CONF_HANDLE_BOOL(prof)
-				CONF_HANDLE_CHAR_P(prof_prefix, "jeprof")
-				CONF_HANDLE_BOOL(prof_active)
-				CONF_HANDLE_SSIZE_T(lg_prof_sample, 0,
+				CONF_HANDLE_BOOL(opt_prof, prof)
+				CONF_HANDLE_CHAR_P(opt_prof_prefix, prof_prefix,
+				    "jeprof")
+				CONF_HANDLE_BOOL(opt_prof_active, prof_active)
+				CONF_HANDLE_SSIZE_T(opt_lg_prof_sample,
+				    lg_prof_sample, 0,
 				    (sizeof(uint64_t) << 3) - 1)
-				CONF_HANDLE_BOOL(prof_accum)
-				CONF_HANDLE_SSIZE_T(lg_prof_interval, -1,
+				CONF_HANDLE_BOOL(opt_prof_accum, prof_accum)
+				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
+				    lg_prof_interval, -1,
 				    (sizeof(uint64_t) << 3) - 1)
-				CONF_HANDLE_BOOL(prof_gdump)
-				CONF_HANDLE_BOOL(prof_leak)
+				CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump)
+				CONF_HANDLE_BOOL(opt_prof_leak, prof_leak)
 			}
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
@@ -773,12 +730,9 @@
 	 * machinery will fail to allocate memory at far lower limits.
 	 */
 	if (narenas > chunksize / sizeof(arena_t *)) {
-		char buf[UMAX2S_BUFSIZE];
-
 		narenas = chunksize / sizeof(arena_t *);
-		malloc_write("<jemalloc>: Reducing narenas to limit (");
-		malloc_write(u2s(narenas, 10, buf));
-		malloc_write(")\n");
+		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
+		    narenas);
 	}
 
 	/* Allocate and initialize arenas. */
diff --git a/src/prof.c b/src/prof.c
index d78658d..b57c5b8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -74,16 +74,18 @@
     struct _Unwind_Context *context, void *arg);
 #endif
 static bool	prof_flush(bool propagate_err);
-static bool	prof_write(const char *s, bool propagate_err);
+static bool	prof_write(bool propagate_err, const char *s);
+static bool	prof_printf(bool propagate_err, const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 2, 3));
 static void	prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
     size_t *leak_nctx);
 static void	prof_ctx_destroy(prof_ctx_t *ctx);
 static void	prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
-static bool	prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
-    bool propagate_err);
+static bool	prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx,
+    prof_bt_t *bt);
 static bool	prof_dump_maps(bool propagate_err);
-static bool	prof_dump(const char *filename, bool leakcheck,
-    bool propagate_err);
+static bool	prof_dump(bool propagate_err, const char *filename,
+    bool leakcheck);
 static void	prof_dump_filename(char *filename, char v, int64_t vseq);
 static void	prof_fdump(void);
 static void	prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
@@ -587,7 +589,7 @@
 }
 
 static bool
-prof_write(const char *s, bool propagate_err)
+prof_write(bool propagate_err, const char *s)
 {
 	unsigned i, slen, n;
 
@@ -616,6 +618,20 @@
 	return (false);
 }
 
+JEMALLOC_ATTR(format(printf, 2, 3))
+static bool
+prof_printf(bool propagate_err, const char *format, ...)
+{
+	bool ret;
+	va_list ap;
+
+	va_start(ap, format);
+	ret = prof_write(propagate_err, malloc_vtprintf(format, ap));
+	va_end(ap);
+
+	return (ret);
+}
+
 static void
 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
 {
@@ -744,9 +760,8 @@
 }
 
 static bool
-prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
+prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt)
 {
-	char buf[UMAX2S_BUFSIZE];
 	unsigned i;
 
 	cassert(config_prof);
@@ -758,27 +773,19 @@
 		return (false);
 	}
 
-	if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
-	    propagate_err)
-	    || prof_write(" [", propagate_err)
-	    || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
-	    propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
-	    propagate_err)
-	    || prof_write("] @", propagate_err))
+	if (prof_printf(propagate_err, "%"PRId64": %"PRId64
+	    " [%"PRIu64": %"PRIu64"] @",
+	    ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
+	    ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes))
 		return (true);
 
 	for (i = 0; i < bt->len; i++) {
-		if (prof_write(" 0x", propagate_err)
-		    || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
-		    propagate_err))
+		if (prof_printf(propagate_err, " %#"PRIx64,
+		    (uintptr_t)bt->vec[i]))
 			return (true);
 	}
 
-	if (prof_write("\n", propagate_err))
+	if (prof_write(propagate_err, "\n"))
 		return (true);
 
 	return (false);
@@ -788,39 +795,15 @@
 prof_dump_maps(bool propagate_err)
 {
 	int mfd;
-	char buf[UMAX2S_BUFSIZE];
-	char *s;
-	unsigned i, slen;
-	/*         /proc/<pid>/maps\0 */
-	char mpath[6     + UMAX2S_BUFSIZE
-			      + 5  + 1];
 
 	cassert(config_prof);
 
-	i = 0;
-
-	s = "/proc/";
-	slen = strlen(s);
-	memcpy(&mpath[i], s, slen);
-	i += slen;
-
-	s = u2s(getpid(), 10, buf);
-	slen = strlen(s);
-	memcpy(&mpath[i], s, slen);
-	i += slen;
-
-	s = "/maps";
-	slen = strlen(s);
-	memcpy(&mpath[i], s, slen);
-	i += slen;
-
-	mpath[i] = '\0';
-
-	mfd = open(mpath, O_RDONLY);
+	mfd = open(malloc_tprintf("/proc/%d/maps", (int)getpid()),
+	    O_RDONLY);
 	if (mfd != -1) {
 		ssize_t nread;
 
-		if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
+		if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
 		    propagate_err)
 			return (true);
 		nread = 0;
@@ -842,7 +825,7 @@
 }
 
 static bool
-prof_dump(const char *filename, bool leakcheck, bool propagate_err)
+prof_dump(bool propagate_err, const char *filename, bool leakcheck)
 {
 	prof_cnt_t cnt_all;
 	size_t tabind;
@@ -854,7 +837,6 @@
 		prof_ctx_t	*p;
 		void		*v;
 	} ctx;
-	char buf[UMAX2S_BUFSIZE];
 	size_t leak_nctx;
 
 	cassert(config_prof);
@@ -863,9 +845,9 @@
 	prof_dump_fd = creat(filename, 0644);
 	if (prof_dump_fd == -1) {
 		if (propagate_err == false) {
-			malloc_write("<jemalloc>: creat(\"");
-			malloc_write(filename);
-			malloc_write("\", 0644) failed\n");
+			malloc_printf(
+			    "<jemalloc>: creat(\"%s\"), 0644) failed\n",
+			    filename);
 			if (opt_abort)
 				abort();
 		}
@@ -879,31 +861,27 @@
 		prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
 
 	/* Dump profile header. */
-	if (prof_write("heap profile: ", propagate_err)
-	    || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
-	    || prof_write(" [", propagate_err)
-	    || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
-		goto ERROR;
-
 	if (opt_lg_prof_sample == 0) {
-		if (prof_write("] @ heapprofile\n", propagate_err))
+		if (prof_printf(propagate_err,
+		    "heap profile: %"PRId64": %"PRId64
+		    " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
+		    cnt_all.curobjs, cnt_all.curbytes,
+		    cnt_all.accumobjs, cnt_all.accumbytes))
 			goto ERROR;
 	} else {
-		if (prof_write("] @ heap_v2/", propagate_err)
-		    || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
-		    buf), propagate_err)
-		    || prof_write("\n", propagate_err))
+		if (prof_printf(propagate_err,
+		    "heap profile: %"PRId64": %"PRId64
+		    " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
+		    cnt_all.curobjs, cnt_all.curbytes,
+		    cnt_all.accumobjs, cnt_all.accumbytes,
+		    ((uint64_t)1U << opt_lg_prof_sample)))
 			goto ERROR;
 	}
 
 	/* Dump  per ctx profile stats. */
 	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
 	    == false;) {
-		if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
+		if (prof_dump_ctx(propagate_err, ctx.p, bt.p))
 			goto ERROR;
 	}
 
@@ -917,17 +895,14 @@
 	prof_leave();
 
 	if (leakcheck && cnt_all.curbytes != 0) {
-		malloc_write("<jemalloc>: Leak summary: ");
-		malloc_write(u2s(cnt_all.curbytes, 10, buf));
-		malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
-		malloc_write(u2s(cnt_all.curobjs, 10, buf));
-		malloc_write((cnt_all.curobjs != 1) ? " objects, " :
-		    " object, ");
-		malloc_write(u2s(leak_nctx, 10, buf));
-		malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
-		malloc_write("<jemalloc>: Run pprof on \"");
-		malloc_write(filename);
-		malloc_write("\" for leak detail\n");
+		malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
+		    PRId64" object%s, %zu context%s\n",
+		    cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "",
+		    cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "",
+		    leak_nctx, (leak_nctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
+		    filename);
 	}
 
 	return (false);
@@ -936,76 +911,24 @@
 	return (true);
 }
 
-#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX+ UMAX2S_BUFSIZE		\
-					       + 1			\
-						+ UMAX2S_BUFSIZE	\
-						     + 2		\
-						       + UMAX2S_BUFSIZE	\
-						             + 5  + 1)
+#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 static void
 prof_dump_filename(char *filename, char v, int64_t vseq)
 {
-	char buf[UMAX2S_BUFSIZE];
-	char *s;
-	unsigned i, slen;
 
 	cassert(config_prof);
 
-	/*
-	 * Construct a filename of the form:
-	 *
-	 *   <prefix>.<pid>.<seq>.v<vseq>.heap\0
-	 */
-
-	i = 0;
-
-	s = opt_prof_prefix;
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = ".";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = u2s(getpid(), 10, buf);
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = ".";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = u2s(prof_dump_seq, 10, buf);
-	prof_dump_seq++;
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = ".";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	filename[i] = v;
-	i++;
-
 	if (vseq != UINT64_C(0xffffffffffffffff)) {
-		s = u2s(vseq, 10, buf);
-		slen = strlen(s);
-		memcpy(&filename[i], s, slen);
-		i += slen;
+	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"PRIu64".%c%"PRId64".heap",
+		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
+	} else {
+	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"PRIu64".%c.heap",
+		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
 	}
-
-	s = ".heap";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	filename[i] = '\0';
 }
 
 static void
@@ -1022,14 +945,14 @@
 		malloc_mutex_lock(&prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff));
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
-		prof_dump(filename, opt_prof_leak, false);
+		prof_dump(false, filename, opt_prof_leak);
 	}
 }
 
 void
 prof_idump(void)
 {
-	char filename[DUMP_FILENAME_BUFSIZE];
+	char filename[PATH_MAX + 1];
 
 	cassert(config_prof);
 
@@ -1048,7 +971,7 @@
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
-		prof_dump(filename, false, false);
+		prof_dump(false, filename, false);
 	}
 }
 
@@ -1072,7 +995,7 @@
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
-	return (prof_dump(filename, false, true));
+	return (prof_dump(true, filename, false));
 }
 
 void
@@ -1097,7 +1020,7 @@
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
-		prof_dump(filename, false, false);
+		prof_dump(false, filename, false);
 	}
 }
 
diff --git a/src/stats.c b/src/stats.c
index f976378..38c8bb3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -44,8 +44,6 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void	malloc_vcprintf(void (*write_cb)(void *, const char *),
-    void *cbopaque, const char *format, va_list ap);
 static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
     void *cbopaque, unsigned i);
 static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
@@ -55,97 +53,6 @@
 
 /******************************************************************************/
 
-/* XXX Refactor by adding malloc_vsnprintf(). */
-/*
- * We don't want to depend on vsnprintf() for production builds, since that can
- * cause unnecessary bloat for static binaries.  u2s() provides minimal integer
- * printing functionality, so that malloc_printf() use can be limited to
- * JEMALLOC_STATS code.
- */
-char *
-u2s(uint64_t x, unsigned base, char *s)
-{
-	unsigned i;
-
-	i = UMAX2S_BUFSIZE - 1;
-	s[i] = '\0';
-	switch (base) {
-	case 10:
-		do {
-			i--;
-			s[i] = "0123456789"[x % (uint64_t)10];
-			x /= (uint64_t)10;
-		} while (x > 0);
-		break;
-	case 16:
-		do {
-			i--;
-			s[i] = "0123456789abcdef"[x & 0xf];
-			x >>= 4;
-		} while (x > 0);
-		break;
-	default:
-		do {
-			i--;
-			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x %
-			    (uint64_t)base];
-			x /= (uint64_t)base;
-		} while (x > 0);
-	}
-
-	return (&s[i]);
-}
-
-static void
-malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap)
-{
-	char buf[4096];
-
-	if (write_cb == NULL) {
-		/*
-		 * The caller did not provide an alternate write_cb callback
-		 * function, so use the default one.  malloc_write() is an
-		 * inline function, so use malloc_message() directly here.
-		 */
-		write_cb = je_malloc_message;
-		cbopaque = NULL;
-	}
-
-	vsnprintf(buf, sizeof(buf), format, ap);
-	write_cb(cbopaque, buf);
-}
-
-/*
- * Print to a callback function in such a way as to (hopefully) avoid memory
- * allocation.
- */
-JEMALLOC_ATTR(format(printf, 3, 4))
-void
-malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, ...)
-{
-	va_list ap;
-
-	va_start(ap, format);
-	malloc_vcprintf(write_cb, cbopaque, format, ap);
-	va_end(ap);
-}
-
-/*
- * Print to stderr in such a way as to (hopefully) avoid memory allocation.
- */
-JEMALLOC_ATTR(format(printf, 1, 2))
-void
-malloc_printf(const char *format, ...)
-{
-	va_list ap;
-
-	va_start(ap, format);
-	malloc_vcprintf(NULL, NULL, format, ap);
-	va_end(ap);
-}
-
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
     unsigned i)
@@ -360,7 +267,6 @@
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
-	char s[UMAX2S_BUFSIZE];
 	bool general = true;
 	bool merged = true;
 	bool unmerged = true;
@@ -403,22 +309,22 @@
 
 		for (i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
-				case 'g':
-					general = false;
-					break;
-				case 'm':
-					merged = false;
-					break;
-				case 'a':
-					unmerged = false;
-					break;
-				case 'b':
-					bins = false;
-					break;
-				case 'l':
-					large = false;
-					break;
-				default:;
+			case 'g':
+				general = false;
+				break;
+			case 'm':
+				merged = false;
+				break;
+			case 'a':
+				unmerged = false;
+				break;
+			case 'b':
+				bins = false;
+				break;
+			case 'l':
+				large = false;
+				break;
+			default:;
 			}
 		}
 	}
@@ -438,46 +344,34 @@
 		cpsz = sizeof(const char *);
 
 		CTL_GET("version", &cpv, const char *);
-		write_cb(cbopaque, "Version: ");
-		write_cb(cbopaque, cpv);
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
 		CTL_GET("config.debug", &bv, bool);
-		write_cb(cbopaque, "Assertions ");
-		write_cb(cbopaque, bv ? "enabled" : "disabled");
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
+		    bv ? "enabled" : "disabled");
 
 #define OPT_WRITE_BOOL(n)						\
 		if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0))	\
 		    == 0) {						\
-			write_cb(cbopaque, "  opt."#n": ");		\
-			write_cb(cbopaque, bv ? "true" : "false");	\
-			write_cb(cbopaque, "\n");			\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s\n", bv ? "true" : "false");	\
 		}
 #define OPT_WRITE_SIZE_T(n)						\
 		if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0))	\
 		    == 0) {						\
-			write_cb(cbopaque, "  opt."#n": ");		\
-			write_cb(cbopaque, u2s(sv, 10, s));		\
-			write_cb(cbopaque, "\n");			\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %zu\n", sv);			\
 		}
 #define OPT_WRITE_SSIZE_T(n)						\
 		if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0))	\
 		    == 0) {						\
-			if (ssv >= 0) {					\
-				write_cb(cbopaque, "  opt."#n": ");	\
-				write_cb(cbopaque, u2s(ssv, 10, s));	\
-			} else {					\
-				write_cb(cbopaque, "  opt."#n": -");	\
-				write_cb(cbopaque, u2s(-ssv, 10, s));	\
-			}						\
-			write_cb(cbopaque, "\n");			\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd\n", ssv);			\
 		}
 #define OPT_WRITE_CHAR_P(n)						\
 		if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0))	\
 		    == 0) {						\
-			write_cb(cbopaque, "  opt."#n": \"");		\
-			write_cb(cbopaque, cpv);			\
-			write_cb(cbopaque, "\"\n");			\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": \"%s\"\n", cpv);		\
 		}
 
 		write_cb(cbopaque, "Run-time option settings:\n");
@@ -505,68 +399,52 @@
 #undef OPT_WRITE_SSIZE_T
 #undef OPT_WRITE_CHAR_P
 
-		write_cb(cbopaque, "CPUs: ");
-		write_cb(cbopaque, u2s(ncpus, 10, s));
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
 
 		CTL_GET("arenas.narenas", &uv, unsigned);
-		write_cb(cbopaque, "Max arenas: ");
-		write_cb(cbopaque, u2s(uv, 10, s));
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv);
 
-		write_cb(cbopaque, "Pointer size: ");
-		write_cb(cbopaque, u2s(sizeof(void *), 10, s));
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
+		    sizeof(void *));
 
 		CTL_GET("arenas.quantum", &sv, size_t);
-		write_cb(cbopaque, "Quantum size: ");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
 
 		CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
 		if (ssv >= 0) {
-			write_cb(cbopaque,
-			    "Min active:dirty page ratio per arena: ");
-			write_cb(cbopaque, u2s((1U << ssv), 10, s));
-			write_cb(cbopaque, ":1\n");
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: %u:1\n",
+			    (1U << ssv));
 		} else {
 			write_cb(cbopaque,
 			    "Min active:dirty page ratio per arena: N/A\n");
 		}
 		if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0))
 		    == 0) {
-			write_cb(cbopaque,
-			    "Maximum thread-cached size class: ");
-			write_cb(cbopaque, u2s(sv, 10, s));
-			write_cb(cbopaque, "\n");
+			malloc_cprintf(write_cb, cbopaque,
+			    "Maximum thread-cached size class: %zu\n", sv);
 		}
 		if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 &&
 		    bv) {
 			CTL_GET("opt.lg_prof_sample", &sv, size_t);
-			write_cb(cbopaque, "Average profile sample interval: ");
-			write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s));
-			write_cb(cbopaque, " (2^");
-			write_cb(cbopaque, u2s(sv, 10, s));
-			write_cb(cbopaque, ")\n");
+			malloc_cprintf(write_cb, cbopaque,
+			    "Average profile sample interval: %"PRIu64
+			    " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
 
 			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
-			write_cb(cbopaque, "Average profile dump interval: ");
 			if (ssv >= 0) {
-				write_cb(cbopaque, u2s((((uint64_t)1U) << ssv),
-				    10, s));
-				write_cb(cbopaque, " (2^");
-				write_cb(cbopaque, u2s(ssv, 10, s));
-				write_cb(cbopaque, ")\n");
-			} else
-				write_cb(cbopaque, "N/A\n");
+				malloc_cprintf(write_cb, cbopaque,
+				    "Average profile dump interval: %"PRIu64
+				    " (2^%zd)\n",
+				    (((uint64_t)1U) << ssv), ssv);
+			} else {
+				write_cb(cbopaque,
+				    "Average profile dump interval: N/A\n");
+			}
 		}
-		CTL_GET("arenas.chunksize", &sv, size_t);
-		write_cb(cbopaque, "Chunk size: ");
-		write_cb(cbopaque, u2s(sv, 10, s));
 		CTL_GET("opt.lg_chunk", &sv, size_t);
-		write_cb(cbopaque, " (2^");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, ")\n");
+		malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n",
+		    (ZU(1) << sv), sv);
 	}
 
 	if (config_stats) {
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 0000000..7c4c0d4
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,539 @@
+#define	assert(e) do {							\
+	if (config_debug && !(e)) {					\
+		malloc_write("<jemalloc>: Failed assertion\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_reached() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Unreachable code reached\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Not implemented\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define	JEMALLOC_UTIL_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	wrtmessage(void *cbopaque, const char *s);
+#define	U2S_BUFSIZE	((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
+static char	*u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
+    size_t *slen_p);
+#define	D2S_BUFSIZE	(1 + U2S_BUFSIZE)
+static char	*d2s(intmax_t x, char sign, char *s, size_t *slen_p);
+#define	O2S_BUFSIZE	(1 + U2S_BUFSIZE)
+static char	*o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
+#define	X2S_BUFSIZE	(2 + U2S_BUFSIZE)
+static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
+    size_t *slen_p);
+
+/******************************************************************************/
+
+/* malloc_message() setup. */
+JEMALLOC_CATTR(visibility("hidden"), static)
+void
+wrtmessage(void *cbopaque, const char *s)
+{
+	UNUSED int result = write(STDERR_FILENO, s, strlen(s));
+}
+
+void	(*je_malloc_message)(void *, const char *s)
+    JEMALLOC_ATTR(visibility("default")) = wrtmessage;
+
+/*
+ * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
+ * provide a wrapper.
+ */
+int
+buferror(int errnum, char *buf, size_t buflen)
+{
+#ifdef _GNU_SOURCE
+	char *b = strerror_r(errno, buf, buflen);
+	if (b != buf) {
+		strncpy(buf, b, buflen);
+		buf[buflen-1] = '\0';
+	}
+	return (0);
+#else
+	return (strerror_r(errno, buf, buflen));
+#endif
+}
+
+static char *
+u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p)
+{
+	unsigned i;
+
+	i = U2S_BUFSIZE - 1;
+	s[i] = '\0';
+	switch (base) {
+	case 10:
+		do {
+			i--;
+			s[i] = "0123456789"[x % (uint64_t)10];
+			x /= (uint64_t)10;
+		} while (x > 0);
+		break;
+	case 16: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEF"
+		    : "0123456789abcdef";
+
+		do {
+			i--;
+			s[i] = digits[x & 0xf];
+			x >>= 4;
+		} while (x > 0);
+		break;
+	} default: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    : "0123456789abcdefghijklmnopqrstuvwxyz";
+
+		assert(base >= 2 && base <= 36);
+		do {
+			i--;
+			s[i] = digits[x % (uint64_t)base];
+			x /= (uint64_t)base;
+		} while (x > 0);
+	}}
+
+	*slen_p = U2S_BUFSIZE - 1 - i;
+	return (&s[i]);
+}
+
+static char *
+d2s(intmax_t x, char sign, char *s, size_t *slen_p)
+{
+	bool neg;
+
+	if ((neg = (x < 0)))
+		x = -x;
+	s = u2s(x, 10, false, s, slen_p);
+	if (neg)
+		sign = '-';
+	switch (sign) {
+	case '-':
+		if (neg == false)
+			break;
+		/* Fall through. */
+	case ' ':
+	case '+':
+		s--;
+		(*slen_p)++;
+		*s = sign;
+		break;
+	default: not_reached();
+	}
+	return (s);
+}
+
+static char *
+o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
+{
+
+	s = u2s(x, 8, false, s, slen_p);
+	if (alt_form && *s != '0') {
+		s--;
+		(*slen_p)++;
+		*s = '0';
+	}
+	return (s);
+}
+
+static char *
+x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
+{
+
+	s = u2s(x, 16, uppercase, s, slen_p);
+	if (alt_form) {
+		s -= 2;
+		(*slen_p) += 2;
+		memcpy(s, uppercase ? "0X" : "0x", 2);
+	}
+	return (s);
+}
+
+int
+malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+	int ret;
+	size_t i;
+	const char *f;
+	va_list tap;
+
+#define	APPEND_C(c) do {						\
+	if (i < size)							\
+		str[i] = (c);						\
+	i++;								\
+} while (0)
+#define	APPEND_S(s, slen) do {						\
+	if (i < size) {							\
+		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
+		memcpy(&str[i], s, cpylen);				\
+	}								\
+	i += slen;							\
+} while (0)
+#define	APPEND_PADDED_S(s, slen, width, left_justify) do {		\
+	/* Left padding. */						\
+	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
+	    (size_t)width - slen : 0);					\
+	if (left_justify == false && pad_len != 0) {			\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++)				\
+			APPEND_C(' ');					\
+	}								\
+	/* Value. */							\
+	APPEND_S(s, slen);						\
+	/* Right padding. */						\
+	if (left_justify && pad_len != 0) {				\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++)				\
+			APPEND_C(' ');					\
+	}								\
+} while (0)
+#define GET_ARG_NUMERIC(val, len) do {					\
+	switch (len) {							\
+	case '?':							\
+		val = va_arg(ap, int);					\
+		break;							\
+	case 'l':							\
+		val = va_arg(ap, long);					\
+		break;							\
+	case 'q':							\
+		val = va_arg(ap, long long);				\
+		break;							\
+	case 'j':							\
+		val = va_arg(ap, intmax_t);				\
+		break;							\
+	case 't':							\
+		val = va_arg(ap, ptrdiff_t);				\
+		break;							\
+	case 'z':							\
+		val = va_arg(ap, size_t);				\
+		break;							\
+	default: not_reached();						\
+	}								\
+} while (0)
+
+	if (config_debug)
+		va_copy(tap, ap);
+
+	i = 0;
+	f = format;
+	while (true) {
+		switch (*f) {
+		case '\0': goto OUT;
+		case '%': {
+			bool alt_form = false;
+			bool zero_pad = false;
+			bool left_justify = false;
+			bool plus_space = false;
+			bool plus_plus = false;
+			int prec = -1;
+			int width = -1;
+			char len = '?';
+
+			f++;
+			if (*f == '%') {
+				/* %% */
+				APPEND_C(*f);
+				break;
+			}
+			/* Flags. */
+			while (true) {
+				switch (*f) {
+				case '#':
+					assert(alt_form == false);
+					alt_form = true;
+					break;
+				case '0':
+					assert(zero_pad == false);
+					zero_pad = true;
+					break;
+				case '-':
+					assert(left_justify == false);
+					left_justify = true;
+					break;
+				case ' ':
+					assert(plus_space == false);
+					plus_space = true;
+					break;
+				case '+':
+					assert(plus_plus == false);
+					plus_plus = true;
+					break;
+				default: goto WIDTH;
+				}
+				f++;
+			}
+			/* Width. */
+			WIDTH:
+			switch (*f) {
+			case '*':
+				width = va_arg(ap, int);
+				f++;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				unsigned long uwidth;
+				errno = 0;
+				uwidth = strtoul(f, (char **)&f, 10);
+				assert(uwidth != ULONG_MAX || errno != ERANGE);
+				width = (int)uwidth;
+				if (*f == '.') {
+					f++;
+					goto PRECISION;
+				} else
+					goto LENGTH;
+				break;
+			} case '.':
+				f++;
+				goto PRECISION;
+			default: goto LENGTH;
+			}
+			/* Precision. */
+			PRECISION:
+			switch (*f) {
+			case '*':
+				prec = va_arg(ap, int);
+				f++;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				unsigned long uprec;
+				errno = 0;
+				uprec = strtoul(f, (char **)&f, 10);
+				assert(uprec != ULONG_MAX || errno != ERANGE);
+				prec = (int)uprec;
+				break;
+			}
+			default: break;
+			}
+			/* Length. */
+			LENGTH:
+			switch (*f) {
+			case 'l':
+				f++;
+				if (*f == 'l') {
+					len = 'q';
+					f++;
+				} else
+					len = 'l';
+				break;
+			case 'j':
+				len = 'j';
+				f++;
+				break;
+			case 't':
+				len = 't';
+				f++;
+				break;
+			case 'z':
+				len = 'z';
+				f++;
+				break;
+			default: break;
+			}
+			/* Conversion specifier. */
+			switch (*f) {
+				char *s;
+				size_t slen;
+			case 'd': case 'i': {
+				intmax_t val;
+				char buf[D2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = d2s(val, (plus_plus ? '+' : (plus_space ?
+				    ' ' : '-')), buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'o': {
+				uintmax_t val;
+				char buf[O2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = o2s(val, alt_form, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'u': {
+				uintmax_t val;
+				char buf[U2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = u2s(val, 10, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'x': case 'X': {
+				uintmax_t val;
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = x2s(val, alt_form, *f == 'X', buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'c': {
+				unsigned char val;
+				char buf[2];
+
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				val = va_arg(ap, int);
+				buf[0] = val;
+				buf[1] = '\0';
+				APPEND_PADDED_S(buf, 1, width, left_justify);
+				f++;
+				break;
+			} case 's':
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				s = va_arg(ap, char *);
+				slen = (prec == -1) ? strlen(s) : prec;
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			case 'p': {
+				uintmax_t val;
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = x2s(val, true, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			}
+			default: not_implemented();
+			}
+			break;
+		} default: {
+			APPEND_C(*f);
+			f++;
+			break;
+		}}
+	}
+	OUT:
+	if (i < size)
+		str[i] = '\0';
+	else
+		str[size - 1] = '\0';
+	ret = i;
+
+	if (config_debug) {
+		char buf[ret + 2];
+		int tret;
+
+		/*
+		 * Verify that the resulting string matches what vsnprintf()
+		 * would have created.
+		 */
+		tret = vsnprintf(buf, sizeof(buf), format, tap);
+		assert(tret == ret);
+		assert(memcmp(str, buf, ret + 1) == 0);
+		}
+	}
+
+#undef APPEND_C
+#undef APPEND_S
+#undef APPEND_PADDED_S
+#undef GET_ARG_NUMERIC
+	return (ret);
+}
+
+JEMALLOC_ATTR(format(printf, 3, 4))
+int
+malloc_snprintf(char *str, size_t size, const char *format, ...)
+{
+	int ret;
+	va_list ap;
+
+	va_start(ap, format);
+	ret = malloc_vsnprintf(str, size, format, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+const char *
+malloc_vtprintf(const char *format, va_list ap)
+{
+	/* buf must be large enough for all possible uses within jemalloc. */
+	static __thread char buf[4096];
+
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+
+	return (buf);
+}
+
+JEMALLOC_ATTR(format(printf, 1, 2))
+const char *
+malloc_tprintf(const char *format, ...)
+{
+	const char *ret;
+	va_list ap;
+
+	va_start(ap, format);
+	ret = malloc_vtprintf(format, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+void
+malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap)
+{
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = je_malloc_message;
+		cbopaque = NULL;
+	}
+
+	write_cb(cbopaque, malloc_vtprintf(format, ap));
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_ATTR(format(printf, 3, 4))
+void
+malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(write_cb, cbopaque, format, ap);
+	va_end(ap);
+}
+
+/* Print to stderr in such a way as to avoid memory allocation. */
+JEMALLOC_ATTR(format(printf, 1, 2))
+void
+malloc_printf(const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, NULL, format, ap);
+	va_end(ap);
+}
diff --git a/test/allocm.c b/test/allocm.c
index 137e74c..3aa0fd2 100644
--- a/test/allocm.c
+++ b/test/allocm.c
@@ -77,14 +77,14 @@
 	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
 		fprintf(stderr,
-		    "Expected error for nallocm(&nsz, %zu, 0x%x)\n",
+		    "Expected error for nallocm(&nsz, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
 	rsz = 0;
 	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
 		fprintf(stderr,
-		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    "Expected error for allocm(&p, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
 	if (nsz != rsz)
@@ -105,7 +105,7 @@
 	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
 		fprintf(stderr,
-		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    "Expected error for allocm(&p, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
 
@@ -119,14 +119,14 @@
 	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
 		fprintf(stderr,
-		    "Expected error for nallocm(&nsz, %zu, 0x%x)\n",
+		    "Expected error for nallocm(&nsz, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
 	rsz = 0;
 	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
 		fprintf(stderr,
-		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    "Expected error for allocm(&p, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
 	if (nsz != rsz)
@@ -150,7 +150,7 @@
 				if (r != ALLOCM_SUCCESS) {
 					fprintf(stderr,
 					    "nallocm() error for size %zu"
-					    " (0x%zx): %d\n",
+					    " (%#zx): %d\n",
 					    sz, sz, r);
 					exit(1);
 				}
@@ -160,7 +160,7 @@
 				if (r != ALLOCM_SUCCESS) {
 					fprintf(stderr,
 					    "allocm() error for size %zu"
-					    " (0x%zx): %d\n",
+					    " (%#zx): %d\n",
 					    sz, sz, r);
 					exit(1);
 				}
diff --git a/test/posix_memalign.c b/test/posix_memalign.c
index 5abb420..0ea35c8 100644
--- a/test/posix_memalign.c
+++ b/test/posix_memalign.c
@@ -100,7 +100,7 @@
 				    alignment, size);
 				if (err) {
 					fprintf(stderr,
-					    "Error for size %zu (0x%zx): %s\n",
+					    "Error for size %zu (%#zx): %s\n",
 					    size, size, strerror(err));
 					exit(1);
 				}
