Merge branch 'disable-tls'
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index ca8deb1..6c414a7 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -65,7 +65,7 @@
 @roff_tcache@.Ft void
 @roff_tcache@.Fn @jemalloc_prefix@malloc_tcache_flush "void"
 .Ft void
-.Fn @jemalloc_prefix@malloc_stats_print "const char *opts"
+.Fn @jemalloc_prefix@malloc_stats_print "void (*write4)(const char *, const char *, const char *, const char *)" "const char *opts"
 .Ft const char *
 .Va @jemalloc_prefix@malloc_options ;
 .Ft void
@@ -179,7 +179,14 @@
 .Pp
 The
 .Fn @jemalloc_prefix@malloc_stats_print
-function prints human-readable summary statistics.
+function writes human-readable summary statistics via the
+.Fa write4
+callback function, or
+.Fn malloc_message
+if
+.Fa write4
+is
+.Dv NULL .
 This function can be called repeatedly.
 General information that never changes
 during execution can be omitted by specifying
diff --git a/jemalloc/src/internal/jemalloc_arena.h b/jemalloc/src/internal/jemalloc_arena.h
index 61fec68..d707fc9 100644
--- a/jemalloc/src/internal/jemalloc_arena.h
+++ b/jemalloc/src/internal/jemalloc_arena.h
@@ -405,7 +405,8 @@
     arena_chunk_map_t *mapelm);
 void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 #ifdef JEMALLOC_STATS
-void	arena_stats_print(arena_t *arena, bool bins, bool large);
+void	arena_stats_print(arena_t *arena, bool bins, bool large,
+    void (*write4)(const char *, const char *, const char *, const char *));
 #endif
 void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
 bool	arena_new(arena_t *arena, unsigned ind);
diff --git a/jemalloc/src/internal/jemalloc_stats.h b/jemalloc/src/internal/jemalloc_stats.h
index f15d657..359c868 100644
--- a/jemalloc/src/internal/jemalloc_stats.h
+++ b/jemalloc/src/internal/jemalloc_stats.h
@@ -129,7 +129,10 @@
 
 char	*umax2s(uintmax_t x, unsigned base, char *s);
 #ifdef JEMALLOC_STATS
-void	malloc_printf(const char *format, ...);
+void malloc_cprintf(void (*write4)(const char *, const char *, const char *,
+    const char *), const char *format, ...) JEMALLOC_ATTR(format(printf, 2, 3));
+void	malloc_printf(const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 1, 2));
 #endif
 
 #endif /* JEMALLOC_H_EXTERNS */
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index 7a9f906..58c5f1e 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -361,7 +361,7 @@
 		}
 	}
 #endif
-	JEMALLOC_P(malloc_stats_print)(NULL);
+	JEMALLOC_P(malloc_stats_print)(NULL, NULL);
 }
 
 static inline void *
diff --git a/jemalloc/src/jemalloc.h.in b/jemalloc/src/jemalloc.h.in
index f2a89f5..04eb8da 100644
--- a/jemalloc/src/jemalloc.h.in
+++ b/jemalloc/src/jemalloc.h.in
@@ -24,7 +24,8 @@
 #ifdef JEMALLOC_TCACHE
 void	JEMALLOC_P(malloc_tcache_flush)(void);
 #endif
-void	JEMALLOC_P(malloc_stats_print)(const char *opts);
+void	JEMALLOC_P(malloc_stats_print)(void (*write4)(const char *,
+    const char *, const char *, const char *), const char *opts);
 
 #ifdef __cplusplus
 };
diff --git a/jemalloc/src/jemalloc_arena.c b/jemalloc/src/jemalloc_arena.c
index 0f031c1..7b5fced 100644
--- a/jemalloc/src/jemalloc_arena.c
+++ b/jemalloc/src/jemalloc_arena.c
@@ -1548,42 +1548,47 @@
 
 #ifdef JEMALLOC_STATS
 void
-arena_stats_print(arena_t *arena, bool bins, bool large)
+arena_stats_print(arena_t *arena, bool bins, bool large,
+    void (*write4)(const char *, const char *, const char *, const char *))
 {
 
-	malloc_printf("dirty pages: %zu:%zu active:dirty, %llu sweep%s,"
+	malloc_cprintf(write4,
+	    "dirty pages: %zu:%zu active:dirty, %llu sweep%s,"
 	    " %llu madvise%s, %llu purged\n",
 	    arena->nactive, arena->ndirty,
 	    arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
 	    arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
 	    arena->stats.purged);
 
-	malloc_printf("            allocated      nmalloc      ndalloc\n");
-	malloc_printf("small:   %12zu %12llu %12llu\n",
+	malloc_cprintf(write4,
+	    "            allocated      nmalloc      ndalloc\n");
+	malloc_cprintf(write4, "small:   %12zu %12llu %12llu\n",
 	    arena->stats.allocated_small, arena->stats.nmalloc_small,
 	    arena->stats.ndalloc_small);
-	malloc_printf("medium:  %12zu %12llu %12llu\n",
+	malloc_cprintf(write4, "medium:  %12zu %12llu %12llu\n",
 	    arena->stats.allocated_medium, arena->stats.nmalloc_medium,
 	    arena->stats.ndalloc_medium);
-	malloc_printf("large:   %12zu %12llu %12llu\n",
+	malloc_cprintf(write4, "large:   %12zu %12llu %12llu\n",
 	    arena->stats.allocated_large, arena->stats.nmalloc_large,
 	    arena->stats.ndalloc_large);
-	malloc_printf("total:   %12zu %12llu %12llu\n",
+	malloc_cprintf(write4, "total:   %12zu %12llu %12llu\n",
 	    arena->stats.allocated_small + arena->stats.allocated_medium +
 	    arena->stats.allocated_large, arena->stats.nmalloc_small +
 	    arena->stats.nmalloc_medium + arena->stats.nmalloc_large,
 	    arena->stats.ndalloc_small + arena->stats.ndalloc_medium +
 	    arena->stats.ndalloc_large);
-	malloc_printf("mapped:  %12zu\n", arena->stats.mapped);
+	malloc_cprintf(write4, "mapped:  %12zu\n", arena->stats.mapped);
 
 	if (bins && arena->stats.nmalloc_small + arena->stats.nmalloc_medium >
 	    0) {
 		unsigned i, gap_start;
 #ifdef JEMALLOC_TCACHE
-		malloc_printf("bins:     bin    size regs pgs  requests    "
+		malloc_cprintf(write4,
+		    "bins:     bin    size regs pgs  requests    "
 		    "nfills  nflushes   newruns    reruns maxruns curruns\n");
 #else
-		malloc_printf("bins:     bin    size regs pgs  requests   "
+		malloc_cprintf(write4,
+		    "bins:     bin    size regs pgs  requests   "
 		    "newruns    reruns maxruns curruns\n");
 #endif
 		for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
@@ -1597,21 +1602,22 @@
 						 * Gap of more than one size
 						 * class.
 						 */
-						malloc_printf("[%u..%u]\n",
-						    gap_start, i - 1);
+						malloc_cprintf(write4,
+						    "[%u..%u]\n", gap_start,
+						    i - 1);
 					} else {
 						/* Gap of one size class. */
-						malloc_printf("[%u]\n",
+						malloc_cprintf(write4, "[%u]\n",
 						    gap_start);
 					}
 					gap_start = UINT_MAX;
 				}
-				malloc_printf(
+				malloc_cprintf(write4,
 				    "%13u %1s %5u %4u %3u %9llu %9llu"
 #ifdef JEMALLOC_TCACHE
 				    " %9llu %9llu"
 #endif
-				    " %9llu %7lu %7lu\n",
+				    " %9llu %7zu %7zu\n",
 				    i,
 				    i < ntbins ? "T" : i < ntbins + nqbins ?
 				    "Q" : i < ntbins + nqbins + ncbins ? "C" :
@@ -1634,10 +1640,11 @@
 		if (gap_start != UINT_MAX) {
 			if (i > gap_start + 1) {
 				/* Gap of more than one size class. */
-				malloc_printf("[%u..%u]\n", gap_start, i - 1);
+				malloc_cprintf(write4, "[%u..%u]\n", gap_start,
+				    i - 1);
 			} else {
 				/* Gap of one size class. */
-				malloc_printf("[%u]\n", gap_start);
+				malloc_cprintf(write4, "[%u]\n", gap_start);
 			}
 		}
 	}
@@ -1647,7 +1654,7 @@
 		ssize_t gap_start;
 		size_t nlclasses = (chunksize - PAGE_SIZE) >> PAGE_SHIFT;
 
-		malloc_printf(
+		malloc_cprintf(write4,
 		    "large:   size pages nrequests   maxruns   curruns\n");
 
 		for (i = 0, gap_start = -1; i < nlclasses; i++) {
@@ -1656,10 +1663,12 @@
 					gap_start = i;
 			} else {
 				if (gap_start != -1) {
-					malloc_printf("[%zu]\n", i - gap_start);
+					malloc_cprintf(write4, "[%zu]\n",
+					    i - gap_start);
 					gap_start = -1;
 				}
-				malloc_printf("%13zu %5zu %9llu %9zu %9zu\n",
+				malloc_cprintf(write4,
+				    "%13zu %5zu %9llu %9zu %9zu\n",
 				    (i+1) << PAGE_SHIFT, i+1,
 				    arena->stats.lstats[i].nrequests,
 				    arena->stats.lstats[i].highruns,
@@ -1667,7 +1676,7 @@
 			}
 		}
 		if (gap_start != -1)
-			malloc_printf("[%zu]\n", i - gap_start);
+			malloc_cprintf(write4, "[%zu]\n", i - gap_start);
 	}
 }
 #endif
diff --git a/jemalloc/src/jemalloc_stats.c b/jemalloc/src/jemalloc_stats.c
index e39bcfe..7cd486e 100644
--- a/jemalloc/src/jemalloc_stats.c
+++ b/jemalloc/src/jemalloc_stats.c
@@ -7,6 +7,13 @@
 bool	opt_stats_print = false;
 
 /******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void
+malloc_vcprintf(void (*write4)(const char *, const char *, const char *,
+    const char *), const char *format, va_list ap);
+
+/******************************************************************************/
 
 /*
  * We don't want to depend on vsnprintf() for production builds, since that can
@@ -48,31 +55,76 @@
 }
 
 #ifdef JEMALLOC_STATS
-/*
- * Print to stderr in such a way as to (hopefully) avoid memory allocation.
- */
-void
-malloc_printf(const char *format, ...)
+static void
+malloc_vcprintf(void (*write4)(const char *, const char *, const char *,
+    const char *), const char *format, va_list ap)
 {
 	char buf[4096];
+
+	if (write4 == NULL) {
+		/*
+		 * The caller did not provide an alternate write4 callback
+		 * function, so use the default one.  malloc_write4() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write4 = JEMALLOC_P(malloc_message);
+	}
+
+	vsnprintf(buf, sizeof(buf), format, ap);
+	write4(buf, "", "", "");
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_ATTR(format(printf, 2, 3))
+void
+malloc_cprintf(void (*write4)(const char *, const char *, const char *,
+    const char *), const char *format, ...)
+{
 	va_list ap;
 
 	va_start(ap, format);
-	vsnprintf(buf, sizeof(buf), format, ap);
+	malloc_vcprintf(write4, format, ap);
 	va_end(ap);
-	malloc_write4(buf, "", "", "");
 }
+
+/*
+ * Print to stderr in such a way as to (hopefully) avoid memory allocation.
+ */
+JEMALLOC_ATTR(format(printf, 1, 2))
+void
+malloc_printf(const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, format, ap);
+	va_end(ap);
+}
+
 #endif
 
 JEMALLOC_ATTR(visibility("default"))
 void
-JEMALLOC_P(malloc_stats_print)(const char *opts)
+JEMALLOC_P(malloc_stats_print)(void (*write4)(const char *, const char *,
+    const char *, const char *), const char *opts)
 {
 	char s[UMAX2S_BUFSIZE];
 	bool general = true;
 	bool bins = true;
 	bool large = true;
 
+	if (write4 == NULL) {
+		/*
+		 * The caller did not provide an alternate write4 callback
+		 * function, so use the default one.  malloc_write4() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write4 = JEMALLOC_P(malloc_message);
+	}
+
 	if (opts != NULL) {
 		unsigned i;
 
@@ -92,92 +144,87 @@
 		}
 	}
 
-	malloc_write4("___ Begin jemalloc statistics ___\n", "", "", "");
+	write4("___ Begin jemalloc statistics ___\n", "", "", "");
 	if (general) {
-		malloc_write4("Assertions ",
+		write4("Assertions ",
 #ifdef NDEBUG
 		    "disabled",
 #else
 		    "enabled",
 #endif
 		    "\n", "");
-		malloc_write4("Boolean JEMALLOC_OPTIONS: ",
-		    opt_abort ? "A" : "a", "", "");
+		write4("Boolean JEMALLOC_OPTIONS: ", opt_abort ? "A" : "a",
+		    "", "");
 #ifdef JEMALLOC_FILL
-		malloc_write4(opt_junk ? "J" : "j", "", "", "");
+		write4(opt_junk ? "J" : "j", "", "", "");
 #endif
-		malloc_write4("P", "", "", "");
+		write4("P", "", "", "");
 #ifdef JEMALLOC_TCACHE
-		malloc_write4(opt_tcache_sort ? "S" : "s", "", "", "");
+		write4(opt_tcache_sort ? "S" : "s", "", "", "");
 #endif
 #ifdef JEMALLOC_TRACE
-		malloc_write4(opt_trace ? "T" : "t", "", "", "");
+		write4(opt_trace ? "T" : "t", "", "", "");
 #endif
 #ifdef JEMALLOC_SYSV
-		malloc_write4(opt_sysv ? "V" : "v", "", "", "");
+		write4(opt_sysv ? "V" : "v", "", "", "");
 #endif
 #ifdef JEMALLOC_XMALLOC
-		malloc_write4(opt_xmalloc ? "X" : "x", "", "", "");
+		write4(opt_xmalloc ? "X" : "x", "", "", "");
 #endif
 #ifdef JEMALLOC_FILL
-		malloc_write4(opt_zero ? "Z" : "z", "", "", "");
+		write4(opt_zero ? "Z" : "z", "", "", "");
 #endif
-		malloc_write4("\n", "", "", "");
+		write4("\n", "", "", "");
 
-		malloc_write4("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
-		malloc_write4("Max arenas: ", umax2s(narenas, 10, s), "\n", "");
-		malloc_write4("Pointer size: ", umax2s(sizeof(void *), 10, s),
-		    "\n", "");
-		malloc_write4("Quantum size: ", umax2s(QUANTUM, 10, s), "\n",
+		write4("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
+		write4("Max arenas: ", umax2s(narenas, 10, s), "\n", "");
+		write4("Pointer size: ", umax2s(sizeof(void *), 10, s), "\n",
 		    "");
-		malloc_write4("Cacheline size (assumed): ",
-		    umax2s(CACHELINE, 10, s), "\n", "");
-		malloc_write4("Subpage spacing: ", umax2s(SUBPAGE, 10, s),
+		write4("Quantum size: ", umax2s(QUANTUM, 10, s), "\n", "");
+		write4("Cacheline size (assumed): ", umax2s(CACHELINE, 10, s),
 		    "\n", "");
-		malloc_write4("Medium spacing: ", umax2s((1U << lg_mspace), 10,
-		    s), "\n", "");
+		write4("Subpage spacing: ", umax2s(SUBPAGE, 10, s), "\n", "");
+		write4("Medium spacing: ", umax2s((1U << lg_mspace), 10, s),
+		    "\n", "");
 #ifdef JEMALLOC_TINY
-		malloc_write4("Tiny 2^n-spaced sizes: [", umax2s((1U <<
-		    LG_TINY_MIN), 10, s), "..", "");
-		malloc_write4(umax2s((qspace_min >> 1), 10, s), "]\n", "", "");
+		write4("Tiny 2^n-spaced sizes: [", umax2s((1U << LG_TINY_MIN),
+		    10, s), "..", "");
+		write4(umax2s((qspace_min >> 1), 10, s), "]\n", "", "");
 #endif
-		malloc_write4("Quantum-spaced sizes: [", umax2s(qspace_min, 10,
-		    s), "..", "");
-		malloc_write4(umax2s(qspace_max, 10, s), "]\n", "", "");
-		malloc_write4("Cacheline-spaced sizes: [",
-		    umax2s(cspace_min, 10, s), "..", "");
-		malloc_write4(umax2s(cspace_max, 10, s), "]\n", "", "");
-		malloc_write4("Subpage-spaced sizes: [", umax2s(sspace_min, 10,
-		    s), "..", "");
-		malloc_write4(umax2s(sspace_max, 10, s), "]\n", "", "");
-		malloc_write4("Medium sizes: [", umax2s(medium_min, 10, s),
+		write4("Quantum-spaced sizes: [", umax2s(qspace_min, 10, s),
 		    "..", "");
-		malloc_write4(umax2s(medium_max, 10, s), "]\n", "", "");
+		write4(umax2s(qspace_max, 10, s), "]\n", "", "");
+		write4("Cacheline-spaced sizes: [", umax2s(cspace_min, 10, s),
+		    "..", "");
+		write4(umax2s(cspace_max, 10, s), "]\n", "", "");
+		write4("Subpage-spaced sizes: [", umax2s(sspace_min, 10, s),
+		    "..", "");
+		write4(umax2s(sspace_max, 10, s), "]\n", "", "");
+		write4("Medium sizes: [", umax2s(medium_min, 10, s), "..", "");
+		write4(umax2s(medium_max, 10, s), "]\n", "", "");
 		if (opt_lg_dirty_mult >= 0) {
-			malloc_write4(
-			    "Min active:dirty page ratio per arena: ",
+			write4("Min active:dirty page ratio per arena: ",
 			    umax2s((1U << opt_lg_dirty_mult), 10, s), ":1\n",
 			    "");
 		} else {
-			malloc_write4(
-			    "Min active:dirty page ratio per arena: N/A\n",
+			write4("Min active:dirty page ratio per arena: N/A\n",
 			    "", "", "");
 		}
 #ifdef JEMALLOC_TCACHE
-		malloc_write4("Thread cache slots per size class: ",
-		    tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A",
-		    "\n", "");
-		malloc_write4("Thread cache GC sweep interval: ",
+		write4("Thread cache slots per size class: ",
+		    tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A", "\n",
+		    "");
+		write4("Thread cache GC sweep interval: ",
 		    (tcache_nslots && tcache_gc_incr > 0) ?
 		    umax2s((1U << opt_lg_tcache_gc_sweep), 10, s) : "N/A",
 		    "", "");
-		malloc_write4(" (increment interval: ",
+		write4(" (increment interval: ",
 		    (tcache_nslots && tcache_gc_incr > 0) ?
 		    umax2s(tcache_gc_incr, 10, s) : "N/A",
 		    ")\n", "");
 #endif
-		malloc_write4("Chunk size: ", umax2s(chunksize, 10, s), "", "");
-		malloc_write4(" (2^", umax2s(opt_lg_chunk, 10, s), ")\n", "");
+		write4("Chunk size: ", umax2s(chunksize, 10, s), "", "");
+		write4(" (2^", umax2s(opt_lg_chunk, 10, s), ")\n", "");
 	}
 
 #ifdef JEMALLOC_STATS
@@ -208,8 +255,8 @@
 		mapped += base_mapped;
 		malloc_mutex_unlock(&base_mtx);
 
-		malloc_printf("Allocated: %zu, mapped: %zu\n", allocated,
-		    mapped);
+		malloc_cprintf(write4, "Allocated: %zu, mapped: %zu\n",
+		    allocated, mapped);
 
 		/* Print chunk stats. */
 		{
@@ -219,30 +266,30 @@
 			chunks_stats = stats_chunks;
 			malloc_mutex_unlock(&huge_mtx);
 
-			malloc_printf("chunks: nchunks   "
+			malloc_cprintf(write4, "chunks: nchunks   "
 			    "highchunks    curchunks\n");
-			malloc_printf("  %13llu%13lu%13lu\n",
+			malloc_cprintf(write4, "  %13llu%13lu%13lu\n",
 			    chunks_stats.nchunks, chunks_stats.highchunks,
 			    chunks_stats.curchunks);
 		}
 
 		/* Print chunk stats. */
-		malloc_printf(
+		malloc_cprintf(write4,
 		    "huge: nmalloc      ndalloc    allocated\n");
-		malloc_printf(" %12llu %12llu %12zu\n", huge_nmalloc,
+		malloc_cprintf(write4, " %12llu %12llu %12zu\n", huge_nmalloc,
 		    huge_ndalloc, huge_allocated);
 
 		/* Print stats for each arena. */
 		for (i = 0; i < narenas; i++) {
 			arena = arenas[i];
 			if (arena != NULL) {
-				malloc_printf("\narenas[%u]:\n", i);
+				malloc_cprintf(write4, "\narenas[%u]:\n", i);
 				malloc_mutex_lock(&arena->lock);
-				arena_stats_print(arena, bins, large);
+				arena_stats_print(arena, bins, large, write4);
 				malloc_mutex_unlock(&arena->lock);
 			}
 		}
 	}
 #endif /* #ifdef JEMALLOC_STATS */
-	malloc_write4("--- End jemalloc statistics ---\n", "", "", "");
+	write4("--- End jemalloc statistics ---\n", "", "", "");
 }