Various heap profiling improvements.
Add the --disable-prof-libgcc configure option, and add backtracing
based on libgcc, which is used by default.
Fix a bug in hash().
Fix various configuration-dependent compilation errors.
diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL
index 057d8d5..c02e252 100644
--- a/jemalloc/INSTALL
+++ b/jemalloc/INSTALL
@@ -48,12 +48,16 @@
Enable heap profiling and leak detection functionality. Use the 'B', 'F',
'I', 'L', and 'U' options to control these features.
+--disable-prof-libgcc
+ Disable the use of libgcc's backtracing functionality. Ordinarily, libgcc's
+ backtracing functionality is superior to the alternatives, but it may fail
+ to capture backtraces on some systems.
+
--enable-prof-libunwind
Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
- backtracing, rather than frame pointers. libunwind is quite slow in
- comparison to frame pointer-based backtracing, but it has the advantage of
- working on applications/libraries that were compiled with
- -fomit-frame-pointer.
+ backtracing. libunwind is quite slow, but it tends to work across a wider
+ variety of system configurations than the default backtracing code, which is
+ based on libgcc functionality or gcc intrinsics.
--disable-tiny
Disable tiny (sub-quantum-sized) object support. Technically it is not
diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac
index 1c4e335..17c7aa6 100644
--- a/jemalloc/configure.ac
+++ b/jemalloc/configure.ac
@@ -373,6 +373,17 @@
],
[enable_prof="0"]
)
+AC_ARG_ENABLE([prof-libgcc],
+ [AS_HELP_STRING([--disable-prof-libgcc],
+ [Do not use libgcc for backtracing])],
+[if test "x$enable_prof_libgcc" = "xno" ; then
+ enable_prof_libgcc="0"
+else
+ enable_prof_libgcc="1"
+fi
+],
+[enable_prof_libgcc="1"]
+)
AC_ARG_ENABLE([prof-libunwind],
[AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])],
[if test "x$enable_prof_libunwind" = "xno" ; then
@@ -682,6 +693,18 @@
fi
AC_SUBST([roff_prof])
+dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics
+dnl for backtracing.
+if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \
+ -a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then
+ enable_prof_libgcc="1"
+ AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
+ AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+ if test "x${enable_prof_libgcc}" = "x1" ; then
+ AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+ fi
+fi
+
dnl ============================================================================
dnl Configure libgd for mtrgraph.
bins="${objroot}bin/jemtr2mtr${install_suffix}"
diff --git a/jemalloc/src/internal/hash.h b/jemalloc/src/internal/hash.h
index 182babd..d12cdb8 100644
--- a/jemalloc/src/internal/hash.h
+++ b/jemalloc/src/internal/hash.h
@@ -30,8 +30,10 @@
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t *data = (const uint64_t *)key;
- const unsigned char *data2 = (const unsigned char*)data;
const uint64_t *end = data + (len/8);
+ const unsigned char *data2;
+
+ assert(((uintptr_t)key & 0x7) == 0);
while(data != end) {
uint64_t k = *data++;
@@ -44,6 +46,7 @@
h *= m;
}
+ data2 = (const unsigned char *)data;
switch(len & 7) {
case 7: h ^= ((uint64_t)(data2[6])) << 48;
case 6: h ^= ((uint64_t)(data2[5])) << 40;
diff --git a/jemalloc/src/internal/jemalloc_chunk.h b/jemalloc/src/internal/jemalloc_chunk.h
index 40541e7..00b2e1d 100644
--- a/jemalloc/src/internal/jemalloc_chunk.h
+++ b/jemalloc/src/internal/jemalloc_chunk.h
@@ -32,7 +32,7 @@
extern bool opt_overcommit;
#endif
-#ifdef JEMALLOC_STATS
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
/* Protects stats_chunks; currently not used for any other purpose. */
extern malloc_mutex_t chunks_mtx;
/* Chunk statistics. */
diff --git a/jemalloc/src/internal/jemalloc_internal.h.in b/jemalloc/src/internal/jemalloc_internal.h.in
index 88e33e3..8f52fa3 100644
--- a/jemalloc/src/internal/jemalloc_internal.h.in
+++ b/jemalloc/src/internal/jemalloc_internal.h.in
@@ -32,10 +32,8 @@
#endif
#include "internal/rb.h"
-#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS))
#include "internal/qr.h"
#include "internal/ql.h"
-#endif
extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
const char *p2, const char *p3, const char *p4);
@@ -106,7 +104,7 @@
#ifdef __sparc64__
# define LG_QUANTUM 4
#endif
-#ifdef __amd64__
+#if (defined(__amd64__) || defined(__x86_64__))
# define LG_QUANTUM 4
#endif
#ifdef __arm__
@@ -172,7 +170,6 @@
(((s) + PAGE_MASK) & ~PAGE_MASK)
#include "internal/prn.h"
-#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h"
@@ -185,6 +182,7 @@
#include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h"
+#include "internal/hash.h"
#include "internal/prof.h"
#undef JEMALLOC_H_TYPES
@@ -192,7 +190,6 @@
#define JEMALLOC_H_STRUCTS
#include "internal/prn.h"
-#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h"
@@ -205,6 +202,7 @@
#include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h"
+#include "internal/hash.h"
#include "internal/prof.h"
#undef JEMALLOC_H_STRUCTS
@@ -255,7 +253,6 @@
#endif
#include "internal/prn.h"
-#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h"
@@ -268,6 +265,7 @@
#include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h"
+#include "internal/hash.h"
#include "internal/prof.h"
#undef JEMALLOC_H_EXTERNS
@@ -275,7 +273,6 @@
#define JEMALLOC_H_INLINES
#include "internal/prn.h"
-#include "internal/hash.h"
#include "internal/mb.h"
#include "internal/ckh.h"
#include "internal/jemalloc_stats.h"
@@ -376,6 +373,7 @@
#include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_arena.h"
#include "internal/jemalloc_trace.h"
+#include "internal/hash.h"
#include "internal/prof.h"
#ifndef JEMALLOC_ENABLE_INLINE
diff --git a/jemalloc/src/internal/jemalloc_stats.h b/jemalloc/src/internal/jemalloc_stats.h
index 12f0676..36dc5fe 100644
--- a/jemalloc/src/internal/jemalloc_stats.h
+++ b/jemalloc/src/internal/jemalloc_stats.h
@@ -8,6 +8,8 @@
typedef struct malloc_bin_stats_s malloc_bin_stats_t;
typedef struct malloc_large_stats_s malloc_large_stats_t;
typedef struct arena_stats_s arena_stats_t;
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
typedef struct chunk_stats_s chunk_stats_t;
#endif
diff --git a/jemalloc/src/internal/mb.h b/jemalloc/src/internal/mb.h
index 0a272e7..1707aa9 100644
--- a/jemalloc/src/internal/mb.h
+++ b/jemalloc/src/internal/mb.h
@@ -54,7 +54,7 @@
);
#endif
}
-#elif defined(__amd64_)
+#elif (defined(__amd64_) || defined(__x86_64__))
JEMALLOC_INLINE void
mb_write(void)
{
diff --git a/jemalloc/src/internal/prof.h b/jemalloc/src/internal/prof.h
index 326d558..1721ad8 100644
--- a/jemalloc/src/internal/prof.h
+++ b/jemalloc/src/internal/prof.h
@@ -35,6 +35,15 @@
unsigned len;
};
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+ prof_bt_t *bt;
+ unsigned nignore;
+ unsigned max;
+} prof_unwind_data_t;
+#endif
+
struct prof_cnt_s {
/*
* Profiling counters. An allocation/deallocation pair can operate on
diff --git a/jemalloc/src/jemalloc_defs.h.in b/jemalloc/src/jemalloc_defs.h.in
index 247b596..942694f 100644
--- a/jemalloc/src/jemalloc_defs.h.in
+++ b/jemalloc/src/jemalloc_defs.h.in
@@ -54,6 +54,9 @@
/* Use libunwind for profile backtracing if defined. */
#undef JEMALLOC_PROF_LIBUNWIND
+/* Use libgcc for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBGCC
+
/*
* JEMALLOC_TINY enables support for tiny objects, which are smaller than one
* quantum.
diff --git a/jemalloc/src/jemalloc_stats.c b/jemalloc/src/jemalloc_stats.c
index 580f0fe..b0efe74 100644
--- a/jemalloc/src/jemalloc_stats.c
+++ b/jemalloc/src/jemalloc_stats.c
@@ -219,8 +219,8 @@
size_t);
if (config_tcache) {
malloc_cprintf(write4, w4opaque,
- "%13u %1s %5u %4u %3u %10"PRIu64" %9"PRIu64
- " %9"PRIu64" %9"PRIu64""
+ "%13u %1s %5zu %4u %3zu %10"PRIu64
+ " %9"PRIu64" %9"PRIu64" %9"PRIu64""
" %9"PRIu64" %7zu %7zu\n",
j,
j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
@@ -232,8 +232,8 @@
highruns, curruns);
} else {
malloc_cprintf(write4, w4opaque,
- "%13u %1s %5u %4u %3u %10"PRIu64" %9"PRIu64
- " %9"PRIu64" %7zu %7zu\n",
+ "%13u %1s %5zu %4u %3zu %10"PRIu64
+ " %9"PRIu64" %9"PRIu64" %7zu %7zu\n",
j,
j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
"Q" : j < ntbins_ + nqbins + ncbins ? "C" :
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index a7d9cc3..db56659 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -3,6 +3,10 @@
#ifdef JEMALLOC_PROF
/******************************************************************************/
+#ifdef JEMALLOC_PROF_LIBGCC
+#include <unwind.h>
+#endif
+
#ifdef JEMALLOC_PROF_LIBUNWIND
#define UNW_LOCAL_ONLY
#include <libunwind.h>
@@ -82,7 +86,13 @@
static prof_bt_t *bt_dup(prof_bt_t *bt);
static void bt_init(prof_bt_t *bt, void **vec);
-static bool prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code prof_unwind_init_callback(
+ struct _Unwind_Context *context, void *arg);
+static _Unwind_Reason_Code prof_unwind_callback(
+ struct _Unwind_Context *context, void *arg);
+#endif
+static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
static void prof_flush(void);
@@ -160,8 +170,40 @@
prof_udump();
}
-#ifdef JEMALLOC_PROF_LIBUNWIND
-static bool
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
+{
+
+ return (_URC_NO_REASON);
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg)
+{
+ prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+
+ if (data->nignore > 0)
+ data->nignore--;
+ else {
+ data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
+ data->bt->len++;
+ if (data->bt->len == data->max)
+ return (_URC_END_OF_STACK);
+ }
+
+ return (_URC_NO_REASON);
+}
+
+static void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+ prof_unwind_data_t data = {bt, nignore, max};
+
+ _Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#elif defined(JEMALLOC_PROF_LIBUNWIND)
+static void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
unw_context_t uc;
@@ -180,7 +222,7 @@
for (i = 0; i < nignore + 1; i++) {
err = unw_step(&cursor);
if (err <= 0)
- return (false);
+ return;
}
/*
@@ -195,11 +237,9 @@
break;
}
}
-
- return (false);
}
#else
-static bool
+static void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
#define NIGNORE 3
@@ -207,16 +247,16 @@
if ((i) < NIGNORE + max) { \
void *p; \
if (__builtin_frame_address(i) == 0) \
- return (false); \
+ return; \
p = __builtin_return_address(i); \
if (p == NULL) \
- return (false); \
+ return; \
if (i >= NIGNORE) { \
bt->vec[(i) - NIGNORE] = p; \
bt->len = (i) - NIGNORE + 1; \
} \
} else \
- return (false);
+ return;
assert(max <= (1U << opt_lg_prof_bt_max));
@@ -376,9 +416,7 @@
BT_FRAME(128)
BT_FRAME(129)
BT_FRAME(130)
-
#undef BT_FRAME
- return (false);
}
#endif
@@ -1039,6 +1077,14 @@
}
}
+#ifdef JEMALLOC_PROF_LIBGCC
+ /*
+ * Cause the backtracing machinery to allocate its internal state
+ * before enabling profiling.
+ */
+ _Unwind_Backtrace(prof_unwind_init_callback, NULL);
+#endif
+
prof_booted = true;
return (false);