| #define JEMALLOC_PROF_C_ |
| #include "jemalloc/internal/jemalloc_internal.h" |
| /******************************************************************************/ |
| |
| #ifdef JEMALLOC_PROF_LIBUNWIND |
| #define UNW_LOCAL_ONLY |
| #include <libunwind.h> |
| #endif |
| |
| #ifdef JEMALLOC_PROF_LIBGCC |
| #include <unwind.h> |
| #endif |
| |
| /******************************************************************************/ |
| /* Data. */ |
| |
| malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) |
| |
| bool opt_prof = false; |
| bool opt_prof_active = true; |
| size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; |
| ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; |
| bool opt_prof_gdump = false; |
| bool opt_prof_final = true; |
| bool opt_prof_leak = false; |
| bool opt_prof_accum = false; |
| char opt_prof_prefix[ |
| /* Minimize memory bloat for non-prof builds. */ |
| #ifdef JEMALLOC_PROF |
| PATH_MAX + |
| #endif |
| 1]; |
| |
| uint64_t prof_interval = 0; |
| |
| /* |
| * Table of mutexes that are shared among ctx's. These are leaf locks, so |
| * there is no problem with using them for more than one ctx at the same time. |
| * The primary motivation for this sharing though is that ctx's are ephemeral, |
| * and destroying mutexes causes complications for systems that allocate when |
| * creating/destroying mutexes. |
| */ |
| static malloc_mutex_t *ctx_locks; |
| static unsigned cum_ctxs; /* Atomic counter. */ |
| |
| /* |
| * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data |
| * structure that knows about all backtraces currently captured. |
| */ |
| static ckh_t bt2ctx; |
| static malloc_mutex_t bt2ctx_mtx; |
| |
| static malloc_mutex_t prof_dump_seq_mtx; |
| static uint64_t prof_dump_seq; |
| static uint64_t prof_dump_iseq; |
| static uint64_t prof_dump_mseq; |
| static uint64_t prof_dump_useq; |
| |
| /* |
| * This buffer is rather large for stack allocation, so use a single buffer for |
| * all profile dumps. |
| */ |
| static malloc_mutex_t prof_dump_mtx; |
| static char prof_dump_buf[ |
| /* Minimize memory bloat for non-prof builds. */ |
| #ifdef JEMALLOC_PROF |
| PROF_DUMP_BUFSIZE |
| #else |
| 1 |
| #endif |
| ]; |
| static unsigned prof_dump_buf_end; |
| static int prof_dump_fd; |
| |
| /* Do not dump any profiles until bootstrapping is complete. */ |
| static bool prof_booted = false; |
| |
| /******************************************************************************/ |
| |
| void |
| bt_init(prof_bt_t *bt, void **vec) |
| { |
| |
| cassert(config_prof); |
| |
| bt->vec = vec; |
| bt->len = 0; |
| } |
| |
| static void |
| bt_destroy(prof_bt_t *bt) |
| { |
| |
| cassert(config_prof); |
| |
| idalloc(bt); |
| } |
| |
| static prof_bt_t * |
| bt_dup(prof_bt_t *bt) |
| { |
| prof_bt_t *ret; |
| |
| cassert(config_prof); |
| |
| /* |
| * Create a single allocation that has space for vec immediately |
| * following the prof_bt_t structure. The backtraces that get |
| * stored in the backtrace caches are copied from stack-allocated |
| * temporary variables, so size is known at creation time. Making this |
| * a contiguous object improves cache locality. |
| */ |
| ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + |
| (bt->len * sizeof(void *))); |
| if (ret == NULL) |
| return (NULL); |
| ret->vec = (void **)((uintptr_t)ret + |
| QUANTUM_CEILING(sizeof(prof_bt_t))); |
| memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); |
| ret->len = bt->len; |
| |
| return (ret); |
| } |
| |
| static inline void |
| prof_enter(prof_tdata_t *prof_tdata) |
| { |
| |
| cassert(config_prof); |
| |
| assert(prof_tdata->enq == false); |
| prof_tdata->enq = true; |
| |
| malloc_mutex_lock(&bt2ctx_mtx); |
| } |
| |
| static inline void |
| prof_leave(prof_tdata_t *prof_tdata) |
| { |
| bool idump, gdump; |
| |
| cassert(config_prof); |
| |
| malloc_mutex_unlock(&bt2ctx_mtx); |
| |
| assert(prof_tdata->enq); |
| prof_tdata->enq = false; |
| idump = prof_tdata->enq_idump; |
| prof_tdata->enq_idump = false; |
| gdump = prof_tdata->enq_gdump; |
| prof_tdata->enq_gdump = false; |
| |
| if (idump) |
| prof_idump(); |
| if (gdump) |
| prof_gdump(); |
| } |
| |
| #ifdef JEMALLOC_PROF_LIBUNWIND |
| void |
| prof_backtrace(prof_bt_t *bt) |
| { |
| int nframes; |
| |
| cassert(config_prof); |
| assert(bt->len == 0); |
| assert(bt->vec != NULL); |
| |
| nframes = unw_backtrace(bt->vec, PROF_BT_MAX); |
| if (nframes <= 0) |
| return; |
| bt->len = nframes; |
| } |
| #elif (defined(JEMALLOC_PROF_LIBGCC)) |
| static _Unwind_Reason_Code |
| prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) |
| { |
| |
| cassert(config_prof); |
| |
| return (_URC_NO_REASON); |
| } |
| |
| static _Unwind_Reason_Code |
| prof_unwind_callback(struct _Unwind_Context *context, void *arg) |
| { |
| prof_unwind_data_t *data = (prof_unwind_data_t *)arg; |
| void *ip; |
| |
| cassert(config_prof); |
| |
| ip = (void *)_Unwind_GetIP(context); |
| if (ip == NULL) |
| return (_URC_END_OF_STACK); |
| data->bt->vec[data->bt->len] = ip; |
| data->bt->len++; |
| if (data->bt->len == data->max) |
| return (_URC_END_OF_STACK); |
| |
| return (_URC_NO_REASON); |
| } |
| |
| void |
| prof_backtrace(prof_bt_t *bt) |
| { |
| prof_unwind_data_t data = {bt, PROF_BT_MAX}; |
| |
| cassert(config_prof); |
| |
| _Unwind_Backtrace(prof_unwind_callback, &data); |
| } |
| #elif (defined(JEMALLOC_PROF_GCC)) |
| void |
| prof_backtrace(prof_bt_t *bt) |
| { |
| #define BT_FRAME(i) \ |
| if ((i) < PROF_BT_MAX) { \ |
| void *p; \ |
| if (__builtin_frame_address(i) == 0) \ |
| return; \ |
| p = __builtin_return_address(i); \ |
| if (p == NULL) \ |
| return; \ |
| bt->vec[(i)] = p; \ |
| bt->len = (i) + 1; \ |
| } else \ |
| return; |
| |
| cassert(config_prof); |
| |
| BT_FRAME(0) |
| BT_FRAME(1) |
| BT_FRAME(2) |
| BT_FRAME(3) |
| BT_FRAME(4) |
| BT_FRAME(5) |
| BT_FRAME(6) |
| BT_FRAME(7) |
| BT_FRAME(8) |
| BT_FRAME(9) |
| |
| BT_FRAME(10) |
| BT_FRAME(11) |
| BT_FRAME(12) |
| BT_FRAME(13) |
| BT_FRAME(14) |
| BT_FRAME(15) |
| BT_FRAME(16) |
| BT_FRAME(17) |
| BT_FRAME(18) |
| BT_FRAME(19) |
| |
| BT_FRAME(20) |
| BT_FRAME(21) |
| BT_FRAME(22) |
| BT_FRAME(23) |
| BT_FRAME(24) |
| BT_FRAME(25) |
| BT_FRAME(26) |
| BT_FRAME(27) |
| BT_FRAME(28) |
| BT_FRAME(29) |
| |
| BT_FRAME(30) |
| BT_FRAME(31) |
| BT_FRAME(32) |
| BT_FRAME(33) |
| BT_FRAME(34) |
| BT_FRAME(35) |
| BT_FRAME(36) |
| BT_FRAME(37) |
| BT_FRAME(38) |
| BT_FRAME(39) |
| |
| BT_FRAME(40) |
| BT_FRAME(41) |
| BT_FRAME(42) |
| BT_FRAME(43) |
| BT_FRAME(44) |
| BT_FRAME(45) |
| BT_FRAME(46) |
| BT_FRAME(47) |
| BT_FRAME(48) |
| BT_FRAME(49) |
| |
| BT_FRAME(50) |
| BT_FRAME(51) |
| BT_FRAME(52) |
| BT_FRAME(53) |
| BT_FRAME(54) |
| BT_FRAME(55) |
| BT_FRAME(56) |
| BT_FRAME(57) |
| BT_FRAME(58) |
| BT_FRAME(59) |
| |
| BT_FRAME(60) |
| BT_FRAME(61) |
| BT_FRAME(62) |
| BT_FRAME(63) |
| BT_FRAME(64) |
| BT_FRAME(65) |
| BT_FRAME(66) |
| BT_FRAME(67) |
| BT_FRAME(68) |
| BT_FRAME(69) |
| |
| BT_FRAME(70) |
| BT_FRAME(71) |
| BT_FRAME(72) |
| BT_FRAME(73) |
| BT_FRAME(74) |
| BT_FRAME(75) |
| BT_FRAME(76) |
| BT_FRAME(77) |
| BT_FRAME(78) |
| BT_FRAME(79) |
| |
| BT_FRAME(80) |
| BT_FRAME(81) |
| BT_FRAME(82) |
| BT_FRAME(83) |
| BT_FRAME(84) |
| BT_FRAME(85) |
| BT_FRAME(86) |
| BT_FRAME(87) |
| BT_FRAME(88) |
| BT_FRAME(89) |
| |
| BT_FRAME(90) |
| BT_FRAME(91) |
| BT_FRAME(92) |
| BT_FRAME(93) |
| BT_FRAME(94) |
| BT_FRAME(95) |
| BT_FRAME(96) |
| BT_FRAME(97) |
| BT_FRAME(98) |
| BT_FRAME(99) |
| |
| BT_FRAME(100) |
| BT_FRAME(101) |
| BT_FRAME(102) |
| BT_FRAME(103) |
| BT_FRAME(104) |
| BT_FRAME(105) |
| BT_FRAME(106) |
| BT_FRAME(107) |
| BT_FRAME(108) |
| BT_FRAME(109) |
| |
| BT_FRAME(110) |
| BT_FRAME(111) |
| BT_FRAME(112) |
| BT_FRAME(113) |
| BT_FRAME(114) |
| BT_FRAME(115) |
| BT_FRAME(116) |
| BT_FRAME(117) |
| BT_FRAME(118) |
| BT_FRAME(119) |
| |
| BT_FRAME(120) |
| BT_FRAME(121) |
| BT_FRAME(122) |
| BT_FRAME(123) |
| BT_FRAME(124) |
| BT_FRAME(125) |
| BT_FRAME(126) |
| BT_FRAME(127) |
| #undef BT_FRAME |
| } |
| #else |
| void |
| prof_backtrace(prof_bt_t *bt) |
| { |
| |
| cassert(config_prof); |
| not_reached(); |
| } |
| #endif |
| |
| static malloc_mutex_t * |
| prof_ctx_mutex_choose(void) |
| { |
| unsigned nctxs = atomic_add_u(&cum_ctxs, 1); |
| |
| return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); |
| } |
| |
| static void |
| prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) |
| { |
| |
| ctx->bt = bt; |
| ctx->lock = prof_ctx_mutex_choose(); |
| /* |
| * Set nlimbo to 1, in order to avoid a race condition with |
| * prof_ctx_merge()/prof_ctx_destroy(). |
| */ |
| ctx->nlimbo = 1; |
| ql_elm_new(ctx, dump_link); |
| memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); |
| ql_new(&ctx->cnts_ql); |
| } |
| |
| static void |
| prof_ctx_destroy(prof_ctx_t *ctx) |
| { |
| prof_tdata_t *prof_tdata; |
| |
| cassert(config_prof); |
| |
| /* |
| * Check that ctx is still unused by any thread cache before destroying |
| * it. prof_lookup() increments ctx->nlimbo in order to avoid a race |
| * condition with this function, as does prof_ctx_merge() in order to |
| * avoid a race between the main body of prof_ctx_merge() and entry |
| * into this function. |
| */ |
| prof_tdata = prof_tdata_get(false); |
| assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); |
| prof_enter(prof_tdata); |
| malloc_mutex_lock(ctx->lock); |
| if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && |
| ctx->nlimbo == 1) { |
| assert(ctx->cnt_merged.curbytes == 0); |
| assert(ctx->cnt_merged.accumobjs == 0); |
| assert(ctx->cnt_merged.accumbytes == 0); |
| /* Remove ctx from bt2ctx. */ |
| if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) |
| not_reached(); |
| prof_leave(prof_tdata); |
| /* Destroy ctx. */ |
| malloc_mutex_unlock(ctx->lock); |
| bt_destroy(ctx->bt); |
| idalloc(ctx); |
| } else { |
| /* |
| * Compensate for increment in prof_ctx_merge() or |
| * prof_lookup(). |
| */ |
| ctx->nlimbo--; |
| malloc_mutex_unlock(ctx->lock); |
| prof_leave(prof_tdata); |
| } |
| } |
| |
| static void |
| prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) |
| { |
| bool destroy; |
| |
| cassert(config_prof); |
| |
| /* Merge cnt stats and detach from ctx. */ |
| malloc_mutex_lock(ctx->lock); |
| ctx->cnt_merged.curobjs += cnt->cnts.curobjs; |
| ctx->cnt_merged.curbytes += cnt->cnts.curbytes; |
| ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; |
| ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; |
| ql_remove(&ctx->cnts_ql, cnt, cnts_link); |
| if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && |
| ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { |
| /* |
| * Increment ctx->nlimbo in order to keep another thread from |
| * winning the race to destroy ctx while this one has ctx->lock |
| * dropped. Without this, it would be possible for another |
| * thread to: |
| * |
| * 1) Sample an allocation associated with ctx. |
| * 2) Deallocate the sampled object. |
| * 3) Successfully prof_ctx_destroy(ctx). |
| * |
| * The result would be that ctx no longer exists by the time |
| * this thread accesses it in prof_ctx_destroy(). |
| */ |
| ctx->nlimbo++; |
| destroy = true; |
| } else |
| destroy = false; |
| malloc_mutex_unlock(ctx->lock); |
| if (destroy) |
| prof_ctx_destroy(ctx); |
| } |
| |
| static bool |
| prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, |
| prof_ctx_t **p_ctx, bool *p_new_ctx) |
| { |
| union { |
| prof_ctx_t *p; |
| void *v; |
| } ctx; |
| union { |
| prof_bt_t *p; |
| void *v; |
| } btkey; |
| bool new_ctx; |
| |
| prof_enter(prof_tdata); |
| if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { |
| /* bt has never been seen before. Insert it. */ |
| ctx.v = imalloc(sizeof(prof_ctx_t)); |
| if (ctx.v == NULL) { |
| prof_leave(prof_tdata); |
| return (true); |
| } |
| btkey.p = bt_dup(bt); |
| if (btkey.v == NULL) { |
| prof_leave(prof_tdata); |
| idalloc(ctx.v); |
| return (true); |
| } |
| prof_ctx_init(ctx.p, btkey.p); |
| if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { |
| /* OOM. */ |
| prof_leave(prof_tdata); |
| idalloc(btkey.v); |
| idalloc(ctx.v); |
| return (true); |
| } |
| new_ctx = true; |
| } else { |
| /* |
| * Increment nlimbo, in order to avoid a race condition with |
| * prof_ctx_merge()/prof_ctx_destroy(). |
| */ |
| malloc_mutex_lock(ctx.p->lock); |
| ctx.p->nlimbo++; |
| malloc_mutex_unlock(ctx.p->lock); |
| new_ctx = false; |
| } |
| prof_leave(prof_tdata); |
| |
| *p_btkey = btkey.v; |
| *p_ctx = ctx.p; |
| *p_new_ctx = new_ctx; |
| return (false); |
| } |
| |
| prof_thr_cnt_t * |
| prof_lookup(prof_bt_t *bt) |
| { |
| union { |
| prof_thr_cnt_t *p; |
| void *v; |
| } ret; |
| prof_tdata_t *prof_tdata; |
| |
| cassert(config_prof); |
| |
| prof_tdata = prof_tdata_get(false); |
| if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) |
| return (NULL); |
| |
| if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { |
| void *btkey; |
| prof_ctx_t *ctx; |
| bool new_ctx; |
| |
| /* |
| * This thread's cache lacks bt. Look for it in the global |
| * cache. |
| */ |
| if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) |
| return (NULL); |
| |
| /* Link a prof_thd_cnt_t into ctx for this thread. */ |
| if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { |
| assert(ckh_count(&prof_tdata->bt2cnt) > 0); |
| /* |
| * Flush the least recently used cnt in order to keep |
| * bt2cnt from becoming too large. |
| */ |
| ret.p = ql_last(&prof_tdata->lru_ql, lru_link); |
| assert(ret.v != NULL); |
| if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, |
| NULL, NULL)) |
| not_reached(); |
| ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); |
| prof_ctx_merge(ret.p->ctx, ret.p); |
| /* ret can now be re-used. */ |
| } else { |
| assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); |
| /* Allocate and partially initialize a new cnt. */ |
| ret.v = imalloc(sizeof(prof_thr_cnt_t)); |
| if (ret.p == NULL) { |
| if (new_ctx) |
| prof_ctx_destroy(ctx); |
| return (NULL); |
| } |
| ql_elm_new(ret.p, cnts_link); |
| ql_elm_new(ret.p, lru_link); |
| } |
| /* Finish initializing ret. */ |
| ret.p->ctx = ctx; |
| ret.p->epoch = 0; |
| memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); |
| if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { |
| if (new_ctx) |
| prof_ctx_destroy(ctx); |
| idalloc(ret.v); |
| return (NULL); |
| } |
| ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); |
| malloc_mutex_lock(ctx->lock); |
| ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); |
| ctx->nlimbo--; |
| malloc_mutex_unlock(ctx->lock); |
| } else { |
| /* Move ret to the front of the LRU. */ |
| ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); |
| ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); |
| } |
| |
| return (ret.p); |
| } |
| |
| |
| void |
| prof_sample_threshold_update(prof_tdata_t *prof_tdata) |
| { |
| /* |
| * The body of this function is compiled out unless heap profiling is |
| * enabled, so that it is possible to compile jemalloc with floating |
| * point support completely disabled. Avoiding floating point code is |
| * important on memory-constrained systems, but it also enables a |
| * workaround for versions of glibc that don't properly save/restore |
| * floating point registers during dynamic lazy symbol loading (which |
| * internally calls into whatever malloc implementation happens to be |
| * integrated into the application). Note that some compilers (e.g. |
| * gcc 4.8) may use floating point registers for fast memory moves, so |
| * jemalloc must be compiled with such optimizations disabled (e.g. |
| * -mno-sse) in order for the workaround to be complete. |
| */ |
| #ifdef JEMALLOC_PROF |
| uint64_t r; |
| double u; |
| |
| if (!config_prof) |
| return; |
| |
| if (prof_tdata == NULL) |
| prof_tdata = prof_tdata_get(false); |
| |
| if (opt_lg_prof_sample == 0) { |
| prof_tdata->bytes_until_sample = 0; |
| return; |
| } |
| |
| /* |
| * Compute sample threshold as a geometrically distributed random |
| * variable with mean (2^opt_lg_prof_sample). |
| * |
| * __ __ |
| * | log(u) | 1 |
| * prof_tdata->threshold = | -------- |, where p = ------------------- |
| * | log(1-p) | opt_lg_prof_sample |
| * 2 |
| * |
| * For more information on the math, see: |
| * |
| * Non-Uniform Random Variate Generation |
| * Luc Devroye |
| * Springer-Verlag, New York, 1986 |
| * pp 500 |
| * (http://luc.devroye.org/rnbookindex.html) |
| */ |
| prng64(r, 53, prof_tdata->prng_state, |
| UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); |
| u = (double)r * (1.0/9007199254740992.0L); |
| prof_tdata->bytes_until_sample = (uint64_t)(log(u) / |
| log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) |
| + (uint64_t)1U; |
| #endif |
| } |
| |
| |
| #ifdef JEMALLOC_JET |
| size_t |
| prof_bt_count(void) |
| { |
| size_t bt_count; |
| prof_tdata_t *prof_tdata; |
| |
| prof_tdata = prof_tdata_get(false); |
| if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) |
| return (0); |
| |
| prof_enter(prof_tdata); |
| bt_count = ckh_count(&bt2ctx); |
| prof_leave(prof_tdata); |
| |
| return (bt_count); |
| } |
| #endif |
| |
| #ifdef JEMALLOC_JET |
| #undef prof_dump_open |
| #define prof_dump_open JEMALLOC_N(prof_dump_open_impl) |
| #endif |
| static int |
| prof_dump_open(bool propagate_err, const char *filename) |
| { |
| int fd; |
| |
| fd = creat(filename, 0644); |
| if (fd == -1 && propagate_err == false) { |
| malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", |
| filename); |
| if (opt_abort) |
| abort(); |
| } |
| |
| return (fd); |
| } |
| #ifdef JEMALLOC_JET |
| #undef prof_dump_open |
| #define prof_dump_open JEMALLOC_N(prof_dump_open) |
| prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); |
| #endif |
| |
| static bool |
| prof_dump_flush(bool propagate_err) |
| { |
| bool ret = false; |
| ssize_t err; |
| |
| cassert(config_prof); |
| |
| err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); |
| if (err == -1) { |
| if (propagate_err == false) { |
| malloc_write("<jemalloc>: write() failed during heap " |
| "profile flush\n"); |
| if (opt_abort) |
| abort(); |
| } |
| ret = true; |
| } |
| prof_dump_buf_end = 0; |
| |
| return (ret); |
| } |
| |
| static bool |
| prof_dump_close(bool propagate_err) |
| { |
| bool ret; |
| |
| assert(prof_dump_fd != -1); |
| ret = prof_dump_flush(propagate_err); |
| close(prof_dump_fd); |
| prof_dump_fd = -1; |
| |
| return (ret); |
| } |
| |
| static bool |
| prof_dump_write(bool propagate_err, const char *s) |
| { |
| unsigned i, slen, n; |
| |
| cassert(config_prof); |
| |
| i = 0; |
| slen = strlen(s); |
| while (i < slen) { |
| /* Flush the buffer if it is full. */ |
| if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) |
| if (prof_dump_flush(propagate_err) && propagate_err) |
| return (true); |
| |
| if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { |
| /* Finish writing. */ |
| n = slen - i; |
| } else { |
| /* Write as much of s as will fit. */ |
| n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; |
| } |
| memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); |
| prof_dump_buf_end += n; |
| i += n; |
| } |
| |
| return (false); |
| } |
| |
| JEMALLOC_ATTR(format(printf, 2, 3)) |
| static bool |
| prof_dump_printf(bool propagate_err, const char *format, ...) |
| { |
| bool ret; |
| va_list ap; |
| char buf[PROF_PRINTF_BUFSIZE]; |
| |
| va_start(ap, format); |
| malloc_vsnprintf(buf, sizeof(buf), format, ap); |
| va_end(ap); |
| ret = prof_dump_write(propagate_err, buf); |
| |
| return (ret); |
| } |
| |
| static void |
| prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, |
| prof_ctx_list_t *ctx_ql) |
| { |
| prof_thr_cnt_t *thr_cnt; |
| prof_cnt_t tcnt; |
| |
| cassert(config_prof); |
| |
| malloc_mutex_lock(ctx->lock); |
| |
| /* |
| * Increment nlimbo so that ctx won't go away before dump. |
| * Additionally, link ctx into the dump list so that it is included in |
| * prof_dump()'s second pass. |
| */ |
| ctx->nlimbo++; |
| ql_tail_insert(ctx_ql, ctx, dump_link); |
| |
| memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); |
| ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { |
| volatile unsigned *epoch = &thr_cnt->epoch; |
| |
| while (true) { |
| unsigned epoch0 = *epoch; |
| |
| /* Make sure epoch is even. */ |
| if (epoch0 & 1U) |
| continue; |
| |
| memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); |
| |
| /* Terminate if epoch didn't change while reading. */ |
| if (*epoch == epoch0) |
| break; |
| } |
| |
| ctx->cnt_summed.curobjs += tcnt.curobjs; |
| ctx->cnt_summed.curbytes += tcnt.curbytes; |
| if (opt_prof_accum) { |
| ctx->cnt_summed.accumobjs += tcnt.accumobjs; |
| ctx->cnt_summed.accumbytes += tcnt.accumbytes; |
| } |
| } |
| |
| if (ctx->cnt_summed.curobjs != 0) |
| (*leak_nctx)++; |
| |
| /* Add to cnt_all. */ |
| cnt_all->curobjs += ctx->cnt_summed.curobjs; |
| cnt_all->curbytes += ctx->cnt_summed.curbytes; |
| if (opt_prof_accum) { |
| cnt_all->accumobjs += ctx->cnt_summed.accumobjs; |
| cnt_all->accumbytes += ctx->cnt_summed.accumbytes; |
| } |
| |
| malloc_mutex_unlock(ctx->lock); |
| } |
| |
| static bool |
| prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) |
| { |
| |
| if (opt_lg_prof_sample == 0) { |
| if (prof_dump_printf(propagate_err, |
| "heap profile: %"PRId64": %"PRId64 |
| " [%"PRIu64": %"PRIu64"] @ heapprofile\n", |
| cnt_all->curobjs, cnt_all->curbytes, |
| cnt_all->accumobjs, cnt_all->accumbytes)) |
| return (true); |
| } else { |
| if (prof_dump_printf(propagate_err, |
| "heap profile: %"PRId64": %"PRId64 |
| " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", |
| cnt_all->curobjs, cnt_all->curbytes, |
| cnt_all->accumobjs, cnt_all->accumbytes, |
| ((uint64_t)1U << opt_lg_prof_sample))) |
| return (true); |
| } |
| |
| return (false); |
| } |
| |
| static void |
| prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) |
| { |
| |
| ctx->nlimbo--; |
| ql_remove(ctx_ql, ctx, dump_link); |
| } |
| |
| static void |
| prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) |
| { |
| |
| malloc_mutex_lock(ctx->lock); |
| prof_dump_ctx_cleanup_locked(ctx, ctx_ql); |
| malloc_mutex_unlock(ctx->lock); |
| } |
| |
| static bool |
| prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, |
| prof_ctx_list_t *ctx_ql) |
| { |
| bool ret; |
| unsigned i; |
| |
| cassert(config_prof); |
| |
| /* |
| * Current statistics can sum to 0 as a result of unmerged per thread |
| * statistics. Additionally, interval- and growth-triggered dumps can |
| * occur between the time a ctx is created and when its statistics are |
| * filled in. Avoid dumping any ctx that is an artifact of either |
| * implementation detail. |
| */ |
| malloc_mutex_lock(ctx->lock); |
| if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || |
| (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { |
| assert(ctx->cnt_summed.curobjs == 0); |
| assert(ctx->cnt_summed.curbytes == 0); |
| assert(ctx->cnt_summed.accumobjs == 0); |
| assert(ctx->cnt_summed.accumbytes == 0); |
| ret = false; |
| goto label_return; |
| } |
| |
| if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 |
| " [%"PRIu64": %"PRIu64"] @", |
| ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, |
| ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { |
| ret = true; |
| goto label_return; |
| } |
| |
| for (i = 0; i < bt->len; i++) { |
| if (prof_dump_printf(propagate_err, " %#"PRIxPTR, |
| (uintptr_t)bt->vec[i])) { |
| ret = true; |
| goto label_return; |
| } |
| } |
| |
| if (prof_dump_write(propagate_err, "\n")) { |
| ret = true; |
| goto label_return; |
| } |
| |
| ret = false; |
| label_return: |
| prof_dump_ctx_cleanup_locked(ctx, ctx_ql); |
| malloc_mutex_unlock(ctx->lock); |
| return (ret); |
| } |
| |
| static bool |
| prof_dump_maps(bool propagate_err) |
| { |
| bool ret; |
| int mfd; |
| char filename[PATH_MAX + 1]; |
| |
| cassert(config_prof); |
| #ifdef __FreeBSD__ |
| malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map"); |
| #else |
| malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", |
| (int)getpid()); |
| #endif |
| mfd = open(filename, O_RDONLY); |
| if (mfd != -1) { |
| ssize_t nread; |
| |
| if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && |
| propagate_err) { |
| ret = true; |
| goto label_return; |
| } |
| nread = 0; |
| do { |
| prof_dump_buf_end += nread; |
| if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { |
| /* Make space in prof_dump_buf before read(). */ |
| if (prof_dump_flush(propagate_err) && |
| propagate_err) { |
| ret = true; |
| goto label_return; |
| } |
| } |
| nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], |
| PROF_DUMP_BUFSIZE - prof_dump_buf_end); |
| } while (nread > 0); |
| } else { |
| ret = true; |
| goto label_return; |
| } |
| |
| ret = false; |
| label_return: |
| if (mfd != -1) |
| close(mfd); |
| return (ret); |
| } |
| |
| static void |
| prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, |
| const char *filename) |
| { |
| |
| if (cnt_all->curbytes != 0) { |
| malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" |
| PRId64" object%s, %zu context%s\n", |
| cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", |
| cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", |
| leak_nctx, (leak_nctx != 1) ? "s" : ""); |
| malloc_printf( |
| "<jemalloc>: Run pprof on \"%s\" for leak detail\n", |
| filename); |
| } |
| } |
| |
| static bool |
| prof_dump(bool propagate_err, const char *filename, bool leakcheck) |
| { |
| prof_tdata_t *prof_tdata; |
| prof_cnt_t cnt_all; |
| size_t tabind; |
| union { |
| prof_ctx_t *p; |
| void *v; |
| } ctx; |
| size_t leak_nctx; |
| prof_ctx_list_t ctx_ql; |
| |
| cassert(config_prof); |
| |
| prof_tdata = prof_tdata_get(false); |
| if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) |
| return (true); |
| |
| malloc_mutex_lock(&prof_dump_mtx); |
| |
| /* Merge per thread profile stats, and sum them in cnt_all. */ |
| memset(&cnt_all, 0, sizeof(prof_cnt_t)); |
| leak_nctx = 0; |
| ql_new(&ctx_ql); |
| prof_enter(prof_tdata); |
| for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) |
| prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); |
| prof_leave(prof_tdata); |
| |
| /* Create dump file. */ |
| if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) |
| goto label_open_close_error; |
| |
| /* Dump profile header. */ |
| if (prof_dump_header(propagate_err, &cnt_all)) |
| goto label_write_error; |
| |
| /* Dump per ctx profile stats. */ |
| while ((ctx.p = ql_first(&ctx_ql)) != NULL) { |
| if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) |
| goto label_write_error; |
| } |
| |
| /* Dump /proc/<pid>/maps if possible. */ |
| if (prof_dump_maps(propagate_err)) |
| goto label_write_error; |
| |
| if (prof_dump_close(propagate_err)) |
| goto label_open_close_error; |
| |
| malloc_mutex_unlock(&prof_dump_mtx); |
| |
| if (leakcheck) |
| prof_leakcheck(&cnt_all, leak_nctx, filename); |
| |
| return (false); |
| label_write_error: |
| prof_dump_close(propagate_err); |
| label_open_close_error: |
| while ((ctx.p = ql_first(&ctx_ql)) != NULL) |
| prof_dump_ctx_cleanup(ctx.p, &ctx_ql); |
| malloc_mutex_unlock(&prof_dump_mtx); |
| return (true); |
| } |
| |
| #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) |
| #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) |
| static void |
| prof_dump_filename(char *filename, char v, uint64_t vseq) |
| { |
| |
| cassert(config_prof); |
| |
| if (vseq != VSEQ_INVALID) { |
| /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ |
| malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, |
| "%s.%d.%"PRIu64".%c%"PRIu64".heap", |
| opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); |
| } else { |
| /* "<prefix>.<pid>.<seq>.<v>.heap" */ |
| malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, |
| "%s.%d.%"PRIu64".%c.heap", |
| opt_prof_prefix, (int)getpid(), prof_dump_seq, v); |
| } |
| prof_dump_seq++; |
| } |
| |
| static void |
| prof_fdump(void) |
| { |
| char filename[DUMP_FILENAME_BUFSIZE]; |
| |
| cassert(config_prof); |
| |
| if (prof_booted == false) |
| return; |
| |
| if (opt_prof_final && opt_prof_prefix[0] != '\0') { |
| malloc_mutex_lock(&prof_dump_seq_mtx); |
| prof_dump_filename(filename, 'f', VSEQ_INVALID); |
| malloc_mutex_unlock(&prof_dump_seq_mtx); |
| prof_dump(false, filename, opt_prof_leak); |
| } |
| } |
| |
| void |
| prof_idump(void) |
| { |
| prof_tdata_t *prof_tdata; |
| char filename[PATH_MAX + 1]; |
| |
| cassert(config_prof); |
| |
| if (prof_booted == false) |
| return; |
| prof_tdata = prof_tdata_get(false); |
| if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) |
| return; |
| if (prof_tdata->enq) { |
| prof_tdata->enq_idump = true; |
| return; |
| } |
| |
| if (opt_prof_prefix[0] != '\0') { |
| malloc_mutex_lock(&prof_dump_seq_mtx); |
| prof_dump_filename(filename, 'i', prof_dump_iseq); |
| prof_dump_iseq++; |
| malloc_mutex_unlock(&prof_dump_seq_mtx); |
| prof_dump(false, filename, false); |
| } |
| } |
| |
| bool |
| prof_mdump(const char *filename) |
| { |
| char filename_buf[DUMP_FILENAME_BUFSIZE]; |
| |
| cassert(config_prof); |
| |
| if (opt_prof == false || prof_booted == false) |
| return (true); |
| |
| if (filename == NULL) { |
| /* No filename specified, so automatically generate one. */ |
| if (opt_prof_prefix[0] == '\0') |
| return (true); |
| malloc_mutex_lock(&prof_dump_seq_mtx); |
| prof_dump_filename(filename_buf, 'm', prof_dump_mseq); |
| prof_dump_mseq++; |
| malloc_mutex_unlock(&prof_dump_seq_mtx); |
| filename = filename_buf; |
| } |
| return (prof_dump(true, filename, false)); |
| } |
| |
| void |
| prof_gdump(void) |
| { |
| prof_tdata_t *prof_tdata; |
| char filename[DUMP_FILENAME_BUFSIZE]; |
| |
| cassert(config_prof); |
| |
| if (prof_booted == false) |
| return; |
| prof_tdata = prof_tdata_get(false); |
| if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) |
| return; |
| if (prof_tdata->enq) { |
| prof_tdata->enq_gdump = true; |
| return; |
| } |
| |
| if (opt_prof_prefix[0] != '\0') { |
| malloc_mutex_lock(&prof_dump_seq_mtx); |
| prof_dump_filename(filename, 'u', prof_dump_useq); |
| prof_dump_useq++; |
| malloc_mutex_unlock(&prof_dump_seq_mtx); |
| prof_dump(false, filename, false); |
| } |
| } |
| |
| static void |
| prof_bt_hash(const void *key, size_t r_hash[2]) |
| { |
| prof_bt_t *bt = (prof_bt_t *)key; |
| |
| cassert(config_prof); |
| |
| hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); |
| } |
| |
| static bool |
| prof_bt_keycomp(const void *k1, const void *k2) |
| { |
| const prof_bt_t *bt1 = (prof_bt_t *)k1; |
| const prof_bt_t *bt2 = (prof_bt_t *)k2; |
| |
| cassert(config_prof); |
| |
| if (bt1->len != bt2->len) |
| return (false); |
| return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); |
| } |
| |
| prof_tdata_t * |
| prof_tdata_init(void) |
| { |
| prof_tdata_t *prof_tdata; |
| |
| cassert(config_prof); |
| |
| /* Initialize an empty cache for this thread. */ |
| prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); |
| if (prof_tdata == NULL) |
| return (NULL); |
| |
| if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, |
| prof_bt_hash, prof_bt_keycomp)) { |
| idalloc(prof_tdata); |
| return (NULL); |
| } |
| ql_new(&prof_tdata->lru_ql); |
| |
| prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); |
| if (prof_tdata->vec == NULL) { |
| ckh_delete(&prof_tdata->bt2cnt); |
| idalloc(prof_tdata); |
| return (NULL); |
| } |
| |
| prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata; |
| prof_sample_threshold_update(prof_tdata); |
| |
| prof_tdata->enq = false; |
| prof_tdata->enq_idump = false; |
| prof_tdata->enq_gdump = false; |
| |
| prof_tdata_tsd_set(&prof_tdata); |
| |
| return (prof_tdata); |
| } |
| |
| void |
| prof_tdata_cleanup(void *arg) |
| { |
| prof_thr_cnt_t *cnt; |
| prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; |
| |
| cassert(config_prof); |
| |
| if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { |
| /* |
| * Another destructor deallocated memory after this destructor |
| * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY |
| * in order to receive another callback. |
| */ |
| prof_tdata = PROF_TDATA_STATE_PURGATORY; |
| prof_tdata_tsd_set(&prof_tdata); |
| } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { |
| /* |
| * The previous time this destructor was called, we set the key |
| * to PROF_TDATA_STATE_PURGATORY so that other destructors |
| * wouldn't cause re-creation of the prof_tdata. This time, do |
| * nothing, so that the destructor will not be called again. |
| */ |
| } else if (prof_tdata != NULL) { |
| /* |
| * Delete the hash table. All of its contents can still be |
| * iterated over via the LRU. |
| */ |
| ckh_delete(&prof_tdata->bt2cnt); |
| /* |
| * Iteratively merge cnt's into the global stats and delete |
| * them. |
| */ |
| while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { |
| ql_remove(&prof_tdata->lru_ql, cnt, lru_link); |
| prof_ctx_merge(cnt->ctx, cnt); |
| idalloc(cnt); |
| } |
| idalloc(prof_tdata->vec); |
| idalloc(prof_tdata); |
| prof_tdata = PROF_TDATA_STATE_PURGATORY; |
| prof_tdata_tsd_set(&prof_tdata); |
| } |
| } |
| |
| void |
| prof_boot0(void) |
| { |
| |
| cassert(config_prof); |
| |
| memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, |
| sizeof(PROF_PREFIX_DEFAULT)); |
| } |
| |
| void |
| prof_boot1(void) |
| { |
| |
| cassert(config_prof); |
| |
| /* |
| * opt_prof must be in its final state before any arenas are |
| * initialized, so this function must be executed early. |
| */ |
| |
| if (opt_prof_leak && opt_prof == false) { |
| /* |
| * Enable opt_prof, but in such a way that profiles are never |
| * automatically dumped. |
| */ |
| opt_prof = true; |
| opt_prof_gdump = false; |
| } else if (opt_prof) { |
| if (opt_lg_prof_interval >= 0) { |
| prof_interval = (((uint64_t)1U) << |
| opt_lg_prof_interval); |
| } |
| } |
| } |
| |
| bool |
| prof_boot2(void) |
| { |
| |
| cassert(config_prof); |
| |
| if (opt_prof) { |
| unsigned i; |
| |
| if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, |
| prof_bt_keycomp)) |
| return (true); |
| if (malloc_mutex_init(&bt2ctx_mtx)) |
| return (true); |
| if (prof_tdata_tsd_boot()) { |
| malloc_write( |
| "<jemalloc>: Error in pthread_key_create()\n"); |
| abort(); |
| } |
| |
| if (malloc_mutex_init(&prof_dump_seq_mtx)) |
| return (true); |
| if (malloc_mutex_init(&prof_dump_mtx)) |
| return (true); |
| |
| if (atexit(prof_fdump) != 0) { |
| malloc_write("<jemalloc>: Error in atexit()\n"); |
| if (opt_abort) |
| abort(); |
| } |
| |
| ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * |
| sizeof(malloc_mutex_t)); |
| if (ctx_locks == NULL) |
| return (true); |
| for (i = 0; i < PROF_NCTX_LOCKS; i++) { |
| if (malloc_mutex_init(&ctx_locks[i])) |
| return (true); |
| } |
| } |
| |
| #ifdef JEMALLOC_PROF_LIBGCC |
| /* |
| * Cause the backtracing machinery to allocate its internal state |
| * before enabling profiling. |
| */ |
| _Unwind_Backtrace(prof_unwind_init_callback, NULL); |
| #endif |
| |
| prof_booted = true; |
| |
| return (false); |
| } |
| |
| void |
| prof_prefork(void) |
| { |
| |
| if (opt_prof) { |
| unsigned i; |
| |
| malloc_mutex_prefork(&bt2ctx_mtx); |
| malloc_mutex_prefork(&prof_dump_seq_mtx); |
| for (i = 0; i < PROF_NCTX_LOCKS; i++) |
| malloc_mutex_prefork(&ctx_locks[i]); |
| } |
| } |
| |
| void |
| prof_postfork_parent(void) |
| { |
| |
| if (opt_prof) { |
| unsigned i; |
| |
| for (i = 0; i < PROF_NCTX_LOCKS; i++) |
| malloc_mutex_postfork_parent(&ctx_locks[i]); |
| malloc_mutex_postfork_parent(&prof_dump_seq_mtx); |
| malloc_mutex_postfork_parent(&bt2ctx_mtx); |
| } |
| } |
| |
| void |
| prof_postfork_child(void) |
| { |
| |
| if (opt_prof) { |
| unsigned i; |
| |
| for (i = 0; i < PROF_NCTX_LOCKS; i++) |
| malloc_mutex_postfork_child(&ctx_locks[i]); |
| malloc_mutex_postfork_child(&prof_dump_seq_mtx); |
| malloc_mutex_postfork_child(&bt2ctx_mtx); |
| } |
| } |
| |
| /******************************************************************************/ |