| /******************************************************************************/ |
| #ifdef JEMALLOC_H_TYPES |
| |
| typedef struct prof_bt_s prof_bt_t; |
| typedef struct prof_cnt_s prof_cnt_t; |
| typedef struct prof_thr_cnt_s prof_thr_cnt_t; |
| typedef struct prof_ctx_s prof_ctx_t; |
| typedef struct prof_tdata_s prof_tdata_t; |
| |
| /* Option defaults. */ |
| #define PROF_PREFIX_DEFAULT "jeprof" |
| #define LG_PROF_SAMPLE_DEFAULT 0 |
| #define LG_PROF_INTERVAL_DEFAULT -1 |
| |
| /* |
| * Hard limit on stack backtrace depth. The version of prof_backtrace() that |
| * is based on __builtin_return_address() necessarily has a hard-coded number |
| * of backtrace frame handlers, and should be kept in sync with this setting. |
| */ |
| #define PROF_BT_MAX 128 |
| |
| /* Maximum number of backtraces to store in each per thread LRU cache. */ |
| #define PROF_TCMAX 1024 |
| |
| /* Initial hash table size. */ |
| #define PROF_CKH_MINITEMS 64 |
| |
| /* Size of memory buffer to use when writing dump files. */ |
| #define PROF_DUMP_BUF_SIZE 65536 |
| |
| #endif /* JEMALLOC_H_TYPES */ |
| /******************************************************************************/ |
| #ifdef JEMALLOC_H_STRUCTS |
| |
| struct prof_bt_s { |
| /* Backtrace, stored as len program counters. */ |
| void **vec; |
| unsigned len; |
| }; |
| |
| #ifdef JEMALLOC_PROF_LIBGCC |
| /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ |
| typedef struct { |
| prof_bt_t *bt; |
| unsigned nignore; |
| unsigned max; |
| } prof_unwind_data_t; |
| #endif |
| |
| struct prof_cnt_s { |
| /* |
| * Profiling counters. An allocation/deallocation pair can operate on |
| * different prof_thr_cnt_t objects that are linked into the same |
| * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go |
| * negative. In principle it is possible for the *bytes counters to |
| * overflow/underflow, but a general solution would require something |
| * like 128-bit counters; this implementation doesn't bother to solve |
| * that problem. |
| */ |
| int64_t curobjs; |
| int64_t curbytes; |
| uint64_t accumobjs; |
| uint64_t accumbytes; |
| }; |
| |
| struct prof_thr_cnt_s { |
| /* Linkage into prof_ctx_t's cnts_ql. */ |
| ql_elm(prof_thr_cnt_t) cnts_link; |
| |
| /* Linkage into thread's LRU. */ |
| ql_elm(prof_thr_cnt_t) lru_link; |
| |
| /* |
| * Associated context. If a thread frees an object that it did not |
| * allocate, it is possible that the context is not cached in the |
| * thread's hash table, in which case it must be able to look up the |
| * context, insert a new prof_thr_cnt_t into the thread's hash table, |
| * and link it into the prof_ctx_t's cnts_ql. |
| */ |
| prof_ctx_t *ctx; |
| |
| /* |
| * Threads use memory barriers to update the counters. Since there is |
| * only ever one writer, the only challenge is for the reader to get a |
| * consistent read of the counters. |
| * |
| * The writer uses this series of operations: |
| * |
| * 1) Increment epoch to an odd number. |
| * 2) Update counters. |
| * 3) Increment epoch to an even number. |
| * |
| * The reader must assure 1) that the epoch is even while it reads the |
| * counters, and 2) that the epoch doesn't change between the time it |
| * starts and finishes reading the counters. |
| */ |
| unsigned epoch; |
| |
| /* Profiling counters. */ |
| prof_cnt_t cnts; |
| }; |
| |
| struct prof_ctx_s { |
| /* Associated backtrace. */ |
| prof_bt_t *bt; |
| |
| /* Protects cnt_merged and cnts_ql. */ |
| malloc_mutex_t lock; |
| |
| /* Temporary storage for summation during dump. */ |
| prof_cnt_t cnt_summed; |
| |
| /* When threads exit, they merge their stats into cnt_merged. */ |
| prof_cnt_t cnt_merged; |
| |
| /* |
| * List of profile counters, one for each thread that has allocated in |
| * this context. |
| */ |
| ql_head(prof_thr_cnt_t) cnts_ql; |
| }; |
| |
| struct prof_tdata_s { |
| /* |
| * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a |
| * cache of backtraces, with associated thread-specific prof_thr_cnt_t |
| * objects. Other threads may read the prof_thr_cnt_t contents, but no |
| * others will ever write them. |
| * |
| * Upon thread exit, the thread must merge all the prof_thr_cnt_t |
| * counter data into the associated prof_ctx_t objects, and unlink/free |
| * the prof_thr_cnt_t objects. |
| */ |
| ckh_t bt2cnt; |
| |
| /* LRU for contents of bt2cnt. */ |
| ql_head(prof_thr_cnt_t) lru_ql; |
| |
| /* Backtrace vector, used for calls to prof_backtrace(). */ |
| void **vec; |
| |
| /* Sampling state. */ |
| uint64_t prng_state; |
| uint64_t threshold; |
| uint64_t accum; |
| }; |
| |
| #endif /* JEMALLOC_H_STRUCTS */ |
| /******************************************************************************/ |
| #ifdef JEMALLOC_H_EXTERNS |
| |
| extern bool opt_prof; |
| /* |
| * Even if opt_prof is true, sampling can be temporarily disabled by setting |
| * opt_prof_active to false. No locking is used when updating opt_prof_active, |
| * so there are no guarantees regarding how long it will take for all threads |
| * to notice state changes. |
| */ |
| extern bool opt_prof_active; |
| extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ |
| extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ |
| extern bool opt_prof_gdump; /* High-water memory dumping. */ |
| extern bool opt_prof_leak; /* Dump leak summary at exit. */ |
| extern bool opt_prof_accum; /* Report cumulative bytes. */ |
| extern char opt_prof_prefix[PATH_MAX + 1]; |
| |
| /* |
| * Profile dump interval, measured in bytes allocated. Each arena triggers a |
| * profile dump when it reaches this threshold. The effect is that the |
| * interval between profile dumps averages prof_interval, though the actual |
| * interval between dumps will tend to be sporadic, and the interval will be a |
| * maximum of approximately (prof_interval * narenas). |
| */ |
| extern uint64_t prof_interval; |
| |
| /* |
| * If true, promote small sampled objects to large objects, since small run |
| * headers do not have embedded profile context pointers. |
| */ |
| extern bool prof_promote; |
| |
| /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ |
| #ifdef JEMALLOC_TLS |
| extern __thread prof_tdata_t *prof_tdata_tls |
| JEMALLOC_ATTR(tls_model("initial-exec")); |
| # define PROF_TCACHE_GET() prof_tdata_tls |
| # define PROF_TCACHE_SET(v) do { \ |
| prof_tdata_tls = (v); \ |
| pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ |
| } while (0) |
| #else |
| # define PROF_TCACHE_GET() \ |
| ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) |
| # define PROF_TCACHE_SET(v) do { \ |
| pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ |
| } while (0) |
| #endif |
| /* |
| * Same contents as b2cnt_tls, but initialized such that the TSD destructor is |
| * called when a thread exits, so that prof_tdata_tls contents can be merged, |
| * unlinked, and deallocated. |
| */ |
| extern pthread_key_t prof_tdata_tsd; |
| |
| void bt_init(prof_bt_t *bt, void **vec); |
| void prof_backtrace(prof_bt_t *bt, unsigned nignore); |
| prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); |
| void prof_idump(void); |
| bool prof_mdump(const char *filename); |
| void prof_gdump(void); |
| prof_tdata_t *prof_tdata_init(void); |
| void prof_boot0(void); |
| void prof_boot1(void); |
| bool prof_boot2(void); |
| |
| #endif /* JEMALLOC_H_EXTERNS */ |
| /******************************************************************************/ |
| #ifdef JEMALLOC_H_INLINES |
| |
| #define PROF_ALLOC_PREP(nignore, size, ret) do { \ |
| prof_tdata_t *prof_tdata; \ |
| prof_bt_t bt; \ |
| \ |
| assert(size == s2u(size)); \ |
| \ |
| prof_tdata = PROF_TCACHE_GET(); \ |
| if (prof_tdata == NULL) { \ |
| prof_tdata = prof_tdata_init(); \ |
| if (prof_tdata == NULL) { \ |
| ret = NULL; \ |
| break; \ |
| } \ |
| } \ |
| \ |
| if (opt_prof_active == false) { \ |
| /* Sampling is currently inactive, so avoid sampling. */\ |
| ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ |
| } else if (opt_lg_prof_sample == 0) { \ |
| /* Don't bother with sampling logic, since sampling */\ |
| /* interval is 1. */\ |
| bt_init(&bt, prof_tdata->vec); \ |
| prof_backtrace(&bt, nignore); \ |
| ret = prof_lookup(&bt); \ |
| } else { \ |
| if (prof_tdata->threshold == 0) { \ |
| /* Initialize. Seed the prng differently for */\ |
| /* each thread. */\ |
| prof_tdata->prng_state = \ |
| (uint64_t)(uintptr_t)&size; \ |
| prof_sample_threshold_update(prof_tdata); \ |
| } \ |
| \ |
| /* Determine whether to capture a backtrace based on */\ |
| /* whether size is enough for prof_accum to reach */\ |
| /* prof_tdata->threshold. However, delay updating */\ |
| /* these variables until prof_{m,re}alloc(), because */\ |
| /* we don't know for sure that the allocation will */\ |
| /* succeed. */\ |
| /* */\ |
| /* Use subtraction rather than addition to avoid */\ |
| /* potential integer overflow. */\ |
| if (size >= prof_tdata->threshold - \ |
| prof_tdata->accum) { \ |
| bt_init(&bt, prof_tdata->vec); \ |
| prof_backtrace(&bt, nignore); \ |
| ret = prof_lookup(&bt); \ |
| } else \ |
| ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ |
| } \ |
| } while (0) |
| |
| #ifndef JEMALLOC_ENABLE_INLINE |
| void prof_sample_threshold_update(prof_tdata_t *prof_tdata); |
| prof_ctx_t *prof_ctx_get(const void *ptr); |
| void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); |
| bool prof_sample_accum_update(size_t size); |
| void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); |
| void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, |
| size_t old_size, prof_ctx_t *old_ctx); |
| void prof_free(const void *ptr, size_t size); |
| #endif |
| |
| #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) |
| JEMALLOC_INLINE void |
| prof_sample_threshold_update(prof_tdata_t *prof_tdata) |
| { |
| uint64_t r; |
| double u; |
| |
| cassert(config_prof); |
| |
| /* |
| * Compute sample threshold as a geometrically distributed random |
| * variable with mean (2^opt_lg_prof_sample). |
| * |
| * __ __ |
| * | log(u) | 1 |
| * prof_tdata->threshold = | -------- |, where p = ------------------- |
| * | log(1-p) | opt_lg_prof_sample |
| * 2 |
| * |
| * For more information on the math, see: |
| * |
| * Non-Uniform Random Variate Generation |
| * Luc Devroye |
| * Springer-Verlag, New York, 1986 |
| * pp 500 |
| * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) |
| */ |
| prng64(r, 53, prof_tdata->prng_state, |
| UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); |
| u = (double)r * (1.0/9007199254740992.0L); |
| prof_tdata->threshold = (uint64_t)(log(u) / |
| log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) |
| + (uint64_t)1U; |
| } |
| |
| JEMALLOC_INLINE prof_ctx_t * |
| prof_ctx_get(const void *ptr) |
| { |
| prof_ctx_t *ret; |
| arena_chunk_t *chunk; |
| |
| cassert(config_prof); |
| assert(ptr != NULL); |
| |
| chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); |
| if (chunk != ptr) { |
| /* Region. */ |
| ret = arena_prof_ctx_get(ptr); |
| } else |
| ret = huge_prof_ctx_get(ptr); |
| |
| return (ret); |
| } |
| |
| JEMALLOC_INLINE void |
| prof_ctx_set(const void *ptr, prof_ctx_t *ctx) |
| { |
| arena_chunk_t *chunk; |
| |
| cassert(config_prof); |
| assert(ptr != NULL); |
| |
| chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); |
| if (chunk != ptr) { |
| /* Region. */ |
| arena_prof_ctx_set(ptr, ctx); |
| } else |
| huge_prof_ctx_set(ptr, ctx); |
| } |
| |
| JEMALLOC_INLINE bool |
| prof_sample_accum_update(size_t size) |
| { |
| prof_tdata_t *prof_tdata; |
| |
| cassert(config_prof); |
| /* Sampling logic is unnecessary if the interval is 1. */ |
| assert(opt_lg_prof_sample != 0); |
| |
| prof_tdata = PROF_TCACHE_GET(); |
| assert(prof_tdata != NULL); |
| |
| /* Take care to avoid integer overflow. */ |
| if (size >= prof_tdata->threshold - prof_tdata->accum) { |
| prof_tdata->accum -= (prof_tdata->threshold - size); |
| /* Compute new sample threshold. */ |
| prof_sample_threshold_update(prof_tdata); |
| while (prof_tdata->accum >= prof_tdata->threshold) { |
| prof_tdata->accum -= prof_tdata->threshold; |
| prof_sample_threshold_update(prof_tdata); |
| } |
| return (false); |
| } else { |
| prof_tdata->accum += size; |
| return (true); |
| } |
| } |
| |
| JEMALLOC_INLINE void |
| prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) |
| { |
| |
| cassert(config_prof); |
| assert(ptr != NULL); |
| assert(size == isalloc(ptr)); |
| |
| if (opt_lg_prof_sample != 0) { |
| if (prof_sample_accum_update(size)) { |
| /* |
| * Don't sample. For malloc()-like allocation, it is |
| * always possible to tell in advance how large an |
| * object's usable size will be, so there should never |
| * be a difference between the size passed to |
| * PROF_ALLOC_PREP() and prof_malloc(). |
| */ |
| assert((uintptr_t)cnt == (uintptr_t)1U); |
| } |
| } |
| |
| if ((uintptr_t)cnt > (uintptr_t)1U) { |
| prof_ctx_set(ptr, cnt->ctx); |
| |
| cnt->epoch++; |
| /*********/ |
| mb_write(); |
| /*********/ |
| cnt->cnts.curobjs++; |
| cnt->cnts.curbytes += size; |
| if (opt_prof_accum) { |
| cnt->cnts.accumobjs++; |
| cnt->cnts.accumbytes += size; |
| } |
| /*********/ |
| mb_write(); |
| /*********/ |
| cnt->epoch++; |
| /*********/ |
| mb_write(); |
| /*********/ |
| } else |
| prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); |
| } |
| |
| JEMALLOC_INLINE void |
| prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, |
| size_t old_size, prof_ctx_t *old_ctx) |
| { |
| prof_thr_cnt_t *told_cnt; |
| |
| cassert(config_prof); |
| assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); |
| |
| if (ptr != NULL) { |
| assert(size == isalloc(ptr)); |
| if (opt_lg_prof_sample != 0) { |
| if (prof_sample_accum_update(size)) { |
| /* |
| * Don't sample. The size passed to |
| * PROF_ALLOC_PREP() was larger than what |
| * actually got allocated, so a backtrace was |
| * captured for this allocation, even though |
| * its actual size was insufficient to cross |
| * the sample threshold. |
| */ |
| cnt = (prof_thr_cnt_t *)(uintptr_t)1U; |
| } |
| } |
| } |
| |
| if ((uintptr_t)old_ctx > (uintptr_t)1U) { |
| told_cnt = prof_lookup(old_ctx->bt); |
| if (told_cnt == NULL) { |
| /* |
| * It's too late to propagate OOM for this realloc(), |
| * so operate directly on old_cnt->ctx->cnt_merged. |
| */ |
| malloc_mutex_lock(&old_ctx->lock); |
| old_ctx->cnt_merged.curobjs--; |
| old_ctx->cnt_merged.curbytes -= old_size; |
| malloc_mutex_unlock(&old_ctx->lock); |
| told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; |
| } |
| } else |
| told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; |
| |
| if ((uintptr_t)told_cnt > (uintptr_t)1U) |
| told_cnt->epoch++; |
| if ((uintptr_t)cnt > (uintptr_t)1U) { |
| prof_ctx_set(ptr, cnt->ctx); |
| cnt->epoch++; |
| } else |
| prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); |
| /*********/ |
| mb_write(); |
| /*********/ |
| if ((uintptr_t)told_cnt > (uintptr_t)1U) { |
| told_cnt->cnts.curobjs--; |
| told_cnt->cnts.curbytes -= old_size; |
| } |
| if ((uintptr_t)cnt > (uintptr_t)1U) { |
| cnt->cnts.curobjs++; |
| cnt->cnts.curbytes += size; |
| if (opt_prof_accum) { |
| cnt->cnts.accumobjs++; |
| cnt->cnts.accumbytes += size; |
| } |
| } |
| /*********/ |
| mb_write(); |
| /*********/ |
| if ((uintptr_t)told_cnt > (uintptr_t)1U) |
| told_cnt->epoch++; |
| if ((uintptr_t)cnt > (uintptr_t)1U) |
| cnt->epoch++; |
| /*********/ |
| mb_write(); /* Not strictly necessary. */ |
| } |
| |
| JEMALLOC_INLINE void |
| prof_free(const void *ptr, size_t size) |
| { |
| prof_ctx_t *ctx = prof_ctx_get(ptr); |
| |
| cassert(config_prof); |
| |
| if ((uintptr_t)ctx > (uintptr_t)1) { |
| assert(size == isalloc(ptr)); |
| prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); |
| |
| if (tcnt != NULL) { |
| tcnt->epoch++; |
| /*********/ |
| mb_write(); |
| /*********/ |
| tcnt->cnts.curobjs--; |
| tcnt->cnts.curbytes -= size; |
| /*********/ |
| mb_write(); |
| /*********/ |
| tcnt->epoch++; |
| /*********/ |
| mb_write(); |
| /*********/ |
| } else { |
| /* |
| * OOM during free() cannot be propagated, so operate |
| * directly on cnt->ctx->cnt_merged. |
| */ |
| malloc_mutex_lock(&ctx->lock); |
| ctx->cnt_merged.curobjs--; |
| ctx->cnt_merged.curbytes -= size; |
| malloc_mutex_unlock(&ctx->lock); |
| } |
| } |
| } |
| #endif |
| |
| #endif /* JEMALLOC_H_INLINES */ |
| /******************************************************************************/ |