| |
| /*--------------------------------------------------------------------*/ |
| /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/ |
| /*--------------------------------------------------------------------*/ |
| |
| /* |
| This file is part of Valgrind, a dynamic binary instrumentation |
| framework. |
| |
| Copyright (C) 2016-2017 Philippe Waroquiers |
| |
| This code generalises the XTree idea that was implemented in |
| the massif tool in Valgrind versions <= 3.12, which is |
| Copyright (C) 2005-2017 Nicholas Nethercote |
| njn@valgrind.org |
| |
| The XTree implementation in this file is however implemented completely |
| differently. Some code has been re-used for the production of the |
| massif file header (e.g. FP_cmd function). |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License as |
| published by the Free Software Foundation; either version 2 of the |
| License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 02111-1307, USA. |
| |
| The GNU General Public License is contained in the file COPYING. |
| */ |
| |
| #include "pub_core_basics.h" |
| #include "pub_core_debuglog.h" |
| #include "pub_core_clientstate.h" |
| #include "pub_core_stacktrace.h" |
| #include "pub_core_execontext.h" |
| #include "pub_core_libcbase.h" |
| #include "pub_core_libcassert.h" |
| #include "pub_core_libcfile.h" |
| #include "pub_core_libcprint.h" |
| #include "pub_core_libcproc.h" |
| #include "pub_core_hashtable.h" |
| #include "pub_core_mallocfree.h" |
| #include "pub_core_options.h" |
| #include "pub_core_debuginfo.h" |
| #include "pub_core_deduppoolalloc.h" |
| #include "pub_core_xtree.h" /* self */ |
| |
| #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \ |
| VG_(dmsg)(__VA_ARGS__) \ |
| : 0) |
| |
| /* Defines the relevant part of an ec. This is shared between an xt |
| and its snapshots (see XT_shared XArray of xec). */ |
| typedef |
| struct _xec { |
| ExeContext* ec; |
| UShort top; // The first ips of ec to take into account. |
| UShort n_ips_sel; // The nr of ips from top to take into account. |
| } |
| xec; |
| |
| /* XT_shared maintains the information shared between an XT and all |
| its snapshots. */ |
| typedef |
| struct _XT_shared { |
| UWord nrRef; /* nr of XTrees referencing this shared memory. */ |
| |
| Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */ |
| const HChar* cc; /* cost centre for alloc */ |
| Free_Fn_t free_fn; /* free fn */ |
| |
| /* The data associated to each ec is stored in 2 arrays: |
| an xec array, shared between an xt and all its snapshots. |
| a data array, private to each XTree. |
| For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in |
| xec and data arrays where the ec information is located (this |
| indirection is used to avoid huge xec and data arrays, in |
| case an XTree contains data only for a small number of ec. |
| The offset in the xec and data array is used as xtree ec unique |
| id i.e. an xecu. */ |
| |
| UInt d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */ |
| UInt* d4ecu2xecu; |
| |
| /* ec information common to an xt and its snapshots. */ |
| XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */ |
| |
| /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1]. |
| See ips_order_cmp. */ |
| XArray* ips_order_xecu; |
| } XT_shared; |
| |
| /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec |
| (with the index ecu/4). */ |
| #define NO_OFFSET 0xffffffff |
| |
| static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn, |
| const HChar* cc, |
| void (*free_fn)(void*)) |
| { |
| XT_shared* shared; |
| |
| vg_assert(alloc_fn); |
| vg_assert(cc); |
| vg_assert(free_fn); |
| shared = alloc_fn(cc, sizeof(*shared)); |
| shared->nrRef = 0; |
| shared->alloc_fn = alloc_fn; |
| shared->cc = cc; |
| shared->free_fn = free_fn; |
| |
| shared->d4ecu2xecu_sz = 0; |
| shared->d4ecu2xecu = NULL; |
| shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec)); |
| shared->ips_order_xecu = NULL; // Allocated when needed. |
| |
| return shared; |
| } |
| |
| static void delete_XT_shared (XT_shared* shared) |
| { |
| vg_assert(shared->nrRef == 0); |
| shared->free_fn(shared->d4ecu2xecu); |
| VG_(deleteXA)(shared->xec); |
| if (shared->ips_order_xecu != NULL) |
| VG_(deleteXA)(shared->ips_order_xecu); |
| shared->free_fn(shared); |
| } |
| |
| /* Compare 2 entries in ips_order_xecu by StackTrace elements. |
| In case stack traces are of different length, an 'absent' ips is |
| considered smaller than any other address. */ |
| static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec |
| static Int ips_order_cmp(const void* vleft, const void* vright) |
| { |
| const Xecu left_xecu = *(const Xecu*)vleft; |
| const Xecu right_xecu = *(const Xecu*)vright; |
| const xec* left = VG_(indexXA)(xec_data_for_sort, left_xecu); |
| const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu); |
| const StackTrace left_ips = VG_(get_ExeContext_StackTrace)(left->ec) |
| + left->top; |
| const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec) |
| + right->top; |
| UInt i; |
| |
| const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel |
| ? left->n_ips_sel : right->n_ips_sel; |
| |
| // First see if we have a difference on the common nr of ips selected |
| for (i = 0; i < c_n_ips_sel; i++) { |
| if (left_ips[i] == right_ips[i]) continue; |
| if (left_ips[i] < right_ips[i]) return -1; |
| return 1; |
| } |
| // Common part is equal => compare lengths. |
| if (left->n_ips_sel < right->n_ips_sel) return -1; |
| if (left->n_ips_sel > right->n_ips_sel) return 1; |
| return 0; |
| } |
| |
| // If needed, build or refresh shared->ips_order_xecu |
| static void ensure_ips_order_xecu_valid(XT_shared* shared) |
| { |
| UInt i; |
| UInt n_xecu; |
| |
| if (shared->ips_order_xecu == NULL) { |
| shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc, |
| shared->free_fn, sizeof(Xecu)); |
| VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec)); |
| VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp); |
| } |
| |
| if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu)) |
| return; |
| |
| n_xecu = VG_(sizeXA)(shared->xec); |
| for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++) |
| VG_(addToXA)(shared->ips_order_xecu, &i); |
| |
| xec_data_for_sort = shared->xec; |
| VG_(sortXA)(shared->ips_order_xecu); |
| } |
| |
| static void addRef_XT_shared (XT_shared* shared) |
| { |
| shared->nrRef++; |
| } |
| |
| static UWord release_XT_shared(XT_shared* shared) |
| { |
| UWord nrRef; |
| |
| vg_assert(shared->nrRef > 0); |
| nrRef = --shared->nrRef; |
| if (nrRef == 0) |
| delete_XT_shared(shared); |
| return nrRef; |
| } |
| |
| |
| struct _XTree { |
| Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */ |
| const HChar* cc; /* cost centre for alloc */ |
| Free_Fn_t free_fn; /* free fn */ |
| Word dataSzB; /* data size in bytes */ |
| XT_init_data_t init_data_fn; |
| XT_add_data_t add_data_fn; |
| XT_sub_data_t sub_data_fn; |
| XT_filter_IPs_t filter_IPs_fn; |
| |
| XT_shared* shared; |
| |
| HChar* tmp_data; /* temporary buffer, to insert new elements. */ |
| XArray* data; /* of elements of size dataSzB */ |
| }; |
| |
| |
| XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn, |
| const HChar* cc, |
| Free_Fn_t free_fn, |
| Word dataSzB, |
| XT_init_data_t init_data_fn, |
| XT_add_data_t add_data_fn, |
| XT_sub_data_t sub_data_fn, |
| XT_filter_IPs_t filter_IPs_fn) |
| { |
| XTree* xt; |
| |
| /* check user-supplied info .. */ |
| vg_assert(alloc_fn); |
| vg_assert(free_fn); |
| vg_assert(dataSzB >= 0); |
| vg_assert(init_data_fn); |
| vg_assert(add_data_fn); |
| vg_assert(sub_data_fn); |
| |
| xt = alloc_fn(cc, sizeof(struct _XTree) ); |
| xt->alloc_fn = alloc_fn; |
| xt->cc = cc; |
| xt->free_fn = free_fn; |
| xt->dataSzB = dataSzB; |
| xt->init_data_fn = init_data_fn; |
| xt->add_data_fn = add_data_fn; |
| xt->sub_data_fn = sub_data_fn; |
| xt->filter_IPs_fn = filter_IPs_fn; |
| |
| xt->shared = new_XT_shared(alloc_fn, cc, free_fn); |
| addRef_XT_shared(xt->shared); |
| xt->tmp_data = alloc_fn(cc, xt->dataSzB); |
| xt->data = VG_(newXA)(alloc_fn, cc, free_fn, dataSzB); |
| |
| return xt; |
| } |
| |
| XTree* VG_(XT_snapshot)(XTree* xt) |
| { |
| XTree* nxt; |
| |
| vg_assert(xt); |
| |
| nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) ); |
| |
| *nxt = *xt; |
| addRef_XT_shared(nxt->shared); |
| nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB); |
| nxt->data = VG_(cloneXA)(nxt->cc, xt->data); |
| |
| return nxt; |
| } |
| |
| void VG_(XT_delete) ( XTree* xt ) |
| { |
| vg_assert(xt); |
| |
| release_XT_shared(xt->shared); |
| xt->free_fn(xt->tmp_data); |
| VG_(deleteXA)(xt->data); |
| xt->free_fn(xt); |
| } |
| |
| static Xecu find_or_insert (XTree* xt, ExeContext* ec) |
| { |
| |
| const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4; |
| XT_shared* shared = xt->shared; |
| |
| /* First grow the d4ecu2xecu array if needed. */ |
| if (d4ecu >= shared->d4ecu2xecu_sz) { |
| UInt old_sz = shared->d4ecu2xecu_sz; |
| UInt new_sz = (3 * d4ecu) / 2; |
| |
| if (new_sz < 1000) |
| new_sz = 1000; |
| shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu, |
| new_sz * sizeof(UInt)); |
| shared->d4ecu2xecu_sz = new_sz; |
| for (UInt i = old_sz; i < new_sz; i++) |
| shared->d4ecu2xecu[i] = NO_OFFSET; |
| } |
| |
| if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) { |
| xec xe; |
| |
| xe.ec = ec; |
| if (xt->filter_IPs_fn == NULL) { |
| xe.top = 0; |
| xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec); |
| } else { |
| UInt top; |
| UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec); |
| xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel, |
| &top, &n_ips_sel); |
| xe.top = (UShort)top; |
| xe.n_ips_sel = (UShort)n_ips_sel; |
| } |
| xt->init_data_fn(xt->tmp_data); |
| VG_(addToXA)(shared->xec, &xe); |
| shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data); |
| } |
| |
| return shared->d4ecu2xecu[d4ecu]; |
| } |
| |
| Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value) |
| { |
| Xecu xecu = find_or_insert(xt, ec); |
| void* data = VG_(indexXA)(xt->data, xecu); |
| |
| xt->add_data_fn(data, value); |
| return xecu; |
| } |
| |
| Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value) |
| { |
| Xecu xecu = find_or_insert(xt, ec); |
| void* data = VG_(indexXA)(xt->data, xecu); |
| |
| xt->sub_data_fn(data, value); |
| return xecu; |
| } |
| |
| void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value) |
| { |
| void* data = VG_(indexXA)(xt->data, xecu); |
| xt->add_data_fn(data, value); |
| } |
| |
| void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value) |
| { |
| void* data = VG_(indexXA)(xt->data, xecu); |
| xt->sub_data_fn(data, value); |
| } |
| |
| UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu) |
| { |
| xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu); |
| return (UInt)xe->n_ips_sel; |
| } |
| |
| ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu) |
| { |
| xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu); |
| return xe->ec; |
| } |
| |
| static VgFile* xt_open (const HChar* outfilename) |
| { |
| VgFile* fp; |
| |
| fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC, |
| VKI_S_IRUSR|VKI_S_IWUSR); |
| if (fp == NULL) { |
| VG_(message)(Vg_UserMsg, |
| "Error: can not open xtree output file `%s'\n", |
| outfilename); |
| } |
| return fp; |
| } |
| |
| #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); }) |
| |
| // Print "cmd:" line. |
| static void FP_cmd(VgFile* fp) |
| { |
| UInt i; |
| |
| FP("cmd: "); |
| FP("%s", VG_(args_the_exename)); |
| for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) { |
| HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i); |
| FP(" %s", arg); |
| } |
| FP("\n"); |
| } |
| |
| /* ----------- Callgrind output ------------------------------------------- */ |
| |
| /* Output a callgrind format element in compressed format: |
| "name=(pos)" or "name=(pos) value" (if value_new) |
| or not compressed format: "name=value" |
| VG_(clo_xtree_compress_strings) indicates if the compressed format is used. |
| name is the format element (e.g. fl, fn, cfi, cfn, ...). |
| pos is the value dictionary position, used for compressed format. |
| value_new is True if this is the first usage of value. */ |
| static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos, |
| const HChar* value, Bool value_new) |
| { |
| if (!VG_(clo_xtree_compress_strings)) |
| FP("%s=%s\n", name, value); |
| else if (value_new) |
| FP("%s=(%d) %s\n", name, pos, value); |
| else |
| FP("%s=(%d)\n", name, pos); |
| } |
| |
| void VG_(XT_callgrind_print) |
| (XTree* xt, |
| const HChar* outfilename, |
| const HChar* events, |
| const HChar* (*img_value)(const void* value)) |
| { |
| UInt n_xecu; |
| XT_shared* shared = xt->shared; |
| VgFile* fp = xt_open(outfilename); |
| DedupPoolAlloc* fnname_ddpa; |
| DedupPoolAlloc* filename_ddpa; |
| HChar* filename_buf = NULL; |
| UInt filename_buf_size = 0; |
| const HChar* filename_dir; |
| const HChar* filename_name; |
| |
| if (fp == NULL) |
| return; |
| |
| fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn, |
| "XT_callgrind_print.fn", xt->free_fn); |
| filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn, |
| "XT_callgrind_print.fl", xt->free_fn); |
| |
| FP("# callgrind format\n"); |
| FP("version: 1\n"); |
| FP("creator: xtree-1\n"); |
| FP("pid: %d\n", VG_(getpid)()); |
| FP_cmd(fp); |
| |
| /* Currently, we only need/support line positions. */ |
| FP("\npositions:%s\n", " line"); |
| |
| /* Produce one "event:" line for each event, and the "events:" line. */ |
| { |
| HChar strtok_events[VG_(strlen)(events)+1]; |
| HChar* e; |
| HChar* ssaveptr; |
| HChar* p; |
| |
| VG_(strcpy)(strtok_events, events); |
| for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr); |
| e != NULL; |
| e = VG_(strtok_r)(NULL, ",", &ssaveptr)) |
| FP("event: %s\n", e); |
| FP("events:"); |
| VG_(strcpy)(strtok_events, events); |
| for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr); |
| e != NULL; |
| e = VG_(strtok_r)(NULL, ",", &ssaveptr)) { |
| p = e; |
| while (*p) { |
| if (*p == ':') |
| *p = 0; |
| p++; |
| } |
| FP(" %s", e); |
| } |
| FP("\n"); |
| } |
| xt->init_data_fn(xt->tmp_data); // to compute totals |
| |
| n_xecu = VG_(sizeXA)(xt->data); |
| vg_assert (n_xecu <= VG_(sizeXA)(shared->xec)); |
| for (Xecu xecu = 0; xecu < n_xecu; xecu++) { |
| xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu); |
| if (xe->n_ips_sel == 0) |
| continue; |
| |
| const HChar* img = img_value(VG_(indexXA)(xt->data, xecu)); |
| |
| // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n] |
| // in the variables called_filename/called_linenum/called_fnname. |
| // The booleans called_filename_new/called_fnname_new are set to True |
| // the first time the called_filename/called_fnname are encountered. |
| // The called_filename_nr/called_fnname_nr are numbers identifying |
| // the strings called_filename/called_fnname. |
| #define CALLED_FLF(n) \ |
| if ((n) < 0 \ |
| || !VG_(get_filename_linenum)(ips[(n)], \ |
| &filename_name, \ |
| &filename_dir, \ |
| &called_linenum)) { \ |
| filename_name = "UnknownFile???"; \ |
| called_linenum = 0; \ |
| } \ |
| if ((n) < 0 \ |
| || !VG_(get_fnname)(ips[(n)], &called_fnname)) { \ |
| called_fnname = "UnknownFn???"; \ |
| } \ |
| { \ |
| UInt needed_size = VG_(strlen)(filename_dir) + 1 \ |
| + VG_(strlen)(filename_name) + 1; \ |
| if (filename_buf_size < needed_size) { \ |
| filename_buf_size = needed_size; \ |
| filename_buf = VG_(realloc)(xt->cc, filename_buf, \ |
| filename_buf_size); \ |
| } \ |
| } \ |
| VG_(strcpy)(filename_buf, filename_dir); \ |
| if (filename_buf[0] != '\0') { \ |
| VG_(strcat)(filename_buf, "/"); \ |
| } \ |
| VG_(strcat)(filename_buf, filename_name); \ |
| called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \ |
| filename_buf, \ |
| &called_filename_new); \ |
| called_filename = filename_buf; \ |
| called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \ |
| called_fnname, \ |
| &called_fnname_new); |
| |
| /* Instead of unknown fnname ???, CALLED_FLF could use instead: |
| VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]); |
| but that creates a lot of (useless) nodes at least for |
| valgrind self-hosting. */ |
| |
| if (img) { |
| const HChar* called_filename; |
| UInt called_filename_nr; |
| Bool called_filename_new; // True the first time we see this filename. |
| const HChar* called_fnname; |
| UInt called_fnname_nr; |
| Bool called_fnname_new; // True the first time we see this fnname. |
| UInt called_linenum; |
| UInt prev_linenum; |
| |
| const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top; |
| Int ips_idx = xe->n_ips_sel - 1; |
| |
| if (0) { |
| VG_(printf)("entry img %s\n", img); |
| VG_(pp_ExeContext)(xe->ec); |
| VG_(printf)("\n"); |
| } |
| xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu)); |
| CALLED_FLF(ips_idx); |
| for (; |
| ips_idx >= 0; |
| ips_idx--) { |
| FP_pos_str(fp, "fl", called_filename_nr, |
| called_filename, called_filename_new); |
| FP_pos_str(fp, "fn", called_fnname_nr, |
| called_fnname, called_fnname_new); |
| if (ips_idx == 0) |
| FP("%d %s\n", called_linenum, img); |
| else |
| FP("%d\n", called_linenum); //no self cost. |
| prev_linenum = called_linenum; |
| if (ips_idx >= 1) { |
| CALLED_FLF(ips_idx-1); |
| FP_pos_str(fp, "cfi", called_filename_nr, |
| called_filename, called_filename_new); |
| FP_pos_str(fp, "cfn", called_fnname_nr, |
| called_fnname, called_fnname_new); |
| called_filename_new = False; |
| called_fnname_new = False; |
| /* Giving a call count of 0 allows kcachegrind to hide the calls |
| column. A call count of 1 means kcachegrind would show in the |
| calls column the nr of stacktrace containing this arc, which |
| is very confusing. So, the less bad is to give a 0 call |
| count. */ |
| FP("calls=0 %d\n", called_linenum); |
| FP("%d %s\n", prev_linenum, img); |
| } |
| } |
| FP("\n"); |
| } |
| } |
| /* callgrind format is not really fully supporting (yet?) execution trees: |
| in an execution tree, self and inclusive costs are identical, and |
| cannot be added together. |
| If no totals: line is given, callgrind_annotate calculates the addition |
| of all costs, and so gives a wrong totals. |
| Giving a totals: line solves this, but the user must give the option |
| --inclusive=yes (kind of hack) to have all the functions given |
| in the output file. */ |
| FP("totals: %s\n", img_value(xt->tmp_data)); |
| VG_(fclose)(fp); |
| VG_(deleteDedupPA)(fnname_ddpa); |
| VG_(deleteDedupPA)(filename_ddpa); |
| VG_(free)(filename_buf); |
| } |
| |
| |
| /* ----------- Massif output ---------------------------------------------- */ |
| |
| /* For Massif output, some functions from the execontext are not output, a.o. |
| the allocation functions at the top of the stack and the functions below |
| main. So, the StackTrace of the execontexts in the xtree must be filtered. |
| Ms_Ec defines the subset of the stacktrace relevant for the report. */ |
| typedef |
| struct { |
| StackTrace ips; // ips and n_ips provides the subset of the xtree ec |
| UInt n_ips; // to use for a massif report. |
| |
| SizeT report_value; // The value to report for this stack trace. |
| } Ms_Ec; |
| |
| /* Ms_Group defines (at a certain depth) a group of ec context that |
| have the same IPs at the given depth, and have the same 'parent'. |
| total is the sum of the values of all group elements. |
| A Ms_Group can also represent a set of ec contexts that do not |
| have the same IP, but that have each a total which is below the |
| significant size. Such a group has a NULL ms_ec, a zero group_io. |
| n_ec is the nr of insignificant ec that have been collected inside this |
| insignificant group, and total is the sum of all non significant ec |
| at the given depth. */ |
| typedef |
| struct { |
| Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1] |
| Addr group_ip; |
| UInt n_ec; |
| SizeT total; |
| } Ms_Group; |
| |
| /* Compare 2 groups by total, to have bigger total first. */ |
| static Int ms_group_revcmp_total(const void* vleft, const void* vright) |
| { |
| const Ms_Group* left = (const Ms_Group*)vleft; |
| const Ms_Group* right = (const Ms_Group*)vright; |
| |
| // First reverse compare total |
| if (left->total > right->total) return -1; |
| if (left->total < right->total) return 1; |
| |
| /* Equal size => compare IPs. |
| This (somewhat?) helps to have deterministic test results. |
| If this would change between platforms, then we should compare |
| function names/filename/linenr */ |
| if (left->group_ip < right->group_ip) return -1; |
| if (left->group_ip > right->group_ip) return 1; |
| return 0; |
| } |
| |
| /* Scan the addresses in ms_ec at the given depth. |
| On return, |
| *groups points to an array of Ms_Group sorted by total. |
| *n_groups is the nr of groups |
| The caller is responsible to free the allocated group array. */ |
| static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz, |
| UInt* n_groups, Ms_Group** groups) |
| { |
| UInt i, g; |
| Addr cur_group_ip = 0; |
| |
| *n_groups = 0; |
| |
| /* Handle special case somewhat more efficiently */ |
| if (n_ec == 0) { |
| *groups = NULL; |
| return; |
| } |
| |
| /* Compute how many groups we have. */ |
| for (i = 0; i < n_ec; i++) { |
| if (ms_ec[i].n_ips > depth |
| && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) { |
| (*n_groups)++; |
| cur_group_ip = ms_ec[i].ips[depth]; |
| } |
| } |
| |
| /* make the group array. */ |
| *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group)); |
| i = 0; |
| for (g = 0; g < *n_groups; g++) { |
| while (ms_ec[i].n_ips <= depth) |
| i++; |
| cur_group_ip = ms_ec[i].ips[depth]; |
| (*groups)[g].group_ip = cur_group_ip; |
| (*groups)[g].ms_ec = &ms_ec[i]; |
| (*groups)[g].n_ec = 1; |
| (*groups)[g].total = ms_ec[i].report_value; |
| i++; |
| while (i < n_ec |
| && ms_ec[i].n_ips > depth |
| && cur_group_ip == ms_ec[i].ips[depth]) { |
| (*groups)[g].total += ms_ec[i].report_value; |
| i++; |
| (*groups)[g].n_ec++; |
| } |
| } |
| |
| /* Search for insignificant groups, collect them all together |
| in the first insignificant group, and compact the group array. */ |
| { |
| UInt insig1; // Position of first insignificant group. |
| UInt n_insig = 0; // Nr of insignificant groups found. |
| |
| for (g = 0; g < *n_groups; g++) { |
| if ((*groups)[g].total < sig_sz) { |
| if (n_insig == 0) { |
| // First insig group => transform it into the special group |
| (*groups)[g].ms_ec = NULL; |
| (*groups)[g].group_ip = 0; |
| (*groups)[g].n_ec = 0; |
| // start the sum of insig total as total |
| insig1 = g; |
| } else { |
| // Add this insig group total into insig1 first group |
| (*groups)[insig1].total += (*groups)[g].total; |
| } |
| n_insig++; |
| } else { |
| if (n_insig > 1) |
| (*groups)[g - n_insig + 1] = (*groups)[g]; |
| } |
| } |
| if (n_insig > 0) { |
| (*groups)[insig1].n_ec = n_insig; |
| *n_groups -= n_insig - 1; |
| } |
| DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig); |
| } |
| |
| /* Sort on total size, bigger size first. */ |
| VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total); |
| } |
| |
| static void ms_output_group (VgFile* fp, UInt depth, Ms_Group* group, |
| SizeT sig_sz, double sig_pct_threshold) |
| { |
| UInt i; |
| Ms_Group* groups; |
| UInt n_groups; |
| |
| // If this is an insignificant group, handle it specially |
| if (group->ms_ec == NULL) { |
| const HChar* s = ( 1 == group->n_ec? "," : "s, all" ); |
| vg_assert(group->group_ip == 0); |
| FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n", |
| depth+1, "", group->total, group->n_ec, s, sig_pct_threshold); |
| return; |
| } |
| |
| // Normal group => output the group and its subgroups. |
| ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz, |
| &n_groups, &groups); |
| |
| FP("%*s" "n%u: %ld %s\n", |
| depth + 1, "", |
| n_groups, |
| group->total, |
| VG_(describe_IP)(group->ms_ec->ips[depth] - 1, NULL)); |
| /* XTREE??? Massif original code removes 1 to get the IP description. I am |
| wondering if this is not something that predates revision r8818, |
| which introduced a -1 in the stack unwind (see m_stacktrace.c) |
| Kept for the moment to allow exact comparison with massif output, but |
| probably we should remove this, as we very probably end up 2 bytes before |
| the RA Return Address. */ |
| |
| /* Output sub groups of this group. */ |
| for (i = 0; i < n_groups; i++) |
| ms_output_group(fp, depth+1, &groups[i], sig_sz, sig_pct_threshold); |
| |
| VG_(free)(groups); |
| } |
| |
| /* Allocate and build an array of Ms_Ec sorted by addresses in the |
| Ms_Ec StackTrace. */ |
| static void prepare_ms_ec (XTree* xt, |
| ULong (*report_value)(const void* value), |
| ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec) |
| { |
| XT_shared* shared = xt->shared; |
| const UInt n_xecu = VG_(sizeXA)(shared->xec); |
| const UInt n_data_xecu = VG_(sizeXA)(xt->data); |
| Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec)); |
| UInt n_xecu_sel = 0; // Nr of xecu that are selected for output. |
| |
| vg_assert(n_data_xecu <= n_xecu); |
| |
| // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace. |
| ensure_ips_order_xecu_valid(shared); |
| |
| *top_total = 0; |
| DMSG(1, "iteration %u\n", n_xecu); |
| for (UInt i = 0; i < n_xecu; i++) { |
| Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i); |
| xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu); |
| |
| if (xecu >= n_data_xecu) |
| continue; // No data for this xecu in xt->data. |
| ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel; |
| if (ms_ec[n_xecu_sel].n_ips == 0) |
| continue; |
| |
| ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top; |
| ms_ec[n_xecu_sel].report_value |
| = (*report_value)(VG_(indexXA)(xt->data, xecu)); |
| *top_total += ms_ec[n_xecu_sel].report_value; |
| |
| n_xecu_sel++; |
| } |
| vg_assert(n_xecu_sel <= n_xecu); |
| |
| *vms_ec = ms_ec; |
| *vn_ec = n_xecu_sel; |
| } |
| |
| MsFile* VG_(XT_massif_open) |
| (const HChar* outfilename, |
| const HChar* desc, |
| const XArray* desc_args, |
| const HChar* time_unit) |
| { |
| UInt i; |
| VgFile* fp = xt_open(outfilename); |
| |
| if (fp == NULL) |
| return NULL; // xt_open reported the error. |
| |
| /* ------ file header ------------------------------- */ |
| FP("desc:"); |
| if (desc) |
| FP(" %s", desc); |
| i = 0; |
| if (desc_args) { |
| for (i = 0; i < VG_(sizeXA)(desc_args); i++) { |
| HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i); |
| FP(" %s", arg); |
| } |
| } |
| if (0 == i && desc == NULL) FP(" (none)"); |
| FP("\n"); |
| |
| FP_cmd(fp); |
| |
| FP("time_unit: %s\n", time_unit); |
| |
| return fp; |
| } |
| |
| void VG_(XT_massif_close)(MsFile* fp) |
| { |
| if (fp == NULL) |
| return; // Error should have been reported by VG_(XT_massif_open) |
| |
| VG_(fclose)(fp); |
| } |
| |
| void VG_(XT_massif_print) |
| (MsFile* fp, |
| XTree* xt, |
| const Massif_Header* header, |
| ULong (*report_value)(const void* value)) |
| { |
| UInt i; |
| |
| if (fp == NULL) |
| return; // Normally VG_(XT_massif_open) already reported an error. |
| |
| /* Compute/prepare Snapshot totals/data/... */ |
| ULong top_total; |
| |
| /* Following variables only used for detailed snapshot. */ |
| UInt n_ec = 0; |
| Ms_Ec* ms_ec = NULL; |
| const HChar* kind = |
| header->detailed ? (header->peak ? "peak" : "detailed") : "empty"; |
| |
| DMSG(1, "XT_massif_print %s\n", kind); |
| if (header->detailed) { |
| /* Prepare the Ms_Ec sorted array of stacktraces and the groups |
| at level 0. */ |
| prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec); |
| DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec); |
| } else if (xt == NULL) { |
| /* Non detailed, no xt => use the sz provided in the header. */ |
| top_total = header->sz_B; |
| } else { |
| /* For non detailed snapshot, compute total directly from the xec. */ |
| const XT_shared* shared = xt->shared; |
| const UInt n_xecu = VG_(sizeXA)(xt->data); |
| top_total = 0; |
| |
| for (UInt xecu = 0; xecu < n_xecu; xecu++) { |
| xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu); |
| if (xe->n_ips_sel == 0) |
| continue; |
| top_total += (*report_value)(VG_(indexXA)(xt->data, xecu)); |
| } |
| } |
| |
| /* ------ snapshot header --------------------------- */ |
| FP("#-----------\n"); |
| FP("snapshot=%d\n", header->snapshot_n); |
| FP("#-----------\n"); |
| FP("time=%lld\n", header->time); |
| |
| FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B |
| FP("mem_heap_extra_B=%llu\n", header->extra_B); |
| FP("mem_stacks_B=%llu\n", header->stacks_B); |
| FP("heap_tree=%s\n", kind); |
| |
| /* ------ detailed snapshot data ----------------------------- */ |
| if (header->detailed) { |
| UInt n_groups; |
| Ms_Group* groups; |
| |
| ULong sig_sz; |
| // Work out how big a child must be to be significant. If the current |
| // top_total is zero, then we set it to 1, which means everything will be |
| // judged insignificant -- this is sensible, as there's no point showing |
| // any detail for this case. Unless they used threshold=0, in which |
| // case we show them everything because that's what they asked for. |
| // |
| // Nb: We do this once now, rather than once per child, because if we do |
| // that the cost of all the divisions adds up to something significant. |
| if (0 == top_total && 0 != header->sig_threshold) |
| sig_sz = 1; |
| else |
| sig_sz = ((top_total + header->extra_B + header->stacks_B) |
| * header->sig_threshold) / 100; |
| |
| /* Produce the groups at depth 0 */ |
| DMSG(1, "XT_massif_print producing depth 0 groups\n"); |
| ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups); |
| |
| /* Output the top node. */ |
| FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc); |
| |
| /* Output depth 0 groups. */ |
| DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups); |
| for (i = 0; i < n_groups; i++) |
| ms_output_group(fp, 0, &groups[i], sig_sz, header->sig_threshold); |
| |
| VG_(free)(groups); |
| VG_(free)(ms_ec); |
| } |
| } |
| |
| Int VG_(XT_offset_main_or_below_main)(Addr* ips, Int n_ips) |
| { |
| /* Search for main or below main function. |
| To limit the nr of ips to examine, we maintain the deepest |
| offset where main was found, and we first search main |
| from there. |
| If no main is found, we will then do a search for main or |
| below main function till the top. */ |
| static Int deepest_main = 0; |
| Vg_FnNameKind kind = Vg_FnNameNormal; |
| Int mbm = n_ips - 1; // Position of deepest main or below main. |
| Vg_FnNameKind mbmkind = Vg_FnNameNormal; |
| Int i; |
| |
| for (i = n_ips - 1 - deepest_main; |
| i < n_ips; |
| i++) { |
| mbmkind = VG_(get_fnname_kind_from_IP)(ips[i]); |
| if (mbmkind != Vg_FnNameNormal) { |
| mbm = i; |
| break; |
| } |
| } |
| |
| /* Search for main or below main function till top. */ |
| for (i = mbm - 1; |
| i >= 0 && mbmkind != Vg_FnNameMain; |
| i--) { |
| kind = VG_(get_fnname_kind_from_IP)(ips[i]); |
| if (kind != Vg_FnNameNormal) { |
| mbm = i; |
| mbmkind = kind; |
| } |
| } |
| if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) { |
| if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main) |
| deepest_main = n_ips - 1 - mbm; |
| return mbm; |
| } else |
| return n_ips-1; |
| } |
| |
| void VG_(XT_filter_1top_and_maybe_below_main) |
| (Addr* ips, Int n_ips, |
| UInt* top, UInt* n_ips_sel) |
| { |
| Int mbm; |
| |
| *n_ips_sel = n_ips; |
| if (n_ips == 0) { |
| *top = 0; |
| return; |
| } |
| |
| /* Filter top function. */ |
| *top = 1; |
| |
| if (VG_(clo_show_below_main)) |
| mbm = n_ips - 1; |
| else |
| mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips); |
| |
| *n_ips_sel = mbm - *top + 1; |
| } |
| |
| void VG_(XT_filter_maybe_below_main) |
| (Addr* ips, Int n_ips, |
| UInt* top, UInt* n_ips_sel) |
| { |
| Int mbm; |
| |
| *n_ips_sel = n_ips; |
| *top = 0; |
| if (n_ips == 0) |
| return; |
| |
| if (VG_(clo_show_below_main)) |
| mbm = n_ips - 1; |
| else |
| mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips); |
| |
| *n_ips_sel = mbm - *top + 1; |
| } |
| |
| /*--------------------------------------------------------------------*/ |
| /*--- end m_xtree.c ---*/ |
| /*--------------------------------------------------------------------*/ |