Callgrind merge: code
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5780 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/callgrind/Makefile.am b/callgrind/Makefile.am
new file mode 100644
index 0000000..0d8cf57
--- /dev/null
+++ b/callgrind/Makefile.am
@@ -0,0 +1,59 @@
+include $(top_srcdir)/Makefile.tool.am
+
+bin_SCRIPTS = callgrind_annotate callgrind_control
+
+noinst_HEADERS = global.h costs.h events.h
+
+noinst_PROGRAMS =
+if VG_X86_LINUX
+noinst_PROGRAMS += callgrind-x86-linux
+endif
+if VG_AMD64_LINUX
+noinst_PROGRAMS += callgrind-amd64-linux
+endif
+if VG_PPC32_LINUX
+noinst_PROGRAMS += callgrind-ppc32-linux
+endif
+if VG_PPC64_LINUX
+noinst_PROGRAMS += callgrind-ppc64-linux
+endif
+
+CALLGRIND_SOURCES_COMMON = main.c events.c bb.c clo.c \
+ costs.c bbcc.c command.c debug.c fn.c \
+ sim.c callstack.c context.c dump.c jumps.c \
+ threads.c
+
+CALLGRIND_SOURCES_X86 = ../cachegrind/cg-x86.c
+CALLGRIND_SOURCES_AMD64 = ../cachegrind/cg-amd64.c
+CALLGRIND_SOURCES_PPC32 = ../cachegrind/cg-ppc32.c
+CALLGRIND_SOURCES_PPC64 = ../cachegrind/cg-ppc64.c
+
+CALLGRIND_CFLAGS_COMMON = -I../cachegrind
+
+callgrind_x86_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_X86)
+callgrind_x86_linux_CPPFLAGS = $(AM_CPPFLAGS_X86_LINUX)
+callgrind_x86_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_X86_LINUX)
+callgrind_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX)
+callgrind_x86_linux_LDADD = $(TOOL_LDADD_X86_LINUX)
+callgrind_x86_linux_LDFLAGS = $(TOOL_LDFLAGS_X86_LINUX)
+
+callgrind_amd64_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_AMD64)
+callgrind_amd64_linux_CPPFLAGS = $(AM_CPPFLAGS_AMD64_LINUX)
+callgrind_amd64_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_AMD64_LINUX)
+callgrind_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX)
+callgrind_amd64_linux_LDADD = $(TOOL_LDADD_AMD64_LINUX)
+callgrind_amd64_linux_LDFLAGS = $(TOOL_LDFLAGS_AMD64_LINUX)
+
+callgrind_ppc32_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_PPC32)
+callgrind_ppc32_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC32_LINUX)
+callgrind_ppc32_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_PPC32_LINUX)
+callgrind_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX)
+callgrind_ppc32_linux_LDADD = $(TOOL_LDADD_PPC32_LINUX)
+callgrind_ppc32_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC32_LINUX)
+
+callgrind_ppc64_linux_SOURCES = $(CALLGRIND_SOURCES_COMMON) $(CALLGRIND_SOURCES_PPC64)
+callgrind_ppc64_linux_CPPFLAGS = $(AM_CPPFLAGS_PPC64_LINUX)
+callgrind_ppc64_linux_CFLAGS = $(CALLGRIND_CFLAGS_COMMON) $(AM_CFLAGS_PPC64_LINUX)
+callgrind_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX)
+callgrind_ppc64_linux_LDADD = $(TOOL_LDADD_PPC64_LINUX)
+callgrind_ppc64_linux_LDFLAGS = $(TOOL_LDFLAGS_PPC64_LINUX)
\ No newline at end of file
diff --git a/callgrind/bb.c b/callgrind/bb.c
new file mode 100644
index 0000000..a6c8eba
--- /dev/null
+++ b/callgrind/bb.c
@@ -0,0 +1,338 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- bb.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+/*------------------------------------------------------------*/
+/*--- Basic block (BB) operations ---*/
+/*------------------------------------------------------------*/
+
+/* BB hash, resizable */
+bb_hash bbs;
+
+void CLG_(init_bb_hash)()
+{
+ Int i;
+
+ bbs.size = 8437;
+ bbs.entries = 0;
+ bbs.table = (BB**) CLG_MALLOC(bbs.size * sizeof(BB*));
+
+ for (i = 0; i < bbs.size; i++) bbs.table[i] = NULL;
+}
+
+bb_hash* CLG_(get_bb_hash)()
+{
+ return &bbs;
+}
+
+/* The hash stores BBs according to
+ * - ELF object (is 0 for code in anonymous mapping)
+ * - BB base as object file offset
+ */
+static __inline__
+UInt bb_hash_idx(obj_node* obj, OffT offset, UInt size)
+{
+ return (((Addr)obj) + offset) % size;
+}
+
+/* double size of bb table */
+static
+void resize_bb_table(void)
+{
+ Int i, new_size, conflicts1 = 0, conflicts2 = 0;
+ BB **new_table, *curr, *next;
+ UInt new_idx;
+
+ new_size = 2* bbs.size +3;
+ new_table = (BB**) CLG_MALLOC(new_size * sizeof(BB*));
+
+ if (!new_table) return;
+
+ for (i = 0; i < new_size; i++)
+ new_table[i] = NULL;
+
+ for (i = 0; i < bbs.size; i++) {
+ if (bbs.table[i] == NULL) continue;
+
+ curr = bbs.table[i];
+ while (NULL != curr) {
+ next = curr->next;
+
+ new_idx = bb_hash_idx(curr->obj, curr->offset, new_size);
+
+ curr->next = new_table[new_idx];
+ new_table[new_idx] = curr;
+ if (curr->next) {
+ conflicts1++;
+ if (curr->next->next)
+ conflicts2++;
+ }
+
+ curr = next;
+ }
+ }
+
+ VG_(free)(bbs.table);
+
+
+ CLG_DEBUG(0, "Resize BB Hash: %d => %d (entries %d, conflicts %d/%d)\n",
+ bbs.size, new_size,
+ bbs.entries, conflicts1, conflicts2);
+
+ bbs.size = new_size;
+ bbs.table = new_table;
+ CLG_(stat).bb_hash_resizes++;
+}
+
+
+/**
+ * Allocate new BB structure (including space for event type list)
+ * Not initialized:
+ * - instr_len, cost_count, instr[]
+ */
+static BB* new_bb(obj_node* obj, OffT offset,
+ UInt instr_count, UInt cjmp_count, Bool cjmp_inverted)
+{
+ BB* new;
+ UInt new_idx;
+
+ /* check fill degree of bb hash table and resize if needed (>80%) */
+ bbs.entries++;
+ if (10 * bbs.entries / bbs.size > 8)
+ resize_bb_table();
+
+ new = (BB*) CLG_MALLOC(sizeof(BB) +
+ instr_count * sizeof(InstrInfo) +
+ (cjmp_count+1) * sizeof(CJmpInfo));
+
+ new->obj = obj;
+ new->offset = offset;
+
+ new->instr_count = instr_count;
+ new->cjmp_count = cjmp_count;
+ new->cjmp_inverted = cjmp_inverted;
+ new->jmp = (CJmpInfo*) &(new->instr[instr_count]);
+ new->instr_len = 0;
+ new->cost_count = 0;
+ new->sect_kind = VG_(seginfo_sect_kind)(offset + obj->offset);
+ new->fn = 0;
+ new->line = 0;
+ new->is_entry = 0;
+ new->bbcc_list = 0;
+ new->last_bbcc = 0;
+
+ /* insert into BB hash table */
+ new_idx = bb_hash_idx(obj, offset, bbs.size);
+ new->next = bbs.table[new_idx];
+ bbs.table[new_idx] = new;
+
+ CLG_(stat).distinct_bbs++;
+
+#if CLG_ENABLE_DEBUG
+ CLG_DEBUGIF(3) {
+ VG_(printf)(" new_bb (instr %d, jmps %d, inv %s) [now %d]: ",
+ instr_count, cjmp_count,
+ cjmp_inverted ? "yes":"no",
+ CLG_(stat).distinct_bbs);
+ CLG_(print_bb)(0, new);
+ VG_(printf)("\n");
+ }
+#endif
+
+ CLG_(get_fn_node)(new);
+
+ return new;
+}
+
+
+/* get the BB structure for a BB start address */
+static __inline__
+BB* lookup_bb(obj_node* obj, OffT offset)
+{
+ BB* bb;
+ Int idx;
+
+ idx = bb_hash_idx(obj, offset, bbs.size);
+ bb = bbs.table[idx];
+
+ while(bb) {
+ if ((bb->obj == obj) && (bb->offset == offset)) break;
+ bb = bb->next;
+ }
+
+ CLG_DEBUG(5, " lookup_bb (Obj %s, off %p): %p\n",
+ obj->name, offset, bb);
+ return bb;
+}
+
+static __inline__
+obj_node* obj_of_address(Addr addr)
+{
+ obj_node* obj;
+ SegInfo* si;
+ OffT offset;
+
+ si = VG_(find_seginfo)(addr);
+ obj = CLG_(get_obj_node)( si );
+
+ /* Update symbol offset in object if remapped */
+ offset = si ? VG_(seginfo_sym_offset)(si):0;
+ if (obj->offset != offset) {
+ Addr start = si ? VG_(seginfo_start)(si) : 0;
+
+ CLG_DEBUG(0, "Mapping changed for '%s': %p -> %p\n",
+ obj->name, obj->start, start);
+
+ /* Size should be the same, and offset diff == start diff */
+ CLG_ASSERT( obj->size == (si ? VG_(seginfo_size)(si) : 0) );
+ CLG_ASSERT( obj->start - start == obj->offset - offset );
+ obj->offset = offset;
+ obj->start = start;
+ }
+
+ return obj;
+}
+
+/* Get the BB structure for a BB start address.
+ * If the BB has to be created, the IRBB is needed to
+ * compute the event type list for costs, and seen_before is
+ * set to False. Otherwise, seen_before is set to True.
+ *
+ * BBs are never discarded. There are 2 cases where this function
+ * is called from CLG_(instrument)() and a BB already exists:
+ * - The instrumented version was removed from Valgrinds TT cache
+ * - The ELF object of the BB was unmapped and mapped again.
+ * This involves a possibly different address, but is handled by
+ * looking up a BB keyed by (obj_node, file offset).
+ *
+ * bbIn==0 is possible for artifical BB without real code.
+ * Such a BB is created when returning to an unknown function.
+ */
+BB* CLG_(get_bb)(Addr addr, IRBB* bbIn, /*OUT*/ Bool *seen_before)
+{
+ BB* bb;
+ obj_node* obj;
+ UInt n_instrs, n_jmps;
+ Bool cjmp_inverted = False;
+
+ CLG_DEBUG(5, "+ get_bb(BB %p)\n", addr);
+
+ obj = obj_of_address(addr);
+ bb = lookup_bb(obj, addr - obj->offset);
+
+ n_instrs = 0;
+ n_jmps = 0;
+ CLG_(collectBlockInfo)(bbIn, &n_instrs, &n_jmps, &cjmp_inverted);
+
+ *seen_before = bb ? True : False;
+ if (*seen_before) {
+ if (bb->instr_count != n_instrs) {
+ VG_(message)(Vg_DebugMsg,
+ "ERROR: BB Retranslation Mismatch at BB %p", addr);
+ VG_(message)(Vg_DebugMsg,
+ " new: Obj %s, Off %p, BBOff %p, Instrs %u",
+ obj->name, obj->offset,
+ addr - obj->offset, n_instrs);
+ VG_(message)(Vg_DebugMsg,
+ " old: Obj %s, Off %p, BBOff %p, Instrs %u",
+ bb->obj->name, bb->obj->offset,
+ bb->offset, bb->instr_count);
+ CLG_ASSERT(bb->instr_count == n_instrs );
+ }
+ CLG_ASSERT(bb->cjmp_count == n_jmps );
+ CLG_(stat).bb_retranslations++;
+
+ CLG_DEBUG(5, "- get_bb(BB %p): seen before.\n", addr);
+ return bb;
+ }
+
+ bb = new_bb(obj, addr - obj->offset, n_instrs, n_jmps, cjmp_inverted);
+
+ CLG_DEBUG(5, "- get_bb(BB %p)\n", addr);
+
+ return bb;
+}
+
+/* Delete the BB info for the bb with unredirected entry-point
+ address 'addr'. */
+void CLG_(delete_bb)(Addr addr)
+{
+ BB *bb, *bp;
+ Int idx, size;
+
+ obj_node* obj = obj_of_address(addr);
+ OffT offset = addr - obj->offset;
+
+ idx = bb_hash_idx(obj, offset, bbs.size);
+ bb = bbs.table[idx];
+
+ /* bb points at the current bb under consideration, and bp is the
+ one before. */
+ bp = NULL;
+ while(bb) {
+ if ((bb->obj == obj) && (bb->offset == offset)) break;
+ bp = bb;
+ bb = bb->next;
+ }
+
+ if (bb == NULL) {
+ CLG_DEBUG(3, " delete_bb (Obj %s, off %p): NOT FOUND\n",
+ obj->name, offset);
+
+ /* we didn't find it. That's strange. */
+ return;
+ }
+
+ /* unlink it from hash table */
+
+ if (bp == NULL) {
+ /* we found the first one in the list. */
+ tl_assert(bb == bbs.table[idx]);
+ bbs.table[idx] = bb->next;
+ } else {
+ tl_assert(bb != bbs.table[idx]);
+ bp->next = bb->next;
+ }
+
+ CLG_DEBUG(3, " delete_bb (Obj %s, off %p): %p, BBCC head: %p\n",
+ obj->name, offset, bb, bb->bbcc_list);
+
+ if (bb->bbcc_list == 0) {
+ /* can be safely deleted */
+
+ /* Fill the block up with junk and then free it, so we will
+ hopefully get a segfault if it is used again by mistake. */
+ size = sizeof(BB)
+ + bb->instr_count * sizeof(InstrInfo)
+ + (bb->cjmp_count+1) * sizeof(CJmpInfo);
+ VG_(memset)( bb, 0xAA, size );
+ CLG_FREE(bb);
+ }
+ CLG_DEBUG(3, " delete_bb: BB in use, can not free!\n");
+}
diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c
new file mode 100644
index 0000000..d2eb4b9
--- /dev/null
+++ b/callgrind/bbcc.c
@@ -0,0 +1,883 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- bbcc.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+#include "costs.h"
+
+#include <pub_tool_threadstate.h>
+
+/*------------------------------------------------------------*/
+/*--- BBCC operations ---*/
+/*------------------------------------------------------------*/
+
+#define N_BBCC_INITIAL_ENTRIES 10437
+
+/* BBCC table (key is BB/Context), per thread, resizable */
+bbcc_hash current_bbccs;
+
+void CLG_(init_bbcc_hash)(bbcc_hash* bbccs)
+{
+ Int i;
+
+ CLG_ASSERT(bbccs != 0);
+
+ bbccs->size = N_BBCC_INITIAL_ENTRIES;
+ bbccs->entries = 0;
+ bbccs->table = (BBCC**) CLG_MALLOC(bbccs->size * sizeof(BBCC*));
+
+ for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
+}
+
+void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst)
+{
+ CLG_ASSERT(dst != 0);
+
+ dst->size = current_bbccs.size;
+ dst->entries = current_bbccs.entries;
+ dst->table = current_bbccs.table;
+}
+
+bbcc_hash* CLG_(get_current_bbcc_hash)()
+{
+ return ¤t_bbccs;
+}
+
+void CLG_(set_current_bbcc_hash)(bbcc_hash* h)
+{
+ CLG_ASSERT(h != 0);
+
+ current_bbccs.size = h->size;
+ current_bbccs.entries = h->entries;
+ current_bbccs.table = h->table;
+}
+
+/*
+ * Zero all costs of a BBCC
+ */
+void CLG_(zero_bbcc)(BBCC* bbcc)
+{
+ Int i;
+ jCC* jcc;
+
+ CLG_ASSERT(bbcc->cxt != 0);
+ CLG_DEBUG(1, " zero_bbcc: BB %p, Cxt %d "
+ "(fn '%s', rec %d)\n",
+ bb_addr(bbcc->bb),
+ bbcc->cxt->base_number + bbcc->rec_index,
+ bbcc->cxt->fn[0]->name,
+ bbcc->rec_index);
+
+ if ((bbcc->ecounter_sum ==0) &&
+ (bbcc->ret_counter ==0)) return;
+
+ for(i=0;i<bbcc->bb->cost_count;i++)
+ bbcc->cost[i] = 0;
+ for(i=0;i <= bbcc->bb->cjmp_count;i++) {
+ bbcc->jmp[i].ecounter = 0;
+ for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from)
+ CLG_(init_cost)( CLG_(sets).full, jcc->cost );
+ }
+ bbcc->ecounter_sum = 0;
+ bbcc->ret_counter = 0;
+}
+
+
+
+void CLG_(forall_bbccs)(void (*func)(BBCC*))
+{
+ BBCC *bbcc, *bbcc2;
+ int i, j;
+
+ for (i = 0; i < current_bbccs.size; i++) {
+ if ((bbcc=current_bbccs.table[i]) == NULL) continue;
+ while (bbcc) {
+ /* every bbcc should have a rec_array */
+ CLG_ASSERT(bbcc->rec_array != 0);
+
+ for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) {
+ if ((bbcc2 = bbcc->rec_array[j]) == 0) continue;
+
+ (*func)(bbcc2);
+ }
+ bbcc = bbcc->next;
+ }
+ }
+}
+
+
+/* All BBCCs for recursion level 0 are inserted into a
+ * thread specific hash table with key
+ * - address of BB structure (unique, as never freed)
+ * - current context (includes caller chain)
+ * BBCCs for other recursion levels are in bbcc->rec_array.
+ *
+ * The hash is used in setup_bb(), i.e. to find the cost
+ * counters to be changed in the execution of a BB.
+ */
+
+static __inline__
+UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size)
+{
+ CLG_ASSERT(bb != 0);
+ CLG_ASSERT(cxt != 0);
+
+ return ((Addr)bb + (Addr)cxt) % size;
+}
+
+
+/* Lookup for a BBCC in hash.
+ */
+static
+BBCC* lookup_bbcc(BB* bb, Context* cxt)
+{
+ BBCC* bbcc = bb->last_bbcc;
+ UInt idx;
+
+ /* check LRU */
+ if (bbcc->cxt == cxt) {
+ if (!CLG_(clo).separate_threads) {
+ /* if we don't dump threads separate, tid doesn't have to match */
+ return bbcc;
+ }
+ if (bbcc->tid == CLG_(current_tid)) return bbcc;
+ }
+
+ CLG_(stat).bbcc_lru_misses++;
+
+ idx = bbcc_hash_idx(bb, cxt, current_bbccs.size);
+ bbcc = current_bbccs.table[idx];
+ while (bbcc &&
+ (bb != bbcc->bb ||
+ cxt != bbcc->cxt)) {
+ bbcc = bbcc->next;
+ }
+
+ CLG_DEBUG(2," lookup_bbcc(BB %p, Cxt %d, fn '%s'): %p (tid %d)\n",
+ bb_addr(bb), cxt->base_number, cxt->fn[0]->name,
+ bbcc, bbcc ? bbcc->tid : 0);
+
+ CLG_DEBUGIF(2)
+ if (bbcc) CLG_(print_bbcc)(-2,bbcc,False);
+
+ return bbcc;
+}
+
+
+/* double size of hash table 1 (addr->BBCC) */
+static void resize_bbcc_hash(void)
+{
+ Int i, new_size, conflicts1 = 0, conflicts2 = 0;
+ BBCC** new_table;
+ UInt new_idx;
+ BBCC *curr_BBCC, *next_BBCC;
+
+ new_size = 2*current_bbccs.size+3;
+ new_table = (BBCC**) CLG_MALLOC(new_size * sizeof(BBCC*));
+
+ if (!new_table) return;
+
+ for (i = 0; i < new_size; i++)
+ new_table[i] = NULL;
+
+ for (i = 0; i < current_bbccs.size; i++) {
+ if (current_bbccs.table[i] == NULL) continue;
+
+ curr_BBCC = current_bbccs.table[i];
+ while (NULL != curr_BBCC) {
+ next_BBCC = curr_BBCC->next;
+
+ new_idx = bbcc_hash_idx(curr_BBCC->bb,
+ curr_BBCC->cxt,
+ new_size);
+
+ curr_BBCC->next = new_table[new_idx];
+ new_table[new_idx] = curr_BBCC;
+ if (curr_BBCC->next) {
+ conflicts1++;
+ if (curr_BBCC->next->next)
+ conflicts2++;
+ }
+
+ curr_BBCC = next_BBCC;
+ }
+ }
+
+ VG_(free)(current_bbccs.table);
+
+
+ CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n",
+ current_bbccs.size, new_size,
+ current_bbccs.entries, conflicts1, conflicts2);
+
+ current_bbccs.size = new_size;
+ current_bbccs.table = new_table;
+ CLG_(stat).bbcc_hash_resizes++;
+}
+
+
+static __inline
+BBCC** new_recursion(int size)
+{
+ BBCC** bbccs;
+ int i;
+
+ bbccs = (BBCC**) CLG_MALLOC(sizeof(BBCC*) * size);
+ for(i=0;i<size;i++)
+ bbccs[i] = 0;
+
+ CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs);
+
+ return bbccs;
+}
+
+
+/*
+ * Allocate a new BBCC
+ *
+ * Uninitialized:
+ * cxt, rec_index, rec_array, next_bbcc, next1, next2
+ */
+static __inline__
+BBCC* new_bbcc(BB* bb)
+{
+ BBCC* new;
+ Int i;
+
+ /* We need cjmp_count+1 JmpData structs:
+ * the last is for the unconditional jump/call/ret at end of BB
+ */
+ new = (BBCC*)CLG_MALLOC(sizeof(BBCC) +
+ (bb->cjmp_count+1) * sizeof(JmpData));
+ new->bb = bb;
+ new->tid = CLG_(current_tid);
+
+ new->ret_counter = 0;
+ new->skipped = 0;
+ new->cost = CLG_(get_costarray)(bb->cost_count);
+ for(i=0;i<bb->cost_count;i++)
+ new->cost[i] = 0;
+ for(i=0; i<=bb->cjmp_count; i++) {
+ new->jmp[i].ecounter = 0;
+ new->jmp[i].jcc_list = 0;
+ }
+ new->ecounter_sum = 0;
+
+ /* Init pointer caches (LRU) */
+ new->lru_next_bbcc = 0;
+ new->lru_from_jcc = 0;
+ new->lru_to_jcc = 0;
+
+ CLG_(stat).distinct_bbccs++;
+
+ CLG_DEBUG(3, " new_bbcc(BB %p): %p (now %d)\n",
+ bb_addr(bb), new, CLG_(stat).distinct_bbccs);
+
+ return new;
+}
+
+
+/**
+ * Inserts a new BBCC into hashes.
+ * BBCC specific items must be set as this is used for the hash
+ * keys:
+ * fn : current function
+ * tid : current thread ID
+ * from : position where current function is called from
+ *
+ * Recursion level doesn't need to be set as this is not included
+ * in the hash key: Only BBCCs with rec level 0 are in hashes.
+ */
+static
+void insert_bbcc_into_hash(BBCC* bbcc)
+{
+ UInt idx;
+
+ CLG_ASSERT(bbcc->cxt != 0);
+
+ CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %p, fn '%s')\n",
+ bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name);
+
+ /* check fill degree of hash and resize if needed (>90%) */
+ current_bbccs.entries++;
+ if (100 * current_bbccs.entries / current_bbccs.size > 90)
+ resize_bbcc_hash();
+
+ idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size);
+ bbcc->next = current_bbccs.table[idx];
+ current_bbccs.table[idx] = bbcc;
+
+ CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n",
+ current_bbccs.entries);
+}
+
+static Char* mangled_cxt(Context* cxt, int rec_index)
+{
+ static Char mangled[FN_NAME_LEN];
+ int i, p;
+
+ if (!cxt) return "(no context)";
+
+ p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name);
+ if (rec_index >0)
+ p += VG_(sprintf)(mangled+p, "'%d", rec_index +1);
+ for(i=1;i<cxt->size;i++)
+ p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name);
+
+ return mangled;
+}
+
+
+/* Create a new BBCC as a copy of an existing one,
+ * but with costs set to 0 and jcc chains empty.
+ *
+ * This is needed when a BB is executed in another context than
+ * the one at instrumentation time of the BB.
+ *
+ * Use cases:
+ * rec_index == 0: clone from a BBCC with differing tid/cxt
+ * and insert into hashes
+ * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0
+ * don't insert into hashes
+ */
+static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)
+{
+ BBCC* new;
+
+ CLG_DEBUG(3,"+ clone_bbcc(BB %p, rec %d, fn %s)\n",
+ bb_addr(orig->bb), rec_index, cxt->fn[0]->name);
+
+ new = new_bbcc(orig->bb);
+
+ if (rec_index == 0) {
+
+ /* hash insertion is only allowed if tid or cxt is different */
+ CLG_ASSERT((orig->tid != CLG_(current_tid)) ||
+ (orig->cxt != cxt));
+
+ new->rec_index = 0;
+ new->cxt = cxt;
+ new->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
+ new->rec_array[0] = new;
+
+ insert_bbcc_into_hash(new);
+ }
+ else {
+ if (CLG_(clo).separate_threads)
+ CLG_ASSERT(orig->tid == CLG_(current_tid));
+
+ CLG_ASSERT(orig->cxt == cxt);
+ CLG_ASSERT(orig->rec_array);
+ CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index);
+ CLG_ASSERT(orig->rec_array[rec_index] ==0);
+
+ /* new BBCC will only have differing recursion level */
+ new->rec_index = rec_index;
+ new->cxt = cxt;
+ new->rec_array = orig->rec_array;
+ new->rec_array[rec_index] = new;
+ }
+
+ /* update list of BBCCs for same BB */
+ new->next_bbcc = orig->bb->bbcc_list;
+ orig->bb->bbcc_list = new;
+
+
+ CLG_DEBUGIF(3)
+ CLG_(print_bbcc)(-2, new, False);
+
+ CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %p\n"
+ " orig %s\n"
+ " new %s\n",
+ orig, rec_index, bb_addr(orig->bb),
+ mangled_cxt(orig->cxt, orig->rec_index),
+ mangled_cxt(new->cxt, new->rec_index));
+
+ CLG_(stat).bbcc_clones++;
+
+ return new;
+};
+
+
+
+/* Get a pointer to the cost centre structure for given basic block
+ * address. If created, the BBCC is inserted into the BBCC hash.
+ * Also sets BB_seen_before by reference.
+ *
+ */
+BBCC* CLG_(get_bbcc)(BB* bb)
+{
+ BBCC* bbcc;
+
+ CLG_DEBUG(3, "+ get_bbcc(BB %p)\n", bb_addr(bb));
+
+ bbcc = bb->bbcc_list;
+
+ if (!bbcc) {
+ bbcc = new_bbcc(bb);
+
+ /* initialize BBCC */
+ bbcc->cxt = 0;
+ bbcc->rec_array = 0;
+ bbcc->rec_index = 0;
+
+ bbcc->next_bbcc = bb->bbcc_list;
+ bb->bbcc_list = bbcc;
+ bb->last_bbcc = bbcc;
+
+ CLG_DEBUGIF(3)
+ CLG_(print_bbcc)(-2, bbcc, False);
+ }
+
+ CLG_DEBUG(3, "- get_bbcc(BB %p): BBCC %p\n",
+ bb_addr(bb), bbcc);
+
+ return bbcc;
+}
+
+
+/* Callgrind manages its own call stack for each thread.
+ * When leaving a function, a underflow can happen when
+ * Callgrind's tracing was switched on in the middle of
+ * a run, i.e. when Callgrind was not able to trace the
+ * call instruction.
+ * This function tries to reconstruct the original call.
+ * As we know the return address (the address following
+ * the CALL instruction), we can detect the function
+ * we return back to, but the original call site is unknown.
+ * We suppose a call site at return address - 1.
+ * (TODO: other heuristic: lookup info of instrumented BBs).
+ */
+static void handleUnderflow(BB* bb)
+{
+ /* RET at top of call stack */
+ BBCC* source_bbcc;
+ BB* source_bb;
+ jCC* jcc;
+ Bool seen_before;
+ fn_node* caller;
+ int fn_number, *pactive;
+ call_entry* call_entry_up;
+
+ CLG_DEBUG(1," Callstack underflow !\n");
+
+ /* we emulate an old call from the function we return to
+ * by using (<return address> -1) */
+ source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before);
+ source_bbcc = CLG_(get_bbcc)(source_bb);
+
+ /* seen_before can be true if RET from a signal handler */
+ if (!seen_before) {
+ source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0;
+ }
+ else if (CLG_(current_state).collect)
+ source_bbcc->ecounter_sum++;
+
+ /* Force a new top context, will be set active by push_cxt() */
+ CLG_(current_fn_stack).top--;
+ CLG_(current_state).cxt = 0;
+ caller = CLG_(get_fn_node)(bb);
+ CLG_(push_cxt)( caller );
+
+ if (!seen_before) {
+ /* set rec array for source BBCC: this is at rec level 1 */
+ source_bbcc->rec_array = new_recursion(caller->separate_recursions);
+ source_bbcc->rec_array[0] = source_bbcc;
+
+ CLG_ASSERT(source_bbcc->cxt == 0);
+ source_bbcc->cxt = CLG_(current_state).cxt;
+ insert_bbcc_into_hash(source_bbcc);
+ }
+ CLG_ASSERT(CLG_(current_state).bbcc);
+
+ /* correct active counts */
+ fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number;
+ pactive = CLG_(get_fn_entry)(fn_number);
+ (*pactive)--;
+
+ /* This assertion is not correct for reentrant
+ * signal handlers */
+ /* CLG_ASSERT(*pactive == 0); */
+
+ CLG_(current_state).nonskipped = 0; /* we didn't skip this function */
+ /* back to current context */
+ CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] );
+ CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc,
+ (Addr)-1, False);
+ call_entry_up =
+ &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]);
+ jcc = call_entry_up->jcc;
+ /* assume this call is lasting since last dump or
+ * for a signal handler since it's call */
+ if (CLG_(current_state).sig == 0)
+ CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost,
+ CLG_(get_current_thread)()->lastdump_cost );
+ else
+ CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost );
+}
+
+
+/*
+ * Helper function called at start of each instrumented BB to setup
+ * pointer to costs for current thread/context/recursion level
+ */
+
+VG_REGPARM(1)
+void CLG_(setup_bbcc)(BB* bb)
+{
+ BBCC *bbcc, *last_bbcc;
+ Bool call_emulation = False, delayed_push = False, skip = False;
+ Addr sp;
+ BB* last_bb;
+ ThreadId tid;
+ Int jmpkind, passed = 0, csp;
+ Bool ret_without_call = False;
+ Int popcount_on_return = 1;
+
+ CLG_DEBUG(3,"+ setup_bbcc(BB %p)\n", bb_addr(bb));
+
+ /* This is needed because thread switches can not reliable be tracked
+ * with callback CLG_(run_thread) only: we have otherwise no way to get
+ * the thread ID after a signal handler returns.
+ * This could be removed again if that bug is fixed in Valgrind.
+ * This is in the hot path but hopefully not to costly.
+ */
+ tid = VG_(get_running_tid)();
+#if 1
+ CLG_(switch_thread)(tid);
+#else
+ CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid));
+#endif
+
+ sp = VG_(get_SP)(tid);
+ last_bbcc = CLG_(current_state).bbcc;
+ last_bb = last_bbcc ? last_bbcc->bb : 0;
+
+ if (last_bb) {
+ passed = CLG_(current_state).jmps_passed;
+ if (passed == last_bb->cjmp_count) {
+ jmpkind = last_bb->jmpkind;
+
+ /* VEX always gives a Boring jump kind also when passed trough */
+ if ((jmpkind == Ijk_Boring) &&
+ (last_bb->offset + last_bb->instr_len == bb->offset))
+ jmpkind = JmpNone;
+ }
+ else
+ jmpkind = JmpCond;
+
+ /* if we are in a function which is skipped in the call graph, we
+ * do not increment the exe counter to produce cost (if simulation off),
+ * which would lead to dumping this BB to be skipped
+ */
+ if (CLG_(current_state).collect && !CLG_(current_state).nonskipped) {
+ last_bbcc->ecounter_sum++;
+ last_bbcc->jmp[passed].ecounter++;
+ if (!CLG_(clo).simulate_cache) {
+ /* update Ir cost */
+ int instr_count = last_bb->jmp[passed].instr+1;
+ CLG_(current_state).cost[CLG_(sets).off_sim_Ir] += instr_count;
+ }
+ }
+
+ CLG_DEBUGIF(4) {
+ CLG_(print_execstate)(-2, &CLG_(current_state) );
+ CLG_(print_bbcc_cost)(-2, last_bbcc);
+ }
+ }
+ else {
+ jmpkind = JmpNone;
+ }
+
+ /* Manipulate JmpKind if needed, only using BB specific info */
+
+ csp = CLG_(current_call_stack).sp;
+
+ /* A return not matching the top call in our callstack is a jump */
+ if ( (jmpkind == Ijk_Ret) && (csp >0)) {
+ Int csp_up = csp-1;
+ call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
+
+ /* We have a real return if
+ * - the stack pointer (SP) left the current stack frame, or
+ * - SP has the same value as when reaching the current function
+ * and the address of this BB is the return address of last call
+ * (we even allow to leave multiple frames if the SP stays the
+ * same and we find a matching return address)
+ * The latter condition is needed because on PPC, SP can stay
+ * the same over CALL=b(c)l / RET=b(c)lr boundaries
+ */
+ if (sp < top_ce->sp) popcount_on_return = 0;
+ else if (top_ce->sp == sp) {
+ while(1) {
+ if (top_ce->ret_addr == bb_addr(bb)) break;
+ if (csp_up>0) {
+ csp_up--;
+ top_ce = &(CLG_(current_call_stack).entry[csp_up]);
+ if (top_ce->sp == sp) {
+ popcount_on_return++;
+ continue;
+ }
+ }
+ popcount_on_return = 0;
+ break;
+ }
+ }
+ if (popcount_on_return == 0) {
+ jmpkind = Ijk_Boring;
+ ret_without_call = True;
+ }
+ }
+
+ /* Should this jump be converted to call or pop/call ? */
+ if (( jmpkind != Ijk_Ret) &&
+ ( jmpkind != Ijk_Call) && last_bb) {
+
+ /* We simulate a JMP/Cont to be a CALL if
+ * - jump is in another ELF object or section kind
+ * - jump is to first instruction of a function (tail recursion)
+ */
+ if (ret_without_call ||
+ /* This is for detection of optimized tail recursion.
+ * On PPC, this is only detected as call when going to another
+ * function. The problem is that on PPC it can go wrong
+ * more easily (no stack frame setup needed)
+ */
+#if defined(VGA_ppc32)
+ (bb->is_entry && (last_bb->fn != bb->fn)) ||
+#else
+ bb->is_entry ||
+#endif
+ (last_bb->sect_kind != bb->sect_kind) ||
+ (last_bb->obj->number != bb->obj->number)) {
+
+ CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n",
+ last_bb->fn->name, last_bb->obj->name,
+ bb->fn->name, bb->obj->name,
+ ret_without_call?" (RET w/o CALL)":"");
+
+ if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) {
+
+ call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
+
+ if (top_ce->jcc) {
+
+ CLG_DEBUG(1," Pop on Jump!\n");
+
+ /* change source for delayed push */
+ CLG_(current_state).bbcc = top_ce->jcc->from;
+ sp = top_ce->sp;
+ CLG_(pop_call_stack)();
+ }
+ else {
+ CLG_ASSERT(CLG_(current_state).nonskipped != 0);
+ }
+ }
+
+ jmpkind = Ijk_Call;
+ call_emulation = True;
+ }
+ }
+
+ if (jmpkind == Ijk_Call)
+ skip = CLG_(get_fn_node)(bb)->skip;
+
+ CLG_DEBUGIF(1) {
+ if (jmpkind == JmpCond)
+ VG_(printf)("Conditional");
+ else if (jmpkind == JmpNone)
+ VG_(printf)("None");
+ else
+ ppIRJumpKind( jmpkind );
+
+ VG_(printf)(" %08x -> %08x, SP %08x\n",
+ last_bb ? bb_jmpaddr(last_bb) : 0,
+ bb_addr(bb), sp);
+ }
+
+ /* Handle CALL/RET and update context to get correct BBCC */
+
+ if (jmpkind == Ijk_Ret) {
+
+ if ((csp == 0) ||
+ ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
+ ( *(CLG_(current_fn_stack).top-1)==0)) ) {
+
+ /* On an empty call stack or at a signal separation marker,
+ * a RETURN generates an call stack underflow.
+ */
+ handleUnderflow(bb);
+ CLG_(pop_call_stack)();
+ }
+ else {
+ CLG_ASSERT(popcount_on_return >0);
+ CLG_(unwind_call_stack)(sp, popcount_on_return);
+ }
+ }
+ else {
+ CLG_(unwind_call_stack)(sp, 0);
+
+ if (jmpkind == Ijk_Call) {
+ delayed_push = True;
+
+ csp = CLG_(current_call_stack).sp;
+ if (call_emulation && csp>0)
+ sp = CLG_(current_call_stack).entry[csp-1].sp;
+
+ }
+ }
+
+ /* Change new context if needed, taking delayed_push into account */
+ if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) {
+ CLG_(push_cxt)(CLG_(get_fn_node)(bb));
+ }
+ CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom);
+
+ /* If there is a fresh instrumented BBCC, assign current context */
+ bbcc = CLG_(get_bbcc)(bb);
+ if (bbcc->cxt == 0) {
+ CLG_ASSERT(bbcc->rec_array == 0);
+
+ bbcc->cxt = CLG_(current_state).cxt;
+ bbcc->rec_array =
+ new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
+ bbcc->rec_array[0] = bbcc;
+
+ insert_bbcc_into_hash(bbcc);
+ }
+ else {
+ /* get BBCC with current context */
+
+ /* first check LRU of last bbcc executed */
+
+ if (last_bbcc) {
+ bbcc = last_bbcc->lru_next_bbcc;
+ if (bbcc &&
+ ((bbcc->bb != bb) ||
+ (bbcc->cxt != CLG_(current_state).cxt)))
+ bbcc = 0;
+ }
+ else
+ bbcc = 0;
+
+ if (!bbcc)
+ bbcc = lookup_bbcc(bb, CLG_(current_state).cxt);
+ if (!bbcc)
+ bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0);
+
+ bb->last_bbcc = bbcc;
+ }
+
+ /* save for fast lookup */
+ if (last_bbcc)
+ last_bbcc->lru_next_bbcc = bbcc;
+
+ if ((*CLG_(current_fn_stack).top)->separate_recursions >1) {
+ UInt level, idx;
+ fn_node* top = *(CLG_(current_fn_stack).top);
+
+ level = *CLG_(get_fn_entry)(top->number);
+
+ if (delayed_push && !skip) {
+ if (CLG_(clo).skip_direct_recursion) {
+ /* do not increment rec. level if called from
+ * same function */
+ if (!CLG_(current_state).bbcc ||
+ (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0]))
+ level++;
+ }
+ else level++;
+ }
+ if (level> top->separate_recursions)
+ level = top->separate_recursions;
+
+ if (level == 0) {
+ /* can only happen if instrumentation just was switched on */
+ level = 1;
+ *CLG_(get_fn_entry)(top->number) = 1;
+ }
+
+ idx = level -1;
+ if (bbcc->rec_array[idx])
+ bbcc = bbcc->rec_array[idx];
+ else
+ bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx);
+
+ CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc);
+ }
+
+ if (delayed_push) {
+ if (!skip && CLG_(current_state).nonskipped) {
+ /* a call from skipped to nonskipped */
+ CLG_(current_state).bbcc = CLG_(current_state).nonskipped;
+ }
+ CLG_(push_call_stack)(CLG_(current_state).bbcc, passed,
+ bbcc, sp, skip);
+ }
+
+ if (CLG_(clo).collect_jumps &&
+ ((jmpkind == JmpCond) || (jmpkind == Ijk_Boring))) {
+
+ /* Handle conditional jumps followed, i.e. trace arcs
+ * This uses JCC structures, too */
+
+ jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
+ CLG_ASSERT(jcc != 0);
+ // Change from default, and check if already changed
+ if (jcc->jmpkind == Ijk_Call)
+ jcc->jmpkind = jmpkind;
+ else {
+ // FIXME: Why can this fail?
+ // CLG_ASSERT(jcc->jmpkind == jmpkind);
+ }
+
+ jcc->call_counter++;
+ if (jmpkind == JmpCond)
+ CLG_(stat).jcnd_counter++;
+ else
+ CLG_(stat).jump_counter++;
+ }
+
+ CLG_(current_state).bbcc = bbcc;
+
+ CLG_DEBUGIF(1) {
+ VG_(printf)(" ");
+ CLG_(print_bbcc_fn)(bbcc);
+ VG_(printf)("\n");
+ }
+
+ CLG_DEBUG(3,"- setup_bbcc (BB %p): Cost %p (Len %d), Instrs %d (Len %d)\n",
+ bb_addr(bb), bbcc->cost, bb->cost_count,
+ bb->instr_count, bb->instr_len);
+ CLG_DEBUGIF(3)
+ CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index);
+ CLG_DEBUG(3,"\n");
+
+ (*CLG_(cachesim).after_bbsetup)();
+
+ CLG_(stat).bb_executions++;
+}
diff --git a/callgrind/callgrind.h b/callgrind/callgrind.h
new file mode 100644
index 0000000..c153dbd
--- /dev/null
+++ b/callgrind/callgrind.h
@@ -0,0 +1,130 @@
+
+/*
+ ----------------------------------------------------------------
+
+ Notice that the following BSD-style license applies to this one
+ file (callgrind.h) only. The entire rest of Valgrind is licensed
+ under the terms of the GNU General Public License, version 2. See
+ the COPYING file in the source distribution for details.
+
+ ----------------------------------------------------------------
+
+ This file is part of callgrind, a valgrind skin for cache simulation
+ and call tree tracing.
+
+ Copyright (C) 2003,2004 Josef Weidendorfer. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+ 3. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+ 4. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ----------------------------------------------------------------
+
+ Notice that the above BSD-style license applies to this one file
+ (vgprof.h) only. The entire rest of Valgrind is licensed under
+ the terms of the GNU General Public License, version 2. See the
+ COPYING file in the source distribution for details.
+
+ ----------------------------------------------------------------
+*/
+
+#ifndef __CALLGRIND_H
+#define __CALLGRIND_H
+
+#include "valgrind.h"
+
+typedef
+ enum {
+ VG_USERREQ__DUMP_STATS = VG_USERREQ_TOOL_BASE('C','T'),
+ VG_USERREQ__ZERO_STATS,
+ VG_USERREQ__TOGGLE_COLLECT,
+ VG_USERREQ__DUMP_STATS_AT,
+ VG_USERREQ__START_INSTRUMENTATION,
+ VG_USERREQ__STOP_INSTRUMENTATION
+ } Vg_CalltreeClientRequest;
+
+/* Dump current state of cost centers.
+ This will also atomically zero the cost centers */
+#define CALLGRIND_DUMP_STATS() \
+ do { \
+ unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__DUMP_STATS, \
+ 0, 0, 0, 0); \
+ (void)0; \
+ } while(0)
+
+/* Dump current state of cost centers.
+ This will also atomically zero the cost centers */
+#define CALLGRIND_DUMP_STATS_AT(pos_str) \
+ do { \
+ unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__DUMP_STATS_AT, \
+ pos_str, 0, 0, 0); \
+ (void)0; \
+ } while(0)
+
+/* Zero cost centers */
+#define CALLGRIND_ZERO_STATS() \
+ do { \
+ unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__ZERO_STATS, \
+ 0, 0, 0, 0); \
+ (void)0; \
+ } while(0)
+
+/* Toggle collection state,
+ * i.e. if events happening are collected into cost centers */
+#define CALLGRIND_TOGGLE_COLLECT() \
+ do { \
+ unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__TOGGLE_COLLECT, \
+ 0, 0, 0, 0); \
+ (void)0; \
+ } while(0)
+
+/* Start instrumentation if not already on */
+#define CALLGRIND_START_INSTRUMENTATION() \
+ do { \
+ unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__START_INSTRUMENTATION,\
+ 0, 0, 0, 0); \
+ (void)0; \
+ } while(0)
+
+/* Stop instrumentation if not already off */
+#define CALLGRIND_STOP_INSTRUMENTATION() \
+ do { \
+ unsigned int _qzz_res; \
+ VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__STOP_INSTRUMENTATION,\
+ 0, 0, 0, 0); \
+ (void)0; \
+ } while(0)
+
+#endif /* __CALLGRIND_H */
diff --git a/callgrind/callgrind_annotate.in b/callgrind/callgrind_annotate.in
new file mode 100644
index 0000000..6d36f06
--- /dev/null
+++ b/callgrind/callgrind_annotate.in
@@ -0,0 +1,1191 @@
+#! /usr/bin/perl -w
+##--------------------------------------------------------------------##
+##--- The cache simulation framework: instrumentation, recording ---##
+##--- and results printing. ---##
+##--- callgrind_annotate ---##
+##--------------------------------------------------------------------##
+
+# This file is part of Callgrind, a cache-simulator and call graph
+# tracer built on Valgrind.
+#
+# Copyright (C) 2003 Josef Weidendorfer
+# Josef.Weidendorfer@gmx.de
+#
+# This file is based heavily on vg_annotate, part of Valgrind.
+# Copyright (C) 2002 Nicholas Nethercote
+# njn25@cam.ac.uk
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307, USA.
+#
+# The GNU General Public License is contained in the file COPYING.
+
+#----------------------------------------------------------------------------
+# Annotator for cachegrind/callgrind.
+#
+# File format is described in /docs/techdocs.html.
+#
+# Performance improvements record, using cachegrind.out for cacheprof, doing no
+# source annotation (irrelevant ones removed):
+# user time
+# 1. turned off warnings in add_hash_a_to_b() 3.81 --> 3.48s
+# [now add_array_a_to_b()]
+# 6. make line_to_CC() return a ref instead of a hash 3.01 --> 2.77s
+#
+#10. changed file format to avoid file/fn name repetition 2.40s
+# (not sure why higher; maybe due to new '.' entries?)
+#11. changed file format to drop unnecessary end-line "."s 2.36s
+# (shrunk file by about 37%)
+#12. switched from hash CCs to array CCs 1.61s
+#13. only adding b[i] to a[i] if b[i] defined (was doing it if
+# either a[i] or b[i] was defined, but if b[i] was undefined
+# it just added 0) 1.48s
+#14. Stopped converting "." entries to undef and then back 1.16s
+#15. Using foreach $i (x..y) instead of for ($i = 0...) in
+# add_array_a_to_b() 1.11s
+#
+# Auto-annotating primes:
+#16. Finding count lengths by int((length-1)/3), not by
+# commifying (halves the number of commify calls) 1.68s --> 1.47s
+
+use strict;
+
+#----------------------------------------------------------------------------
+# Overview: the running example in the comments is for:
+# - events = A,B,C,D
+# - --show=C,A,D
+# - --sort=D,C
+#----------------------------------------------------------------------------
+
+#----------------------------------------------------------------------------
+# Global variables, main data structures
+#----------------------------------------------------------------------------
+# CCs are arrays, the counts corresponding to @events, with 'undef'
+# representing '.'. This makes things fast (faster than using hashes for CCs)
+# but we have to use @sort_order and @show_order below to handle the --sort and
+# --show options, which is a bit tricky.
+#----------------------------------------------------------------------------
+
+# Total counts for summary (an array reference).
+my $summary_CC;
+
+# Totals for each function, for overall summary.
+# hash(filename:fn_name => CC array)
+my %fn_totals;
+
+# Individual CCs, organised by filename and line_num for easy annotation.
+# hash(filename => hash(line_num => CC array))
+my %all_ind_CCs;
+
+# Files chosen for annotation on the command line.
+# key = basename (trimmed of any directory), value = full filename
+my %user_ann_files;
+
+# Generic description string.
+my $desc = "";
+
+# Command line of profiled program.
+my $cmd = "";
+
+# Info on the profiled process.
+my $pid = "";
+my $part = "";
+my $thread = "";
+
+# Positions used for cost lines; default: line numbers
+my $has_line = 1;
+my $has_addr = 0;
+
+# Events in input file, eg. (A,B,C,D)
+my @events;
+my $events;
+
+# Events to show, from command line, eg. (C,A,D)
+my @show_events;
+
+# Map from @show_events indices to @events indices, eg. (2,0,3). Gives the
+# order in which we must traverse @events in order to show the @show_events,
+# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events.
+# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).)
+my @show_order;
+
+# Print out the function totals sorted by these events, eg. (D,C).
+my @sort_events;
+
+# Map from @sort_events indices to @events indices, eg. (3,2). Same idea as
+# for @show_order.
+my @sort_order;
+
+# Thresholds, one for each sort event (or default to 1 if no sort events
+# specified). We print out functions and do auto-annotations until we've
+# handled this proportion of all the events thresholded.
+my @thresholds;
+
+my $default_threshold = 99;
+
+my $single_threshold = $default_threshold;
+
+# If on, automatically annotates all files that are involved in getting over
+# all the threshold counts.
+my $auto_annotate = 0;
+
+# Number of lines to show around each annotated line.
+my $context = 8;
+
+# Directories in which to look for annotation files.
+my @include_dirs = ("");
+
+# Verbose mode
+my $verbose = "1";
+
+# Inclusive statistics (with subroutine events)
+my $inclusive = 0;
+
+# Inclusive totals for each function, for overall summary.
+# hash(filename:fn_name => CC array)
+my %cfn_totals;
+
+# hash( file:func => [ called file:func ])
+my $called_funcs;
+
+# hash( file:func => [ calling file:func ])
+my $calling_funcs;
+
+# hash( file:func,line => [called file:func ])
+my $called_from_line;
+
+# hash( file:func,line => file:func
+my %func_of_line;
+
+# hash (file:func => object name)
+my %obj_name;
+
+# Print out the callers of a function
+my $tree_caller = 0;
+
+# Print out the called functions
+my $tree_calling = 0;
+
+# hash( file:func,cfile:cfunc => call CC[])
+my %call_CCs;
+
+# hash( file:func,cfile:cfunc => call counter)
+my %call_counter;
+
+# hash(context, index) => realname for compressed traces
+my %compressed;
+
+# Input file name, will be set in process_cmd_line
+my $input_file = "";
+
+# Version number
+my $version = "@VERSION@";
+
+# Usage message.
+my $usage = <<END
+usage: callgrind_annotate [options] [data-file [source-files]]
+
+ options for the user, with defaults in [ ], are:
+ -h --help show this message
+ -v --version show version
+ --show=A,B,C only show figures for events A,B,C [all]
+ --sort=A,B,C sort columns by events A,B,C [event column order]
+ --threshold=<0--100> percentage of counts (of primary sort event) we
+ are interested in [$default_threshold%]
+ --auto=yes|no annotate all source files containing functions
+ that helped reach the event count threshold [no]
+ --context=N print N lines of context before and after
+ annotated lines [8]
+ --inclusive=yes|no add subroutine costs to functions calls [no]
+ --tree=none|caller| print for each function their callers,
+ calling|both the called functions or both [none]
+ -I --include=<dir> add <dir> to list of directories to search for
+ source files
+
+END
+;
+
+# Used in various places of output.
+my $fancy = '-' x 80 . "\n";
+
+#-----------------------------------------------------------------------------
+# Argument and option handling
+#-----------------------------------------------------------------------------
+sub process_cmd_line()
+{
+ for my $arg (@ARGV) {
+
+ # Option handling
+ if ($arg =~ /^-/) {
+
+ # --version
+ if ($arg =~ /^-v$|^--version$/) {
+ die("callgrind_annotate-$version\n");
+
+ # --show=A,B,C
+ } elsif ($arg =~ /^--show=(.*)$/) {
+ @show_events = split(/,/, $1);
+
+ # --sort=A,B,C
+ } elsif ($arg =~ /^--sort=(.*)$/) {
+ @sort_events = split(/,/, $1);
+ foreach my $i (0 .. scalar @sort_events - 1) {
+ if ($sort_events[$i] =~#/.*:(\d+)$/) {
+ /.*:([\d\.]+)%?$/) {
+ my $th = $1;
+ ($th >= 0 && $th <= 100) or die($usage);
+ $sort_events[$i] =~ s/:.*//;
+ $thresholds[$i] = $th;
+ } else {
+ $thresholds[$i] = 0;
+ }
+ }
+
+ # --threshold=X (tolerates a trailing '%')
+ } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
+ $single_threshold = $1;
+ ($1 >= 0 && $1 <= 100) or die($usage);
+
+ # --auto=yes|no
+ } elsif ($arg =~ /^--auto=(yes|no)$/) {
+ $auto_annotate = 1 if ($1 eq "yes");
+ $auto_annotate = 0 if ($1 eq "no");
+
+ # --context=N
+ } elsif ($arg =~ /^--context=([\d\.]+)$/) {
+ $context = $1;
+ if ($context < 0) {
+ die($usage);
+ }
+
+ # --inclusive=yes|no
+ } elsif ($arg =~ /^--inclusive=(yes|no)$/) {
+ $inclusive = 1 if ($1 eq "yes");
+ $inclusive = 0 if ($1 eq "no");
+
+ # --tree=none|caller|calling|both
+ } elsif ($arg =~ /^--tree=(none|caller|calling|both)$/) {
+ $tree_caller = 1 if ($1 eq "caller" || $1 eq "both");
+ $tree_calling = 1 if ($1 eq "calling" || $1 eq "both");
+
+ # --include=A,B,C
+ } elsif ($arg =~ /^(-I|--include)=(.*)$/) {
+ my $inc = $2;
+ $inc =~ s|/$||; # trim trailing '/'
+ push(@include_dirs, "$inc/");
+
+ } else { # -h and --help fall under this case
+ die($usage);
+ }
+
+ # Argument handling -- annotation file checking and selection.
+ # Stick filenames into a hash for quick 'n easy lookup throughout
+ } else {
+ if ($input_file eq "") {
+ $input_file = $arg;
+ }
+ else {
+ my $readable = 0;
+ foreach my $include_dir (@include_dirs) {
+ if (-r $include_dir . $arg) {
+ $readable = 1;
+ }
+ }
+ $readable or die("File $arg not found in any of: @include_dirs\n");
+ $user_ann_files{$arg} = 1;
+ }
+ }
+ }
+
+ if ($input_file eq "") {
+ $input_file = (<cachegrind.out*>)[0];
+ if (!defined $input_file) {
+ $input_file = "cachegrind.out";
+ }
+ print "Reading data from '$input_file'...\n";
+ }
+}
+
+#-----------------------------------------------------------------------------
+# Reading of input file
+#-----------------------------------------------------------------------------
+sub max ($$)
+{
+ my ($x, $y) = @_;
+ return ($x > $y ? $x : $y);
+}
+
+# Add the two arrays; any '.' entries are ignored. Two tricky things:
+# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
+# off warnings to allow this. This makes things about 10% faster than
+# checking for definedness ourselves.
+# 2. We don't add an undefined count or a ".", even though it's value is 0,
+# because we don't want to make an $a2->[$i] that is undef become 0
+# unnecessarily.
+sub add_array_a_to_b ($$)
+{
+ my ($a1, $a2) = @_;
+
+ my $n = max(scalar @$a1, scalar @$a2);
+ $^W = 0;
+ foreach my $i (0 .. $n-1) {
+ $a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]);
+ }
+ $^W = 1;
+}
+
+# Add each event count to the CC array. '.' counts become undef, as do
+# missing entries (implicitly).
+sub line_to_CC ($)
+{
+ my @CC = (split /\s+/, $_[0]);
+ (@CC <= @events) or die("Line $.: too many event counts\n");
+ return \@CC;
+}
+
+sub uncompressed_name($$)
+{
+ my ($context, $name) = @_;
+
+ if ($name =~ /^\((\d+)\)\s*(.*)$/) {
+ my $index = $1;
+ my $realname = $2;
+
+ if ($realname eq "") {
+ $realname = $compressed{$context,$index};
+ }
+ else {
+ $compressed{$context,$index} = $realname;
+ }
+ return $realname;
+ }
+ return $name;
+}
+
+sub read_input_file()
+{
+ open(INPUTFILE, "< $input_file") || die "File $input_file not opened\n";
+
+ my $line;
+
+ # Read header
+ while(<INPUTFILE>) {
+
+ # remove comments
+ s/#.*$//;
+
+ if (/^$/) { ; }
+
+ elsif (/^version:\s*(\d+)/) {
+ # Can't read format with major version > 1
+ ($1<2) or die("Can't read format with major version $1.\n");
+ }
+
+ elsif (/^pid:\s+(.*)$/) { $pid = $1; }
+ elsif (/^thread:\s+(.*)$/) { $thread = $1; }
+ elsif (/^part:\s+(.*)$/) { $part = $1; }
+ elsif (/^desc:\s+(.*)$/) {
+ my $dline = $1;
+ # suppress profile options in description output
+ if ($dline =~ /^Option:/) {;}
+ else { $desc .= "$dline\n"; }
+ }
+ elsif (/^cmd:\s+(.*)$/) { $cmd = $1; }
+ elsif (/^positions:\s+(.*)$/) {
+ my $positions = $1;
+ $has_line = ($positions =~ /line/);
+ $has_addr = ($positions =~ /(addr|instr)/);
+ }
+ elsif (/^events:\s+(.*)$/) {
+ $events = $1;
+
+ # events line is last in header
+ last;
+ }
+ else {
+ warn("WARNING: header line $. malformed, ignoring\n");
+ if ($verbose) { chomp; warn(" line: '$_'\n"); }
+ }
+ }
+
+ # Check for needed header entries
+ ($cmd ne "") or die("Line $.: missing command line\n");
+
+ # Read "events:" line. We make a temporary hash in which the Nth event's
+ # value is N, which is useful for handling --show/--sort options below.
+ ($events ne "") or die("Line $.: missing events line\n");
+ @events = split(/\s+/, $events);
+ my %events;
+ my $n = 0;
+ foreach my $event (@events) {
+ $events{$event} = $n;
+ $n++
+ }
+
+ # If no --show arg give, default to showing all events in the file.
+ # If --show option is used, check all specified events appeared in the
+ # "events:" line. Then initialise @show_order.
+ if (@show_events) {
+ foreach my $show_event (@show_events) {
+ (defined $events{$show_event}) or
+ die("--show event `$show_event' did not appear in input\n");
+ }
+ } else {
+ @show_events = @events;
+ }
+ foreach my $show_event (@show_events) {
+ push(@show_order, $events{$show_event});
+ }
+
+ # Do as for --show, but if no --sort arg given, default to sorting by
+ # column order (ie. first column event is primary sort key, 2nd column is
+ # 2ndary key, etc).
+ if (@sort_events) {
+ foreach my $sort_event (@sort_events) {
+ (defined $events{$sort_event}) or
+ die("--sort event `$sort_event' did not appear in input\n");
+ }
+ } else {
+ @sort_events = @events;
+ }
+ foreach my $sort_event (@sort_events) {
+ push(@sort_order, $events{$sort_event});
+ }
+
+ # If multiple threshold args weren't given via --sort, stick in the single
+ # threshold (either from --threshold if used, or the default otherwise) for
+ # the primary sort event, and 0% for the rest.
+ if (not @thresholds) {
+ foreach my $e (@sort_order) {
+ push(@thresholds, 0);
+ }
+ $thresholds[0] = $single_threshold;
+ }
+
+ my $curr_obj = "";
+ my $curr_file;
+ my $curr_fn;
+ my $curr_name;
+ my $curr_line_num = 0;
+
+ my $curr_cobj = "";
+ my $curr_cfile = "";
+ my $curr_cfunc = "";
+ my $curr_cname;
+ my $curr_call_counter = 0;
+ my $curr_cfn_CC = [];
+
+ my $curr_fn_CC = [];
+ my $curr_file_ind_CCs = {}; # hash(line_num => CC)
+
+ # Read body of input file.
+ while (<INPUTFILE>) {
+ s/#.*$//; # remove comments
+ s/^\+(\d+)/$curr_line_num+$1/e;
+ s/^\-(\d+)/$curr_line_num-$1/e;
+ s/^\*/$curr_line_num/e;
+ if (s/^(\d+|0x\w+)\s+//) {
+ $curr_line_num = $1;
+ if ($has_addr) {
+ if ($has_line) {
+ s/^\+(\d+)/$curr_line_num+$1/e;
+ s/^\-(\d+)/$curr_line_num-$1/e;
+ s/^\*/$curr_line_num/e;
+
+ if (s/^(\d+)\s+//) { $curr_line_num = $1; }
+ }
+ else { $curr_line_num = 0; }
+ }
+ my $CC = line_to_CC($_);
+
+ if ($curr_call_counter>0) {
+# print "Read ($curr_name => $curr_cname) $curr_call_counter\n";
+
+ if (defined $call_CCs{$curr_name,$curr_cname}) {
+ add_array_a_to_b($CC, $call_CCs{$curr_name,$curr_cname});
+ $call_counter{$curr_name,$curr_cname} += $curr_call_counter;
+ }
+ else {
+ $call_CCs{$curr_name,$curr_cname} = $CC;
+ $call_counter{$curr_name,$curr_cname} = $curr_call_counter;
+ }
+
+ my $tmp = $called_from_line->{$curr_file,$curr_line_num};
+ if (!defined $tmp) {
+ $func_of_line{$curr_file,$curr_line_num} = $curr_name;
+ }
+ $tmp = {} unless defined $tmp;
+ $$tmp{$curr_cname} = 1;
+ $called_from_line->{$curr_file,$curr_line_num} = $tmp;
+ $call_CCs{$curr_name,$curr_cname,$curr_line_num} = $CC;
+ $call_counter{$curr_name,$curr_cname,$curr_line_num} = $curr_call_counter;
+
+ $curr_call_counter = 0;
+
+ # inclusive costs
+ $curr_cfn_CC = $cfn_totals{$curr_cname};
+ $curr_cfn_CC = [] unless (defined $curr_cfn_CC);
+ add_array_a_to_b($CC, $curr_cfn_CC);
+ $cfn_totals{$curr_cname} = $curr_cfn_CC;
+
+ if ($inclusive) {
+ add_array_a_to_b($CC, $curr_fn_CC);
+ }
+ next;
+ }
+
+ add_array_a_to_b($CC, $curr_fn_CC);
+
+ # If curr_file is selected, add CC to curr_file list. We look for
+ # full filename matches; or, if auto-annotating, we have to
+ # remember everything -- we won't know until the end what's needed.
+ if ($auto_annotate || defined $user_ann_files{$curr_file}) {
+ my $tmp = $curr_file_ind_CCs->{$curr_line_num};
+ $tmp = [] unless defined $tmp;
+ add_array_a_to_b($CC, $tmp);
+ $curr_file_ind_CCs->{$curr_line_num} = $tmp;
+ }
+
+ } elsif (s/^fn=(.*)$//) {
+ # Commit result from previous function
+ $fn_totals{$curr_name} = $curr_fn_CC if (defined $curr_name);
+
+ # Setup new one
+ $curr_fn = uncompressed_name("fn",$1);
+ $curr_name = "$curr_file:$curr_fn";
+ $obj_name{$curr_name} = $curr_obj;
+ $curr_fn_CC = $fn_totals{$curr_name};
+ $curr_fn_CC = [] unless (defined $curr_fn_CC);
+
+ } elsif (s/^ob=(.*)$//) {
+ $curr_obj = uncompressed_name("ob",$1);
+
+ } elsif (s/^fl=(.*)$//) {
+ $all_ind_CCs{$curr_file} = $curr_file_ind_CCs
+ if (defined $curr_file);
+
+ $curr_file = uncompressed_name("fl",$1);
+ $curr_file_ind_CCs = $all_ind_CCs{$curr_file};
+ $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
+
+ } elsif (s/^(fi|fe)=(.*)$//) {
+ (defined $curr_name) or die("Line $.: Unexpected fi/fe line\n");
+ $fn_totals{$curr_name} = $curr_fn_CC;
+ $all_ind_CCs{$curr_file} = $curr_file_ind_CCs;
+
+ $curr_file = uncompressed_name("fl",$2);
+ $curr_name = "$curr_file:$curr_fn";
+ $curr_file_ind_CCs = $all_ind_CCs{$curr_file};
+ $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
+ $curr_fn_CC = $fn_totals{$curr_name};
+ $curr_fn_CC = [] unless (defined $curr_fn_CC);
+
+ } elsif (s/^\s*$//) {
+ # blank, do nothing
+
+ } elsif (s/^cob=(.*)$//) {
+ $curr_cobj = uncompressed_name("ob",$1);
+
+ } elsif (s/^cfi=(.*)$//) {
+ $curr_cfile = uncompressed_name("fl",$1);
+
+ } elsif (s/^cfn=(.*)$//) {
+ $curr_cfunc = uncompressed_name("fn",$1);
+ if ($curr_cfile eq "") {
+ $curr_cname = "$curr_file:$curr_cfunc";
+ }
+ else {
+ $curr_cname = "$curr_cfile:$curr_cfunc";
+ $curr_cfile = "";
+ }
+
+ my $tmp = $calling_funcs->{$curr_cname};
+ $tmp = {} unless defined $tmp;
+ $$tmp{$curr_name} = 1;
+ $calling_funcs->{$curr_cname} = $tmp;
+
+ my $tmp2 = $called_funcs->{$curr_name};
+ $tmp2 = {} unless defined $tmp2;
+ $$tmp2{$curr_cname} = 1;
+ $called_funcs->{$curr_name} = $tmp2;
+
+ } elsif (s/^calls=(\d+)//) {
+ $curr_call_counter = $1;
+
+ } elsif (s/^(jump|jcnd)=//) {
+ #ignore jump information
+
+ } elsif (s/^totals:\s+//) {
+ #ignore
+
+ } elsif (s/^summary:\s+//) {
+ $summary_CC = line_to_CC($_);
+
+ } else {
+ warn("WARNING: line $. malformed, ignoring\n");
+ if ($verbose) { chomp; warn(" line: '$_'\n"); }
+ }
+ }
+
+ # Check if summary line was present
+ if (not defined $summary_CC) {
+ warn("WARNING: missing final summary line, no summary will be printed\n");
+ }
+ else {
+ # Finish up handling final filename/fn_name counts
+ $fn_totals{"$curr_file:$curr_fn"} = $curr_fn_CC
+ if (defined $curr_file && defined $curr_fn);
+ $all_ind_CCs{$curr_file} =
+ $curr_file_ind_CCs if (defined $curr_file);
+
+ (scalar(@$summary_CC) == @events)
+ or die("Line $.: summary event and total event mismatch\n");
+ }
+
+ # Correct inclusive totals
+ if ($inclusive) {
+ foreach my $name (keys %cfn_totals) {
+ $fn_totals{$name} = $cfn_totals{$name};
+ }
+ }
+
+ close(INPUTFILE);
+}
+
+#-----------------------------------------------------------------------------
+# Print options used
+#-----------------------------------------------------------------------------
+sub print_options ()
+{
+ print($fancy);
+ print($desc);
+ my $target = $cmd;
+ if ($pid ne "") {
+ $target .= " (PID $pid";
+ if ($part ne "") { $target .= ", part $part"; }
+ if ($thread ne "") { $target .= ", thread $thread"; }
+ $target .= ")";
+ }
+ print("Profiled target: $target\n");
+ print("Events recorded: @events\n");
+ print("Events shown: @show_events\n");
+ print("Event sort order: @sort_events\n");
+ print("Thresholds: @thresholds\n");
+
+ my @include_dirs2 = @include_dirs; # copy @include_dirs
+ shift(@include_dirs2); # remove "" entry, which is always the first
+ unshift(@include_dirs2, "") if (0 == @include_dirs2);
+ my $include_dir = shift(@include_dirs2);
+ print("Include dirs: $include_dir\n");
+ foreach my $include_dir (@include_dirs2) {
+ print(" $include_dir\n");
+ }
+
+ my @user_ann_files = keys %user_ann_files;
+ unshift(@user_ann_files, "") if (0 == @user_ann_files);
+ my $user_ann_file = shift(@user_ann_files);
+ print("User annotated: $user_ann_file\n");
+ foreach $user_ann_file (@user_ann_files) {
+ print(" $user_ann_file\n");
+ }
+
+ my $is_on = ($auto_annotate ? "on" : "off");
+ print("Auto-annotation: $is_on\n");
+ print("\n");
+}
+
+#-----------------------------------------------------------------------------
+# Print summary and sorted function totals
+#-----------------------------------------------------------------------------
+sub mycmp ($$)
+{
+ my ($c, $d) = @_;
+
+ # Iterate through sort events (eg. 3,2); return result if two are different
+ foreach my $i (@sort_order) {
+ my ($x, $y);
+ $x = $c->[$i];
+ $y = $d->[$i];
+ $x = -1 unless defined $x;
+ $y = -1 unless defined $y;
+
+ my $cmp = $y <=> $x; # reverse sort
+ if (0 != $cmp) {
+ return $cmp;
+ }
+ }
+ # Exhausted events, equal
+ return 0;
+}
+
+sub commify ($) {
+ my ($val) = @_;
+ 1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/);
+ return $val;
+}
+
+# Because the counts can get very big, and we don't want to waste screen space
+# and make lines too long, we compute exactly how wide each column needs to be
+# by finding the widest entry for each one.
+sub compute_CC_col_widths (@)
+{
+ my @CCs = @_;
+ my $CC_col_widths = [];
+
+ # Initialise with minimum widths (from event names)
+ foreach my $event (@events) {
+ push(@$CC_col_widths, length($event));
+ }
+
+ # Find maximum width count for each column. @CC_col_width positions
+ # correspond to @CC positions.
+ foreach my $CC (@CCs) {
+ foreach my $i (0 .. scalar(@$CC)-1) {
+ if (defined $CC->[$i]) {
+ # Find length, accounting for commas that will be added
+ my $length = length $CC->[$i];
+ my $clength = $length + int(($length - 1) / 3);
+ $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength);
+ }
+ }
+ }
+ return $CC_col_widths;
+}
+
+# Print the CC with each column's size dictated by $CC_col_widths.
+sub print_CC ($$)
+{
+ my ($CC, $CC_col_widths) = @_;
+
+ foreach my $i (@show_order) {
+ my $count = (defined $CC->[$i] ? commify($CC->[$i]) : ".");
+ my $space = ' ' x ($CC_col_widths->[$i] - length($count));
+ print("$space$count ");
+ }
+}
+
+sub print_events ($)
+{
+ my ($CC_col_widths) = @_;
+
+ foreach my $i (@show_order) {
+ my $event = $events[$i];
+ my $event_width = length($event);
+ my $col_width = $CC_col_widths->[$i];
+ my $space = ' ' x ($col_width - $event_width);
+ print("$space$event ");
+ }
+}
+
+# Prints summary and function totals (with separate column widths, so that
+# function names aren't pushed over unnecessarily by huge summary figures).
+# Also returns a hash containing all the files that are involved in getting the
+# events count above the thresholds (ie. all the interesting ones).
+sub print_summary_and_fn_totals ()
+{
+ my @fn_fullnames = keys %fn_totals;
+
+ # Work out the size of each column for printing (summary and functions
+ # separately).
+ my $summary_CC_col_widths = compute_CC_col_widths($summary_CC);
+ my $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals);
+
+ # Header and counts for summary
+ print($fancy);
+ print_events($summary_CC_col_widths);
+ print("\n");
+ print($fancy);
+ print_CC($summary_CC, $summary_CC_col_widths);
+ print(" PROGRAM TOTALS\n");
+ print("\n");
+
+ # Header for functions
+ print($fancy);
+ print_events($fn_CC_col_widths);
+ print(" file:function\n");
+ print($fancy);
+
+ # Sort function names into order dictated by --sort option.
+ @fn_fullnames = sort {
+ mycmp($fn_totals{$a}, $fn_totals{$b})
+ } @fn_fullnames;
+
+
+ # Assertion
+ (scalar @sort_order == scalar @thresholds) or
+ die("sort_order length != thresholds length:\n",
+ " @sort_order\n @thresholds\n");
+
+ my $threshold_files = {};
+ # @curr_totals has the same shape as @sort_order and @thresholds
+ my @curr_totals = ();
+ foreach my $e (@thresholds) {
+ push(@curr_totals, 0);
+ }
+
+ # Print functions, stopping when the threshold has been reached.
+ foreach my $fn_name (@fn_fullnames) {
+
+ # Stop when we've reached all the thresholds
+ my $reached_all_thresholds = 1;
+ foreach my $i (0 .. scalar @thresholds - 1) {
+ my $prop = $curr_totals[$i] * 100;
+ if ($summary_CC->[$sort_order[$i]] >0) {
+ $prop = $prop / $summary_CC->[$sort_order[$i]];
+ }
+ $reached_all_thresholds &= ($prop >= $thresholds[$i]);
+ }
+ last if $reached_all_thresholds;
+
+ if ($tree_caller || $tree_calling) { print "\n"; }
+
+ if ($tree_caller && ($fn_name ne "???:???")) {
+ # Print function callers
+ my $tmp1 = $calling_funcs->{$fn_name};
+ if (defined $tmp1) {
+ foreach my $calling (keys %$tmp1) {
+ if (defined $call_counter{$calling,$fn_name}) {
+ print_CC($call_CCs{$calling,$fn_name}, $fn_CC_col_widths);
+ print" < $calling (";
+ print $call_counter{$calling,$fn_name} . "x)";
+ if (defined $obj_name{$calling}) {
+ print " [$obj_name{$calling}]";
+ }
+ print "\n";
+ }
+ }
+ }
+ }
+
+ # Print function results
+ my $fn_CC = $fn_totals{$fn_name};
+ print_CC($fn_CC, $fn_CC_col_widths);
+ if ($tree_caller || $tree_calling) { print " * "; }
+ print(" $fn_name");
+ if (defined $obj_name{$fn_name}) {
+ print " [$obj_name{$fn_name}]";
+ }
+ print "\n";
+
+ if ($tree_calling && ($fn_name ne "???:???")) {
+ # Print called functions
+ my $tmp2 = $called_funcs->{$fn_name};
+ if (defined $tmp2) {
+ foreach my $called (keys %$tmp2) {
+ if (defined $call_counter{$fn_name,$called}) {
+ print_CC($call_CCs{$fn_name,$called}, $fn_CC_col_widths);
+ print" > $called (";
+ print $call_counter{$fn_name,$called} . "x)";
+ if (defined $obj_name{$called}) {
+ print " [$obj_name{$called}]";
+ }
+ print "\n";
+ }
+ }
+ }
+ }
+
+ # Update the threshold counts
+ my $filename = $fn_name;
+ $filename =~ s/:.+$//; # remove function name
+ $threshold_files->{$filename} = 1;
+ foreach my $i (0 .. scalar @sort_order - 1) {
+ if ($inclusive) {
+ $curr_totals[$i] = $summary_CC->[$sort_order[$i]] -
+ $fn_CC->[$sort_order[$i]]
+ if (defined $fn_CC->[$sort_order[$i]]);
+ } else {
+ $curr_totals[$i] += $fn_CC->[$sort_order[$i]]
+ if (defined $fn_CC->[$sort_order[$i]]);
+ }
+ }
+ }
+ print("\n");
+
+ return $threshold_files;
+}
+
+#-----------------------------------------------------------------------------
+# Annotate selected files
+#-----------------------------------------------------------------------------
+
+# Issue a warning that the source file is more recent than the input file.
+sub warning_on_src_more_recent_than_inputfile ($)
+{
+ my $src_file = $_[0];
+
+ my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Source file '$src_file' is more recent than input file '$input_file'.
+@ Annotations may not be correct.
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+END
+;
+ print($warning);
+}
+
+# If there is information about lines not in the file, issue a warning
+# explaining possible causes.
+sub warning_on_nonexistent_lines ($$$)
+{
+ my ($src_more_recent_than_inputfile, $src_file, $excess_line_nums) = @_;
+ my $cause_and_solution;
+
+ if ($src_more_recent_than_inputfile) {
+ $cause_and_solution = <<END
+@@ cause: '$src_file' has changed since information was gathered.
+@@ If so, a warning will have already been issued about this.
+@@ solution: Recompile program and rerun under "valgrind --cachesim=yes" to
+@@ gather new information.
+END
+ # We suppress warnings about .h files
+ } elsif ($src_file =~ /\.h$/) {
+ $cause_and_solution = <<END
+@@ cause: bug in the Valgrind's debug info reader that screws up with .h
+@@ files sometimes
+@@ solution: none, sorry
+END
+ } else {
+ $cause_and_solution = <<END
+@@ cause: not sure, sorry
+END
+ }
+
+ my $warning = <<END
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@
+@@ Information recorded about lines past the end of '$src_file'.
+@@
+@@ Probable cause and solution:
+$cause_and_solution@@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+END
+;
+ print($warning);
+}
+
+sub annotate_ann_files($)
+{
+ my ($threshold_files) = @_;
+
+ my %all_ann_files;
+ my @unfound_auto_annotate_files;
+ my $printed_totals_CC = [];
+
+ # If auto-annotating, add interesting files (but not "???")
+ if ($auto_annotate) {
+ delete $threshold_files->{"???"};
+ %all_ann_files = (%user_ann_files, %$threshold_files)
+ } else {
+ %all_ann_files = %user_ann_files;
+ }
+
+ # Track if we did any annotations.
+ my $did_annotations = 0;
+
+ LOOP:
+ foreach my $src_file (keys %all_ann_files) {
+
+ my $opened_file = "";
+ my $full_file_name = "";
+ foreach my $include_dir (@include_dirs) {
+ my $try_name = $include_dir . $src_file;
+ if (open(INPUTFILE, "< $try_name")) {
+ $opened_file = $try_name;
+ $full_file_name = ($include_dir eq ""
+ ? $src_file
+ : "$include_dir + $src_file");
+ last;
+ }
+ }
+
+ if (not $opened_file) {
+ # Failed to open the file. If chosen on the command line, die.
+ # If arose from auto-annotation, print a little message.
+ if (defined $user_ann_files{$src_file}) {
+ die("File $src_file not opened in any of: @include_dirs\n");
+
+ } else {
+ push(@unfound_auto_annotate_files, $src_file);
+ }
+
+ } else {
+ # File header (distinguish between user- and auto-selected files).
+ print("$fancy");
+ my $ann_type =
+ (defined $user_ann_files{$src_file} ? "User" : "Auto");
+ print("-- $ann_type-annotated source: $full_file_name\n");
+ print("$fancy");
+
+ # Get file's CCs
+ my $src_file_CCs = $all_ind_CCs{$src_file};
+ if (!defined $src_file_CCs) {
+ print(" No information has been collected for $src_file\n\n");
+ next LOOP;
+ }
+
+ $did_annotations = 1;
+
+ # Numeric, not lexicographic sort!
+ my @line_nums = sort {$a <=> $b} keys %$src_file_CCs;
+
+ # If $src_file more recent than cachegrind.out, issue warning
+ my $src_more_recent_than_inputfile = 0;
+ if ((stat $opened_file)[9] > (stat $input_file)[9]) {
+ $src_more_recent_than_inputfile = 1;
+ warning_on_src_more_recent_than_inputfile($src_file);
+ }
+
+ # Work out the size of each column for printing
+ my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs);
+
+ # Events header
+ print_events($CC_col_widths);
+ print("\n\n");
+
+ # Shift out 0 if it's in the line numbers (from unknown entries,
+ # likely due to bugs in Valgrind's stabs debug info reader)
+ shift(@line_nums) if (0 == $line_nums[0]);
+
+ # Finds interesting line ranges -- all lines with a CC, and all
+ # lines within $context lines of a line with a CC.
+ my $n = @line_nums;
+ my @pairs;
+ for (my $i = 0; $i < $n; $i++) {
+ push(@pairs, $line_nums[$i] - $context); # lower marker
+ while ($i < $n-1 &&
+ $line_nums[$i] + 2*$context >= $line_nums[$i+1]) {
+ $i++;
+ }
+ push(@pairs, $line_nums[$i] + $context); # upper marker
+ }
+
+ # Annotate chosen lines, tracking total counts of lines printed
+ $pairs[0] = 1 if ($pairs[0] < 1);
+ while (@pairs) {
+ my $low = shift @pairs;
+ my $high = shift @pairs;
+ while ($. < $low-1) {
+ my $tmp = <INPUTFILE>;
+ last unless (defined $tmp); # hack to detect EOF
+ }
+ my $src_line;
+ # Print line number, unless start of file
+ print("-- line $low " . '-' x 40 . "\n") if ($low != 1);
+ while (($. < $high) && ($src_line = <INPUTFILE>)) {
+ if (defined $line_nums[0] && $. == $line_nums[0]) {
+ print_CC($src_file_CCs->{$.}, $CC_col_widths);
+ add_array_a_to_b($src_file_CCs->{$.},
+ $printed_totals_CC);
+ shift(@line_nums);
+
+ } else {
+ print_CC( [], $CC_col_widths);
+ }
+
+ print(" $src_line");
+
+ my $tmp = $called_from_line->{$src_file,$.};
+ my $func = $func_of_line{$src_file,$.};
+ if (defined $tmp) {
+ foreach my $called (keys %$tmp) {
+ if (defined $call_CCs{$func,$called,$.}) {
+ print_CC($call_CCs{$func,$called,$.}, $CC_col_widths);
+ print " => $called (";
+ print $call_counter{$func,$called,$.} . "x)\n";
+ }
+ }
+ }
+ }
+ # Print line number, unless EOF
+ if ($src_line) {
+ print("-- line $high " . '-' x 40 . "\n");
+ } else {
+ last;
+ }
+ }
+
+ # If there was info on lines past the end of the file...
+ if (@line_nums) {
+ foreach my $line_num (@line_nums) {
+ print_CC($src_file_CCs->{$line_num}, $CC_col_widths);
+ print(" <bogus line $line_num>\n");
+ }
+ print("\n");
+ warning_on_nonexistent_lines($src_more_recent_than_inputfile,
+ $src_file, \@line_nums);
+ }
+ print("\n");
+
+ # Print summary of counts attributed to file but not to any
+ # particular line (due to incomplete debug info).
+ if ($src_file_CCs->{0}) {
+ print_CC($src_file_CCs->{0}, $CC_col_widths);
+ print(" <counts for unidentified lines in $src_file>\n\n");
+ }
+
+ close(INPUTFILE);
+ }
+ }
+
+ # Print list of unfound auto-annotate selected files.
+ if (@unfound_auto_annotate_files) {
+ print("$fancy");
+ print("The following files chosen for auto-annotation could not be found:\n");
+ print($fancy);
+ foreach my $f (@unfound_auto_annotate_files) {
+ print(" $f\n");
+ }
+ print("\n");
+ }
+
+ # If we did any annotating, print what proportion of events were covered by
+ # annotated lines above.
+ if ($did_annotations) {
+ my $percent_printed_CC;
+ foreach (my $i = 0; $i < @$summary_CC; $i++) {
+ $percent_printed_CC->[$i] =
+ sprintf("%.0f",
+ $printed_totals_CC->[$i] / $summary_CC->[$i] * 100);
+ }
+ my $pp_CC_col_widths = compute_CC_col_widths($percent_printed_CC);
+ print($fancy);
+ print_events($pp_CC_col_widths);
+ print("\n");
+ print($fancy);
+ print_CC($percent_printed_CC, $pp_CC_col_widths);
+ print(" percentage of events annotated\n\n");
+ }
+}
+
+#----------------------------------------------------------------------------
+# "main()"
+#----------------------------------------------------------------------------
+process_cmd_line();
+read_input_file();
+print_options();
+my $threshold_files = print_summary_and_fn_totals();
+annotate_ann_files($threshold_files);
+
+##--------------------------------------------------------------------##
+##--- end vg_annotate.in ---##
+##--------------------------------------------------------------------##
+
+
diff --git a/callgrind/callgrind_control.in b/callgrind/callgrind_control.in
new file mode 100644
index 0000000..869c9b3
--- /dev/null
+++ b/callgrind/callgrind_control.in
@@ -0,0 +1,485 @@
+#! /usr/bin/perl -w
+##--------------------------------------------------------------------##
+##--- Control supervision of applications run with callgrind ---##
+##--- callgrind_control ---##
+##--------------------------------------------------------------------##
+
+# This file is part of Callgrind, a cache-simulator and call graph
+# tracer built on Valgrind.
+#
+# Copyright (C) 2003,2004,2005 Josef Weidendorfer
+# Josef.Weidendorfer@gmx.de
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307, USA.
+
+sub getCallgrindPids {
+
+ @pids = ();
+ foreach $f (</tmp/callgrind.info.*>) {
+ ($pid) = ($f =~ /info\.(\d+)/);
+ if ($pid eq "") { next; }
+ $mapfile = "/proc/$pid/maps";
+ if (!-e $mapfile) { next; }
+
+ open MAP, "<$mapfile";
+ $found = 0;
+ while(<MAP>) {
+ # works both for VG 3.0 and VG 3.1
+ if (/callgrind/) { $found = 1; }
+ }
+ close MAP;
+ if ($found == 0) { next; }
+
+ open INFO, "<$f";
+ while(<INFO>) {
+ if (/version: (\d+)/) { $mversion{$pid} = $1; }
+ if (/cmd: (.+)$/) { $cmd{$pid} = $1; }
+ if (/control: (.+)$/) { $control{$pid} = $1; }
+ if (/base: (.+)$/) { $base{$pid} = $1; }
+ if (/result: (.+)$/) { $result{$pid} = $1; }
+ }
+ close INFO;
+
+ if ($mversion{$pid} > 1) {
+ #print " Unsupported Callgrind Major Version $mversion.\n\n";
+ next;
+ }
+
+ push(@pids, $pid);
+ }
+}
+
+sub printHeader {
+ if ($headerPrinted) { return; }
+ $headerPrinted = 1;
+ if ($beQuiet) { return; }
+
+ print "Observe the status and control currently active callgrind runs.\n";
+ print "(C) 2003-2005, Josef Weidendorfer (Josef.Weidendorfer\@gmx.de)\n\n";
+}
+
+sub printVersion {
+ print "callgrind_control-@VERSION@\n";
+ exit;
+}
+
+sub printHelp {
+ printHeader;
+
+ print "Usage: callgrind_control [options] [ <PID>|<Name> ...]\n\n";
+ print "If no PIDs/Names are given, an action is applied to all currently\n";
+ print "active Callgrind runs. Default action is printing short information.\n\n";
+ print "Options:\n";
+ print " -h Print this help text\n";
+ print " -v Print version\n";
+ print " -q Be quiet\n";
+ print " -l Print more information\n";
+ print " -s Print status information\n";
+ print " -b Print backtrace information\n";
+ print " -e [A,..] Print event counters for A,.. [default: all]\n";
+ print " -d [str] Request a profile dump, include <str> as trigger hint\n";
+ print " -z Zero all cost counters\n";
+ print " -k Kill\n";
+ print " -i on/off Switch instrumentation state on/off\n";
+ print " -w <dir> Manually specify the working directory of a callgrind run\n";
+ print "\n";
+ exit;
+}
+
+
+#
+# Parts more or less copied from ct_annotate (author: Nicholas Nethercote)
+#
+
+sub prepareEvents {
+
+ @events = split(/\s+/, $events);
+ %events = ();
+ $n = 0;
+ foreach $event (@events) {
+ $events{$event} = $n;
+ $n++;
+ }
+ if (@show_events) {
+ foreach my $show_event (@show_events) {
+ (defined $events{$show_event}) or
+ print "Warning: Event `$show_event' is not being collected\n";
+ }
+ } else {
+ @show_events = @events;
+ }
+ @show_order = ();
+ foreach my $show_event (@show_events) {
+ push(@show_order, $events{$show_event});
+ }
+}
+
+sub max ($$)
+{
+ my ($x, $y) = @_;
+ return ($x > $y ? $x : $y);
+}
+
+sub line_to_CC ($)
+{
+ my @CC = (split /\s+/, $_[0]);
+ (@CC <= @events) or die("Line $.: too many event counts\n");
+ return \@CC;
+}
+
+sub commify ($) {
+ my ($val) = @_;
+ 1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/);
+ return $val;
+}
+
+sub compute_CC_col_widths (@)
+{
+ my @CCs = @_;
+ my $CC_col_widths = [];
+
+ # Initialise with minimum widths (from event names)
+ foreach my $event (@events) {
+ push(@$CC_col_widths, length($event));
+ }
+
+ # Find maximum width count for each column. @CC_col_width positions
+ # correspond to @CC positions.
+ foreach my $CC (@CCs) {
+ foreach my $i (0 .. scalar(@$CC)-1) {
+ if (defined $CC->[$i]) {
+ # Find length, accounting for commas that will be added
+ my $length = length $CC->[$i];
+ my $clength = $length + int(($length - 1) / 3);
+ $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength);
+ }
+ }
+ }
+ return $CC_col_widths;
+}
+
+# Print the CC with each column's size dictated by $CC_col_widths.
+sub print_CC ($$)
+{
+ my ($CC, $CC_col_widths) = @_;
+
+ foreach my $i (@show_order) {
+ my $count = (defined $CC->[$i] ? commify($CC->[$i]) : ".");
+ my $space = ' ' x ($CC_col_widths->[$i] - length($count));
+ print("$space$count ");
+ }
+}
+
+sub print_events ($)
+{
+ my ($CC_col_widths) = @_;
+
+ foreach my $i (@show_order) {
+ my $event = $events[$i];
+ my $event_width = length($event);
+ my $col_width = $CC_col_widths->[$i];
+ my $space = ' ' x ($col_width - $event_width);
+ print("$space$event ");
+ }
+}
+
+
+
+#
+# Main
+#
+
+getCallgrindPids;
+
+$requestEvents = 0;
+$requestDump = 0;
+$switchInstr = 0;
+$headerPrinted = 0;
+$beQuiet = 0;
+$dumpHint = "";
+$gotW = 0;
+$workingDir = "";
+
+%spids = ();
+foreach $arg (@ARGV) {
+ if ($arg =~ /^-/) {
+ if ($requestDump == 1) { $requestDump = 2; }
+ if ($requestEvents == 1) { $requestEvents = 2; }
+ if ($gotW == 1) { $gotW = 2; }
+
+ if ($arg =~ /^-?-h/) { printHelp; }
+ if ($arg =~ /^-?-v/) { printVersion; }
+ if ($arg =~ /^-q/) { $beQuiet = 1; next; }
+ if ($arg =~ /^-l/) { $printLong = 1; next; }
+ if ($arg =~ /^-s/) { $printStatus = 1; next; }
+ if ($arg =~ /^-b/) { $printBacktrace = 1; next; }
+ if ($arg =~ /^-d/) { $requestDump = 1; next; }
+ if ($arg =~ /^-z/) { $requestZero = 1; next; }
+ if ($arg =~ /^-k/) { $requestKill = 1; next; }
+ if ($arg =~ /^-e/) { $requestEvents = 1; next; }
+ if ($arg =~ /^-i/) { $switchInstr = 1; next; }
+ if ($arg =~ /^-w/) { $gotW = 1; next; }
+
+ printHeader;
+ print "Unknown option '$arg'.\n\n";
+ printHelp;
+ }
+
+ if ($arg =~ /^[A-Za-z_]/) {
+ # arguments of -d/-e/-i are non-numeric
+ if ($requestDump == 1) {
+ $requestDump = 2;
+ $dumpHint = $arg;
+ next;
+ }
+
+ if ($requestEvents == 1) {
+ $requestEvents = 2;
+ @show_events = split(/,/, $arg);
+ next;
+ }
+
+ if ($switchInstr == 1) {
+ $switchInstr = 2;
+ $switchInstrMode = "+";
+ if (($arg eq "off") || ($arg eq "no")) {
+ $switchInstrMode = "-";
+ }
+ next;
+ }
+ }
+
+ if ($gotW == 1) {
+ $gotW = 2;
+ $workingDir = $arg;
+ if (!-d $workingDir) {
+ print "Error: directory '$workingDir' does not exist.\n";
+ printHelp;
+ }
+ next;
+ }
+
+ if (defined $cmd{$arg}) { $spids{$arg} = 1; next; }
+ $nameFound = 0;
+ foreach $p (@pids) {
+ if ($cmd{$p} =~ /^$arg/) {
+ $nameFound = 1;
+ $spids{$p} = 1;
+ }
+ }
+ if ($nameFound) { next; }
+
+ printHeader;
+ print "Non-existent Callgrind task with PID/Name '$arg'.\n\n";
+ printHelp;
+}
+
+if ($workingDir ne "") {
+ # Generate dummy information for dummy pid 0
+ $pid = "0";
+ $mversion{$pid} = "@VERSION@";
+ $cmd{$pid} = "???";
+ $base{$pid} = $workingDir;
+ $control{$pid} = "$workingDir/callgrind.cmd";
+ # do not wait for any result...
+ $result{$pid} = "";
+
+ # Only handle this faked callgrind run
+ @pids = ($pid);
+}
+
+if (scalar @pids == 0) {
+ print "No active callgrind runs detected.\n";
+ #print "Detection fails when /proc/*/maps is not readable.\n";
+ print "[Detection can fail on some systems; to work around this,\n";
+ print " specify the working directory of a callgrind run with '-w']\n";
+ exit;
+}
+
+@spids = keys %spids;
+if (scalar @spids >0) { @pids = @spids; }
+
+$command = "";
+$waitForAnswer = 0;
+if ($requestDump) {
+ $command = "Dump";
+ if ($dumpHint ne "") { $command .= " ".$dumpHint; }
+}
+if ($requestZero) { $command = "Zero"; }
+if ($requestKill) { $command = "Kill"; }
+if ($switchInstr) { $command = $switchInstrMode."Instrumentation"; }
+if ($printStatus || $printBacktrace || $requestEvents) {
+ $command = "Status";
+ $waitForAnswer = 1;
+}
+
+foreach $pid (@pids) {
+ $pidstr = "PID $pid: ";
+ print $pidstr.$cmd{$pid};
+
+ if ($command eq "") {
+ if ($printLong) {
+ #print " " x length $pidstr;
+ print " (in $base{$pid})\n";
+ }
+ else {
+ print "\n";
+ }
+ next;
+ }
+ else {
+ if (! (open CONTROL, ">$control{$pid}")) {
+ print " [sending '$command' failed: permission denied]\n";
+ next;
+ }
+ print " [requesting '$command'...]\n";
+ print CONTROL $command;
+ close CONTROL;
+
+ while(-e $control{$pid}) {
+ # sleep for 250 ms
+ select(undef, undef, undef, 0.25);
+ }
+ }
+
+ if ($result{$pid} eq "") { $waitForAnswer=0; }
+ if (!$waitForAnswer) { print " OK.\n"; next; }
+
+ if (! (open RESULT, "<$result{$pid}")) {
+ print " Warning: Can't open expected result file $result{$pid}.\n";
+ next;
+ }
+
+ @tids = ();
+ $ctid = 0;
+ %fcount = ();
+ %func = ();
+ %calls = ();
+ %events = ();
+ @events = ();
+ %totals = ();
+
+ $exec_bbs = 0;
+ $dist_bbs = 0;
+ $exec_calls = 0;
+ $dist_calls = 0;
+ $dist_ctxs = 0;
+ $dist_funcs = 0;
+ $threads = 0;
+ $events = "";
+
+ while(<RESULT>) {
+ if (/function-(\d+)-(\d+): (.+)$/) {
+ if ($ctid != $1) {
+ $ctid = $1;
+ push(@tids, $ctid);
+ $fcount{$ctid} = 0;
+ }
+ $fcount{$ctid}++;
+ $func{$ctid,$fcount{$ctid}} = $3;
+ }
+ elsif (/calls-(\d+)-(\d+): (.+)$/) {
+ if ($ctid != $1) { next; }
+ $calls{$ctid,$fcount{$ctid}} = $3;
+ }
+ elsif (/events-(\d+)-(\d+): (.+)$/) {
+ if ($ctid != $1) { next; }
+ $events{$ctid,$fcount{$ctid}} = line_to_CC($3);
+ }
+ elsif (/events-(\d+): (.+)$/) {
+ if (scalar @events == 0) { next; }
+ $totals{$1} = line_to_CC($2);
+ }
+ elsif (/executed-bbs: (\d+)/) { $exec_bbs = $1; }
+ elsif (/distinct-bbs: (\d+)/) { $dist_bbs = $1; }
+ elsif (/executed-calls: (\d+)/) { $exec_calls = $1; }
+ elsif (/distinct-calls: (\d+)/) { $dist_calls = $1; }
+ elsif (/distinct-functions: (\d+)/) { $dist_funcs = $1; }
+ elsif (/distinct-contexts: (\d+)/) { $dist_ctxs = $1; }
+ elsif (/events: (.+)$/) { $events = $1; prepareEvents; }
+ elsif (/threads: (\d+)$/) { $threads = $1; }
+ elsif (/instrumentation: (\w+)$/) { $instrumentation = $1; }
+ }
+
+ unlink $result{$pid};
+
+ if ($instrumentation eq "off") {
+ print " No information available as instrumentation is switched off.\n\n";
+ exit;
+ }
+
+ if ($printStatus) {
+ if ($requestEvents <1) {
+ print " Number of threads: $threads\n";
+ print " Events collected: $events\n";
+ }
+
+ print " Functions: ".commify($dist_funcs);
+ print " (executed ".commify($exec_calls);
+ print ", contexts ".commify($dist_ctxs).")\n";
+
+ print " Basic blocks: ".commify($dist_bbs);
+ print " (executed ".commify($exec_bbs);
+ print ", call sites ".commify($dist_calls).")\n";
+ }
+
+ if ($requestEvents >0) {
+ $totals_width = compute_CC_col_widths(values %totals);
+ print "\n Totals:";
+ print_events($totals_width);
+ print("\n");
+ foreach $tid (@tids) {
+ print " Th".substr(" ".$tid,-2)." ";
+ print_CC($totals{$tid}, $totals_width);
+ print("\n");
+ }
+ }
+
+ if ($printBacktrace) {
+
+ if ($requestEvents >0) {
+ $totals_width = compute_CC_col_widths(values %events);
+ }
+
+ foreach $tid (@tids) {
+ print "\n Frame: ";
+ if ($requestEvents >0) {
+ print_events($totals_width);
+ }
+ print "Backtrace for Thread $tid\n";
+
+ $i = $fcount{$tid};
+ $c = 0;
+ while($i>0 && $c<100) {
+ $fc = substr(" $c",-2);
+ print " [$fc] ";
+ if ($requestEvents >0) {
+ print_CC($events{$tid,$i-1}, $totals_width);
+ }
+ print $func{$tid,$i};
+ if ($i > 1) {
+ print " (".$calls{$tid,$i-1}." x)";
+ }
+ print "\n";
+ $i--;
+ $c++;
+ }
+ print "\n";
+ }
+ }
+ print "\n";
+}
+
diff --git a/callgrind/callstack.c b/callgrind/callstack.c
new file mode 100644
index 0000000..6e14b2e
--- /dev/null
+++ b/callgrind/callstack.c
@@ -0,0 +1,424 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- ct_callstack.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+/*------------------------------------------------------------*/
+/*--- Call stack, operations ---*/
+/*------------------------------------------------------------*/
+
+/* Stack of current thread. Gets initialized when switching to 1st thread.
+ *
+ * The artificial call stack is an array of call_entry's, representing
+ * stack frames of the executing program.
+ * Array call_stack and call_stack_esp have same size and grow on demand.
+ * Array call_stack_esp holds SPs of corresponding stack frames.
+ *
+ */
+
+#define N_CALL_STACK_INITIAL_ENTRIES 500
+
+call_stack CLG_(current_call_stack);
+
+void CLG_(init_call_stack)(call_stack* s)
+{
+ Int i;
+
+ CLG_ASSERT(s != 0);
+
+ s->size = N_CALL_STACK_INITIAL_ENTRIES;
+ s->entry = (call_entry*) CLG_MALLOC(s->size * sizeof(call_entry));
+ s->sp = 0;
+ s->entry[0].cxt = 0; /* for assertion in push_cxt() */
+
+ for(i=0; i<s->size; i++) s->entry[i].enter_cost = 0;
+}
+
+call_entry* CLG_(get_call_entry)(Int sp)
+{
+ CLG_ASSERT(sp <= CLG_(current_call_stack).sp);
+ return &(CLG_(current_call_stack).entry[sp]);
+}
+
+void CLG_(copy_current_call_stack)(call_stack* dst)
+{
+ CLG_ASSERT(dst != 0);
+
+ dst->size = CLG_(current_call_stack).size;
+ dst->entry = CLG_(current_call_stack).entry;
+ dst->sp = CLG_(current_call_stack).sp;
+}
+
+void CLG_(set_current_call_stack)(call_stack* s)
+{
+ CLG_ASSERT(s != 0);
+
+ CLG_(current_call_stack).size = s->size;
+ CLG_(current_call_stack).entry = s->entry;
+ CLG_(current_call_stack).sp = s->sp;
+}
+
+
+static __inline__
+void ensure_stack_size(Int i)
+{
+ Int oldsize;
+ call_stack *cs = &CLG_(current_call_stack);
+
+ if (i < cs->size) return;
+
+ oldsize = cs->size;
+ cs->size *= 2;
+ while (i > cs->size) cs->size *= 2;
+
+ cs->entry = (call_entry*) VG_(realloc)(cs->entry,
+ cs->size * sizeof(call_entry));
+
+ for(i=oldsize; i<cs->size; i++)
+ cs->entry[i].enter_cost = 0;
+
+ CLG_(stat).call_stack_resizes++;
+
+ CLG_DEBUGIF(2)
+ VG_(printf)(" call stack enlarged to %d entries\n",
+ CLG_(current_call_stack).size);
+}
+
+
+
+/* Called when function entered nonrecursive */
+static void function_entered(fn_node* fn, BBCC* to)
+{
+ CLG_ASSERT(fn != 0);
+
+#if CLG_ENABLE_DEBUG
+ if (fn->verbosity >=0) {
+ Int old = CLG_(clo).verbose;
+ CLG_(clo).verbose = fn->verbosity;
+ fn->verbosity = old;
+ VG_(message)(Vg_DebugMsg,
+ "Entering %s: Verbosity set to %d",
+ fn->name, CLG_(clo).verbose);
+ }
+#endif
+
+ if (fn->dump_before) {
+ Char trigger[FN_NAME_LEN];
+ VG_(sprintf)(trigger, "--dump-before=%s", fn->name);
+ CLG_(dump_profile)(trigger, True);
+ }
+ else if (fn->zero_before) {
+ CLG_(zero_all_cost)(True);
+ }
+
+ if (fn->toggle_collect) {
+ CLG_(current_state).collect = !CLG_(current_state).collect;
+ CLG_DEBUG(2," entering %s: toggled collection state to %s\n",
+ fn->name,
+ CLG_(current_state).collect ? "ON" : "OFF");
+ }
+}
+
+/* Called when function left (no recursive level active) */
+static void function_left(fn_node* fn, BBCC* from)
+{
+ CLG_ASSERT(fn != 0);
+
+ if (fn->dump_after) {
+ Char trigger[FN_NAME_LEN];
+ VG_(sprintf)(trigger, "--dump-after=%s", fn->name);
+ CLG_(dump_profile)(trigger, True);
+ }
+ if (fn->toggle_collect) {
+ CLG_(current_state).collect = !CLG_(current_state).collect;
+ CLG_DEBUG(2," leaving %s: toggled collection state to %s\n",
+ fn->name,
+ CLG_(current_state).collect ? "ON" : "OFF");
+ }
+
+#if CLG_ENABLE_DEBUG
+ if (fn->verbosity >=0) {
+ Int old = CLG_(clo).verbose;
+ CLG_(clo).verbose = fn->verbosity;
+ fn->verbosity = old;
+ VG_(message)(Vg_DebugMsg,
+ "Leaving %s: Verbosity set back to %d",
+ fn->name, CLG_(clo).verbose);
+ }
+#endif
+}
+
+
+/* Push call on call stack.
+ *
+ * Increment the usage count for the function called.
+ * A jump from <from> to <to>, with <sp>.
+ * If <skip> is true, this is a call to a function to be skipped;
+ * for this, we set jcc = 0.
+ */
+void CLG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip)
+{
+ jCC* jcc;
+ UInt* pdepth;
+ call_entry* current_entry;
+ Addr ret_addr;
+
+ /* Ensure a call stack of size <current_sp>+1.
+ * The +1 is needed as push_cxt will store the
+ * context at [current_sp]
+ */
+ ensure_stack_size(CLG_(current_call_stack).sp +1);
+ current_entry = &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp]);
+
+ if (skip) {
+ jcc = 0;
+ }
+ else {
+ fn_node* to_fn = to->cxt->fn[0];
+
+ if (CLG_(current_state).nonskipped) {
+ /* this is a jmp from skipped to nonskipped */
+ CLG_ASSERT(CLG_(current_state).nonskipped == from);
+ }
+
+ /* As push_cxt() has to be called before push_call_stack if not
+ * skipping, the old context should already be saved on the stack */
+ CLG_ASSERT(current_entry->cxt != 0);
+ CLG_(copy_cost_lz)( CLG_(sets).full, &(current_entry->enter_cost),
+ CLG_(current_state).cost );
+
+ jcc = CLG_(get_jcc)(from, jmp, to);
+ CLG_ASSERT(jcc != 0);
+
+ pdepth = CLG_(get_fn_entry)(to_fn->number);
+ if (CLG_(clo).skip_direct_recursion) {
+ /* only increment depth if another function is called */
+ if (jcc->from->cxt->fn[0] != to_fn) (*pdepth)++;
+ }
+ else (*pdepth)++;
+
+ if (*pdepth>1)
+ CLG_(stat).rec_call_counter++;
+
+ jcc->call_counter++;
+ CLG_(stat).call_counter++;
+
+ if (*pdepth == 1) function_entered(to_fn, to);
+ }
+
+ /* return address is only is useful with a real call;
+ * used to detect RET w/o CALL */
+ ret_addr = (from->bb->jmpkind == Ijk_Call) ?
+ bb_addr(from->bb) + from->bb->instr_len : 0;
+
+ /* put jcc on call stack */
+ current_entry->jcc = jcc;
+ current_entry->sp = sp;
+ current_entry->ret_addr = ret_addr;
+ current_entry->nonskipped = CLG_(current_state).nonskipped;
+
+ CLG_(current_call_stack).sp++;
+
+ /* To allow for above assertion we set context of next frame to 0 */
+ CLG_ASSERT(CLG_(current_call_stack).sp < CLG_(current_call_stack).size);
+ current_entry++;
+ current_entry->cxt = 0;
+
+ if (!skip)
+ CLG_(current_state).nonskipped = 0;
+ else if (!CLG_(current_state).nonskipped) {
+ /* a call from nonskipped to skipped */
+ CLG_(current_state).nonskipped = from;
+ if (!CLG_(current_state).nonskipped->skipped) {
+ CLG_(init_cost_lz)( CLG_(sets).full,
+ &CLG_(current_state).nonskipped->skipped);
+ CLG_(stat).distinct_skips++;
+ }
+ }
+
+#if CLG_ENABLE_DEBUG
+ CLG_DEBUGIF(0) {
+ if (CLG_(clo).verbose<2) {
+ if (jcc && jcc->to && jcc->to->bb) {
+ char spaces[][41] = { " . . . . . . . . . .",
+ " . . . . . . . . . . ",
+ " . . . . . . . . . . ",
+ ". . . . . . . . . . " };
+
+ int s = CLG_(current_call_stack).sp;
+ Int* pars = (Int*) sp;
+
+ BB* bb = jcc->to->bb;
+ if (s>40) s=40;
+ VG_(printf)("%s> %s(0x%x, 0x%x, ...) [%s / %p]\n", spaces[s%4]+40-s, bb->fn->name,
+ pars ? pars[1]:0,
+ pars ? pars[2]:0,
+ bb->obj->name + bb->obj->last_slash_pos,
+ bb->offset);
+ }
+ }
+ else if (CLG_(clo).verbose<4) {
+ VG_(printf)("+ %2d ", CLG_(current_call_stack).sp);
+ CLG_(print_short_jcc)(jcc);
+ VG_(printf)(", SP %p, RA %p\n", sp, ret_addr);
+ }
+ else {
+ VG_(printf)(" Pushed ");
+ CLG_(print_stackentry)(3, CLG_(current_call_stack).sp-1);
+ }
+ }
+#endif
+
+}
+
+
+/* Pop call stack and update inclusive sums.
+ * Returns modified fcc.
+ *
+ * If the JCC becomes inactive, call entries are freed if possible
+ */
+void CLG_(pop_call_stack)()
+{
+ jCC* jcc;
+ Int depth = 0;
+ call_entry* lower_entry;
+
+ if (CLG_(current_state).sig >0) {
+ /* Check if we leave a signal handler; this can happen when
+ * calling longjmp() in the handler */
+ CLG_(run_post_signal_on_call_stack_bottom)();
+ }
+
+ lower_entry =
+ &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp-1]);
+
+ CLG_DEBUG(4,"+ pop_call_stack: frame %d, jcc %p\n",
+ CLG_(current_call_stack).sp, lower_entry->jcc);
+
+ /* jCC item not any more on real stack: pop */
+ jcc = lower_entry->jcc;
+ CLG_(current_state).nonskipped = lower_entry->nonskipped;
+
+ if (jcc) {
+ fn_node* to_fn = jcc->to->cxt->fn[0];
+ UInt* pdepth = CLG_(get_fn_entry)(to_fn->number);
+ if (CLG_(clo).skip_direct_recursion) {
+ /* only decrement depth if another function was called */
+ if (jcc->from->cxt->fn[0] != to_fn) (*pdepth)--;
+ }
+ else (*pdepth)--;
+ depth = *pdepth;
+
+ /* add cost difference to sum */
+ if ( CLG_(add_diff_cost_lz)( CLG_(sets).full, &(jcc->cost),
+ lower_entry->enter_cost,
+ CLG_(current_state).cost) ) {
+
+ /* only count this call if it attributed some cost.
+ * the ret_counter is used to check if a BBCC dump is needed.
+ */
+ jcc->from->ret_counter++;
+ }
+ CLG_(stat).ret_counter++;
+
+ /* restore context */
+ CLG_(current_state).cxt = lower_entry->cxt;
+ CLG_(current_fn_stack).top =
+ CLG_(current_fn_stack).bottom + lower_entry->fn_sp;
+ CLG_ASSERT(CLG_(current_state).cxt != 0);
+
+ if (depth == 0) function_left(to_fn, jcc->from);
+ }
+
+ /* To allow for an assertion in push_call_stack() */
+ lower_entry->cxt = 0;
+
+ CLG_(current_call_stack).sp--;
+
+#if CLG_ENABLE_DEBUG
+ CLG_DEBUGIF(1) {
+ if (CLG_(clo).verbose<4) {
+ if (jcc) {
+ /* popped JCC target first */
+ VG_(printf)("- %2d %p => ",
+ CLG_(current_call_stack).sp,
+ bb_addr(jcc->to->bb));
+ CLG_(print_addr)(bb_jmpaddr(jcc->from->bb));
+ VG_(printf)(", SP %p\n",
+ CLG_(current_call_stack).entry[CLG_(current_call_stack).sp].sp);
+ CLG_(print_cost)(10, CLG_(sets).full, jcc->cost);
+ }
+ else
+ VG_(printf)("- %2d [Skipped JCC], SP %p\n",
+ CLG_(current_call_stack).sp,
+ CLG_(current_call_stack).entry[CLG_(current_call_stack).sp].sp);
+ }
+ else {
+ VG_(printf)(" Popped ");
+ CLG_(print_stackentry)(7, CLG_(current_call_stack).sp);
+ if (jcc) {
+ VG_(printf)(" returned to ");
+ CLG_(print_addr_ln)(bb_jmpaddr(jcc->from->bb));
+ }
+ }
+ }
+#endif
+
+}
+
+
+/* remove CallStack items to sync with current SP
+ */
+void CLG_(unwind_call_stack)(Addr sp, Int minpops)
+{
+ Int csp;
+ CLG_DEBUG(4,"+ unwind_call_stack(sp %p, minpops %d): frame %d\n",
+ sp, minpops, CLG_(current_call_stack).sp);
+
+ /* We pop old stack frames.
+ * For a call, be p the stack address with return address.
+ * - call_stack_esp[] has SP after the CALL: p-4
+ * - current sp is after a RET: >= p
+ */
+
+ while( (csp=CLG_(current_call_stack).sp) >0) {
+ call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
+
+ if ((top_ce->sp < sp) ||
+ ((top_ce->sp == sp) && minpops>0)) {
+
+ minpops--;
+ CLG_(pop_call_stack)();
+ csp=CLG_(current_call_stack).sp;
+ continue;
+ }
+ break;
+ }
+
+ CLG_DEBUG(4,"- unwind_call_stack\n");
+}
diff --git a/callgrind/clo.c b/callgrind/clo.c
new file mode 100644
index 0000000..184fed1
--- /dev/null
+++ b/callgrind/clo.c
@@ -0,0 +1,765 @@
+/*
+ This file is part of Callgrind, a Valgrind skin for call graph
+ profiling programs.
+
+ Copyright (C) 2002-2005, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This skin is derived from and contains lot of code from Cachegrind
+ Copyright (C) 2002 Nicholas Nethercote (njn25@cam.ac.uk)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "config.h" // for VG_PREFIX
+
+#include "global.h"
+
+
+
+/*------------------------------------------------------------*/
+/*--- Function specific configuration options ---*/
+/*------------------------------------------------------------*/
+
+/* Special value for separate_callers: automatic = adaptive */
+#define CONFIG_AUTO -1
+
+#define CONFIG_DEFAULT -1
+#define CONFIG_FALSE 0
+#define CONFIG_TRUE 1
+
+/* Logging configuration for a function */
+struct _fn_config {
+ Int dump_before;
+ Int dump_after;
+ Int zero_before;
+ Int toggle_collect;
+
+ Int skip; /* Handle CALL to this function as JMP (= Skip)? */
+ Int group; /* don't change caller dependency inside group !=0 */
+ Int pop_on_jump;
+
+ Int separate_callers; /* separate logging dependent on caller */
+ Int separate_recursions; /* separate logging of rec. levels */
+
+#if CLG_ENABLE_DEBUG
+ Int verbosity; /* Change debug verbosity level while in function */
+#endif
+};
+
+/* Configurations for function name prefix patterns.
+ * Currently, only very limit patterns are possible:
+ * Exact prefix patterns and "*::" are allowed.
+ * E.g.
+ * - "abc" matches all functions starting with "abc".
+ * - "abc*::def" matches all functions starting with "abc" and
+ * starting with "def" after the first "::" separator.
+ * - "*::print(" matches C++ methods "print" in all classes
+ * without namespace. I.e. "*" doesn't match a "::".
+ *
+ * We build a trie from patterns, and for a given function, we
+ * go down the tree and apply all non-default configurations.
+ */
+
+
+#define NODE_DEGREE 30
+
+/* node of compressed trie search structure */
+typedef struct _config_node config_node;
+struct _config_node {
+ Int length;
+
+ fn_config* config;
+ config_node* sub_node[NODE_DEGREE];
+ config_node* next;
+ config_node* wild_star;
+ config_node* wild_char;
+
+ Char name[1];
+};
+
+/* root of trie */
+static config_node* fn_configs = 0;
+
+static __inline__
+fn_config* new_fnc(void)
+{
+ fn_config* new = (fn_config*) CLG_MALLOC(sizeof(fn_config));
+
+ new->dump_before = CONFIG_DEFAULT;
+ new->dump_after = CONFIG_DEFAULT;
+ new->zero_before = CONFIG_DEFAULT;
+ new->toggle_collect = CONFIG_DEFAULT;
+ new->skip = CONFIG_DEFAULT;
+ new->pop_on_jump = CONFIG_DEFAULT;
+ new->group = CONFIG_DEFAULT;
+ new->separate_callers = CONFIG_DEFAULT;
+ new->separate_recursions = CONFIG_DEFAULT;
+
+#if CLG_ENABLE_DEBUG
+ new->verbosity = CONFIG_DEFAULT;
+#endif
+
+ return new;
+}
+
+
+static config_node* new_config(Char* name, int length)
+{
+ int i;
+ config_node* node = (config_node*) CLG_MALLOC(sizeof(config_node) + length);
+
+ for(i=0;i<length;i++) {
+ if (name[i] == 0) break;
+ node->name[i] = name[i];
+ }
+ node->name[i] = 0;
+
+ node->length = length;
+ node->config = 0;
+ for(i=0;i<NODE_DEGREE;i++)
+ node->sub_node[i] = 0;
+ node->next = 0;
+ node->wild_char = 0;
+ node->wild_star = 0;
+
+ CLG_DEBUG(3, " new_config('%s', len %d)\n", node->name, length);
+
+ return node;
+}
+
+static __inline__
+Bool is_wild(Char n)
+{
+ return (n == '*') || (n == '?');
+}
+
+/* Recursively build up function matching tree (prefix tree).
+ * Returns function config object for pattern <name>
+ * and starting at tree node <*pnode>.
+ *
+ * Tree nodes (config_node) are created as needed,
+ * tree root is stored into <*pnode>, and the created
+ * leaf (fn_config) for the given pattern is returned.
+ */
+static fn_config* get_fnc2(config_node* node, Char* name)
+{
+ config_node *new_sub, *n, *nprev;
+ int offset, len;
+
+ CLG_DEBUG(3, " get_fnc2(%p, '%s')\n", node, name);
+
+ if (name[0] == 0) {
+ if (!node->config) node->config = new_fnc();
+ return node->config;
+ }
+
+ if (is_wild(*name)) {
+ if (*name == '*') {
+ while(name[1] == '*') name++;
+ new_sub = node->wild_star;
+ }
+ else
+ new_sub = node->wild_char;
+
+ if (!new_sub) {
+ new_sub = new_config(name, 1);
+ if (*name == '*')
+ node->wild_star = new_sub;
+ else
+ node->wild_char = new_sub;
+ }
+
+ return get_fnc2( new_sub, name+1);
+ }
+
+ n = node->sub_node[ name[0]%NODE_DEGREE ];
+ nprev = 0;
+ len = 0;
+ while(n) {
+ for(len=0; name[len] == n->name[len]; len++);
+ if (len>0) break;
+ nprev = n;
+ n = n->next;
+ }
+
+ if (!n) {
+ len = 1;
+ while(name[len] && (!is_wild(name[len]))) len++;
+ new_sub = new_config(name, len);
+ new_sub->next = node->sub_node[ name[0]%NODE_DEGREE ];
+ node->sub_node[ name[0]%NODE_DEGREE ] = new_sub;
+
+ if (name[len] == 0) {
+ new_sub->config = new_fnc();
+ return new_sub->config;
+ }
+
+ /* recurse on wildcard */
+ return get_fnc2( new_sub, name+len);
+ }
+
+ if (len < n->length) {
+
+ /* split up the subnode <n> */
+ config_node *new_node;
+ int i;
+
+ new_node = new_config(n->name, len);
+ if (nprev)
+ nprev->next = new_node;
+ else
+ node->sub_node[ n->name[0]%NODE_DEGREE ] = new_node;
+ new_node->next = n->next;
+
+ new_node->sub_node[ n->name[len]%NODE_DEGREE ] = n;
+
+ for(i=0, offset=len; offset < n->length; i++, offset++)
+ n->name[i] = n->name[offset];
+ n->name[i] = 0;
+ n->length = i;
+
+ name += len;
+ offset = 0;
+ while(name[offset] && (!is_wild(name[offset]))) offset++;
+ new_sub = new_config(name, offset);
+ /* this sub_node of new_node could already be set: chain! */
+ new_sub->next = new_node->sub_node[ name[0]%NODE_DEGREE ];
+ new_node->sub_node[ name[0]%NODE_DEGREE ] = new_sub;
+
+ if (name[offset]==0) {
+ new_sub->config = new_fnc();
+ return new_sub->config;
+ }
+
+ /* recurse on wildcard */
+ return get_fnc2( new_sub, name+offset);
+ }
+
+ name += n->length;
+
+ if (name[0] == 0) {
+ /* name and node name are the same */
+ if (!n->config) n->config = new_fnc();
+ return n->config;
+ }
+
+ offset = 1;
+ while(name[offset] && (!is_wild(name[offset]))) offset++;
+
+ new_sub = new_config(name, offset);
+ new_sub->next = n->sub_node[ name[offset]%NODE_DEGREE ];
+ n->sub_node[ name[offset]%NODE_DEGREE ] = new_sub;
+
+ return get_fnc2(new_sub, name+offset);
+}
+
+static void print_config_node(int s, config_node* node)
+{
+ config_node* n;
+ int i;
+
+ if (node != fn_configs) {
+ char sp[] = " ";
+
+ if (s>40) s=40;
+ VG_(printf)(sp+40-s);
+ VG_(printf)("'%s'/%d\n", node->name, node->length);
+ }
+ for(i=0;i<NODE_DEGREE;i++) {
+ n = node->sub_node[i];
+ while(n) {
+ print_config_node(s+1, n);
+ n = n->next;
+ }
+ }
+ if (node->wild_char) print_config_node(s+1, node->wild_char);
+ if (node->wild_star) print_config_node(s+1, node->wild_star);
+}
+
+/* get a function config for a name pattern (from command line) */
+static fn_config* get_fnc(Char* name)
+{
+ fn_config* fnc;
+
+ CLG_DEBUG(3, " +get_fnc(%s)\n", name);
+ if (fn_configs == 0)
+ fn_configs = new_config(name, 0);
+ fnc = get_fnc2(fn_configs, name);
+
+ CLG_DEBUGIF(3) {
+ CLG_DEBUG(3, " -get_fnc(%s):\n", name);
+ print_config_node(3, fn_configs);
+ }
+ return fnc;
+}
+
+
+
+static void update_fn_config1(fn_node* fn, fn_config* fnc)
+{
+ if (fnc->dump_before != CONFIG_DEFAULT)
+ fn->dump_before = (fnc->dump_before == CONFIG_TRUE);
+
+ if (fnc->dump_after != CONFIG_DEFAULT)
+ fn->dump_after = (fnc->dump_after == CONFIG_TRUE);
+
+ if (fnc->zero_before != CONFIG_DEFAULT)
+ fn->zero_before = (fnc->zero_before == CONFIG_TRUE);
+
+ if (fnc->toggle_collect != CONFIG_DEFAULT)
+ fn->toggle_collect = (fnc->toggle_collect == CONFIG_TRUE);
+
+ if (fnc->skip != CONFIG_DEFAULT)
+ fn->skip = (fnc->skip == CONFIG_TRUE);
+
+ if (fnc->pop_on_jump != CONFIG_DEFAULT)
+ fn->pop_on_jump = (fnc->pop_on_jump == CONFIG_TRUE);
+
+ if (fnc->group != CONFIG_DEFAULT)
+ fn->group = fnc->group;
+
+ if (fnc->separate_callers != CONFIG_DEFAULT)
+ fn->separate_callers = fnc->separate_callers;
+
+ if (fnc->separate_recursions != CONFIG_DEFAULT)
+ fn->separate_recursions = fnc->separate_recursions;
+
+#if CLG_ENABLE_DEBUG
+ if (fnc->verbosity != CONFIG_DEFAULT)
+ fn->verbosity = fnc->verbosity;
+#endif
+}
+
+/* Recursively go down the function matching tree,
+ * looking for a match to <name>. For every matching leaf,
+ * <fn> is updated with the pattern config.
+ */
+static void update_fn_config2(fn_node* fn, Char* name, config_node* node)
+{
+ config_node* n;
+
+ CLG_DEBUG(3, " update_fn_config2('%s', node '%s'): \n",
+ name, node->name);
+ if ((*name == 0) && node->config) {
+ CLG_DEBUG(3, "Found!\n");
+ update_fn_config1(fn, node->config);
+ return;
+ }
+
+ n = node->sub_node[ name[0]%NODE_DEGREE ];
+ while(n) {
+ if (VG_(strncmp)(name, n->name, n->length)==0) break;
+ n = n->next;
+ }
+ if (n) update_fn_config2(fn, name+n->length, n);
+
+ if (node->wild_char)
+ update_fn_config2(fn, name+1, node->wild_char);
+
+ if (node->wild_star) {
+ while(*name) {
+ update_fn_config2(fn, name, node->wild_star);
+ name++;
+ }
+ update_fn_config2(fn, name, node->wild_star);
+ }
+}
+
+/* Update function config according to configs of name prefixes */
+void CLG_(update_fn_config)(fn_node* fn)
+{
+ CLG_DEBUG(3, " update_fn_config('%s')\n", fn->name);
+ if (fn_configs)
+ update_fn_config2(fn, fn->name, fn_configs);
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- Command line processing ---*/
+/*--------------------------------------------------------------------*/
+
+static Char* getUInt(Char* s, UInt* pn)
+{
+ UInt n = 0;
+ while((*s >='0') && (*s <='9')) {
+ n = 10*n + (*s-'0');
+ s++;
+ }
+ if (pn) *pn = n;
+ return s;
+}
+
+__attribute__((unused))
+static UWord getUWord(Char* s)
+{
+ UWord n = 0;
+ Bool isHex = False;
+
+ if ((s[0] == '0') && (s[1] == 'x')) {
+ isHex = True;
+ s += 2;
+ }
+
+ if (!isHex) {
+ while((*s >='0') && (*s <='9')) {
+ n = 10*n + (*s-'0');
+ s++;
+ }
+ }
+ else {
+ while(1) {
+ if ((*s >='0') && (*s <='9')) {
+ n = 16*n + (*s-'0');
+ s++;
+ continue;
+ }
+ if ((*s >='a') && (*s <='f')) {
+ n = 16*n + (*s-'a'+10);
+ s++;
+ continue;
+ }
+ if ((*s >='A') && (*s <='F')) {
+ n = 16*n + (*s-'A'+10);
+ s++;
+ continue;
+ }
+ break;
+ }
+ }
+
+ return n;
+}
+
+Bool CLG_(process_cmd_line_option)(Char* arg)
+{
+ if (0 == VG_(strcmp)(arg, "--skip-plt=yes"))
+ CLG_(clo).skip_plt = True;
+ else if (0 == VG_(strcmp)(arg, "--skip-plt=no"))
+ CLG_(clo).skip_plt = False;
+
+ else if (0 == VG_(strcmp)(arg, "--collect-jumps=yes"))
+ CLG_(clo).collect_jumps = True;
+ else if (0 == VG_(strcmp)(arg, "--collect-jumps=no"))
+ CLG_(clo).collect_jumps = False;
+ /* compatibility alias, deprecated option */
+ else if (0 == VG_(strcmp)(arg, "--trace-jump=yes"))
+ CLG_(clo).collect_jumps = True;
+ else if (0 == VG_(strcmp)(arg, "--trace-jump=no"))
+ CLG_(clo).collect_jumps = False;
+
+ else if (0 == VG_(strcmp)(arg, "--combine-dumps=yes"))
+ CLG_(clo).combine_dumps = True;
+ else if (0 == VG_(strcmp)(arg, "--combine-dumps=no"))
+ CLG_(clo).combine_dumps = False;
+
+ else if (0 == VG_(strcmp)(arg, "--collect-atstart=yes"))
+ CLG_(clo).collect_atstart = True;
+ else if (0 == VG_(strcmp)(arg, "--collect-atstart=no"))
+ CLG_(clo).collect_atstart = False;
+
+ else if (0 == VG_(strcmp)(arg, "--instr-atstart=yes"))
+ CLG_(clo).instrument_atstart = True;
+ else if (0 == VG_(strcmp)(arg, "--instr-atstart=no"))
+ CLG_(clo).instrument_atstart = False;
+
+ else if (0 == VG_(strcmp)(arg, "--separate-threads=yes"))
+ CLG_(clo).separate_threads = True;
+ else if (0 == VG_(strcmp)(arg, "--separate-threads=no"))
+ CLG_(clo).separate_threads = False;
+
+ else if (0 == VG_(strcmp)(arg, "--compress-strings=yes"))
+ CLG_(clo).compress_strings = True;
+ else if (0 == VG_(strcmp)(arg, "--compress-strings=no"))
+ CLG_(clo).compress_strings = False;
+
+ else if (0 == VG_(strcmp)(arg, "--compress-mangled=yes"))
+ CLG_(clo).compress_mangled = True;
+ else if (0 == VG_(strcmp)(arg, "--compress-mangled=no"))
+ CLG_(clo).compress_mangled = False;
+
+ else if (0 == VG_(strcmp)(arg, "--compress-pos=yes"))
+ CLG_(clo).compress_pos = True;
+ else if (0 == VG_(strcmp)(arg, "--compress-pos=no"))
+ CLG_(clo).compress_pos = False;
+
+ else if (0 == VG_(strncmp)(arg, "--fn-skip=", 10)) {
+ fn_config* fnc = get_fnc(arg+10);
+ fnc->skip = CONFIG_TRUE;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--dump-before=", 14)) {
+ fn_config* fnc = get_fnc(arg+14);
+ fnc->dump_before = CONFIG_TRUE;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--zero-before=", 14)) {
+ fn_config* fnc = get_fnc(arg+14);
+ fnc->zero_before = CONFIG_TRUE;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--dump-after=", 13)) {
+ fn_config* fnc = get_fnc(arg+13);
+ fnc->dump_after = CONFIG_TRUE;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--toggle-collect=", 17)) {
+ fn_config* fnc = get_fnc(arg+17);
+ fnc->toggle_collect = CONFIG_TRUE;
+ /* defaults to initial collection off */
+ CLG_(clo).collect_atstart = False;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--separate-recs=", 16))
+ CLG_(clo).separate_recursions = (Int)VG_(atoll)(&arg[16]);
+
+ /* workaround to find runtime_resolve (needs special handling) */
+ else if (0 == VG_(strncmp)(arg, "--pop-on-jump=", 14)) {
+ fn_config* fnc = get_fnc(arg+14);
+ fnc->pop_on_jump = CONFIG_TRUE;
+ }
+
+#if CLG_ENABLE_DEBUG
+ else if (0 == VG_(strncmp)(arg, "--ct-verbose=", 13))
+ CLG_(clo).verbose = (Int)VG_(atoll)(&arg[13]);
+
+ else if (0 == VG_(strncmp)(arg, "--ct-vstart=", 12))
+ CLG_(clo).verbose_start = (ULong)VG_(atoll)(&arg[12]);
+
+ else if (0 == VG_(strncmp)(arg, "--ct-verbose", 12)) {
+ UInt n;
+ fn_config* fnc;
+ Char* s = getUInt(arg+12, &n);
+ if ((n == 0) || *s != '=') return False;
+ fnc = get_fnc(s+1);
+ fnc->verbosity = n;
+ }
+#endif
+
+ else if (0 == VG_(strncmp)(arg, "--separate-callers=", 19)) {
+ if (0 == VG_(strcmp)(arg+19, "auto"))
+ CLG_(clo).separate_callers = CONFIG_AUTO;
+ else
+ CLG_(clo).separate_callers = (Int)VG_(atoll)(&arg[19]);
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--fn-group", 10)) {
+ UInt n;
+ fn_config* fnc;
+ Char* s = getUInt(arg+10, &n);
+ if ((n == 0) || *s != '=') return False;
+ fnc = get_fnc(s+1);
+ fnc->group = n;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--separate-callers", 18)) {
+ UInt n;
+ fn_config* fnc;
+ Char* s = getUInt(arg+18, &n);
+ if ((n == 0) || *s != '=') return False;
+ fnc = get_fnc(s+1);
+ fnc->separate_callers = n;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--separate-recs", 15)) {
+ UInt n;
+ fn_config* fnc;
+ Char* s = getUInt(arg+15, &n);
+ if ((n == 0) || *s != '=') return False;
+ fnc = get_fnc(s+1);
+ fnc->separate_recursions = n;
+ }
+
+ else if (0 == VG_(strncmp)(arg, "--base=", 7))
+ CLG_(clo).filename_base = VG_(strdup)(arg+7);
+
+ else if (0 == VG_(strcmp)(arg, "--mangle-names=yes"))
+ CLG_(clo).mangle_names = True;
+ else if (0 == VG_(strcmp)(arg, "--mangle-names=no"))
+ CLG_(clo).mangle_names = False;
+
+ else if (0 == VG_(strcmp)(arg, "--skip-direct-rec=yes"))
+ CLG_(clo).skip_direct_recursion = True;
+ else if (0 == VG_(strcmp)(arg, "--skip-direct-rec=no"))
+ CLG_(clo).skip_direct_recursion = False;
+
+ else if (0 == VG_(strcmp)(arg, "--dump-bbs=yes"))
+ CLG_(clo).dump_bbs = True;
+ else if (0 == VG_(strcmp)(arg, "--dump-bbs=no"))
+ CLG_(clo).dump_bbs = False;
+
+ else if (0 == VG_(strcmp)(arg, "--dump-line=yes"))
+ CLG_(clo).dump_line = True;
+ else if (0 == VG_(strcmp)(arg, "--dump-line=no"))
+ CLG_(clo).dump_line = False;
+
+ else if (0 == VG_(strcmp)(arg, "--dump-instr=yes"))
+ CLG_(clo).dump_instr = True;
+ else if (0 == VG_(strcmp)(arg, "--dump-instr=no"))
+ CLG_(clo).dump_instr = False;
+
+ else if (0 == VG_(strcmp)(arg, "--dump-bb=yes"))
+ CLG_(clo).dump_bb = True;
+ else if (0 == VG_(strcmp)(arg, "--dump-bb=no"))
+ CLG_(clo).dump_bb = False;
+
+ else if (0 == VG_(strncmp)(arg, "--dump-every-bb=", 16))
+ CLG_(clo).dump_every_bb = (Int)VG_(atoll)(&arg[16]);
+
+
+ else if (0 == VG_(strcmp)(arg, "--collect-alloc=yes"))
+ CLG_(clo).collect_alloc = True;
+ else if (0 == VG_(strcmp)(arg, "--collect-alloc=no"))
+ CLG_(clo).collect_alloc = False;
+
+ else if (0 == VG_(strcmp)(arg, "--collect-systime=yes"))
+ CLG_(clo).collect_systime = True;
+ else if (0 == VG_(strcmp)(arg, "--collect-systime=no"))
+ CLG_(clo).collect_systime = False;
+
+ else if (0 == VG_(strcmp)(arg, "--simulate-cache=yes"))
+ CLG_(clo).simulate_cache = True;
+ else if (0 == VG_(strcmp)(arg, "--simulate-cache=no"))
+ CLG_(clo).simulate_cache = False;
+
+ else {
+ Bool isCachesimOption = (*CLG_(cachesim).parse_opt)(arg);
+
+ /* cache simulator is used if a simulator option is given */
+ if (isCachesimOption)
+ CLG_(clo).simulate_cache = True;
+
+ return isCachesimOption;
+ }
+
+ return True;
+}
+
+void CLG_(print_usage)(void)
+{
+ VG_(printf)(
+"\n dump creation options:\n"
+" --base=<prefix> Prefix for profile files [" DEFAULT_DUMPNAME "]\n"
+" --dump-line=no|yes Dump source lines of costs? [yes]\n"
+" --dump-instr=no|yes Dump instruction address of costs? [no]\n"
+" --compress-strings=no|yes Compress strings in profile dump? [yes]\n"
+" --compress-pos=no|yes Compress positions in profile dump? [yes]\n"
+" --combine-dumps=no|yes Concat all dumps into same file [no]\n"
+#if CLG_EXPERIMENTAL
+" --compress-events=no|yes Compress events in profile dump? [no]\n"
+" --dump-bb=no|yes Dump basic block address of costs? [no]\n"
+" --dump-bbs=no|yes Dump basic block info? [no]\n"
+" --dump-skipped=no|yes Dump info on skipped functions in calls? [no]\n"
+" --mangle-names=no|yes Mangle separation into names? [yes]\n"
+#endif
+
+"\n activity options (for interactivity use callgrind_control):\n"
+" --dump-every-bb=<count> Dump every <count> basic blocks [0=never]\n"
+" --dump-before=<func> Dump when entering function\n"
+" --zero-before=<func> Zero all costs when entering function\n"
+" --dump-after=<func> Dump when leaving function\n"
+#if CLG_EXPERIMENTAL
+" --dump-objs=no|yes Dump static object information [no]\n"
+#endif
+
+"\n data collection options:\n"
+" --instr-atstart=no|yes Do instrumentation at callgrind start [yes]\n"
+" --collect-atstart=no|yes Collect at process/thread start [yes]\n"
+" --toggle-collect=<func> Toggle collection on enter/leave function\n"
+" --collect-jumps=no|yes Collect jumps? [no]\n"
+#if CLG_EXPERIMENTAL
+" --collect-alloc=no|yes Collect memory allocation info? [no]\n"
+#endif
+" --collect-systime=no|yes Collect system call time info? [no]\n"
+
+"\n cost entity separation options:\n"
+" --separate-threads=no|yes Separate data per thread [no]\n"
+" --separate-callers=<n> Separate functions by call chain length [0]\n"
+" --separate-recs=<n> Separate function recursions upto level [2]\n"
+" --skip-plt=no|yes Ignore calls to/from PLT sections? [yes]\n"
+" --separate-recs<n>=<f> Separate <n> recursions for function <f>\n"
+" --separate-callers<n>=<f> Separate <n> callers for function <f>\n"
+" --skip-direct-rec=no|yes Ignore direct recursions? [yes]\n"
+" --fn-skip=<function> Ignore calls to/from function?\n"
+#if CLG_EXPERIMENTAL
+" --fn-group<no>=<func> Put function into separation group <no>\n"
+#endif
+ );
+
+ (*CLG_(cachesim).print_opts)();
+
+// VG_(printf)("\n"
+// " For full callgrind documentation, see\n"
+// " "VG_PREFIX"/share/doc/callgrind/html/callgrind.html\n\n");
+}
+
+void CLG_(print_debug_usage)(void)
+{
+ VG_(printf)(
+
+#if CLG_ENABLE_DEBUG
+" --ct-verbose=<level> Verbosity of standard debug output [0]\n"
+" --ct-vstart=<BB number> Only be verbose after basic block [0]\n"
+" --ct-verbose<level>=<func> Verbosity while in <func>\n"
+#else
+" (none)\n"
+#endif
+
+ );
+}
+
+
+void CLG_(set_clo_defaults)(void)
+{
+ /* Default values for command line arguments */
+
+ /* dump options */
+ CLG_(clo).filename_base = 0;
+ CLG_(clo).combine_dumps = False;
+ CLG_(clo).compress_strings = True;
+ CLG_(clo).compress_mangled = False;
+ CLG_(clo).compress_events = False;
+ CLG_(clo).compress_pos = True;
+ CLG_(clo).mangle_names = True;
+ CLG_(clo).dump_line = True;
+ CLG_(clo).dump_instr = False;
+ CLG_(clo).dump_bb = False;
+ CLG_(clo).dump_bbs = False;
+
+ CLG_(clo).dump_every_bb = 0;
+
+ /* Collection */
+ CLG_(clo).separate_threads = False;
+ CLG_(clo).collect_atstart = True;
+ CLG_(clo).collect_jumps = False;
+ CLG_(clo).collect_alloc = False;
+ CLG_(clo).collect_systime = False;
+
+ CLG_(clo).skip_plt = True;
+ CLG_(clo).separate_callers = 0;
+ CLG_(clo).separate_recursions = 2;
+ CLG_(clo).skip_direct_recursion = False;
+
+ /* Instrumentation */
+ CLG_(clo).instrument_atstart = True;
+ CLG_(clo).simulate_cache = False;
+
+#if CLG_ENABLE_DEBUG
+ CLG_(clo).verbose = 0;
+ CLG_(clo).verbose_start = 0;
+#endif
+}
diff --git a/callgrind/command.c b/callgrind/command.c
new file mode 100644
index 0000000..23c14d9
--- /dev/null
+++ b/callgrind/command.c
@@ -0,0 +1,517 @@
+/*
+ This file is part of Callgrind, a Valgrind skin for call graph
+ profiling programs.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This skin is derived from and contains lot of code from Cachegrind
+ Copyright (C) 2002 Nicholas Nethercote (njn25@cam.ac.uk)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+/*
+ * Functions related to interactive commands via "callgrind.cmd"
+ */
+
+#include "config.h"
+#include "global.h"
+
+#include <pub_tool_threadstate.h> // VG_N_THREADS
+
+static Char outbuf[FILENAME_LEN + FN_NAME_LEN + OBJ_NAME_LEN];
+
+static Char* command_file = 0;
+static Char* command_file2 = 0;
+static Char* result_file = 0;
+static Char* info_file = 0;
+static Char* dump_base = 0;
+
+static Bool command_inited = False;
+
+void CLG_(init_command)(Char* dir, Char* dumps)
+{
+ Int fd, size;
+ SysRes res;
+
+ dump_base = dumps;
+
+ size = VG_(strlen)(dir) + VG_(strlen)(DEFAULT_COMMANDNAME) +10;
+ command_file = (char*) CLG_MALLOC(size);
+ CLG_ASSERT(command_file != 0);
+ VG_(sprintf)(command_file, "%s/%s.%d",
+ dir, DEFAULT_COMMANDNAME, VG_(getpid)());
+
+ /* This is for compatibility with the "Force Now" Button of current
+ * KCachegrind releases, as it doesn't use ".pid" to distinguish
+ * different callgrind instances from same base directory.
+ * Should be removed sometimes in the future (29.10.03)
+ */
+ command_file2 = (char*) CLG_MALLOC(size);
+ CLG_ASSERT(command_file2 != 0);
+ VG_(sprintf)(command_file2, "%s/%s",
+ dir, DEFAULT_COMMANDNAME);
+
+ size = VG_(strlen)(dir) + VG_(strlen)(DEFAULT_RESULTNAME) +10;
+ result_file = (char*) CLG_MALLOC(size);
+ CLG_ASSERT(result_file != 0);
+ VG_(sprintf)(result_file, "%s/%s.%d",
+ dir, DEFAULT_RESULTNAME, VG_(getpid)());
+
+ info_file = (char*) CLG_MALLOC(VG_(strlen)(DEFAULT_INFONAME) + 10);
+ CLG_ASSERT(info_file != 0);
+ VG_(sprintf)(info_file, "%s.%d", DEFAULT_INFONAME, VG_(getpid)());
+
+ CLG_DEBUG(1, " dump file base: '%s'\n", dump_base);
+ CLG_DEBUG(1, " command file: '%s'\n", command_file);
+ CLG_DEBUG(1, " result file: '%s'\n", result_file);
+ CLG_DEBUG(1, " info file: '%s'\n", info_file);
+
+ /* create info file to indicate that we are running */
+ res = VG_(open)(info_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
+ if (res.isError) {
+ res = VG_(open)(info_file, VKI_O_CREAT|VKI_O_WRONLY,
+ VKI_S_IRUSR|VKI_S_IWUSR);
+ if (res.isError) {
+ VG_(message)(Vg_DebugMsg,
+ "warning: can't write info file '%s'", info_file);
+ info_file = 0;
+ fd = -1;
+ }
+ }
+ if (!res.isError)
+ fd = (Int) res.val;
+ if (fd>=0) {
+ Char buf[512];
+ Int i;
+
+ VG_(sprintf)(buf,
+ "# This file is generated by Callgrind-" VERSION ".\n"
+ "# It is used to enable controlling the supervision of\n"
+ "# '%s'\n"
+ "# by external tools.\n\n",
+#if VG_CORE_INTERFACE_VERSION < 9
+ VG_(client_argv[0])
+#else
+ VG_(args_the_exename)
+#endif
+ );
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "version: " VERSION "\n");
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "base: %s\n", dir);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "dumps: %s\n", dump_base);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "control: %s\n", command_file);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "result: %s\n", result_file);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(strcpy)(buf, "cmd:");
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+#if VG_CORE_INTERFACE_VERSION < 9
+ for (i = 0; i < VG_(client_argc); i++) {
+ if (!VG_(client_argv[i])) continue;
+ VG_(sprintf)(buf, " %s", VG_(client_argv[i]));
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ }
+#else
+ VG_(sprintf)(buf, " %s", VG_(args_the_exename));
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ for (i = 0; i < VG_(args_for_client).used; i++) {
+ if (!VG_(args_for_client).strs[i]) continue;
+ VG_(sprintf)(buf, " %s", VG_(args_for_client).strs[i]);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ }
+#endif
+ VG_(write)(fd, "\n", 1);
+ VG_(close)(fd);
+ }
+
+ command_inited = True;
+}
+
+void CLG_(finish_command)()
+{
+ /* unlink info file */
+ if (info_file) VG_(unlink)(info_file);
+}
+
+
+static Int createRes(Int fd)
+{
+ SysRes res;
+
+ if (fd > -2) return fd;
+
+ /* fd == -2: No error, but we need to create the file */
+ res = VG_(open)(result_file,
+ VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
+ VKI_S_IRUSR|VKI_S_IWUSR);
+
+ /* VG_(open) can return any negative number on error. Remap errors to -1,
+ * to not confuse it with our special value -2
+ */
+ if (res.isError) fd = -1;
+ else fd = (Int) res.val;
+
+ return fd;
+}
+
+/* Run Info: Fixed information for a callgrind run */
+static Int dump_info(Int fd)
+{
+ Char* buf = outbuf;
+ int i;
+
+ if ( (fd = createRes(fd)) <0) return fd;
+
+ /* version */
+ VG_(sprintf)(buf, "version: " VERSION "\n");
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* "pid:" line */
+ VG_(sprintf)(buf, "pid: %d\n", VG_(getpid)());
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* "base:" line */
+ VG_(sprintf)(buf, "base: %s\n", dump_base);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* "cmd:" line */
+ VG_(strcpy)(buf, "cmd:");
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+#if VG_CORE_INTERFACE_VERSION < 9
+ for (i = 0; i < VG_(client_argc); i++) {
+ if (!VG_(client_argv[i])) continue;
+ VG_(sprintf)(buf, " %s", VG_(client_argv[i]));
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ }
+#else
+ VG_(sprintf)(buf, " %s", VG_(args_the_exename));
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ for (i = 0; i < VG_(args_for_client).used; i++) {
+ if (!VG_(args_for_client).strs[i]) continue;
+ VG_(sprintf)(buf, " %s", VG_(args_for_client).strs[i]);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ }
+#endif
+
+ return fd;
+}
+
+
+/* Helper for dump_state */
+
+Int dump_fd;
+
+void static dump_state_of_thread(thread_info* ti)
+{
+ Char* buf = outbuf;
+ int t = CLG_(current_tid);
+ Int p, i;
+ static FullCost sum = 0, tmp = 0;
+ BBCC *from, *to;
+ call_entry* ce;
+
+ p = VG_(sprintf)(buf, "events-%d: ", t);
+ CLG_(init_cost_lz)( CLG_(sets).full, &sum );
+ CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
+ CLG_(add_diff_cost)( CLG_(sets).full, sum,
+ ti->lastdump_cost,
+ ti->states.entry[0]->cost);
+ CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
+ p += CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum);
+ p += VG_(sprintf)(buf+p, "\n");
+ VG_(write)(dump_fd, (void*)buf, p);
+
+ p = VG_(sprintf)(buf, "frames-%d: %d\n", t,
+ CLG_(current_call_stack).sp);
+ VG_(write)(dump_fd, (void*)buf, p);
+ ce = 0;
+ for(i = 0; i < CLG_(current_call_stack).sp; i++) {
+ ce = CLG_(get_call_entry)(i);
+ /* if this frame is skipped, we don't have counters */
+ if (!ce->jcc) continue;
+
+ from = ce->jcc->from;
+ p = VG_(sprintf)(buf, "function-%d-%d: %s\n",t, i,
+ from->cxt->fn[0]->name);
+ VG_(write)(dump_fd, (void*)buf, p);
+
+ p = VG_(sprintf)(buf, "calls-%d-%d: ",t, i);
+ p+= VG_(sprintf)(buf+p, "%llu\n", ce->jcc->call_counter);
+ VG_(write)(dump_fd, (void*)buf, p);
+
+ /* FIXME: EventSets! */
+ CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
+ CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
+ CLG_(add_diff_cost)( CLG_(sets).full, sum,
+ ce->enter_cost, CLG_(current_state).cost );
+ CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
+
+ p = VG_(sprintf)(buf, "events-%d-%d: ",t, i);
+ p += CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum );
+ p += VG_(sprintf)(buf+p, "\n");
+ VG_(write)(dump_fd, (void*)buf, p);
+ }
+ if (ce && ce->jcc) {
+ to = ce->jcc->to;
+ p = VG_(sprintf)(buf, "function-%d-%d: %s\n",t, i,
+ to->cxt->fn[0]->name );
+ VG_(write)(dump_fd, (void*)buf, p);
+ }
+}
+
+/* Dump info on current callgrind state */
+static Int dump_state(Int fd)
+{
+ Char* buf = outbuf;
+ thread_info** th;
+ int t, p;
+ Int orig_tid = CLG_(current_tid);
+
+ if ( (fd = createRes(fd)) <0) return fd;
+
+ VG_(sprintf)(buf, "instrumentation: %s\n",
+ CLG_(instrument_state) ? "on":"off");
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ if (!CLG_(instrument_state)) return fd;
+
+ VG_(sprintf)(buf, "executed-bbs: %llu\n", CLG_(stat).bb_executions);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "executed-calls: %llu\n", CLG_(stat).call_counter);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "distinct-calls: %d\n", CLG_(stat).distinct_jccs);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "distinct-functions: %d\n", CLG_(stat).distinct_fns);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* "events:" line. Given here because it will be dynamic in the future */
+ p = VG_(sprintf)(buf, "events: ");
+ CLG_(sprint_eventmapping)(buf+p, CLG_(dumpmap));
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(write)(fd, "\n", 1);
+
+ /* "part:" line (number of last part. Is 0 at start */
+ VG_(sprintf)(buf, "\npart: %d\n", CLG_(get_dump_counter)());
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* threads */
+ th = CLG_(get_threads)();
+ p = VG_(sprintf)(buf, "threads:");
+ for(t=1;t<VG_N_THREADS;t++) {
+ if (!th[t]) continue;
+ p += VG_(sprintf)(buf+p, " %d", t);
+ }
+ p += VG_(sprintf)(buf+p, "\n");
+ VG_(write)(fd, (void*)buf, p);
+
+ VG_(sprintf)(buf, "current-tid: %d\n", orig_tid);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* current event counters */
+ dump_fd = fd;
+ CLG_(forall_threads)(dump_state_of_thread);
+
+ return fd;
+}
+
+void CLG_(check_command)()
+{
+ /* check for dumps needed */
+ static Char buf[512];
+ static Char cmdBuffer[512];
+ Char *cmdPos = 0, *cmdNextLine = 0;
+ Int fd, bytesRead = 0, do_kill = 0;
+ static Char* cfile = 0;
+ SysRes res;
+
+ if (!command_inited) return;
+
+ /* toggle between 2 command files, with/without ".pid" postfix */
+ cfile = ((cfile == command_file) || (cfile == 0)) ?
+ command_file2 : command_file;
+
+
+ res = VG_(open)(cfile, VKI_O_RDONLY,0);
+ if (!res.isError) {
+ fd = (Int) res.val;
+ bytesRead = VG_(read)(fd,cmdBuffer,500);
+ cmdBuffer[500] = 0; /* no command overrun please */
+ VG_(close)(fd);
+ /* don't delete command file on read error (e.g. EAGAIN) */
+ if (bytesRead>0) {
+ cmdPos = cmdBuffer;
+ }
+ }
+
+ /* force creation of result file if needed */
+ fd = -2;
+
+ while((bytesRead>0) && *cmdPos) {
+
+ /* Calculate pointer for next line */
+ cmdNextLine = cmdPos+1;
+ while((bytesRead>0) && *cmdNextLine && (*cmdNextLine != '\n')) {
+ cmdNextLine++;
+ bytesRead--;
+ }
+ if ((bytesRead>0) && (*cmdNextLine == '\n')) {
+ *cmdNextLine = 0;
+ cmdNextLine++;
+ bytesRead--;
+ }
+
+ /* Command with integer option */
+ if ((*cmdPos >= '0') && (*cmdPos <='9')) {
+ int value = *cmdPos-'0';
+ cmdPos++;
+ while((*cmdPos >= '0') && (*cmdPos <='9')) {
+ value = 10*value + (*cmdPos-'0');
+ cmdPos++;
+ }
+ while((*cmdPos == ' ') || (*cmdPos == '\t')) cmdPos++;
+
+ switch(*cmdPos) {
+#if CLG_ENABLE_DEBUG
+ /* verbosity */
+ case 'V':
+ case 'v':
+ CLG_(clo).verbose = value;
+ break;
+#endif
+ default:
+ break;
+ }
+
+ cmdPos = cmdNextLine;
+ continue;
+ }
+
+ /* Command with boolean/switch option */
+ if ((*cmdPos=='+') ||
+ (*cmdPos=='-')) {
+ int value = (cmdPos[0] == '+');
+ cmdPos++;
+ while((*cmdPos == ' ') || (*cmdPos == '\t')) cmdPos++;
+
+ switch(*cmdPos) {
+ case 'I':
+ case 'i':
+ CLG_(set_instrument_state)("Command", value);
+ break;
+
+ default:
+ break;
+ }
+
+ cmdPos = cmdNextLine;
+ continue;
+ }
+
+ /* regular command */
+ switch(*cmdPos) {
+ case 'D':
+ case 'd':
+ /* DUMP */
+
+ /* skip command */
+ while(*cmdPos && (*cmdPos != ' ')) cmdPos++;
+ if (*cmdPos)
+ VG_(sprintf)(buf, "Dump Command:%s", cmdPos);
+ else
+ VG_(sprintf)(buf, "Dump Command");
+ CLG_(dump_profile)(buf, False);
+ break;
+
+ case 'Z':
+ case 'z':
+ CLG_(zero_all_cost)(False);
+ break;
+
+ case 'K':
+ case 'k':
+ /* Kill: Delay to be able to remove command file before. */
+ do_kill = 1;
+ break;
+
+ case 'I':
+ case 'i':
+ fd = dump_info(fd);
+ break;
+
+ case 's':
+ case 'S':
+ fd = dump_state(fd);
+ break;
+
+ case 'O':
+ case 'o':
+ /* Options Info */
+ if ( (fd = createRes(fd)) <0) break;
+
+ VG_(sprintf)(buf, "\ndesc: Option: --skip-plt=%s\n",
+ CLG_(clo).skip_plt ? "yes" : "no");
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Option: --collect-jumps=%s\n",
+ CLG_(clo).collect_jumps ? "yes" : "no");
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Option: --separate-recs=%d\n",
+ CLG_(clo).separate_recursions);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Option: --separate-callers=%d\n",
+ CLG_(clo).separate_callers);
+ VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+ break;
+
+ default:
+ break;
+ }
+
+ cmdPos = cmdNextLine;
+ }
+
+ /* If command executed, delete command file */
+ if (cmdPos) VG_(unlink)(cfile);
+ if (fd>=0) VG_(close)(fd);
+
+ if (do_kill) {
+ VG_(message)(Vg_UserMsg,
+ "Killed because of command from %s", cfile);
+ CLG_(fini)(0);
+ VG_(exit)(1);
+ }
+}
diff --git a/callgrind/context.c b/callgrind/context.c
new file mode 100644
index 0000000..ade251f
--- /dev/null
+++ b/callgrind/context.c
@@ -0,0 +1,328 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- ct_context.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Context operations ---*/
+/*------------------------------------------------------------*/
+
+#define N_FNSTACK_INITIAL_ENTRIES 500
+#define N_CXT_INITIAL_ENTRIES 2537
+
+fn_stack CLG_(current_fn_stack);
+
+void CLG_(init_fn_stack)(fn_stack* s)
+{
+ CLG_ASSERT(s != 0);
+
+ s->size = N_FNSTACK_INITIAL_ENTRIES;
+ s->bottom = (fn_node**) CLG_MALLOC(s->size * sizeof(fn_node*));
+ s->top = s->bottom;
+ s->bottom[0] = 0;
+}
+
+void CLG_(copy_current_fn_stack)(fn_stack* dst)
+{
+ CLG_ASSERT(dst != 0);
+
+ dst->size = CLG_(current_fn_stack).size;
+ dst->bottom = CLG_(current_fn_stack).bottom;
+ dst->top = CLG_(current_fn_stack).top;
+}
+
+void CLG_(set_current_fn_stack)(fn_stack* s)
+{
+ CLG_ASSERT(s != 0);
+
+ CLG_(current_fn_stack).size = s->size;
+ CLG_(current_fn_stack).bottom = s->bottom;
+ CLG_(current_fn_stack).top = s->top;
+}
+
+static cxt_hash cxts;
+
+void CLG_(init_cxt_table)()
+{
+ Int i;
+
+ cxts.size = N_CXT_INITIAL_ENTRIES;
+ cxts.entries = 0;
+ cxts.table = (Context**) CLG_MALLOC(cxts.size * sizeof(Context*));
+
+ for (i = 0; i < cxts.size; i++)
+ cxts.table[i] = 0;
+}
+
+cxt_hash* CLG_(get_cxt_hash)()
+{
+ return &cxts;
+}
+
+/* double size of cxt table */
+static void resize_cxt_table(void)
+{
+ UInt i, new_size, conflicts1 = 0, conflicts2 = 0;
+ Context **new_table, *curr, *next;
+ UInt new_idx;
+
+ new_size = 2* cxts.size +3;
+ new_table = (Context**) CLG_MALLOC(new_size * sizeof(Context*));
+
+ if (!new_table) return;
+
+ for (i = 0; i < new_size; i++)
+ new_table[i] = NULL;
+
+ for (i = 0; i < cxts.size; i++) {
+ if (cxts.table[i] == NULL) continue;
+
+ curr = cxts.table[i];
+ while (NULL != curr) {
+ next = curr->next;
+
+ new_idx = (UInt) (curr->hash % new_size);
+
+ curr->next = new_table[new_idx];
+ new_table[new_idx] = curr;
+ if (curr->next) {
+ conflicts1++;
+ if (curr->next->next)
+ conflicts2++;
+ }
+
+ curr = next;
+ }
+ }
+
+ VG_(free)(cxts.table);
+
+
+ CLG_DEBUG(0, "Resize Context Hash: %d => %d (entries %d, conflicts %d/%d)\n",
+ cxts.size, new_size,
+ cxts.entries, conflicts1, conflicts2);
+
+ cxts.size = new_size;
+ cxts.table = new_table;
+ CLG_(stat).cxt_hash_resizes++;
+}
+
+__inline__
+static UWord cxt_hash_val(fn_node** fn, UInt size)
+{
+ UWord hash = 0;
+ UInt count = size;
+ while(*fn != 0) {
+ hash = (hash<<7) + (hash>>25) + (UWord)(*fn);
+ fn--;
+ count--;
+ if (count==0) break;
+ }
+ return hash;
+}
+
+__inline__
+static Bool is_cxt(UWord hash, fn_node** fn, Context* cxt)
+{
+ int count;
+ fn_node** cxt_fn;
+
+ if (hash != cxt->hash) return False;
+
+ count = cxt->size;
+ cxt_fn = &(cxt->fn[0]);
+ while((*fn != 0) && (count>0)) {
+ if (*cxt_fn != *fn) return False;
+ fn--;
+ cxt_fn++;
+ count--;
+ }
+ return True;
+}
+
+/**
+ * Allocate new Context structure
+ */
+static Context* new_cxt(fn_node** fn)
+{
+ Context* new;
+ UInt idx, offset;
+ UWord hash;
+ int size, recs;
+ fn_node* top_fn;
+
+ CLG_ASSERT(fn);
+ top_fn = *fn;
+ if (top_fn == 0) return 0;
+
+ size = top_fn->separate_callers +1;
+ recs = top_fn->separate_recursions;
+ if (recs<1) recs=1;
+
+ /* check fill degree of context hash table and resize if needed (>80%) */
+ cxts.entries++;
+ if (10 * cxts.entries / cxts.size > 8)
+ resize_cxt_table();
+
+ new = (Context*) CLG_MALLOC(sizeof(Context)+sizeof(fn_node*)*size);
+
+ // hash value calculation similar to cxt_hash_val(), but additionally
+ // copying function pointers in one run
+ hash = 0;
+ offset = 0;
+ while(*fn != 0) {
+ hash = (hash<<7) + (hash>>25) + (UWord)(*fn);
+ new->fn[offset] = *fn;
+ offset++;
+ fn--;
+ if (offset >= size) break;
+ }
+ if (offset < size) size = offset;
+
+ new->size = size;
+ new->base_number = CLG_(stat).context_counter;
+ new->hash = hash;
+
+ CLG_(stat).context_counter += recs;
+ CLG_(stat).distinct_contexts++;
+
+ /* insert into Context hash table */
+ idx = (UInt) (hash % cxts.size);
+ new->next = cxts.table[idx];
+ cxts.table[idx] = new;
+
+#if CLG_ENABLE_DEBUG
+ CLG_DEBUGIF(3) {
+ VG_(printf)(" new_cxt ox%p: ", new);
+ CLG_(print_cxt)(12, new, 0);
+ }
+#endif
+
+ return new;
+}
+
+/* get the Context structure for current context */
+Context* CLG_(get_cxt)(fn_node** fn)
+{
+ Context* cxt;
+ UInt size, idx;
+ UWord hash;
+
+ CLG_ASSERT(fn != 0);
+ if (*fn == 0) return 0;
+ size = (*fn)->separate_callers+1;
+ if (size<=0) { size = -size+1; }
+
+ CLG_DEBUG(5, "+ get_cxt(fn '%s'): size %d\n",
+ (*fn)->name, size);
+
+ hash = cxt_hash_val(fn, size);
+
+ if ( ((cxt = (*fn)->last_cxt) != 0) && is_cxt(hash, fn, cxt)) {
+ CLG_DEBUG(5, "- get_cxt: %p\n", cxt);
+ return cxt;
+ }
+
+ CLG_(stat).cxt_lru_misses++;
+
+ idx = (UInt) (hash % cxts.size);
+ cxt = cxts.table[idx];
+
+ while(cxt) {
+ if (is_cxt(hash,fn,cxt)) break;
+ cxt = cxt->next;
+ }
+
+ if (!cxt)
+ cxt = new_cxt(fn);
+
+ (*fn)->last_cxt = cxt;
+
+ CLG_DEBUG(5, "- get_cxt: %p\n", cxt);
+
+ return cxt;
+}
+
+
+/**
+ * Change execution context by calling a new function from current context
+ *
+ */
+void CLG_(push_cxt)(fn_node* fn)
+{
+ call_stack* cs = &CLG_(current_call_stack);
+ Int fn_entries;
+
+ /* save old context on stack (even if not changed at all!) */
+ CLG_ASSERT(cs->sp < cs->size);
+ CLG_ASSERT(cs->entry[cs->sp].cxt == 0);
+ cs->entry[cs->sp].cxt = CLG_(current_state).cxt;
+ cs->entry[cs->sp].fn_sp = CLG_(current_fn_stack).top - CLG_(current_fn_stack).bottom;
+
+ if (*(CLG_(current_fn_stack).top) == fn) return;
+ if (fn && (fn->group>0) &&
+ ((*(CLG_(current_fn_stack).top))->group == fn->group)) return;
+
+ /* resizing needed ? */
+ fn_entries = CLG_(current_fn_stack).top - CLG_(current_fn_stack).bottom;
+ if (fn_entries == CLG_(current_fn_stack).size-1) {
+ int new_size = CLG_(current_fn_stack).size *2;
+ fn_node** new = (fn_node**) CLG_MALLOC(new_size * sizeof(fn_node*));
+ int i;
+ for(i=0;i<CLG_(current_fn_stack).size;i++)
+ new[i] = CLG_(current_fn_stack).bottom[i];
+ VG_(free)(CLG_(current_fn_stack).bottom);
+ CLG_(current_fn_stack).top = new + fn_entries;
+ CLG_(current_fn_stack).bottom = new;
+
+ CLG_DEBUG(0, "Resize Context Stack: %d => %d (pushing '%s')\n",
+ CLG_(current_fn_stack).size, new_size,
+ fn ? fn->name : (Char*)"0x0");
+
+ CLG_(current_fn_stack).size = new_size;
+ }
+
+ if (*(CLG_(current_fn_stack).top) == 0) {
+ UInt *pactive;
+
+ /* this is first function: increment its active count */
+ CLG_ASSERT(fn != 0);
+ pactive = CLG_(get_fn_entry)(fn->number);
+ (*pactive)++;
+ }
+
+ CLG_(current_fn_stack).top++;
+ *(CLG_(current_fn_stack).top) = fn;
+ CLG_(current_state).cxt = CLG_(get_cxt)(CLG_(current_fn_stack).top);
+
+ CLG_DEBUG(5, " push_cxt(fn '%s'): %d\n",
+ fn ? fn->name : (Char*)"0x0",
+ CLG_(current_fn_stack).top - CLG_(current_fn_stack).bottom);
+}
+
diff --git a/callgrind/costs.c b/callgrind/costs.c
new file mode 100644
index 0000000..1fa1b61
--- /dev/null
+++ b/callgrind/costs.c
@@ -0,0 +1,79 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- ct_costs.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+#include <pub_tool_mallocfree.h>
+
+#define COSTCHUNK_SIZE 100000
+
+UInt CLG_(costarray_entries) = 0;
+UInt CLG_(costarray_chunks) = 0;
+static CostChunk* cost_chunk_base = 0;
+static CostChunk* cost_chunk_current = 0;
+
+ULong* CLG_(get_costarray)(Int size)
+{
+ ULong* ptr;
+
+ if (!cost_chunk_current ||
+ (cost_chunk_current->size - cost_chunk_current->used < size)) {
+ CostChunk* cc = (CostChunk*) CLG_MALLOC(sizeof(CostChunk) +
+ COSTCHUNK_SIZE * sizeof(ULong));
+ cc->size = COSTCHUNK_SIZE;
+ cc->used = 0;
+ cc->next = 0;
+
+ if (cost_chunk_current)
+ cost_chunk_current->next = cc;
+ cost_chunk_current = cc;
+
+ if (!cost_chunk_base) cost_chunk_base = cc;
+
+ CLG_(costarray_chunks)++;
+ }
+
+ ptr = &(cost_chunk_current->data[cost_chunk_current->used]);
+ cost_chunk_current->used += size;
+
+ CLG_(costarray_entries) += size;
+
+ return ptr;
+}
+
+void CLG_(free_costarrays)()
+{
+ CostChunk* cc = cost_chunk_base, *cc_next;
+ while(cc) {
+ cc_next = cc->next;
+ VG_(free)(cc);
+ cc = cc_next;
+ }
+ cost_chunk_base = 0;
+ cost_chunk_current = 0;
+}
diff --git a/callgrind/costs.h b/callgrind/costs.h
new file mode 100644
index 0000000..5e5ccfd
--- /dev/null
+++ b/callgrind/costs.h
@@ -0,0 +1,35 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- ct_costs.h ---*/
+/*--- (C) 2004, Josef Weidendorfer ---*/
+/*--------------------------------------------------------------------*/
+
+#ifndef CT_COSTS
+#define CT_COSTS
+
+#include "pub_tool_basics.h"
+
+#define CLG_(str) VGAPPEND(vgCallgrind_,str)
+
+extern UInt CLG_(costarray_entries);
+extern UInt CLG_(costarray_chunks);
+
+/* Array of 64bit costs. This is separated from other structs
+ * to support a dynamic number of costs for a cost item.
+ * Chunks are allocated on demand, and deallocated at program termination.
+ */
+typedef struct _CostChunk CostChunk;
+struct _CostChunk {
+ Int size;
+ Int used;
+ CostChunk *next, *prev;
+ ULong data[0];
+};
+
+/* Allocate a number of 64bit cost values.
+ * Typically used from ct_events.c */
+ULong* CLG_(get_costarray)(Int size);
+void CLG_(free_costarrays)(void);
+
+
+#endif /* CT_COSTS */
diff --git a/callgrind/debug.c b/callgrind/debug.c
new file mode 100644
index 0000000..2e3ef60
--- /dev/null
+++ b/callgrind/debug.c
@@ -0,0 +1,453 @@
+/*
+ This file is part of Callgrind, a Valgrind skin for call graph
+ profiling programs.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This skin is derived from and contains lot of code from Cachegrind
+ Copyright (C) 2002 Nicholas Nethercote (njn25@cam.ac.uk)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+#include "events.h"
+
+/* If debugging mode of, dummy functions are provided (see below)
+ */
+#if CLG_ENABLE_DEBUG
+
+/*------------------------------------------------------------*/
+/*--- Debug output helpers ---*/
+/*------------------------------------------------------------*/
+
+static void print_indent(int s)
+{
+ /* max of 40 spaces */
+ char sp[] = " ";
+ if (s>40) s=40;
+ VG_(printf)(sp+40-s);
+}
+
+void CLG_(print_bb)(int s, BB* bb)
+{
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ VG_(printf)("BB %p (Obj '%s')", bb_addr(bb), bb->obj->name);
+}
+
+static
+void print_mangled_cxt(Context* cxt, int rec_index)
+{
+ int i;
+
+ if (!cxt)
+ VG_(printf)("(none)");
+ else {
+ VG_(printf)("%s", cxt->fn[0]->name);
+ if (rec_index >0)
+ VG_(printf)("'%d", rec_index +1);
+ for(i=1;i<cxt->size;i++)
+ VG_(printf)("'%s", cxt->fn[i]->name);
+ }
+}
+
+
+
+void CLG_(print_cxt)(int s, Context* cxt, int rec_index)
+{
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ if (cxt) {
+ UInt *pactive = CLG_(get_fn_entry)(cxt->fn[0]->number);
+ CLG_ASSERT(rec_index < cxt->fn[0]->separate_recursions);
+
+ VG_(printf)("Cxt %d" ,cxt->base_number + rec_index);
+ if (*pactive>0)
+ VG_(printf)(" [active=%d]", *pactive);
+ VG_(printf)(": ");
+ print_mangled_cxt(cxt, rec_index);
+ VG_(printf)("\n");
+ }
+ else
+ VG_(printf)("(no context)\n");
+}
+
+void CLG_(print_execstate)(int s, exec_state* es)
+{
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ if (!es) {
+ VG_(printf)("ExecState 0x0\n");
+ return;
+ }
+
+ VG_(printf)("ExecState [Sig %d, collect %s, nonskipped %p]: jmps_passed %d\n",
+ es->sig, es->collect?"yes":"no",
+ es->nonskipped, es->jmps_passed);
+}
+
+
+void CLG_(print_bbcc)(int s, BBCC* bbcc, Bool jumpaddr)
+{
+ BB* bb;
+
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ if (!bbcc) {
+ VG_(printf)("BBCC 0x0\n");
+ return;
+ }
+
+ bb = bbcc->bb;
+ CLG_ASSERT(bb!=0);
+
+#if 0
+ if (jumpaddr)
+ VG_(printf)("%s +%p=%p, ",
+ bb->obj->name + bb->obj->last_slash_pos,
+ bb->jmp_offset, bb_jmpaddr(bb));
+ else
+#endif
+ VG_(printf)("%s +%p=%p, ",
+ bb->obj->name + bb->obj->last_slash_pos,
+ bb->offset, bb_addr(bb));
+ CLG_(print_cxt)(s+8, bbcc->cxt, bbcc->rec_index);
+}
+
+void CLG_(print_eventset)(int s, EventSet* es)
+{
+ int i;
+
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ if (!es) {
+ VG_(printf)("(EventSet not set)\n");
+ return;
+ }
+
+ VG_(printf)("%5s (Size/Cap %d/%d): ",
+ es->name, es->size, es->capacity);
+
+ if (es->size == 0)
+ VG_(printf)("-");
+ else {
+ for(i=0; i< es->size; i++) {
+ if (i>0) {
+ VG_(printf)(" ");
+ if (es->e[i-1].nextTop == i)
+ VG_(printf)("| ");
+ }
+ VG_(printf)(es->e[i].type->name);
+ }
+ }
+ VG_(printf)("\n");
+}
+
+
+void CLG_(print_cost)(int s, EventSet* es, ULong* c)
+{
+ Int i, pos;
+
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ if (!es) {
+ VG_(printf)("Cost (Nothing, EventSet not set)\n");
+ return;
+ }
+ if (!c) {
+ VG_(printf)("Cost (Null, EventSet %s)\n", es->name);
+ return;
+ }
+
+ if (es->size == 0) {
+ VG_(printf)("Cost (Nothing, EventSet %s with len 0)\n", es->name);
+ return;
+ }
+
+ pos = s;
+ pos += VG_(printf)("Cost %s [%p]: %s %llu", es->name, c, es->e[0].type->name, c[0]);
+
+ i = 1;
+ while(i<es->size) {
+ if (pos > 70) {
+ VG_(printf)(",\n");
+ print_indent(s+5);
+ pos = s+5;
+ }
+ else
+ pos += VG_(printf)(", ");
+ pos += VG_(printf)("%s %llu", es->e[i].type->name, c[i]);
+ i++;
+ }
+ VG_(printf)("\n");
+}
+
+
+void CLG_(print_short_jcc)(jCC* jcc)
+{
+ if (jcc)
+ VG_(printf)("%p => %p [%llu/%llu,%llu,%llu]",
+ bb_jmpaddr(jcc->from->bb),
+ bb_addr(jcc->to->bb),
+ jcc->call_counter,
+ jcc->cost ? jcc->cost[CLG_(sets).off_sim_Ir]:0,
+ jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dr]:0,
+ jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dw]:0);
+ else
+ VG_(printf)("[Skipped JCC]");
+}
+
+void CLG_(print_jcc)(int s, jCC* jcc)
+{
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ if (!jcc) {
+ VG_(printf)("JCC to skipped function\n");
+ return;
+ }
+ VG_(printf)("JCC %p from ", jcc);
+ CLG_(print_bbcc)(s+9, jcc->from, True);
+ print_indent(s+4);
+ VG_(printf)("to ");
+ CLG_(print_bbcc)(s+9, jcc->to, False);
+ print_indent(s+4);
+ VG_(printf)("Calls %llu\n", jcc->call_counter);
+ print_indent(s+4);
+ CLG_(print_cost)(s+9, CLG_(sets).full, jcc->cost);
+}
+
+/* dump out the current call stack */
+void CLG_(print_stackentry)(int s, int sp)
+{
+ call_entry* ce;
+
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ ce = CLG_(get_call_entry)(sp);
+ VG_(printf)("[%-2d] SP %p, RA %p", sp, ce->sp, ce->ret_addr);
+ if (ce->nonskipped)
+ VG_(printf)(" NonSkipped BB %p / %s",
+ bb_addr(ce->nonskipped->bb),
+ ce->nonskipped->cxt->fn[0]->name);
+ VG_(printf)("\n");
+ print_indent(s+5);
+ CLG_(print_jcc)(5,ce->jcc);
+}
+
+/* debug output */
+#if 0
+static void print_call_stack()
+{
+ int c;
+
+ VG_(printf)("Call Stack:\n");
+ for(c=0;c<CLG_(current_call_stack).sp;c++)
+ CLG_(print_stackentry)(-2, c);
+}
+#endif
+
+void CLG_(print_bbcc_fn)(BBCC* bbcc)
+{
+ obj_node* obj;
+
+ if (!bbcc) {
+ VG_(printf)("%08x", 0);
+ return;
+ }
+
+ VG_(printf)("%08x/%c %d:", bb_addr(bbcc->bb),
+ (bbcc->bb->sect_kind == Vg_SectText) ? 'T' :
+ (bbcc->bb->sect_kind == Vg_SectData) ? 'D' :
+ (bbcc->bb->sect_kind == Vg_SectBSS) ? 'B' :
+ (bbcc->bb->sect_kind == Vg_SectGOT) ? 'G' :
+ (bbcc->bb->sect_kind == Vg_SectPLT) ? 'P' : 'U',
+ bbcc->cxt->base_number+bbcc->rec_index);
+ print_mangled_cxt(bbcc->cxt, bbcc->rec_index);
+
+ obj = bbcc->cxt->fn[0]->file->obj;
+ if (obj->name[0])
+ VG_(printf)(" %s", obj->name+obj->last_slash_pos);
+
+ if (VG_(strcmp)(bbcc->cxt->fn[0]->file->name, "???") !=0) {
+ VG_(printf)(" %s", bbcc->cxt->fn[0]->file->name);
+ if ((bbcc->cxt->fn[0] == bbcc->bb->fn) && (bbcc->bb->line>0))
+ VG_(printf)(":%d", bbcc->bb->line);
+ }
+}
+
+void CLG_(print_bbcc_cost)(int s, BBCC* bbcc)
+{
+ BB* bb;
+ Int i, cjmpNo;
+ ULong ecounter;
+
+ if (s<0) {
+ s = -s;
+ print_indent(s);
+ }
+
+ if (!bbcc) {
+ VG_(printf)("BBCC 0x0\n");
+ return;
+ }
+
+ bb = bbcc->bb;
+ CLG_ASSERT(bb!=0);
+
+ CLG_(print_bbcc)(s, bbcc, False);
+
+ ecounter = bbcc->ecounter_sum;
+
+ print_indent(s+2);
+ VG_(printf)("ECounter: sum %d ", ecounter);
+ for(i=0; i<bb->cjmp_count; i++) {
+ VG_(printf)("[%d]=%d ",
+ bb->jmp[i].instr, bbcc->jmp[i].ecounter);
+ }
+ VG_(printf)("\n");
+
+ cjmpNo = 0;
+ for(i=0; i<bb->instr_count; i++) {
+ InstrInfo* ii = &(bb->instr[i]);
+ print_indent(s+2);
+ VG_(printf)("[%2d] IOff %2d ecnt %3d ",
+ i, ii->instr_offset, ecounter);
+ CLG_(print_cost)(s+5, ii->eventset, bbcc->cost + ii->cost_offset);
+
+ /* update execution counter */
+ if (cjmpNo < bb->cjmp_count)
+ if (bb->jmp[cjmpNo].instr == i) {
+ ecounter -= bbcc->jmp[cjmpNo].ecounter;
+ cjmpNo++;
+ }
+ }
+}
+
+
+/* dump out an address with source info if available */
+void CLG_(print_addr)(Addr addr)
+{
+ Char fl_buf[FILENAME_LEN];
+ Char fn_buf[FN_NAME_LEN];
+ const UChar* obj_name;
+ SegInfo* si;
+ int ln, i=0, opos=0;
+
+ if (addr == 0) {
+ VG_(printf)("%08x", addr);
+ return;
+ }
+
+ CLG_(get_debug_info)(addr, fl_buf, fn_buf, &ln, &si);
+
+ if (VG_(strcmp)(fn_buf,"???")==0)
+ VG_(printf)("%p", addr);
+ else
+ VG_(printf)("%p %s", addr, fn_buf);
+
+ if (si) {
+ obj_name = VG_(seginfo_filename)(si);
+ if (obj_name) {
+ while(obj_name[i]) {
+ if (obj_name[i]=='/') opos = i+1;
+ i++;
+ }
+ if (obj_name[0])
+ VG_(printf)(" %s", obj_name+opos);
+ }
+ }
+
+ if (ln>0)
+ VG_(printf)(" (%s:%u)", fl_buf,ln);
+}
+
+void CLG_(print_addr_ln)(Addr addr)
+{
+ CLG_(print_addr)(addr);
+ VG_(printf)("\n");
+}
+
+static ULong bb_written = 0;
+
+void CLG_(print_bbno)(void)
+{
+ if (bb_written != CLG_(stat).bb_executions) {
+ bb_written = CLG_(stat).bb_executions;
+ VG_(printf)("BB# %llu\n",CLG_(stat).bb_executions);
+ }
+}
+
+void CLG_(print_context)(void)
+{
+ BBCC* bbcc;
+
+ CLG_DEBUG(0,"In tid %d [%d] ",
+ CLG_(current_tid), CLG_(current_call_stack).sp);
+ bbcc = CLG_(current_state).bbcc;
+ print_mangled_cxt(CLG_(current_state).cxt,
+ bbcc ? bbcc->rec_index : 0);
+ VG_(printf)("\n");
+}
+
+void* CLG_(malloc)(UWord s, char* f)
+{
+ CLG_DEBUG(3, "Malloc(%d) in %s.\n", s, f);
+ return VG_(malloc)(s);
+}
+
+#else /* CLG_ENABLE_DEBUG */
+
+void CLG_(print_bbno)(void) {}
+void CLG_(print_context)(void) {}
+void CLG_(print_jcc)(int s, jCC* jcc) {}
+void CLG_(print_bbcc)(int s, BBCC* bbcc, Bool b) {}
+void CLG_(print_bbcc_fn)(BBCC* bbcc) {}
+void CLG_(print_cost)(int s, EventSet* es, ULong* cost) {}
+void CLG_(print_bb)(int s, BB* bb) {}
+void CLG_(print_cxt)(int s, Context* cxt, int rec_index) {}
+void CLG_(print_short_jcc)(jCC* jcc) {}
+void CLG_(print_stackentry)(int s, int sp) {}
+void CLG_(print_addr)(Addr addr) {}
+void CLG_(print_addr_ln)(Addr addr) {}
+
+#endif
diff --git a/callgrind/docs/Makefile.am b/callgrind/docs/Makefile.am
new file mode 100644
index 0000000..d539a6e
--- /dev/null
+++ b/callgrind/docs/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST =
diff --git a/callgrind/dump.c b/callgrind/dump.c
new file mode 100644
index 0000000..3f13aea
--- /dev/null
+++ b/callgrind/dump.c
@@ -0,0 +1,1715 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- dump.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "config.h"
+#include "global.h"
+
+#include <pub_tool_threadstate.h>
+#include <pub_tool_libcfile.h>
+
+/*------------------------------------------------------------*/
+/*--- Support for signal handlers and multi-threading ---*/
+/*------------------------------------------------------------*/
+
+/* Dump Part Counter */
+static Int out_counter = 0;
+
+static Char* dump_file_base = 0;
+static Char* base_directory = 0;
+
+/* Command */
+static Char cmdbuf[BUF_LEN];
+
+/* Total reads/writes/misses sum over all dumps and threads.
+ * Updated during CC traversal at dump time.
+ */
+FullCost CLG_(total_cost) = 0;
+static FullCost dump_total_cost = 0;
+
+EventMapping* CLG_(dumpmap) = 0;
+
+/* Temporary output buffer for
+ * print_fn_pos, fprint_apos, fprint_fcost, fprint_jcc,
+ * fprint_fcc_ln, dump_run_info, dump_state_info
+ */
+static Char outbuf[FILENAME_LEN + FN_NAME_LEN + OBJ_NAME_LEN];
+
+Int CLG_(get_dump_counter)(void)
+{
+ return out_counter;
+}
+
+Char* CLG_(get_dump_file_base)()
+{
+ return dump_file_base;
+}
+
+/*------------------------------------------------------------*/
+/*--- Output file related stuff ---*/
+/*------------------------------------------------------------*/
+
+/* Boolean dumping array */
+static Bool* dump_array = 0;
+static Int dump_array_size = 0;
+static Bool* obj_dumped = 0;
+static Bool* file_dumped = 0;
+static Bool* fn_dumped = 0;
+static Bool* cxt_dumped = 0;
+
+static
+void reset_dump_array(void)
+{
+ int i;
+
+ CLG_ASSERT(dump_array != 0);
+
+ for(i=0;i<dump_array_size;i++)
+ dump_array[i] = False;
+}
+
+static
+void init_dump_array(void)
+{
+ dump_array_size = CLG_(stat).distinct_objs +
+ CLG_(stat).distinct_files +
+ CLG_(stat).distinct_fns +
+ CLG_(stat).context_counter;
+ CLG_ASSERT(dump_array == 0);
+ dump_array = (Bool*) CLG_MALLOC(dump_array_size * sizeof(Bool));
+ obj_dumped = dump_array;
+ file_dumped = obj_dumped + CLG_(stat).distinct_objs;
+ fn_dumped = file_dumped + CLG_(stat).distinct_files;
+ cxt_dumped = fn_dumped + CLG_(stat).distinct_fns;
+
+ reset_dump_array();
+
+ CLG_DEBUG(1, " init_dump_array: size %d\n", dump_array_size);
+}
+
+static __inline__
+void free_dump_array(void)
+{
+ CLG_ASSERT(dump_array != 0);
+ VG_(free)(dump_array);
+
+ dump_array = 0;
+ obj_dumped = 0;
+ file_dumped = 0;
+ fn_dumped = 0;
+ cxt_dumped = 0;
+}
+
+
+/* Initialize to an invalid position */
+static __inline__
+void init_fpos(FnPos* p)
+ {
+ p->file = 0;
+ p->fn = 0;
+ p->obj = 0;
+ p->cxt = 0;
+ p->rec_index = 0;
+}
+
+
+#if 0
+static __inline__
+static void my_fwrite(Int fd, Char* buf, Int len)
+{
+ VG_(write)(fd, (void*)buf, len);
+}
+#else
+
+#define FWRITE_BUFSIZE 32000
+#define FWRITE_THROUGH 10000
+static Char fwrite_buf[FWRITE_BUFSIZE];
+static Int fwrite_pos;
+static Int fwrite_fd = -1;
+
+static __inline__
+void fwrite_flush(void)
+{
+ if ((fwrite_fd>=0) && (fwrite_pos>0))
+ VG_(write)(fwrite_fd, (void*)fwrite_buf, fwrite_pos);
+ fwrite_pos = 0;
+}
+
+static void my_fwrite(Int fd, Char* buf, Int len)
+{
+ if (fwrite_fd != fd) {
+ fwrite_flush();
+ fwrite_fd = fd;
+ }
+ if (len > FWRITE_THROUGH) {
+ fwrite_flush();
+ VG_(write)(fd, (void*)buf, len);
+ return;
+ }
+ if (FWRITE_BUFSIZE - fwrite_pos <= len) fwrite_flush();
+ VG_(strncpy)(fwrite_buf + fwrite_pos, buf, len);
+ fwrite_pos += len;
+}
+#endif
+
+
+static void print_obj(Char* buf, obj_node* obj)
+{
+ int n;
+
+ if (CLG_(clo).compress_strings) {
+ CLG_ASSERT(obj_dumped != 0);
+ if (obj_dumped[obj->number])
+ n = VG_(sprintf)(buf, "(%d)\n", obj->number);
+ else {
+ n = VG_(sprintf)(buf, "(%d) %s\n",
+ obj->number, obj->name);
+ }
+ }
+ else
+ n = VG_(sprintf)(buf, "%s\n", obj->name);
+
+#if 0
+ /* add mapping parameters the first time a object is dumped
+ * format: mp=0xSTART SIZE 0xOFFSET */
+ if (!obj_dumped[obj->number]) {
+ obj_dumped[obj->number];
+ VG_(sprintf)(buf+n, "mp=%p %p %p\n",
+ pos->obj->start, pos->obj->size, pos->obj->offset);
+ }
+#else
+ obj_dumped[obj->number] = True;
+#endif
+}
+
+static void print_file(Char* buf, file_node* file)
+{
+ if (CLG_(clo).compress_strings) {
+ CLG_ASSERT(file_dumped != 0);
+ if (file_dumped[file->number])
+ VG_(sprintf)(buf, "(%d)\n", file->number);
+ else {
+ VG_(sprintf)(buf, "(%d) %s\n",
+ file->number, file->name);
+ file_dumped[file->number] = True;
+ }
+ }
+ else
+ VG_(sprintf)(buf, "%s\n", file->name);
+}
+
+/*
+ * tag can be "fn", "cfn", "jfn"
+ */
+static void print_fn(Int fd, Char* buf, Char* tag, fn_node* fn)
+{
+ int p;
+ p = VG_(sprintf)(buf, "%s=",tag);
+ if (CLG_(clo).compress_strings) {
+ CLG_ASSERT(fn_dumped != 0);
+ if (fn_dumped[fn->number])
+ p += VG_(sprintf)(buf+p, "(%d)\n", fn->number);
+ else {
+ p += VG_(sprintf)(buf+p, "(%d) %s\n",
+ fn->number, fn->name);
+ fn_dumped[fn->number] = True;
+ }
+ }
+ else
+ p += VG_(sprintf)(buf+p, "%s\n", fn->name);
+
+ my_fwrite(fd, buf, p);
+}
+
+static void print_mangled_fn(Int fd, Char* buf, Char* tag,
+ Context* cxt, int rec_index)
+{
+ int p, i;
+
+ if (CLG_(clo).compress_strings && CLG_(clo).compress_mangled) {
+
+ int n;
+ Context* last;
+
+ CLG_ASSERT(cxt_dumped != 0);
+ if (cxt_dumped[cxt->base_number+rec_index]) {
+ p = VG_(sprintf)(buf, "%s=(%d)\n",
+ tag, cxt->base_number + rec_index);
+ my_fwrite(fd, buf, p);
+ return;
+ }
+
+ last = 0;
+ /* make sure that for all context parts compressed data is written */
+ for(i=cxt->size;i>0;i--) {
+ CLG_ASSERT(cxt->fn[i-1]->pure_cxt != 0);
+ n = cxt->fn[i-1]->pure_cxt->base_number;
+ if (cxt_dumped[n]) continue;
+ p = VG_(sprintf)(buf, "%s=(%d) %s\n",
+ tag, n, cxt->fn[i-1]->name);
+ my_fwrite(fd, buf, p);
+
+ cxt_dumped[n] = True;
+ last = cxt->fn[i-1]->pure_cxt;
+ }
+ /* If the last context was the context to print, we are finished */
+ if ((last == cxt) && (rec_index == 0)) return;
+
+ p = VG_(sprintf)(buf, "%s=(%d) (%d)", tag,
+ cxt->base_number + rec_index,
+ cxt->fn[0]->pure_cxt->base_number);
+ if (rec_index >0)
+ p += VG_(sprintf)(buf+p, "'%d", rec_index +1);
+ for(i=1;i<cxt->size;i++)
+ p += VG_(sprintf)(buf+p, "'(%d)",
+ cxt->fn[i]->pure_cxt->base_number);
+ p += VG_(sprintf)(buf+p, "\n");
+ my_fwrite(fd, buf, p);
+
+ cxt_dumped[cxt->base_number+rec_index] = True;
+ return;
+ }
+
+
+ p = VG_(sprintf)(buf, "%s=", tag);
+ if (CLG_(clo).compress_strings) {
+ CLG_ASSERT(cxt_dumped != 0);
+ if (cxt_dumped[cxt->base_number+rec_index]) {
+ p += VG_(sprintf)(buf+p, "(%d)\n", cxt->base_number + rec_index);
+ my_fwrite(fd, buf, p);
+ return;
+ }
+ else {
+ p += VG_(sprintf)(buf+p, "(%d) ", cxt->base_number + rec_index);
+ cxt_dumped[cxt->base_number+rec_index] = True;
+ }
+ }
+
+ p += VG_(sprintf)(buf+p, "%s", cxt->fn[0]->name);
+ if (rec_index >0)
+ p += VG_(sprintf)(buf+p, "'%d", rec_index +1);
+ for(i=1;i<cxt->size;i++)
+ p += VG_(sprintf)(buf+p, "'%s", cxt->fn[i]->name);
+
+ p += VG_(sprintf)(buf+p, "\n");
+ my_fwrite(fd, buf, p);
+}
+
+
+
+/**
+ * Print function position of the BBCC, but only print info differing to
+ * the <last> position, update <last>
+ * Return True if something changes.
+ */
+static Bool print_fn_pos(int fd, FnPos* last, BBCC* bbcc)
+{
+ Bool res = False;
+
+ CLG_DEBUGIF(3) {
+ CLG_DEBUG(2, "+ print_fn_pos: ");
+ CLG_(print_cxt)(16, bbcc->cxt, bbcc->rec_index);
+ }
+
+ if (!CLG_(clo).mangle_names) {
+ if (last->rec_index != bbcc->rec_index) {
+ VG_(sprintf)(outbuf, "rec=%d\n\n", bbcc->rec_index);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ last->rec_index = bbcc->rec_index;
+ last->cxt = 0; /* reprint context */
+ res = True;
+ }
+
+ if (last->cxt != bbcc->cxt) {
+ fn_node* last_from = (last->cxt && last->cxt->size>1) ?
+ last->cxt->fn[1] : 0;
+ fn_node* curr_from = (bbcc->cxt && bbcc->cxt->size>1) ?
+ bbcc->cxt->fn[1] : 0;
+ if (curr_from == 0) {
+ if (last_from != 0) {
+ /* switch back to no context */
+ VG_(sprintf)(outbuf, "frfn=(spontaneous)\n");
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ res = True;
+ }
+ }
+ else if (last_from != curr_from) {
+ print_fn(fd,outbuf,"frfn", curr_from);
+ res = True;
+ }
+ last->cxt = bbcc->cxt;
+ }
+ }
+
+ if (last->obj != bbcc->cxt->fn[0]->file->obj) {
+ VG_(sprintf)(outbuf, "ob=");
+ print_obj(outbuf+3, bbcc->cxt->fn[0]->file->obj);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ last->obj = bbcc->cxt->fn[0]->file->obj;
+ res = True;
+ }
+
+ if (last->file != bbcc->cxt->fn[0]->file) {
+ VG_(sprintf)(outbuf, "fl=");
+ print_file(outbuf+3, bbcc->cxt->fn[0]->file);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ last->file = bbcc->cxt->fn[0]->file;
+ res = True;
+ }
+
+ if (!CLG_(clo).mangle_names) {
+ if (last->fn != bbcc->cxt->fn[0]) {
+ print_fn(fd,outbuf, "fn", bbcc->cxt->fn[0]);
+ last->fn = bbcc->cxt->fn[0];
+ res = True;
+ }
+ }
+ else {
+ /* Print mangled name if context or rec_index changes */
+ if ((last->rec_index != bbcc->rec_index) ||
+ (last->cxt != bbcc->cxt)) {
+
+ print_mangled_fn(fd, outbuf, "fn", bbcc->cxt, bbcc->rec_index);
+ last->fn = bbcc->cxt->fn[0];
+ last->rec_index = bbcc->rec_index;
+ res = True;
+ }
+ }
+
+ last->cxt = bbcc->cxt;
+
+ CLG_DEBUG(2, "- print_fn_pos: %s\n", res ? "changed" : "");
+
+ return res;
+}
+
+/* the debug lookup cache is useful if BBCC for same BB are
+ * dumped directly in a row. This is a direct mapped cache.
+ */
+#define DEBUG_CACHE_SIZE 1777
+
+static Addr debug_cache_addr[DEBUG_CACHE_SIZE];
+static file_node* debug_cache_file[DEBUG_CACHE_SIZE];
+static int debug_cache_line[DEBUG_CACHE_SIZE];
+static Bool debug_cache_info[DEBUG_CACHE_SIZE];
+
+static __inline__
+void init_debug_cache(void)
+{
+ int i;
+ for(i=0;i<DEBUG_CACHE_SIZE;i++) {
+ debug_cache_addr[i] = 0;
+ debug_cache_file[i] = 0;
+ debug_cache_line[i] = 0;
+ debug_cache_info[i] = 0;
+ }
+}
+
+static __inline__
+Bool get_debug_pos(BBCC* bbcc, Addr addr, AddrPos* p)
+{
+ Char file[FILENAME_LEN];
+ Bool res;
+
+ int cachepos = addr % DEBUG_CACHE_SIZE;
+
+ if (debug_cache_addr[cachepos] == addr) {
+ p->line = debug_cache_line[cachepos];
+ p->file = debug_cache_file[cachepos];
+ res = debug_cache_info[cachepos];
+ }
+ else {
+ res = VG_(get_filename_linenum)(addr,
+ file, FILENAME_LEN,
+ NULL, 0, NULL, //FIXME
+ &(p->line));
+ if (!res) {
+ VG_(strcpy)(file, "???");
+ p->line = 0;
+ }
+ p->file = CLG_(get_file_node)(bbcc->bb->obj, file);
+
+ debug_cache_info[cachepos] = res;
+ debug_cache_addr[cachepos] = addr;
+ debug_cache_line[cachepos] = p->line;
+ debug_cache_file[cachepos] = p->file;
+ }
+
+ /* Address offset from bbcc start address */
+ p->addr = addr - bbcc->bb->obj->offset;
+ p->bb_addr = bbcc->bb->offset;
+
+ CLG_DEBUG(3, " get_debug_pos(%p): BB %p, fn '%s', file '%s', line %u\n",
+ addr, bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name,
+ p->file->name, p->line);
+
+ return res;
+}
+
+
+/* copy file position and init cost */
+static void init_apos(AddrPos* p, Addr addr, Addr bbaddr, file_node* file)
+{
+ p->addr = addr;
+ p->bb_addr = bbaddr;
+ p->file = file;
+ p->line = 0;
+}
+
+static void copy_apos(AddrPos* dst, AddrPos* src)
+{
+ dst->addr = src->addr;
+ dst->bb_addr = src->bb_addr;
+ dst->file = src->file;
+ dst->line = src->line;
+}
+
+/* copy file position and init cost */
+static void init_fcost(AddrCost* c, Addr addr, Addr bbaddr, file_node* file)
+{
+ init_apos( &(c->p), addr, bbaddr, file);
+ /* FIXME: This is a memory leak as a AddrCost is inited multiple times */
+ c->cost = CLG_(get_eventset_cost)( CLG_(sets).full );
+ CLG_(init_cost)( CLG_(sets).full, c->cost );
+}
+
+
+/**
+ * print position change inside of a BB (last -> curr)
+ * this doesn't update last to curr!
+ */
+static void fprint_apos(Int fd, AddrPos* curr, AddrPos* last, file_node* func_file)
+{
+ CLG_ASSERT(curr->file != 0);
+ CLG_DEBUG(2, " print_apos(file '%s', line %d, bb %p, addr %p) fnFile '%s'\n",
+ curr->file->name, curr->line, curr->bb_addr, curr->addr,
+ func_file->name);
+
+ if (curr->file != last->file) {
+
+ /* if we switch back to orig file, use fe=... */
+ if (curr->file == func_file)
+ VG_(sprintf)(outbuf, "fe=");
+ else
+ VG_(sprintf)(outbuf, "fi=");
+ print_file(outbuf+3, curr->file);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ }
+
+ if (CLG_(clo).dump_bbs) {
+ if (curr->line != last->line) {
+ VG_(sprintf)(outbuf, "ln=%d\n", curr->line);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ }
+ }
+}
+
+
+
+/**
+ * Print a position.
+ * This prints out differences if allowed
+ *
+ * This doesn't set last to curr afterwards!
+ */
+static
+void fprint_pos(Int fd, AddrPos* curr, AddrPos* last)
+{
+ if (0) //CLG_(clo).dump_bbs)
+ VG_(sprintf)(outbuf, "%u ", curr->addr - curr->bb_addr);
+ else {
+ int p = 0;
+ if (CLG_(clo).dump_instr) {
+ int diff = curr->addr - last->addr;
+ if ( CLG_(clo).compress_pos && (last->addr >0) &&
+ (diff > -100) && (diff < 100)) {
+ if (diff >0)
+ p = VG_(sprintf)(outbuf, "+%d ", diff);
+ else if (diff==0)
+ p = VG_(sprintf)(outbuf, "* ");
+ else
+ p = VG_(sprintf)(outbuf, "%d ", diff);
+ }
+ else
+ p = VG_(sprintf)(outbuf, "%p ", curr->addr);
+ }
+
+ if (CLG_(clo).dump_bb) {
+ int diff = curr->bb_addr - last->bb_addr;
+ if ( CLG_(clo).compress_pos && (last->bb_addr >0) &&
+ (diff > -100) && (diff < 100)) {
+ if (diff >0)
+ p += VG_(sprintf)(outbuf+p, "+%d ", diff);
+ else if (diff==0)
+ p += VG_(sprintf)(outbuf+p, "* ");
+ else
+ p += VG_(sprintf)(outbuf+p, "%d ", diff);
+ }
+ else
+ p += VG_(sprintf)(outbuf+p, "%p ", curr->bb_addr);
+ }
+
+ if (CLG_(clo).dump_line) {
+ int diff = curr->line - last->line;
+ if ( CLG_(clo).compress_pos && (last->line >0) &&
+ (diff > -100) && (diff < 100)) {
+
+ if (diff >0)
+ VG_(sprintf)(outbuf+p, "+%d ", diff);
+ else if (diff==0)
+ VG_(sprintf)(outbuf+p, "* ");
+ else
+ VG_(sprintf)(outbuf+p, "%d ", diff);
+ }
+ else
+ VG_(sprintf)(outbuf+p, "%u ", curr->line);
+ }
+ }
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+}
+
+
+/**
+ * Print events.
+ */
+
+static
+void fprint_cost(int fd, EventMapping* es, ULong* cost)
+{
+ int p = CLG_(sprint_mappingcost)(outbuf, es, cost);
+ VG_(sprintf)(outbuf+p, "\n");
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ return;
+}
+
+
+
+/* Write the cost of a source line; only that parts of the source
+ * position are written that changed relative to last written position.
+ * funcPos is the source position of the first line of actual function.
+ * Something is written only if cost != 0; returns True in this case.
+ */
+static void fprint_fcost(Int fd, AddrCost* c, AddrPos* last)
+{
+ CLG_DEBUGIF(3) {
+ CLG_DEBUG(2, " print_fcost(file '%s', line %d, bb %p, addr %p):\n",
+ c->p.file->name, c->p.line, c->p.bb_addr, c->p.addr);
+ CLG_(print_cost)(-5, CLG_(sets).full, c->cost);
+ }
+
+ fprint_pos(fd, &(c->p), last);
+ copy_apos( last, &(c->p) ); /* update last to current position */
+
+ fprint_cost(fd, CLG_(dumpmap), c->cost);
+
+ /* add cost to total */
+ CLG_(add_and_zero_cost)( CLG_(sets).full, dump_total_cost, c->cost );
+}
+
+
+/* Write out the calls from jcc (at pos)
+ */
+static void fprint_jcc(Int fd, jCC* jcc, AddrPos* curr, AddrPos* last, ULong ecounter)
+{
+ static AddrPos target;
+ file_node* file;
+ obj_node* obj;
+
+ CLG_DEBUGIF(2) {
+ CLG_DEBUG(2, " fprint_jcc (jkind %d)\n", jcc->jmpkind);
+ CLG_(print_jcc)(-10, jcc);
+ }
+
+ if (!get_debug_pos(jcc->to, bb_addr(jcc->to->bb), &target)) {
+ /* if we don't have debug info, don't switch to file "???" */
+ target.file = last->file;
+ }
+
+ if (jcc->from &&
+ (jcc->jmpkind == JmpCond || jcc->jmpkind == Ijk_Boring)) {
+
+ /* this is a JCC for a followed conditional or boring jump. */
+ CLG_ASSERT(CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost));
+
+ /* objects among jumps should be the same.
+ * Otherwise this jump would have been changed to a call
+ * (see setup_bbcc)
+ */
+ CLG_ASSERT(jcc->from->bb->obj == jcc->to->bb->obj);
+
+ /* only print if target position info is usefull */
+ if (!CLG_(clo).dump_instr && !CLG_(clo).dump_bb && target.line==0) {
+ jcc->call_counter = 0;
+ return;
+ }
+
+ /* Different files/functions are possible e.g. with longjmp's
+ * which change the stack, and thus context
+ */
+ if (last->file != target.file) {
+ VG_(sprintf)(outbuf, "jfi=");
+ print_file(outbuf+4, target.file);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ }
+
+ if (jcc->from->cxt != jcc->to->cxt) {
+ if (CLG_(clo).mangle_names)
+ print_mangled_fn(fd, outbuf, "jfn",
+ jcc->to->cxt, jcc->to->rec_index);
+ else
+ print_fn(fd, outbuf, "jfn", jcc->to->cxt->fn[0]);
+ }
+
+ if (jcc->jmpkind == JmpCond) {
+ /* format: jcnd=<followed>/<executions> <target> */
+ VG_(sprintf)(outbuf, "jcnd=%llu/%llu ",
+ jcc->call_counter, ecounter);
+ }
+ else {
+ /* format: jump=<jump count> <target> */
+ VG_(sprintf)(outbuf, "jump=%llu ",
+ jcc->call_counter);
+ }
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+
+ fprint_pos(fd, &target, last);
+ my_fwrite(fd, "\n", 1);
+ fprint_pos(fd, curr, last);
+ my_fwrite(fd, "\n", 1);
+
+ jcc->call_counter = 0;
+ return;
+ }
+
+ CLG_ASSERT(jcc->to !=0);
+
+ file = jcc->to->cxt->fn[0]->file;
+ obj = jcc->to->bb->obj;
+
+ /* object of called position different to object of this function?*/
+ if (jcc->from->cxt->fn[0]->file->obj != obj) {
+ VG_(sprintf)(outbuf, "cob=");
+ print_obj(outbuf+4, obj);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ }
+
+ /* file of called position different to current file? */
+ if (last->file != file) {
+ VG_(sprintf)(outbuf, "cfi=");
+ print_file(outbuf+4, file);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+ }
+
+ if (CLG_(clo).mangle_names)
+ print_mangled_fn(fd, outbuf, "cfn", jcc->to->cxt, jcc->to->rec_index);
+ else
+ print_fn(fd, outbuf, "cfn", jcc->to->cxt->fn[0]);
+
+ if (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost)) {
+ VG_(sprintf)(outbuf, "calls=%llu ",
+ jcc->call_counter);
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+
+ fprint_pos(fd, &target, last);
+ my_fwrite(fd, "\n", 1);
+ fprint_pos(fd, curr, last);
+ fprint_cost(fd, CLG_(dumpmap), jcc->cost);
+
+ CLG_(init_cost)( CLG_(sets).full, jcc->cost );
+
+ jcc->call_counter = 0;
+ }
+}
+
+
+
+/* Cost summation of functions.We use alternately ccSum[0/1], thus
+ * ssSum[currSum] for recently read lines with same line number.
+ */
+static AddrCost ccSum[2];
+static int currSum;
+
+/*
+ * Print all costs of a BBCC:
+ * - FCCs of instructions
+ * - JCCs of the unique jump of this BB
+ * returns True if something was written
+ */
+static Bool fprint_bbcc(Int fd, BBCC* bbcc, AddrPos* last)
+{
+ InstrInfo* instr_info;
+ ULong ecounter;
+ Bool something_written = False;
+ jCC* jcc;
+ AddrCost *currCost, *newCost;
+ Int jcc_count = 0, instr, i, jmp;
+ BB* bb = bbcc->bb;
+
+ CLG_ASSERT(bbcc->cxt != 0);
+ CLG_DEBUGIF(1) {
+ VG_(printf)("+ fprint_bbcc (Instr %d): ", bb->instr_count);
+ CLG_(print_bbcc)(15, bbcc, False);
+ }
+
+ CLG_ASSERT(currSum == 0 || currSum == 1);
+ currCost = &(ccSum[currSum]);
+ newCost = &(ccSum[1-currSum]);
+
+ ecounter = bbcc->ecounter_sum;
+ jmp = 0;
+ instr_info = &(bb->instr[0]);
+ for(instr=0; instr<bb->instr_count; instr++, instr_info++) {
+
+ /* get debug info of current instruction address and dump cost
+ * if CLG_(clo).dump_bbs or file/line has changed
+ */
+ if (!get_debug_pos(bbcc, bb_addr(bb) + instr_info->instr_offset,
+ &(newCost->p))) {
+ /* if we don't have debug info, don't switch to file "???" */
+ newCost->p.file = bbcc->cxt->fn[0]->file;
+ }
+
+ if (CLG_(clo).dump_bbs || CLG_(clo).dump_instr ||
+ (newCost->p.line != currCost->p.line) ||
+ (newCost->p.file != currCost->p.file)) {
+
+ if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
+ something_written = True;
+
+ fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
+ fprint_fcost(fd, currCost, last);
+ }
+
+ /* switch buffers */
+ currSum = 1 - currSum;
+ currCost = &(ccSum[currSum]);
+ newCost = &(ccSum[1-currSum]);
+ }
+
+ /* add line cost to current cost sum */
+ (*CLG_(cachesim).add_icost)(currCost->cost, bbcc, instr_info, ecounter);
+
+ /* print jcc's if there are: only jumps */
+ if (bb->jmp[jmp].instr == instr) {
+ jcc_count=0;
+ for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from)
+ if ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0))
+ jcc_count++;
+
+ if (jcc_count>0) {
+ if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
+ /* no need to switch buffers, as position is the same */
+ fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
+ fprint_fcost(fd, currCost, last);
+ }
+ get_debug_pos(bbcc, bb_addr(bb)+instr_info->instr_offset, &(currCost->p));
+ fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
+ something_written = True;
+ for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
+ if ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0))
+ fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
+ }
+ }
+ }
+
+ /* update execution counter */
+ if (jmp < bb->cjmp_count)
+ if (bb->jmp[jmp].instr == instr) {
+ ecounter -= bbcc->jmp[jmp].ecounter;
+ jmp++;
+ }
+ }
+
+ /* jCCs at end? If yes, dump cumulated line info first */
+ jcc_count = 0;
+ for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
+ /* yes, if JCC only counts jmp arcs or cost >0 */
+ if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
+ (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
+ jcc_count++;
+ }
+
+ if ( (bbcc->skipped &&
+ !CLG_(is_zero_cost)(CLG_(sets).full, bbcc->skipped)) ||
+ (jcc_count>0) ) {
+
+ if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
+ /* no need to switch buffers, as position is the same */
+ fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
+ fprint_fcost(fd, currCost, last);
+ }
+
+ get_debug_pos(bbcc, bb_jmpaddr(bb), &(currCost->p));
+ fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
+ something_written = True;
+
+ /* first, print skipped costs for calls */
+ if (bbcc->skipped && !CLG_(is_zero_cost)( CLG_(sets).full,
+ bbcc->skipped )) {
+ CLG_(add_and_zero_cost)( CLG_(sets).full,
+ currCost->cost, bbcc->skipped );
+#if 0
+ VG_(sprintf)(outbuf, "# Skipped\n");
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+#endif
+ fprint_fcost(fd, currCost, last);
+ }
+
+ if (jcc_count > 0)
+ for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
+ CLG_ASSERT(jcc->jmp == jmp);
+ if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
+ (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
+
+ fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
+ }
+ }
+
+ if (CLG_(clo).dump_bbs || CLG_(clo).dump_bb) {
+ if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
+ something_written = True;
+
+ fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
+ fprint_fcost(fd, currCost, last);
+ }
+ if (CLG_(clo).dump_bbs) my_fwrite(fd, (void*)"\n", 1);
+
+ /* when every cost was immediatly written, we must have done so,
+ * as this function is only called when there's cost in a BBCC
+ */
+ CLG_ASSERT(something_written);
+ }
+
+ bbcc->ecounter_sum = 0;
+ for(i=0; i<=bbcc->bb->cjmp_count; i++)
+ bbcc->jmp[i].ecounter = 0;
+ bbcc->ret_counter = 0;
+
+ CLG_DEBUG(1, "- fprint_bbcc: JCCs %d\n", jcc_count);
+
+ return something_written;
+}
+
+/* order by
+ * recursion,
+ * from->bb->obj, from->bb->fn
+ * obj, fn[0]->file, fn
+ * address
+ */
+static int my_cmp(BBCC** pbbcc1, BBCC** pbbcc2)
+{
+#if 0
+ return (*pbbcc1)->bb->offset - (*pbbcc2)->bb->offset;
+#else
+ BBCC *bbcc1 = *pbbcc1;
+ BBCC *bbcc2 = *pbbcc2;
+ Context* cxt1 = bbcc1->cxt;
+ Context* cxt2 = bbcc2->cxt;
+ int off = 1;
+
+ if (cxt1->fn[0]->file->obj != cxt2->fn[0]->file->obj)
+ return cxt1->fn[0]->file->obj - cxt2->fn[0]->file->obj;
+
+ if (cxt1->fn[0]->file != cxt2->fn[0]->file)
+ return cxt1->fn[0]->file - cxt2->fn[0]->file;
+
+ if (cxt1->fn[0] != cxt2->fn[0])
+ return cxt1->fn[0] - cxt2->fn[0];
+
+ if (bbcc1->rec_index != bbcc2->rec_index)
+ return bbcc1->rec_index - bbcc2->rec_index;
+
+ while((off < cxt1->size) && (off < cxt2->size)) {
+ fn_node* ffn1 = cxt1->fn[off];
+ fn_node* ffn2 = cxt2->fn[off];
+ if (ffn1->file->obj != ffn2->file->obj)
+ return ffn1->file->obj - ffn2->file->obj;
+ if (ffn1 != ffn2)
+ return ffn1 - ffn2;
+ off++;
+ }
+ if (cxt1->size > cxt2->size) return 1;
+ else if (cxt1->size < cxt2->size) return -1;
+
+ return bbcc1->bb->offset - bbcc2->bb->offset;
+#endif
+}
+
+
+
+
+
+/* modified version of:
+ *
+ * qsort -- qsort interface implemented by faster quicksort.
+ * J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265.
+ * Copyright 1993, John Wiley.
+*/
+
+static __inline__
+void swapfunc(BBCC** a, BBCC** b, int n)
+{
+ while(n>0) {
+ BBCC* t = *a; *a = *b; *b = t;
+ a++, b++;
+ n--;
+ }
+}
+
+static __inline__
+void swap(BBCC** a, BBCC** b)
+{
+ BBCC* t;
+ t = *a; *a = *b; *b = t;
+}
+
+#define min(x, y) ((x)<=(y) ? (x) : (y))
+
+static
+BBCC** med3(BBCC **a, BBCC **b, BBCC **c, int (*cmp)(BBCC**,BBCC**))
+{ return cmp(a, b) < 0 ?
+ (cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a)
+ : (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a);
+}
+
+static BBCC** qsort_start = 0;
+
+static void qsort(BBCC **a, int n, int (*cmp)(BBCC**,BBCC**))
+{
+ BBCC **pa, **pb, **pc, **pd, **pl, **pm, **pn, **pv;
+ int s, r;
+ BBCC* v;
+
+ CLG_DEBUG(8, " qsort(%d,%d)\n", a-qsort_start, n);
+
+ if (n < 7) { /* Insertion sort on smallest arrays */
+ for (pm = a+1; pm < a+n; pm++)
+ for (pl = pm; pl > a && cmp(pl-1, pl) > 0; pl --)
+ swap(pl, pl-1);
+
+ CLG_DEBUGIF(8) {
+ for (pm = a; pm < a+n; pm++) {
+ VG_(printf)(" %3d BB %p, ", pm - qsort_start,
+ bb_addr((*pm)->bb));
+ CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
+ }
+ }
+ return;
+ }
+ pm = a + n/2; /* Small arrays, middle element */
+ if (n > 7) {
+ pl = a;
+ pn = a + (n-1);
+ if (n > 40) { /* Big arrays, pseudomedian of 9 */
+ s = n/8;
+ pl = med3(pl, pl+s, pl+2*s, cmp);
+ pm = med3(pm-s, pm, pm+s, cmp);
+ pn = med3(pn-2*s, pn-s, pn, cmp);
+ }
+ pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */
+ }
+
+
+ v = *pm;
+ pv = &v;
+ pa = pb = a;
+ pc = pd = a + (n-1);
+ for (;;) {
+ while ((pb <= pc) && ((r=cmp(pb, pv)) <= 0)) {
+ if (r==0) {
+ /* same as pivot, to start */
+ swap(pa,pb); pa++;
+ }
+ pb ++;
+ }
+ while ((pb <= pc) && ((r=cmp(pc, pv)) >= 0)) {
+ if (r==0) {
+ /* same as pivot, to end */
+ swap(pc,pd); pd--;
+ }
+ pc --;
+ }
+ if (pb > pc) { break; }
+ swap(pb, pc);
+ pb ++;
+ pc --;
+ }
+ pb--;
+ pc++;
+
+ /* put pivot from start into middle */
+ if ((s = pa-a)>0) { for(r=0;r<s;r++) swap(a+r, pb+1-s+r); }
+ /* put pivot from end into middle */
+ if ((s = a+n-1-pd)>0) { for(r=0;r<s;r++) swap(pc+r, a+n-s+r); }
+
+ CLG_DEBUGIF(8) {
+ VG_(printf)(" PV BB %p, ", bb_addr((*pv)->bb));
+ CLG_(print_cxt)(9, (*pv)->cxt, (*pv)->rec_index);
+
+ s = pb-pa+1;
+ VG_(printf)(" Lower %d - %d:\n", a-qsort_start, a+s-1-qsort_start);
+ for (r=0;r<s;r++) {
+ pm = a+r;
+ VG_(printf)(" %3d BB %p, ",
+ pm-qsort_start,bb_addr((*pm)->bb));
+ CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
+ }
+
+ s = pd-pc+1;
+ VG_(printf)(" Upper %d - %d:\n",
+ a+n-s-qsort_start, a+n-1-qsort_start);
+ for (r=0;r<s;r++) {
+ pm = a+n-s+r;
+ VG_(printf)(" %3d BB %p, ",
+ pm-qsort_start,bb_addr((*pm)->bb));
+ CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
+ }
+ }
+
+ if ((s = pb+1-pa) > 1) qsort(a, s, cmp);
+ if ((s = pd+1-pc) > 1) qsort(a+n-s, s, cmp);
+}
+
+
+/* Helpers for prepare_dump */
+
+static Int prepare_count;
+static BBCC** prepare_ptr;
+
+
+static void hash_addCount(BBCC* bbcc)
+{
+ if ((bbcc->ecounter_sum > 0) || (bbcc->ret_counter>0))
+ prepare_count++;
+}
+
+static void hash_addPtr(BBCC* bbcc)
+{
+ if ((bbcc->ecounter_sum == 0) &&
+ (bbcc->ret_counter == 0)) return;
+
+ *prepare_ptr = bbcc;
+ prepare_ptr++;
+}
+
+
+static void cs_addCount(thread_info* ti)
+{
+ Int i;
+ BBCC* bbcc;
+
+ /* add BBCCs with active call in call stack of current thread.
+ * update cost sums for active calls
+ */
+
+ for(i = 0; i < CLG_(current_call_stack).sp; i++) {
+ call_entry* e = &(CLG_(current_call_stack).entry[i]);
+ if (e->jcc == 0) continue;
+
+ CLG_(add_diff_cost_lz)( CLG_(sets).full, &(e->jcc->cost),
+ e->enter_cost, CLG_(current_state).cost);
+ bbcc = e->jcc->from;
+
+ CLG_DEBUG(1, " [%2d] (tid %d), added active: %s\n",
+ i,CLG_(current_tid),bbcc->cxt->fn[0]->name);
+
+ if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
+ /* already counted */
+ continue;
+ }
+ prepare_count++;
+ }
+}
+
+static void cs_addPtr(thread_info* ti)
+{
+ Int i;
+ BBCC* bbcc;
+
+ /* add BBCCs with active call in call stack of current thread.
+ * update cost sums for active calls
+ */
+
+ for(i = 0; i < CLG_(current_call_stack).sp; i++) {
+ call_entry* e = &(CLG_(current_call_stack).entry[i]);
+ if (e->jcc == 0) continue;
+
+ bbcc = e->jcc->from;
+
+ if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
+ /* already counted */
+ continue;
+ }
+
+ *prepare_ptr = bbcc;
+ prepare_ptr++;
+ }
+}
+
+
+/**
+ * Put all BBCCs with costs into a sorted array.
+ * The returned arrays ends with a null pointer.
+ * Must be freed after dumping.
+ */
+static
+BBCC** prepare_dump(void)
+{
+ BBCC **array;
+
+ prepare_count = 0;
+
+ /* if we do not separate among threads, this gives all */
+ /* count number of BBCCs with >0 executions */
+ CLG_(forall_bbccs)(hash_addCount);
+
+ /* even if we do not separate among threads,
+ * call stacks are separated */
+ if (CLG_(clo).separate_threads)
+ cs_addCount(0);
+ else
+ CLG_(forall_threads)(cs_addCount);
+
+ CLG_DEBUG(0, "prepare_dump: %d BBCCs\n", prepare_count);
+
+ /* allocate bbcc array, insert BBCCs and sort */
+ prepare_ptr = array =
+ (BBCC**) CLG_MALLOC((prepare_count+1) * sizeof(BBCC*));
+
+ CLG_(forall_bbccs)(hash_addPtr);
+
+ if (CLG_(clo).separate_threads)
+ cs_addPtr(0);
+ else
+ CLG_(forall_threads)(cs_addPtr);
+
+ CLG_ASSERT(array + prepare_count == prepare_ptr);
+
+ /* end mark */
+ *prepare_ptr = 0;
+
+ CLG_DEBUG(0," BBCCs inserted\n");
+
+ qsort_start = array;
+ qsort(array, prepare_count, my_cmp);
+
+ CLG_DEBUG(0," BBCCs sorted\n");
+
+ return array;
+}
+
+
+
+
+static void fprint_cost_ln(int fd, Char* prefix,
+ EventMapping* em, ULong* cost)
+{
+ int p;
+
+ p = VG_(sprintf)(outbuf, "%s", prefix);
+ p += CLG_(sprint_mappingcost)(outbuf + p, em, cost);
+ VG_(sprintf)(outbuf + p, "\n");
+ my_fwrite(fd, (void*)outbuf, VG_(strlen)(outbuf));
+}
+
+static ULong bbs_done = 0;
+static Char* filename = 0;
+
+static
+void file_err(void)
+{
+ VG_(message)(Vg_UserMsg,
+ "Error: can not open cache simulation output file `%s'",
+ filename );
+ VG_(exit)(1);
+}
+
+/**
+ * Create a new dump file and write header.
+ *
+ * Naming: <CLG_(clo).filename_base>.<pid>[.<part>][-<tid>]
+ * <part> is skipped for final dump (trigger==0)
+ * <tid> is skipped for thread 1 with CLG_(clo).separate_threads=no
+ *
+ * Returns the file descriptor, and -1 on error (no write permission)
+ */
+static int new_dumpfile(Char buf[BUF_LEN], int tid, Char* trigger)
+{
+ Bool appending = False;
+ int i, fd;
+ FullCost sum = 0;
+ SysRes res;
+
+ CLG_ASSERT(filename != 0);
+
+ if (!CLG_(clo).combine_dumps) {
+ i = VG_(sprintf)(filename, "%s.%d", dump_file_base, VG_(getpid)());
+
+ if (trigger)
+ i += VG_(sprintf)(filename+i, ".%d", out_counter);
+
+ if (CLG_(clo).separate_threads)
+ i += VG_(sprintf)(filename+i, "-%02d", tid);
+
+ res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
+ }
+ else {
+ VG_(sprintf)(filename, "%s.%d", dump_file_base, VG_(getpid)());
+ res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_APPEND, 0);
+ if (!res.isError && out_counter>1)
+ appending = True;
+ }
+
+ if (res.isError) {
+ res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY,
+ VKI_S_IRUSR|VKI_S_IWUSR);
+ if (res.isError) {
+ /* If the file can not be opened for whatever reason (conflict
+ between multiple supervised processes?), give up now. */
+ file_err();
+ }
+ }
+ fd = (Int) res.val;
+
+ CLG_DEBUG(2, " new_dumpfile '%s'\n", filename);
+
+ if (!appending)
+ reset_dump_array();
+
+
+ if (!appending) {
+ /* version */
+ VG_(sprintf)(buf, "version: 1\n");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* creator */
+ VG_(sprintf)(buf, "creator: callgrind-" VERSION "\n");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* "pid:" line */
+ VG_(sprintf)(buf, "pid: %d\n", VG_(getpid)());
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* "cmd:" line */
+ VG_(strcpy)(buf, "cmd: ");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ my_fwrite(fd, (void*)cmdbuf, VG_(strlen)(cmdbuf));
+ }
+
+ VG_(sprintf)(buf, "\npart: %d\n", out_counter);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ if (CLG_(clo).separate_threads) {
+ VG_(sprintf)(buf, "thread: %d\n", tid);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ }
+
+ /* "desc:" lines */
+ if (!appending) {
+ my_fwrite(fd, "\n", 1);
+
+#if 0
+ /* Global options changing the tracing behaviour */
+ VG_(sprintf)(buf, "\ndesc: Option: --skip-plt=%s\n",
+ CLG_(clo).skip_plt ? "yes" : "no");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Option: --collect-jumps=%s\n",
+ CLG_(clo).collect_jumps ? "yes" : "no");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Option: --separate-recs=%d\n",
+ CLG_(clo).separate_recursions);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Option: --separate-callers=%d\n",
+ CLG_(clo).separate_callers);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+
+ VG_(sprintf)(buf, "desc: Option: --dump-bbs=%s\n",
+ CLG_(clo).dump_bbs ? "yes" : "no");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Option: --separate-threads=%s\n",
+ CLG_(clo).separate_threads ? "yes" : "no");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+#endif
+
+ (*CLG_(cachesim).getdesc)(buf);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ }
+
+ VG_(sprintf)(buf, "\ndesc: Timerange: Basic block %llu - %llu\n",
+ bbs_done, CLG_(stat).bb_executions);
+
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(sprintf)(buf, "desc: Trigger: %s\n",
+ trigger ? trigger : (Char*)"Program termination");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+
+#if 0
+ /* Output function specific config
+ * FIXME */
+ for (i = 0; i < N_FNCONFIG_ENTRIES; i++) {
+ fnc = fnc_table[i];
+ while (fnc) {
+ if (fnc->skip) {
+ VG_(sprintf)(buf, "desc: Option: --fn-skip=%s\n", fnc->name);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ }
+ if (fnc->dump_at_enter) {
+ VG_(sprintf)(buf, "desc: Option: --fn-dump-at-enter=%s\n",
+ fnc->name);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ }
+ if (fnc->dump_at_leave) {
+ VG_(sprintf)(buf, "desc: Option: --fn-dump-at-leave=%s\n",
+ fnc->name);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ }
+ if (fnc->separate_callers != CLG_(clo).separate_callers) {
+ VG_(sprintf)(buf, "desc: Option: --separate-callers%d=%s\n",
+ fnc->separate_callers, fnc->name);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ }
+ if (fnc->separate_recursions != CLG_(clo).separate_recursions) {
+ VG_(sprintf)(buf, "desc: Option: --separate-recs%d=%s\n",
+ fnc->separate_recursions, fnc->name);
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ }
+ fnc = fnc->next;
+ }
+ }
+#endif
+
+ /* "positions:" line */
+ VG_(sprintf)(buf, "\npositions:%s%s%s\n",
+ CLG_(clo).dump_instr ? " instr" : "",
+ CLG_(clo).dump_bb ? " bb" : "",
+ CLG_(clo).dump_line ? " line" : "");
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+
+ /* "events:" line */
+ i = VG_(sprintf)(buf, "events: ");
+ CLG_(sprint_eventmapping)(buf+i, CLG_(dumpmap));
+ my_fwrite(fd, (void*)buf, VG_(strlen)(buf));
+ my_fwrite(fd, "\n", 1);
+
+ /* summary lines */
+ sum = CLG_(get_eventset_cost)( CLG_(sets).full );
+ CLG_(zero_cost)(CLG_(sets).full, sum);
+ if (CLG_(clo).separate_threads) {
+ thread_info* ti = CLG_(get_current_thread)();
+ CLG_(add_diff_cost)(CLG_(sets).full, sum, ti->lastdump_cost,
+ ti->states.entry[0]->cost);
+ }
+ else {
+ /* This function is called once for thread 1, where
+ * all costs are summed up when not dumping separate per thread.
+ * But this is not true for summary: we need to add all threads.
+ */
+ int t;
+ thread_info** thr = CLG_(get_threads)();
+ for(t=1;t<VG_N_THREADS;t++) {
+ if (!thr[t]) continue;
+ CLG_(add_diff_cost)(CLG_(sets).full, sum,
+ thr[t]->lastdump_cost,
+ thr[t]->states.entry[0]->cost);
+ }
+ }
+ fprint_cost_ln(fd, "summary: ", CLG_(dumpmap), sum);
+
+ /* all dumped cost will be added to total_fcc */
+ CLG_(init_cost_lz)( CLG_(sets).full, &dump_total_cost );
+
+ my_fwrite(fd, "\n\n",2);
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "Dump to %s", filename);
+
+ return fd;
+}
+
+
+static void close_dumpfile(Char buf[BUF_LEN], int fd, int tid)
+{
+ if (fd <0) return;
+
+ fprint_cost_ln(fd, "totals: ", CLG_(dumpmap),
+ dump_total_cost);
+ //fprint_fcc_ln(fd, "summary: ", &dump_total_fcc);
+ CLG_(add_cost_lz)(CLG_(sets).full,
+ &CLG_(total_cost), dump_total_cost);
+
+ fwrite_flush();
+ VG_(close)(fd);
+
+ if (filename[0] == '.') {
+ if (-1 == VG_(rename) (filename, filename+1)) {
+ /* Can not rename to correct file name: give out warning */
+ VG_(message)(Vg_DebugMsg, "Warning: Can not rename .%s to %s",
+ filename, filename);
+ }
+ }
+}
+
+
+/* Helper for print_bbccs */
+
+static Int print_fd;
+static Char* print_trigger;
+static Char print_buf[BUF_LEN];
+
+static void print_bbccs_of_thread(thread_info* ti)
+{
+ BBCC **p, **array;
+ FnPos lastFnPos;
+ AddrPos lastAPos;
+
+ CLG_DEBUG(1, "+ print_bbccs(tid %d)\n", CLG_(current_tid));
+
+ print_fd = new_dumpfile(print_buf, CLG_(current_tid), print_trigger);
+ if (print_fd <0) {
+ CLG_DEBUG(1, "- print_bbccs(tid %d): No output...\n", CLG_(current_tid));
+ return;
+ }
+
+ p = array = prepare_dump();
+ init_fpos(&lastFnPos);
+ init_apos(&lastAPos, 0, 0, 0);
+
+ if (p) while(1) {
+
+ /* on context/function change, print old cost buffer before */
+ if (lastFnPos.cxt && ((*p==0) ||
+ (lastFnPos.cxt != (*p)->cxt) ||
+ (lastFnPos.rec_index != (*p)->rec_index))) {
+ if (!CLG_(is_zero_cost)( CLG_(sets).full, ccSum[currSum].cost )) {
+ /* no need to switch buffers, as position is the same */
+ fprint_apos(print_fd, &(ccSum[currSum].p), &lastAPos,
+ lastFnPos.cxt->fn[0]->file);
+ fprint_fcost(print_fd, &ccSum[currSum], &lastAPos);
+ }
+
+ if (ccSum[currSum].p.file != lastFnPos.cxt->fn[0]->file) {
+ /* switch back to file of function */
+ VG_(sprintf)(print_buf, "fe=");
+ print_file(print_buf+3, lastFnPos.cxt->fn[0]->file);
+ my_fwrite(print_fd, (void*)print_buf, VG_(strlen)(print_buf));
+ }
+ my_fwrite(print_fd, "\n", 1);
+ }
+
+ if (*p == 0) break;
+
+ if (print_fn_pos(print_fd, &lastFnPos, *p)) {
+
+ /* new function */
+ init_apos(&lastAPos, 0, 0, (*p)->cxt->fn[0]->file);
+ init_fcost(&ccSum[0], 0, 0, 0);
+ init_fcost(&ccSum[1], 0, 0, 0);
+ currSum = 0;
+ }
+
+ if (CLG_(clo).dump_bbs) {
+ /* FIXME: Specify Object of BB if different to object of fn */
+ int i, pos = 0;
+ ULong ecounter = (*p)->ecounter_sum;
+ pos = VG_(sprintf)(print_buf, "bb=%p ", (*p)->bb->offset);
+ for(i = 0; i<(*p)->bb->cjmp_count;i++) {
+ pos += VG_(sprintf)(print_buf+pos, "%d %llu ",
+ (*p)->bb->jmp[i].instr,
+ ecounter);
+ ecounter -= (*p)->jmp[i].ecounter;
+ }
+ VG_(sprintf)(print_buf+pos, "%d %llu\n",
+ (*p)->bb->instr_count,
+ ecounter);
+ my_fwrite(print_fd, (void*)print_buf, VG_(strlen)(print_buf));
+ }
+
+ fprint_bbcc(print_fd, *p, &lastAPos);
+
+ p++;
+ }
+
+ close_dumpfile(print_buf, print_fd, CLG_(current_tid));
+ if (array) VG_(free)(array);
+
+ /* set counters of last dump */
+ CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost,
+ CLG_(current_state).cost );
+
+ CLG_DEBUG(1, "- print_bbccs(tid %d)\n", CLG_(current_tid));
+}
+
+
+static void print_bbccs(Char* trigger, Bool only_current_thread)
+{
+ init_dump_array();
+ init_debug_cache();
+
+ print_fd = -1;
+ print_trigger = trigger;
+
+ if (!CLG_(clo).separate_threads) {
+ /* All BBCC/JCC costs is stored for thread 1 */
+ Int orig_tid = CLG_(current_tid);
+
+ CLG_(switch_thread)(1);
+ print_bbccs_of_thread( CLG_(get_current_thread)() );
+ CLG_(switch_thread)(orig_tid);
+ }
+ else if (only_current_thread)
+ print_bbccs_of_thread( CLG_(get_current_thread)() );
+ else
+ CLG_(forall_threads)(print_bbccs_of_thread);
+
+ free_dump_array();
+}
+
+
+void CLG_(dump_profile)(Char* trigger, Bool only_current_thread)
+{
+ CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n",
+ trigger ? trigger : (Char*)"Prg.Term.");
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...",
+ CLG_(stat).bb_executions,
+ trigger ? trigger : (Char*)"Prg.Term.");
+
+ out_counter++;
+
+ print_bbccs(trigger, only_current_thread);
+
+
+ bbs_done = CLG_(stat).bb_executions++;
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "Dumping done.");
+}
+
+/* copy command to cmd buffer (could change) */
+static
+void init_cmdbuf(void)
+{
+ Int i,j,size = 0;
+ HChar* argv;
+
+#if VG_CORE_INTERFACE_VERSION > 8
+ if (VG_(args_the_exename))
+ size = VG_(sprintf)(cmdbuf, " %s", VG_(args_the_exename));
+
+ for(i = 0; i < VG_(args_for_client).used; i++) {
+ argv = VG_(args_for_client).strs[i];
+ if (!argv) continue;
+ if ((size>0) && (size < BUF_LEN)) cmdbuf[size++] = ' ';
+ for(j=0;argv[j]!=0;j++)
+ if (size < BUF_LEN) cmdbuf[size++] = argv[j];
+ }
+#else
+ for(i = 0; i < VG_(client_argc); i++) {
+ argv = VG_(client_argv[i]);
+ if (!argv) continue;
+ if ((size>0) && (size < BUF_LEN)) cmdbuf[size++] = ' ';
+ for(j=0;argv[j]!=0;j++)
+ if (size < BUF_LEN) cmdbuf[size++] = argv[j];
+ }
+#endif
+
+ if (size == BUF_LEN) size--;
+ cmdbuf[size] = 0;
+}
+
+void CLG_(init_files)(Char** dir, Char** file)
+{
+ Int size;
+ SysRes res;
+
+ if (!CLG_(clo).filename_base)
+ CLG_(clo).filename_base = DEFAULT_DUMPNAME;
+
+ /* get base directory for dump/command/result files */
+ if (CLG_(clo).filename_base[0] == '/') {
+ int lastSlash = 0, i =1;
+ while(CLG_(clo).filename_base[i]) {
+ for(; CLG_(clo).filename_base[i] &&
+ CLG_(clo).filename_base[i] != '/'; i++);
+ if (CLG_(clo).filename_base[i] != '/') break;
+ lastSlash = i;
+ i++;
+ }
+ base_directory = (Char*) CLG_MALLOC(i+1);
+ VG_(strncpy)(base_directory, CLG_(clo).filename_base, i);
+ base_directory[i] = 0;
+
+ dump_file_base = CLG_(clo).filename_base;
+ }
+ else {
+ size = 100;
+ base_directory = 0;
+
+ /* getcwd() fails if the buffer isn't big enough -- keep doubling size
+ until it succeeds. */
+ while (NULL == base_directory) {
+ base_directory = CLG_MALLOC(size);
+ if (!VG_(getcwd)(base_directory, size)) {
+ VG_(free)(base_directory);
+ base_directory = 0;
+ size *= 2;
+ }
+ }
+
+ size = VG_(strlen)(base_directory) + VG_(strlen)(CLG_(clo).filename_base) +2;
+ dump_file_base = (Char*) CLG_MALLOC(size);
+ CLG_ASSERT(dump_file_base != 0);
+ VG_(sprintf)(dump_file_base, "%s/%s",
+ base_directory, CLG_(clo).filename_base);
+ }
+
+ /* allocate space big enough for final filenames */
+ filename = (Char*) CLG_MALLOC(VG_(strlen)(dump_file_base)+32);
+ CLG_ASSERT(filename != 0);
+
+ /* Make sure the output base file can be written.
+ * This is used for the dump at program termination.
+ * We stop with an error here if we can not create the
+ * file: This is probably because of missing rights,
+ * and trace parts wouldn't be allowed to be written, too.
+ */
+ VG_(sprintf)(filename, "%s.%d", dump_file_base, VG_(getpid)());
+ res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
+ if (res.isError) {
+ res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY,
+ VKI_S_IRUSR|VKI_S_IWUSR);
+ if (res.isError) {
+ file_err();
+ }
+ }
+ if (!res.isError) VG_(close)( (Int)res.val );
+
+ *dir = base_directory;
+ *file = filename;
+
+ init_cmdbuf();
+}
diff --git a/callgrind/events.c b/callgrind/events.c
new file mode 100644
index 0000000..6ef8d85
--- /dev/null
+++ b/callgrind/events.c
@@ -0,0 +1,575 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- events.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+#define MAX_EVENTTYPE 20
+
+static EventType eventtype[MAX_EVENTTYPE];
+static Int eventtype_count = 0;
+
+EventType* CLG_(register_eventtype)(Char* name)
+{
+ EventType* et;
+
+ if (eventtype_count == MAX_EVENTTYPE) {
+ VG_(printf)("\nMore than %d event types used!\n"
+ "Increase MAX_EVENTTYPE in ct_events.c and recomile this tool!\n",
+ MAX_EVENTTYPE);
+ VG_(tool_panic)("Too many event types requested.");
+ }
+
+ et = &(eventtype[eventtype_count]);
+ et->id = eventtype_count;
+ et->name = (UChar*) VG_(strdup)(name);
+ et->description = 0;
+
+ eventtype_count++;
+
+ return et;
+}
+
+
+EventType* CLG_(get_eventtype)(Char* name)
+{
+ Int i;
+
+ for(i=0;i<eventtype_count;i++)
+ if (VG_(strcmp)(eventtype[i].name, name) == 0)
+ return eventtype+i;
+ return 0;
+}
+
+EventType* CLG_(get_eventtype_byindex)(Int id)
+{
+ if ((id >= 0) && (id < eventtype_count))
+ return eventtype+id;
+ return 0;
+}
+
+/* Allocate space for an event set */
+EventSet* CLG_(get_eventset)(Char* n, Int capacity)
+{
+ EventSet* es;
+
+ es = (EventSet*) CLG_MALLOC(sizeof(EventSet) +
+ capacity * sizeof(EventSetEntry));
+ es->capacity = capacity;
+ es->size = 0;
+ es->name = n;
+
+ return es;
+}
+
+/* Incorporate a event type into a set, get start offset */
+Int CLG_(add_eventtype)(EventSet* es, EventType* t)
+{
+ Int offset = es->size;
+ if (es->capacity - offset < 1) return -1;
+
+ es->size++;
+ es->e[offset].type = t;
+ es->e[offset].nextTop = es->size;
+
+ return offset;
+}
+
+/* Incorporate one event set into another, get start offset */
+Int CLG_(add_eventset)(EventSet* dst, EventSet* src)
+{
+ Int offset = dst->size, i;
+ if (!src || (src->size == 0)) return offset;
+
+ if (dst->capacity - offset < src->size) return -1;
+
+ for(i=0;i<src->size;i++) {
+ dst->e[offset+i].type = src->e[i].type;
+ dst->e[offset+i].nextTop = src->e[i].nextTop + offset;
+ }
+ dst->size += src->size;
+
+ return offset;
+}
+
+/* Incorporate two event types into a set, with second < first */
+Int CLG_(add_dep_event2)(EventSet* es, EventType* e1, EventType* e2)
+{
+ Int offset = es->size;
+
+ if (es->capacity - offset < 2) return -1;
+
+ es->size += 2;
+ es->e[offset].type = e1;
+ es->e[offset].nextTop = es->size;
+ es->e[offset+1].type = e2;
+ es->e[offset+1].nextTop = es->size;
+
+ return offset;
+}
+
+/* Incorporate 3 event types into a set, with third < second < first */
+Int CLG_(add_dep_event3)(EventSet* es,
+ EventType* e1, EventType* e2, EventType* e3)
+{
+ Int offset = es->size;
+
+ if (es->capacity - offset < 3) return -1;
+
+ es->size += 3;
+ es->e[offset].type = e1;
+ es->e[offset].nextTop = es->size;
+ es->e[offset+1].type = e2;
+ es->e[offset+1].nextTop = es->size;
+ es->e[offset+2].type = e3;
+ es->e[offset+2].nextTop = es->size;
+
+ return offset;
+}
+
+Int CLG_(add_dep_event4)(EventSet* es,
+ EventType* e1, EventType* e2,
+ EventType* e3, EventType* e4)
+{
+ Int offset = es->size;
+
+ if (es->capacity - offset < 4) return -1;
+
+ es->size += 4;
+ es->e[offset].type = e1;
+ es->e[offset].nextTop = es->size;
+ es->e[offset+1].type = e2;
+ es->e[offset+1].nextTop = es->size;
+ es->e[offset+2].type = e3;
+ es->e[offset+2].nextTop = es->size;
+ es->e[offset+3].type = e4;
+ es->e[offset+3].nextTop = es->size;
+
+ return offset;
+}
+
+/* Returns number of characters written */
+Int CLG_(sprint_eventset)(Char* buf, EventSet* es)
+{
+ Int i, pos = 0;
+
+ for(i=0; i< es->size; i++) {
+ if (pos>0) buf[pos++] = ' ';
+ pos += VG_(sprintf)(buf + pos, es->e[i].type->name);
+ }
+ buf[pos] = 0;
+
+ return pos;
+}
+
+/* Get cost array for an event set */
+ULong* CLG_(get_eventset_cost)(EventSet* es)
+{
+ return CLG_(get_costarray)(es->capacity);
+}
+
+/* Set all costs of an event set to zero */
+void CLG_(init_cost)(EventSet* es, ULong* cost)
+{
+ Int i;
+
+ if (!cost) return;
+
+ for(i=0;i<es->capacity;i++)
+ cost[i] = 0;
+}
+
+/* Set all costs of an event set to zero */
+void CLG_(init_cost_lz)(EventSet* es, ULong** cost)
+{
+ Int i;
+
+ CLG_ASSERT(cost != 0);
+ if (!(*cost))
+ *cost = CLG_(get_eventset_cost)(es);
+
+ for(i=0;i<es->capacity;i++)
+ (*cost)[i] = 0;
+}
+
+void CLG_(zero_cost)(EventSet* es, ULong* cost)
+{
+ Int i;
+
+ if (!cost) return;
+
+ for(i=0;i<es->size;i++)
+ cost[i] = 0;
+}
+
+Bool CLG_(is_zero_cost)(EventSet* es, ULong* cost)
+{
+ Int i = 0;
+
+ if (!cost) return True;
+
+ while(i<es->size) {
+ if (cost[i] != 0) return False;
+ i = es->e[i].nextTop;
+ }
+ return True;
+}
+
+Bool CLG_(is_equal_cost)(EventSet* es, ULong* c1, ULong* c2)
+{
+ Int i = 0;
+
+ if (!c1) return CLG_(is_zero_cost)(es,c2);
+ if (!c2) return CLG_(is_zero_cost)(es,c1);
+
+ while(i<es->size) {
+ if (c1[i] != c2[i]) return False;
+ if (c1[i] == 0)
+ i = es->e[i].nextTop;
+ else
+ i++;
+ }
+ return True;
+}
+
+void CLG_(copy_cost)(EventSet* es, ULong* dst, ULong* src)
+{
+ Int i;
+
+ if (!src) {
+ CLG_(zero_cost)(es, dst);
+ return;
+ }
+ CLG_ASSERT(dst != 0);
+
+ for(i=0;i<es->size;i++)
+ dst[i] = src[i];
+}
+
+void CLG_(copy_cost_lz)(EventSet* es, ULong** pdst, ULong* src)
+{
+ Int i;
+ ULong* dst;
+
+ CLG_ASSERT(pdst != 0);
+
+ if (!src) {
+ CLG_(zero_cost)(es, *pdst);
+ return;
+ }
+ dst = *pdst;
+ if (!dst)
+ dst = *pdst = CLG_(get_eventset_cost)(es);
+
+ for(i=0;i<es->size;i++)
+ dst[i] = src[i];
+}
+
+void CLG_(add_cost)(EventSet* es, ULong* dst, ULong* src)
+{
+ Int i = 0;
+
+ if (!src) return;
+ CLG_ASSERT(dst != 0);
+
+ while(i<es->size) {
+ if (src[i] == 0)
+ i = es->e[i].nextTop;
+ else {
+ dst[i] += src[i];
+ i++;
+ }
+ }
+}
+
+void CLG_(add_cost_lz)(EventSet* es, ULong** pdst, ULong* src)
+{
+ Int i;
+ ULong* dst;
+
+ if (!src) return;
+ CLG_ASSERT(pdst != 0);
+
+ dst = *pdst;
+ if (!dst) {
+ dst = *pdst = CLG_(get_eventset_cost)(es);
+ CLG_(copy_cost)(es,dst,src);
+ return;
+ }
+
+ i = 0;
+ while(i<es->size) {
+ if (src[i] == 0)
+ i = es->e[i].nextTop;
+ else {
+ dst[i] += src[i];
+ i++;
+ }
+ }
+}
+
+/* Adds src to dst and zeros src. Returns false if nothing changed */
+Bool CLG_(add_and_zero_cost)(EventSet* es, ULong* dst, ULong* src)
+{
+ Int i = 0, j = 0;
+
+ CLG_DEBUGIF(6) {
+ CLG_DEBUG(6, " add_and_zero_cost(%s, dst %p, src %p)\n", es->name, dst, src);
+ CLG_(print_cost)(-5, es, src);
+ }
+
+ if (!es || !src) return False;
+
+ while(i<es->size) {
+ if (src[i] == 0)
+ i = es->e[i].nextTop;
+ else {
+ dst[i] += src[i];
+ src[i] = 0;
+ i++;
+ j++;
+ }
+ }
+
+ return (j>0);
+}
+
+/* Adds src to dst and zeros src. Returns false if nothing changed */
+Bool CLG_(add_and_zero_cost_lz)(EventSet* es, ULong** pdst, ULong* src)
+{
+ Int i;
+ ULong* dst;
+
+ if (!src) return False;
+
+ i = 0;
+ while(1) {
+ if (i >= es->size) return False;
+ if (src[i] != 0) break;
+ i = es->e[i].nextTop;
+ }
+
+ CLG_ASSERT(pdst != 0);
+ dst = *pdst;
+ if (!dst) {
+ dst = *pdst = CLG_(get_eventset_cost)(es);
+ CLG_(copy_cost)(es,dst,src);
+ CLG_(zero_cost)(es,src);
+ return True;
+ }
+
+ dst[i] += src[i];
+ src[i] = 0;
+ i++;
+
+ while(i<es->size) {
+ if (src[i] == 0)
+ i = es->e[i].nextTop;
+ else {
+ dst[i] += src[i];
+ src[i] = 0;
+ }
+ }
+
+ return True;
+}
+
+/* Adds difference of new and old to dst, and set old to new.
+ * Returns false if nothing changed */
+Bool CLG_(add_diff_cost)(EventSet* es, ULong* dst, ULong* old, ULong* new)
+{
+ Int i = 0, j = 0;
+
+ while(i<es->size) {
+ if (new[i] == old[i])
+ i = es->e[i].nextTop;
+ else {
+ dst[i] += new[i] - old[i];
+ old[i] = new[i];
+ i++;
+ j++;
+ }
+ }
+
+ return (j>0);
+}
+
+/* Adds difference of new and old to dst, and set old to new.
+ * Returns false if nothing changed */
+Bool CLG_(add_diff_cost_lz)(EventSet* es, ULong** pdst,
+ ULong* old, ULong* new)
+{
+ Int i;
+ ULong* dst;
+
+ if (!old && !new) return False;
+ CLG_ASSERT(old && new);
+
+ i = 0;
+ while(1) {
+ if (i >= es->size) return False;
+ if (old[i] != new[i]) break;
+ i = es->e[i].nextTop;
+ }
+
+ CLG_ASSERT(pdst != 0);
+ dst = *pdst;
+ if (!dst) {
+ dst = *pdst = CLG_(get_eventset_cost)(es);
+ CLG_(zero_cost)(es,dst);
+ }
+
+ dst[i] += new[i] - old[i];
+ old[i] = new[i];
+ i++;
+
+ while(i<es->size) {
+ if (new[i] == old[i])
+ i = es->e[i].nextTop;
+ else {
+ dst[i] += new[i] - old[i];
+ old[i] = new[i];
+ i++;
+ }
+ }
+
+ return True;
+}
+
+/* Returns number of characters written */
+Int CLG_(sprint_cost)(Char* buf, EventSet* es, ULong* c)
+{
+ Int i, pos, skipped = 0;
+
+ if (!c || es->size==0) return 0;
+
+ /* At least one entry */
+ pos = VG_(sprintf)(buf, "%llu", c[0]);
+ i = 1;
+
+ while(i<es->size) {
+ if (c[i] == 0) {
+ skipped += es->e[i].nextTop - i;
+ i = es->e[i].nextTop;
+ }
+ else {
+ while(skipped>0) {
+ buf[pos++] = ' ';
+ buf[pos++] = '0';
+ skipped--;
+ }
+ buf[pos++] = ' ';
+ pos += VG_(sprintf)(buf+pos, "%llu", c[i]);
+ i++;
+ }
+ }
+
+ return pos;
+}
+
+
+/* Allocate space for an event mapping */
+EventMapping* CLG_(get_eventmapping)(EventSet* es)
+{
+ EventMapping* em;
+
+ CLG_ASSERT(es != 0);
+
+ em = (EventMapping*) CLG_MALLOC(sizeof(EventMapping) +
+ es->capacity * sizeof(Int));
+ em->capacity = es->capacity;
+ em->size = 0;
+ em->set = es;
+
+ return em;
+}
+
+void CLG_(append_event)(EventMapping* em, Char* n)
+{
+ Int i;
+
+ CLG_ASSERT(em != 0);
+
+ for(i=0; i<em->set->size; i++)
+ if (VG_(strcmp)(n, em->set->e[i].type->name)==0)
+ break;
+
+ if (i == em->set->size) return;
+
+ CLG_ASSERT(em->capacity > em->size);
+
+ em->index[em->size] = i;
+ em->size++;
+}
+
+
+/* Returns number of characters written */
+Int CLG_(sprint_eventmapping)(Char* buf, EventMapping* em)
+{
+ Int i, pos = 0;
+
+ CLG_ASSERT(em != 0);
+
+ for(i=0; i< em->size; i++) {
+ if (pos>0) buf[pos++] = ' ';
+ pos += VG_(sprintf)(buf + pos, em->set->e[em->index[i]].type->name);
+ }
+ buf[pos] = 0;
+
+ return pos;
+}
+
+/* Returns number of characters written */
+Int CLG_(sprint_mappingcost)(Char* buf, EventMapping* em, ULong* c)
+{
+ Int i, pos, skipped = 0;
+
+ if (!c || em->size==0) return 0;
+
+ /* At least one entry */
+ pos = VG_(sprintf)(buf, "%llu", c[em->index[0]]);
+ i = 1;
+
+ while(i<em->size) {
+ if (c[em->index[i]] == 0) {
+ skipped++;
+ i++;
+ }
+ else {
+ while(skipped>0) {
+ buf[pos++] = ' ';
+ buf[pos++] = '0';
+ skipped--;
+ }
+ buf[pos++] = ' ';
+ pos += VG_(sprintf)(buf+pos, "%llu", c[em->index[i]]);
+ i++;
+ }
+ }
+
+ return pos;
+}
diff --git a/callgrind/events.h b/callgrind/events.h
new file mode 100644
index 0000000..d2cad1e
--- /dev/null
+++ b/callgrind/events.h
@@ -0,0 +1,113 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- events.h ---*/
+/*--- (C) 2004-2005, Josef Weidendorfer ---*/
+/*--------------------------------------------------------------------*/
+
+
+/* Abstractions for 64-bit cost lists (events.h) */
+
+#ifndef CG_EVENTS
+#define CG_EVENTS
+
+#include "pub_tool_basics.h"
+
+#define CLG_(str) VGAPPEND(vgCallgrind_,str)
+
+/* An event type */
+typedef struct _EventType EventType;
+struct _EventType {
+ Char* name;
+ Char* description;
+ Int id;
+};
+
+EventType* CLG_(register_eventtype)(Char*);
+EventType* CLG_(get_eventtype)(Char*);
+EventType* CLG_(get_eventtype_byindex)(Int id);
+
+/* An event set is a ordered list of event types, which comes down
+ * to some description for ordered lists of costs.
+ * Often, costs of 2 event types are related, e.g. one is always smaller
+ * than the other. This is useful to speed up arithmetics on cost lists:
+ * Each event type in the set has a <nextTop>. All indexes before are
+ * promised to hold smaller values than the current.
+ */
+typedef struct _EventSetEntry EventSetEntry;
+struct _EventSetEntry {
+ EventType* type;
+ Int nextTop;
+};
+typedef struct _EventSet EventSet;
+struct _EventSet {
+ Char* name;
+ Int size;
+ Int capacity;
+ EventSetEntry e[0];
+};
+
+
+/* Some events out of an event set.
+ * Used to print out part of an EventSet, or in another order.
+ */
+typedef struct _EventMapping EventMapping;
+struct _EventMapping {
+ EventSet* set;
+ Int size;
+ Int capacity;
+ Int index[0];
+};
+
+
+/* Allocate space for an event set */
+EventSet* CLG_(get_eventset)(Char* n, Int capacity);
+/* Incorporate a event type into a set, get start offset */
+Int CLG_(add_eventtype)(EventSet* dst, EventType*);
+/* Incorporate event types into a set, with ... < second < first */
+Int CLG_(add_dep_event2)(EventSet* dst, EventType* e1, EventType* e2);
+Int CLG_(add_dep_event3)(EventSet* dst,
+ EventType* e1, EventType* e2, EventType* e3);
+Int CLG_(add_dep_event4)(EventSet* dst,
+ EventType* e1, EventType* e2, EventType* e3,
+ EventType* e4);
+/* Incorporate one event set into another, get start offset */
+Int CLG_(add_eventset)(EventSet* dst, EventSet* src);
+/* Returns number of characters written */
+Int CLG_(sprint_eventset)(Char* buf, EventSet*);
+/* Allocate cost array for an event set */
+ULong* CLG_(get_eventset_cost)(EventSet*);
+
+/* Operations on costs. A cost pointer of 0 means zero cost.
+ * Functions ending in _lz allocate costs lazy if needed
+ */
+/* Set costs according full capacity of event set to 0 */
+void CLG_(init_cost)(EventSet*,ULong*);
+/* This always allocates counter and sets them to 0 */
+void CLG_(init_cost_lz)(EventSet*,ULong**);
+/* Set costs of an event set to zero */
+void CLG_(zero_cost)(EventSet*,ULong*);
+Bool CLG_(is_zero_cost)(EventSet*,ULong*);
+Bool CLG_(is_equal_cost)(EventSet*,ULong*,ULong*);
+void CLG_(copy_cost)(EventSet*,ULong* dst, ULong* src);
+void CLG_(copy_cost_lz)(EventSet*,ULong** pdst, ULong* src);
+void CLG_(add_cost)(EventSet*,ULong* dst, ULong* src);
+void CLG_(add_cost_lz)(EventSet*,ULong** pdst, ULong* src);
+/* Adds src to dst and zeros src. Returns false if nothing changed */
+Bool CLG_(add_and_zero_cost)(EventSet*,ULong* dst, ULong* src);
+Bool CLG_(add_and_zero_cost_lz)(EventSet*,ULong** pdst, ULong* src);
+/* Adds difference of new and old to to dst, and set old to new.
+ * Returns false if nothing changed */
+Bool CLG_(add_diff_cost)(EventSet*,ULong* dst, ULong* old, ULong* new);
+Bool CLG_(add_diff_cost_lz)(EventSet*,ULong** pdst, ULong* old, ULong* new);
+/* Returns number of characters written */
+Int CLG_(sprint_cost)(Char* buf, EventSet*, ULong*);
+
+/* Allocate space for an event mapping */
+EventMapping* CLG_(get_eventmapping)(EventSet*);
+void CLG_(append_event)(EventMapping*, Char*);
+/* Returns number of characters written */
+Int CLG_(sprint_eventmapping)(Char* buf, EventMapping*);
+/* Returns number of characters written */
+Int CLG_(sprint_mappingcost)(Char* buf, EventMapping*, ULong*);
+
+#endif /* CG_EVENTS */
diff --git a/callgrind/fn.c b/callgrind/fn.c
new file mode 100644
index 0000000..a786c50
--- /dev/null
+++ b/callgrind/fn.c
@@ -0,0 +1,616 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- ct_fn.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+#define N_INITIAL_FN_ARRAY_SIZE 10071
+
+static fn_array current_fn_active;
+
+static Addr runtime_resolve_addr = 0;
+static int runtime_resolve_length = 0;
+
+/* _ld_runtime_resolve, located in needs special handling:
+ * The jump at end into the resolved function should not be
+ * represented as a call (as usually done in callgrind with jumps),
+ * but as a return + call. Otherwise, the repeated existance of
+ * _ld_runtime_resolve in call chains will lead to huge cycles,
+ * making the profile almost worthless.
+ *
+ * If ld.so is stripped, the symbol will not appear. But as this
+ * function is handcrafted assembler, we search for it...
+ *
+ * Returns 0 if code not found, otherwise start address
+ */
+static void search_runtime_resolve(obj_node* obj)
+{
+ /* We do not check target address of <fixup>, therefore we have >1 ranges.
+ * We use a tuple sequence (offset,length) into the code array for this
+ */
+
+#if defined(VGA_x86)
+ /* Check ranges [0-11], [16-23] */
+ static int code_offsets[] = { 0, 12, 16, 8, 24, 0 };
+ static unsigned char code[] = {
+ /* 0*/ 0x50, 0x51, 0x52, 0x8b, 0x54, 0x24, 0x10, 0x8b,
+ /* 8*/ 0x44, 0x24, 0x0c, 0xe8, 0x70, 0x01, 0x00, 0x00,
+ /*16*/ 0x5a, 0x59, 0x87, 0x04, 0x24, 0xc2, 0x08, 0x00 };
+#else
+#if defined(VGA_ppc32)
+ static int code_offsets[] = {0, 65, 68, 64, 132, 0 };
+ static unsigned char code[] = {
+ /* 0*/ 0x94, 0x21, 0xff, 0xc0, 0x90, 0x01, 0x00, 0x0c,
+ /* 8*/ 0x90, 0x61, 0x00, 0x10, 0x90, 0x81, 0x00, 0x14,
+ /*16*/ 0x7d, 0x83, 0x63, 0x78, 0x90, 0xa1, 0x00, 0x18,
+ /*24*/ 0x7d, 0x64, 0x5b, 0x78, 0x90, 0xc1, 0x00, 0x1c,
+ /*32*/ 0x7c, 0x08, 0x02, 0xa6, 0x90, 0xe1, 0x00, 0x20,
+ /*40*/ 0x90, 0x01, 0x00, 0x30, 0x91, 0x01, 0x00, 0x24,
+ /*48*/ 0x7c, 0x00, 0x00, 0x26, 0x91, 0x21, 0x00, 0x28,
+ /*56*/ 0x91, 0x41, 0x00, 0x2c, 0x90, 0x01, 0x00, 0x08,
+ /*64*/ 0x48, 0x00, 0x02, 0x91, 0x7c, 0x69, 0x03, 0xa6, /* at 64: bl aff0 <fixup> */
+ /*72*/ 0x80, 0x01, 0x00, 0x30, 0x81, 0x41, 0x00, 0x2c,
+ /*80*/ 0x81, 0x21, 0x00, 0x28, 0x7c, 0x08, 0x03, 0xa6,
+ /*88*/ 0x81, 0x01, 0x00, 0x24, 0x80, 0x01, 0x00, 0x08,
+ /*96*/ 0x80, 0xe1, 0x00, 0x20, 0x80, 0xc1, 0x00, 0x1c,
+ /*104*/0x7c, 0x0f, 0xf1, 0x20, 0x80, 0xa1, 0x00, 0x18,
+ /*112*/0x80, 0x81, 0x00, 0x14, 0x80, 0x61, 0x00, 0x10,
+ /*120*/0x80, 0x01, 0x00, 0x0c, 0x38, 0x21, 0x00, 0x40,
+ /*128*/0x4e, 0x80, 0x04, 0x20 };
+#else
+#if defined(VGA_amd64)
+ /* x86_64 */
+ static int code_offsets[] = {0, 62, 66, 44, 110, 0 };
+ static unsigned char code[] = {
+ /* 0*/ 0x48, 0x83, 0xec, 0x38, 0x48, 0x89, 0x04, 0x24,
+ /* 8*/ 0x48, 0x89, 0x4c, 0x24, 0x08, 0x48, 0x89, 0x54, 0x24, 0x10,
+ /*18*/ 0x48, 0x89, 0x74, 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20,
+ /*28*/ 0x4c, 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, 0x30,
+ /*38*/ 0x48, 0x8b, 0x74, 0x24, 0x40, 0x49, 0x89, 0xf3,
+ /*46*/ 0x4c, 0x01, 0xde, 0x4c, 0x01, 0xde, 0x48, 0xc1, 0xe6, 0x03,
+ /*56*/ 0x48, 0x8b, 0x7c, 0x24, 0x38, 0xe8, 0xee, 0x01, 0x00, 0x00,
+ /*66*/ 0x49, 0x89, 0xc3, 0x4c, 0x8b, 0x4c, 0x24, 0x30,
+ /*74*/ 0x4c, 0x8b, 0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, 0x24, 0x20,
+ /*84*/ 0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, 0x8b, 0x54, 0x24, 0x10,
+ /*94*/ 0x48, 0x8b, 0x4c, 0x24, 0x08, 0x48, 0x8b, 0x04, 0x24,
+ /*103*/0x48, 0x83, 0xc4, 0x48, 0x41, 0xff, 0xe3 };
+#else
+ /* Unknown architecture, no check is done */
+ static int code_offsets[] = {0, 0 };
+ static unsigned char code[] = { 0 };
+#endif
+#endif
+#endif
+
+ int *range = &(code_offsets[0]), *r = 0;
+ Bool found = False;
+ Addr addr, end;
+
+ /* Only search in libraries with a given name pattern */
+ if ((VG_(strncmp)(obj->name, "/lib/ld", 7) != 0) &&
+ (VG_(strncmp)(obj->name, "/lib64/ld", 9) != 0)) return;
+
+ CLG_DEBUG(1, "search_rs: Checking %d bytes of [%x %x %x...]\n",
+ range[1], code[0], code[1], code[2]);
+
+ end = obj->start + obj->size - range[1];
+ addr = obj->start;
+ while(addr < end) {
+ if (VG_(memcmp)( (void*)addr, code, range[1]) == 0) {
+
+ r = range + 2;
+ found = True;
+ while(r[1]) {
+ CLG_DEBUG(1, " [%p] Found! Checking %d bytes of [%x %x %x...]\n",
+ addr, r[1], code[r[0]], code[r[0]+1], code[r[0]+2]);
+
+ if (VG_(memcmp)( (void*)(addr+r[0]), code+r[0], r[1]) != 0) {
+ found = False;
+ break;
+ }
+ r += 2;
+ }
+ if (found) break;
+ }
+ addr++;
+ }
+
+ if (!found || (r==0)) return;
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "Code check found runtime_resolve: %s +%p=%p, length %d",
+ obj->name + obj->last_slash_pos,
+ addr - obj->start, addr, r[0]);
+
+ runtime_resolve_addr = addr;
+ runtime_resolve_length = r[0];
+}
+
+/*------------------------------------------------------------*/
+/*--- Object/File/Function hash entry operations ---*/
+/*------------------------------------------------------------*/
+
+/* Object hash table, fixed */
+static obj_node* obj_table[N_OBJ_ENTRIES];
+
+void CLG_(init_obj_table)()
+{
+ Int i;
+ for (i = 0; i < N_OBJ_ENTRIES; i++)
+ obj_table[i] = 0;
+}
+
+#define HASH_CONSTANT 256
+
+static UInt str_hash(const Char *s, UInt table_size)
+{
+ int hash_value = 0;
+ for ( ; *s; s++)
+ hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
+ return hash_value;
+}
+
+
+static Char* anonymous_obj = "???";
+
+static __inline__
+obj_node* new_obj_node(SegInfo* si, obj_node* next)
+{
+ Int i;
+ obj_node* new;
+
+ new = (obj_node*) CLG_MALLOC(sizeof(obj_node));
+ new->name = si ? VG_(strdup)( VG_(seginfo_filename)(si) )
+ : anonymous_obj;
+ for (i = 0; i < N_FILE_ENTRIES; i++) {
+ new->files[i] = NULL;
+ }
+ CLG_(stat).distinct_objs ++;
+ new->number = CLG_(stat).distinct_objs;
+ new->start = si ? VG_(seginfo_start)(si) : 0;
+ new->size = si ? VG_(seginfo_size)(si) : 0;
+ new->offset = si ? VG_(seginfo_sym_offset)(si) : 0;
+ new->next = next;
+
+ // not only used for debug output (see static.c)
+ new->last_slash_pos = 0;
+ i = 0;
+ while(new->name[i]) {
+ if (new->name[i]=='/') new->last_slash_pos = i+1;
+ i++;
+ }
+
+ if (runtime_resolve_addr == 0) search_runtime_resolve(new);
+
+ return new;
+}
+
+obj_node* CLG_(get_obj_node)(SegInfo* si)
+{
+ obj_node* curr_obj_node;
+ UInt objname_hash;
+ const UChar* obj_name;
+
+ obj_name = si ? (Char*) VG_(seginfo_filename)(si) : anonymous_obj;
+
+ /* lookup in obj hash */
+ objname_hash = str_hash(obj_name, N_OBJ_ENTRIES);
+ curr_obj_node = obj_table[objname_hash];
+ while (NULL != curr_obj_node &&
+ VG_(strcmp)(obj_name, curr_obj_node->name) != 0) {
+ curr_obj_node = curr_obj_node->next;
+ }
+ if (NULL == curr_obj_node) {
+ obj_table[objname_hash] = curr_obj_node =
+ new_obj_node(si, obj_table[objname_hash]);
+ }
+
+ return curr_obj_node;
+}
+
+
+static __inline__
+file_node* new_file_node(Char filename[FILENAME_LEN],
+ obj_node* obj, file_node* next)
+{
+ Int i;
+ file_node* new = (file_node*) CLG_MALLOC(sizeof(file_node));
+ new->name = VG_(strdup)(filename);
+ for (i = 0; i < N_FN_ENTRIES; i++) {
+ new->fns[i] = NULL;
+ }
+ CLG_(stat).distinct_files++;
+ new->number = CLG_(stat).distinct_files;
+ new->obj = obj;
+ new->next = next;
+ return new;
+}
+
+
+file_node* CLG_(get_file_node)(obj_node* curr_obj_node,
+ Char filename[FILENAME_LEN])
+{
+ file_node* curr_file_node;
+ UInt filename_hash;
+
+ /* lookup in file hash */
+ filename_hash = str_hash(filename, N_FILE_ENTRIES);
+ curr_file_node = curr_obj_node->files[filename_hash];
+ while (NULL != curr_file_node &&
+ VG_(strcmp)(filename, curr_file_node->name) != 0) {
+ curr_file_node = curr_file_node->next;
+ }
+ if (NULL == curr_file_node) {
+ curr_obj_node->files[filename_hash] = curr_file_node =
+ new_file_node(filename, curr_obj_node,
+ curr_obj_node->files[filename_hash]);
+ }
+
+ return curr_file_node;
+}
+
+/* forward decl. */
+static void resize_fn_array(void);
+
+static __inline__
+fn_node* new_fn_node(Char fnname[FILENAME_LEN],
+ file_node* file, fn_node* next)
+{
+ fn_node* new = (fn_node*) CLG_MALLOC(sizeof(fn_node));
+ new->name = VG_(strdup)(fnname);
+
+ CLG_(stat).distinct_fns++;
+ new->number = CLG_(stat).distinct_fns;
+ new->last_cxt = 0;
+ new->pure_cxt = 0;
+ new->file = file;
+ new->next = next;
+
+ new->dump_before = False;
+ new->dump_after = False;
+ new->zero_before = False;
+ new->toggle_collect = False;
+ new->skip = False;
+ new->pop_on_jump = False;
+ new->is_malloc = False;
+ new->is_realloc = False;
+ new->is_free = False;
+
+ new->group = 0;
+ new->separate_callers = CLG_(clo).separate_callers;
+ new->separate_recursions = CLG_(clo).separate_recursions;
+
+#if CLG_ENABLE_DEBUG
+ new->verbosity = -1;
+#endif
+
+ if (CLG_(stat).distinct_fns >= current_fn_active.size)
+ resize_fn_array();
+
+ return new;
+}
+
+
+/* Get a function node in hash2 with known file node.
+ * hash nodes are created if needed
+ */
+static
+fn_node* get_fn_node_infile(file_node* curr_file_node,
+ Char fnname[FN_NAME_LEN])
+{
+ fn_node* curr_fn_node;
+ UInt fnname_hash;
+
+ CLG_ASSERT(curr_file_node != 0);
+
+ /* lookup in function hash */
+ fnname_hash = str_hash(fnname, N_FN_ENTRIES);
+ curr_fn_node = curr_file_node->fns[fnname_hash];
+ while (NULL != curr_fn_node &&
+ VG_(strcmp)(fnname, curr_fn_node->name) != 0) {
+ curr_fn_node = curr_fn_node->next;
+ }
+ if (NULL == curr_fn_node) {
+ curr_file_node->fns[fnname_hash] = curr_fn_node =
+ new_fn_node(fnname, curr_file_node,
+ curr_file_node->fns[fnname_hash]);
+ }
+
+ return curr_fn_node;
+}
+
+
+/* Get a function node in a Segment.
+ * Hash nodes are created if needed.
+ */
+static __inline__
+fn_node* get_fn_node_inseg(SegInfo* si,
+ Char filename[FILENAME_LEN],
+ Char fnname[FN_NAME_LEN])
+{
+ obj_node *obj = CLG_(get_obj_node)(si);
+ file_node *file = CLG_(get_file_node)(obj, filename);
+ fn_node *fn = get_fn_node_infile(file, fnname);
+
+ return fn;
+}
+
+
+Bool CLG_(get_debug_info)(Addr instr_addr,
+ Char filename[FILENAME_LEN],
+ Char fn_name[FN_NAME_LEN], UInt* line_num,
+ SegInfo** pSegInfo)
+{
+ Bool found1, found2, result = True;
+ UInt line;
+
+ CLG_DEBUG(6, " + get_debug_info(%p)\n", instr_addr);
+
+ if (pSegInfo) {
+ *pSegInfo = VG_(find_seginfo)(instr_addr);
+
+ // for generated code in anonymous space, pSegInfo is 0
+ }
+
+ found1 = VG_(get_filename_linenum)(instr_addr,
+ filename, FILENAME_LEN,
+ NULL, 0, NULL, // FIXME: add dirnames!
+ &line);
+ found2 = VG_(get_fnname)(instr_addr,
+ fn_name, FN_NAME_LEN);
+
+ if (!found1 && !found2) {
+ CLG_(stat).no_debug_BBs++;
+ VG_(strcpy)(filename, "???");
+ VG_(strcpy)(fn_name, "???");
+ if (line_num) *line_num=0;
+ result = False;
+
+ } else if ( found1 && found2) {
+ CLG_(stat).full_debug_BBs++;
+ if (line_num) *line_num=line;
+
+ } else if ( found1 && !found2) {
+ CLG_(stat).file_line_debug_BBs++;
+ VG_(strcpy)(fn_name, "???");
+ if (line_num) *line_num=line;
+
+ } else /*(!found1 && found2)*/ {
+ CLG_(stat).fn_name_debug_BBs++;
+ VG_(strcpy)(filename, "???");
+ if (line_num) *line_num=0;
+ }
+
+ CLG_DEBUG(6, " - get_debug_info(%p): seg '%s', fn %s\n",
+ instr_addr,
+ !pSegInfo ? (const UChar*)"-" :
+ (*pSegInfo) ? VG_(seginfo_filename)(*pSegInfo) :
+ (const UChar*)"(None)",
+ fn_name);
+
+ return result;
+}
+
+/* for _libc_freeres_wrapper => _exit renaming */
+static BB* exit_bb = 0;
+
+
+/*
+ * Attach function struct to a BB from debug info.
+ */
+fn_node* CLG_(get_fn_node)(BB* bb)
+{
+ Char filename[FILENAME_LEN], fnname[FN_NAME_LEN];
+ SegInfo* si;
+ UInt line_num;
+ fn_node* fn;
+
+ /* fn from debug info is idempotent for a BB */
+ if (bb->fn) return bb->fn;
+
+ CLG_DEBUG(3,"+ get_fn_node(BB %p)\n", bb_addr(bb));
+
+ /* get function/file name, line number and object of
+ * the BB according to debug information
+ */
+ CLG_(get_debug_info)(bb_addr(bb),
+ filename, fnname, &line_num, &si);
+
+ if (0 == VG_(strcmp)(fnname, "???")) {
+ int p;
+
+ /* Use address as found in library */
+ if (sizeof(Addr) == 4)
+ p = VG_(sprintf)(fnname, "%08p", bb->offset);
+ else
+ // 64bit address
+ p = VG_(sprintf)(fnname, "%016p", bb->offset);
+
+ VG_(sprintf)(fnname+p, "%s",
+ (bb->sect_kind == Vg_SectData) ? " [Data]" :
+ (bb->sect_kind == Vg_SectBSS) ? " [BSS]" :
+ (bb->sect_kind == Vg_SectGOT) ? " [GOT]" :
+ (bb->sect_kind == Vg_SectPLT) ? " [PLT]" : "");
+ }
+ else {
+ if (VG_(get_fnname_if_entry)(bb_addr(bb), fnname, FN_NAME_LEN))
+ bb->is_entry = 1;
+ }
+
+ /* HACK for correct _exit:
+ * _exit is redirected to VG_(__libc_freeres_wrapper) by valgrind,
+ * so we rename it back again :-)
+ */
+ if (0 == VG_(strcmp)(fnname, "vgPlain___libc_freeres_wrapper")
+ && exit_bb) {
+ CLG_(get_debug_info)(bb_addr(exit_bb),
+ filename, fnname, &line_num, &si);
+
+ CLG_DEBUG(1, "__libc_freeres_wrapper renamed to _exit\n");
+ }
+ if (0 == VG_(strcmp)(fnname, "_exit") && !exit_bb)
+ exit_bb = bb;
+
+ if (runtime_resolve_addr &&
+ (bb_addr(bb) >= runtime_resolve_addr) &&
+ (bb_addr(bb) < runtime_resolve_addr + runtime_resolve_length)) {
+ /* BB in runtime_resolve found by code check; use this name */
+ VG_(sprintf)(fnname, "_dl_runtime_resolve");
+ }
+
+ /* get fn_node struct for this function */
+ fn = get_fn_node_inseg( si, filename, fnname);
+
+ /* if this is the 1st time the function is seen,
+ * some attributes are set */
+ if (fn->pure_cxt == 0) {
+
+ /* Every function gets a "pure" context, i.e. a context with stack
+ * depth 1 only with this function. This is for compression of mangled
+ * names
+ */
+ fn_node* pure[2];
+ pure[0] = 0;
+ pure[1] = fn;
+ fn->pure_cxt = CLG_(get_cxt)(pure+1);
+
+ if (bb->sect_kind == Vg_SectPLT)
+ fn->skip = CLG_(clo).skip_plt;
+
+ if (VG_(strcmp)(fn->name, "_dl_runtime_resolve")==0) {
+ fn->pop_on_jump = True;
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "Symbol match: found runtime_resolve: %s +%p=%p",
+ bb->obj->name + bb->obj->last_slash_pos,
+ bb->offset, bb_addr(bb));
+ }
+
+ fn->is_malloc = (VG_(strcmp)(fn->name, "malloc")==0);
+ fn->is_realloc = (VG_(strcmp)(fn->name, "realloc")==0);
+ fn->is_free = (VG_(strcmp)(fn->name, "free")==0);
+
+ /* apply config options from function name patterns
+ * given on command line */
+ CLG_(update_fn_config)(fn);
+ }
+
+
+ bb->fn = fn;
+ bb->line = line_num;
+
+ CLG_DEBUG(3,"- get_fn_node(BB %p): %s (in %s:%u)\n",
+ bb_addr(bb), fnname, filename, line_num);
+
+ return fn;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Active function array operations ---*/
+/*------------------------------------------------------------*/
+
+/* The active function array is a thread-specific array
+ * of UInts, mapping function numbers to the active count of
+ * functions.
+ * The active count is the number of times a function appears
+ * in the current call stack, and is used when costs for recursion
+ * levels should be separated.
+ */
+
+UInt* CLG_(get_fn_entry)(Int n)
+{
+ CLG_ASSERT(n < current_fn_active.size);
+ return current_fn_active.array + n;
+}
+
+void CLG_(init_fn_array)(fn_array* a)
+{
+ Int i;
+
+ CLG_ASSERT(a != 0);
+
+ a->size = N_INITIAL_FN_ARRAY_SIZE;
+ if (a->size <= CLG_(stat).distinct_fns)
+ a->size = CLG_(stat).distinct_fns+1;
+
+ a->array = (UInt*) CLG_MALLOC(a->size * sizeof(UInt));
+ for(i=0;i<a->size;i++)
+ a->array[i] = 0;
+}
+
+void CLG_(copy_current_fn_array)(fn_array* dst)
+{
+ CLG_ASSERT(dst != 0);
+
+ dst->size = current_fn_active.size;
+ dst->array = current_fn_active.array;
+}
+
+fn_array* CLG_(get_current_fn_array)()
+{
+ return ¤t_fn_active;
+}
+
+void CLG_(set_current_fn_array)(fn_array* a)
+{
+ CLG_ASSERT(a != 0);
+
+ current_fn_active.size = a->size;
+ current_fn_active.array = a->array;
+ if (current_fn_active.size <= CLG_(stat).distinct_fns)
+ resize_fn_array();
+}
+
+/* ensure that active_array is big enough:
+ * <distinct_fns> is the highest index, so <fn_active_array_size>
+ * has to be bigger than that.
+ */
+static void resize_fn_array(void)
+{
+ UInt* new;
+ Int i, newsize;
+
+ newsize = current_fn_active.size;
+ while (newsize <= CLG_(stat).distinct_fns) newsize *=2;
+
+ CLG_DEBUG(0, "Resize fn_active_array: %d => %d\n",
+ current_fn_active.size, newsize);
+
+ new = (UInt*) CLG_MALLOC(newsize * sizeof(UInt));
+ for(i=0;i<current_fn_active.size;i++)
+ new[i] = current_fn_active.array[i];
+ while(i<newsize)
+ new[i++] = 0;
+
+ VG_(free)(current_fn_active.array);
+ current_fn_active.size = newsize;
+ current_fn_active.array = new;
+ CLG_(stat).fn_array_resizes++;
+}
+
+
diff --git a/callgrind/global.h b/callgrind/global.h
new file mode 100644
index 0000000..a103392
--- /dev/null
+++ b/callgrind/global.h
@@ -0,0 +1,838 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- global.h ---*/
+/*--- (C) 2004, 2005 Josef Weidendorfer ---*/
+/*--------------------------------------------------------------------*/
+
+#ifndef CLG_GLOBAL
+#define CLG_GLOBAL
+
+#include "pub_tool_basics.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcfile.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_libcproc.h"
+#include "pub_tool_machine.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_options.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_clientstate.h"
+#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
+
+#include "events.h" // defines CLG_ macro
+#include "costs.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Calltree compile options --- */
+/*------------------------------------------------------------*/
+
+/* Enable debug output */
+#define CLG_ENABLE_DEBUG 1
+
+/* Enable experimental features? */
+#define CLG_EXPERIMENTAL 0
+
+/* Syscall Timing in microseconds?
+ * (define to 0 if you get compile errors) */
+#define CLG_MICROSYSTIME 0
+
+/* Set to 1 if you want full sanity checks for JCC */
+#define JCC_CHECK 0
+
+
+
+/*------------------------------------------------------------*/
+/*--- Command line options ---*/
+/*------------------------------------------------------------*/
+
+#define DEFAULT_DUMPNAME "callgrind.out"
+#define DEFAULT_COMMANDNAME "callgrind.cmd"
+#define DEFAULT_RESULTNAME "callgrind.res"
+#define DEFAULT_INFONAME "/tmp/callgrind.info"
+
+typedef struct _CommandLineOptions CommandLineOptions;
+struct _CommandLineOptions {
+
+ /* Dump format options */
+ Char* filename_base; /* Base name for dumps */
+ Bool combine_dumps; /* Dump trace parts into same file? */
+ Bool compress_strings;
+ Bool compress_events;
+ Bool compress_pos;
+ Bool mangle_names;
+ Bool compress_mangled;
+ Bool dump_line;
+ Bool dump_instr;
+ Bool dump_bb;
+ Bool dump_bbs; /* Dump basic block information? */
+
+ /* Dump generation options */
+ Int dump_every_bb; /* Dump every xxx BBs. */
+
+ /* Collection options */
+ Bool separate_threads; /* Separate threads in dump? */
+ Int separate_callers; /* Separate dependent on how many callers? */
+ Int separate_recursions; /* Max level of recursions to separate */
+ Bool skip_plt; /* Skip functions in PLT section? */
+ Bool skip_direct_recursion; /* Increment direct recursions the level? */
+
+ Bool collect_atstart; /* Start in collecting state ? */
+ Bool collect_jumps; /* Collect (cond.) jumps in functions ? */
+
+ Bool collect_alloc; /* Collect size of allocated memory */
+ Bool collect_systime; /* Collect time for system calls */
+
+ /* Instrument options */
+ Bool instrument_atstart; /* Instrument at start? */
+ Bool simulate_cache; /* Call into cache simulator ? */
+
+#if CLG_ENABLE_DEBUG
+ Int verbose;
+ ULong verbose_start;
+#endif
+};
+
+/*------------------------------------------------------------*/
+/*--- Constants ---*/
+/*------------------------------------------------------------*/
+
+
+/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
+#define MAX_x86_INSTR_SIZE 16
+
+/* Minimum cache line size allowed */
+#define MIN_LINE_SIZE 16
+
+/* Size of various buffers used for storing strings */
+#define FILENAME_LEN 256
+#define FN_NAME_LEN 4096 /* for C++ code :-) */
+#define OBJ_NAME_LEN 256
+#define BUF_LEN 512
+#define COMMIFY_BUF_LEN 128
+#define RESULTS_BUF_LEN 128
+#define LINE_BUF_LEN 64
+
+
+
+/*------------------------------------------------------------*/
+/*--- Statistics ---*/
+/*------------------------------------------------------------*/
+
+typedef struct _Statistics Statistics;
+struct _Statistics {
+ ULong call_counter;
+ ULong jcnd_counter;
+ ULong jump_counter;
+ ULong rec_call_counter;
+ ULong ret_counter;
+ ULong bb_executions;
+
+ Int context_counter;
+ Int bb_retranslations;
+
+ Int distinct_objs;
+ Int distinct_files;
+ Int distinct_fns;
+ Int distinct_contexts;
+ Int distinct_bbs;
+ Int distinct_jccs;
+ Int distinct_bbccs;
+ Int distinct_instrs;
+ Int distinct_skips;
+
+ Int bb_hash_resizes;
+ Int bbcc_hash_resizes;
+ Int jcc_hash_resizes;
+ Int cxt_hash_resizes;
+ Int fn_array_resizes;
+ Int call_stack_resizes;
+ Int fn_stack_resizes;
+
+ Int full_debug_BBs;
+ Int file_line_debug_BBs;
+ Int fn_name_debug_BBs;
+ Int no_debug_BBs;
+ Int bbcc_lru_misses;
+ Int jcc_lru_misses;
+ Int cxt_lru_misses;
+ Int bbcc_clones;
+};
+
+
+/*------------------------------------------------------------*/
+/*--- Structure declarations ---*/
+/*------------------------------------------------------------*/
+
+typedef struct _Context Context;
+typedef struct _CC CC;
+typedef struct _BB BB;
+typedef struct _Skipped Skipped;
+typedef struct _BBCC BBCC;
+typedef struct _jCC jCC;
+typedef struct _fCC fCC;
+typedef struct _fn_node fn_node;
+typedef struct _file_node file_node;
+typedef struct _obj_node obj_node;
+typedef struct _fn_config fn_config;
+typedef struct _call_entry call_entry;
+typedef struct _thread_info thread_info;
+
+/* Costs of event sets. Aliases to arrays of 64-bit values */
+typedef ULong* SimCost; /* All events the simulator can produce */
+typedef ULong* UserCost;
+typedef ULong* FullCost; /* Simulator + User */
+
+
+/* JmpCall cost center
+ * for subroutine call (from->bb->jmp_addr => to->bb->addr)
+ *
+ * Each BB has at most one CALL instruction. The list of JCC from
+ * this call is a pointer to the list head (stored in BBCC), and
+ * <next_from> in the JCC struct.
+ *
+ * For fast lookup, JCCs are reachable with a hash table, keyed by
+ * the (from_bbcc,to) pair. <next_hash> is used for the JCC chain
+ * of one hash table entry.
+ *
+ * Cost <sum> holds event counts for already returned executions.
+ * <last> are the event counters at last enter of the subroutine.
+ * <sum> is updated on returning from the subroutine by
+ * adding the diff of <last> and current event counters to <sum>.
+ *
+ * After updating, <last> is set to current event counters. Thus,
+ * events are not counted twice for recursive calls (TODO: True?)
+ */
+#define JmpNone (Ijk_Boring+30)
+#define JmpCond (Ijk_Boring+31)
+
+struct _jCC {
+ Int jmpkind; /* JmpCall, JmpBoring, JmpCond */
+ jCC* next_hash; /* for hash entry chain */
+ jCC* next_from; /* next JCC from a BBCC */
+ BBCC *from, *to; /* call arc from/to this BBCC */
+ UInt jmp; /* jump no. in source */
+
+ ULong call_counter; /* no wraparound with 64 bit */
+
+ FullCost cost; /* simulator + user counters */
+};
+
+
+/*
+ * Info for one instruction of a basic block.
+ */
+typedef struct _InstrInfo InstrInfo;
+struct _InstrInfo {
+ UInt instr_offset;
+ UInt instr_size;
+ UInt data_size;
+ UInt cost_offset;
+ EventSet* eventset;
+};
+
+
+/*
+ * Info for a conditional jump in a basic block
+ */
+typedef struct _CJmpInfo CJmpInfo;
+struct _CJmpInfo {
+ UInt instr; /* instruction index in this basic block */
+ Bool skip; /* Cond.Jumps to next instruction should be ignored */
+};
+
+
+/**
+ * An instrumented basic block (BB).
+ *
+ * BBs are put into a resizable hash to allow for fast detection if a
+ * BB is to be retranslated but cost info is already available.
+ * The key for a BB is a (object, offset) tupel making it independent
+ * from possibly multiple mappings of the same ELF object.
+ *
+ * At the beginning of each instrumented BB,
+ * a call to setup_bbcc(), specifying a pointer to the
+ * according BB structure, is added.
+ *
+ * As cost of a BB has to be distinguished depending on the context,
+ * multiple cost centers for one BB (struct BBCC) exist and the according
+ * BBCC is set by setup_bbcc.
+ */
+struct _BB {
+ obj_node* obj; /* ELF object of BB */
+ OffT offset; /* offset of BB in ELF object file */
+ BB* next; /* chaining for a hash entry */
+
+ VgSectKind sect_kind; /* section of this BB, e.g. PLT */
+ UInt instr_count;
+
+ /* filled by CLG_(get_fn_node) if debug info is available */
+ fn_node* fn; /* debug info for this BB */
+ UInt line;
+ Bool is_entry; /* True if this BB is a function entry */
+
+ BBCC* bbcc_list; /* BBCCs for same BB (see next_bbcc in BBCC) */
+ BBCC* last_bbcc; /* Temporary: Cached for faster access (LRU) */
+
+ /* filled by CLG_(instrument) if not seen before */
+ UInt cjmp_count; /* number of conditional exits */
+ CJmpInfo* jmp; /* array of info for condition jumps,
+ * allocated directly after this struct */
+ Int jmpkind; /* remember jump kind of final exit */
+ Bool cjmp_inverted; /* condition of last cond.jump can be inverted by VEX */
+
+ UInt instr_len;
+ UInt cost_count;
+ InstrInfo instr[0]; /* info on instruction sizes and costs */
+};
+
+
+
+/**
+ * Function context
+ *
+ * Basic blocks are always executed in the scope of a context.
+ * A function context is a list of function nodes representing
+ * the call chain to the current context: I.e. fn[0] is the
+ * function we are currently in, fn[1] has called fn[0], and so on.
+ * Recursion levels are used for fn[0].
+ *
+ * To get a unique number for a full execution context, use
+ * rec_index = min(<fn->rec_separation>,<active>) - 1;
+ * unique_no = <number> + rec_index
+ *
+ * For each Context, recursion index and BB, there can be a BBCC.
+ */
+struct _Context {
+ UInt size; // number of function dependencies
+ UInt base_number; // for context compression & dump array
+ Context* next; // entry chaining for hash
+ UWord hash; // for faster lookup...
+ fn_node* fn[0];
+};
+
+
+/*
+ * Info for a conditional jump in a basic block
+ */
+typedef struct _JmpData JmpData;
+struct _JmpData {
+ ULong ecounter; /* number of times the BB was left at this exit */
+ jCC* jcc_list; /* JCCs for Cond.Jumps from this exit */
+};
+
+
+/*
+ * Basic Block Cost Center
+ *
+ * On demand, multiple BBCCs will be created for the same BB
+ * dependend on command line options and:
+ * - current function (it's possible that a BB is executed in the
+ * context of different functions, e.g. in manual assembler/PLT)
+ * - current thread ID
+ * - position where current function is called from
+ * - recursion level of current function
+ *
+ * The cost centres for the instructions of a basic block are
+ * stored in a contiguous array.
+ * They are distinguishable by their tag field.
+ */
+struct _BBCC {
+ BB* bb; /* BB for this cost center */
+
+ Context* cxt; /* execution context of this BBCC */
+ ThreadId tid; /* only for assertion check purpose */
+ UInt rec_index; /* Recursion index in rec->bbcc for this bbcc */
+ BBCC** rec_array; /* Variable sized array of pointers to
+ * recursion BBCCs. Shared. */
+ ULong ret_counter; /* how often returned from jccs of this bbcc;
+ * used to check if a dump for this BBCC is needed */
+
+ BBCC* next_bbcc; /* Chain of BBCCs for same BB */
+ BBCC* lru_next_bbcc; /* BBCC executed next the last time */
+
+ jCC* lru_from_jcc; /* Temporary: Cached for faster access (LRU) */
+ jCC* lru_to_jcc; /* Temporary: Cached for faster access (LRU) */
+ FullCost skipped; /* cost for skipped functions called from
+ * jmp_addr. Allocated lazy */
+
+ BBCC* next; /* entry chain in hash */
+ ULong* cost; /* start of 64bit costs for this BBCC */
+ ULong ecounter_sum; /* execution counter for first instruction of BB */
+ JmpData jmp[0];
+};
+
+
+/* the <number> of fn_node, file_node and obj_node are for compressed dumping
+ * and a index into the dump boolean table and fn_info_table
+ */
+
+struct _fn_node {
+ Char* name;
+ UInt number;
+ Context* last_cxt; /* LRU info */
+ Context* pure_cxt; /* the context with only the function itself */
+ file_node* file; /* reverse mapping for 2nd hash */
+ fn_node* next;
+
+ Bool dump_before :1;
+ Bool dump_after :1;
+ Bool zero_before :1;
+ Bool toggle_collect :1;
+ Bool skip :1;
+ Bool pop_on_jump : 1;
+
+ Bool is_malloc :1;
+ Bool is_realloc :1;
+ Bool is_free :1;
+
+ Int group;
+ Int separate_callers;
+ Int separate_recursions;
+#if CLG_ENABLE_DEBUG
+ Int verbosity; /* Stores old verbosity level while in function */
+#endif
+};
+
+/* Quite arbitrary fixed hash sizes */
+
+#define N_OBJ_ENTRIES 47
+#define N_FILE_ENTRIES 53
+#define N_FN_ENTRIES 87
+#define N_BBCC2_ENTRIES 37
+
+struct _file_node {
+ Char* name;
+ fn_node* fns[N_FN_ENTRIES];
+ UInt number;
+ obj_node* obj;
+ file_node* next;
+};
+
+/* If an object is dlopened multiple times, we hope that <name> is unique;
+ * <start> and <offset> can change with each dlopen, and <start> is
+ * zero when object is unmapped (possible at dump time).
+ */
+struct _obj_node {
+ Char* name;
+ UInt last_slash_pos;
+
+ Addr start; /* Start address of text segment mapping */
+ SizeT size; /* Length of mapping */
+ OffT offset; /* Offset between symbol address and file offset */
+
+ file_node* files[N_FILE_ENTRIES];
+ UInt number;
+ obj_node* next;
+};
+
+/* an entry in the callstack
+ *
+ * <nonskipped> is 0 if the function called is not skipped (usual case).
+ * Otherwise, it is the last non-skipped BBCC. This one gets all
+ * the calls to non-skipped functions and all costs in skipped
+ * instructions.
+ */
+struct _call_entry {
+ jCC* jcc; /* jCC for this call */
+ FullCost enter_cost; /* cost event counters at entering frame */
+ Addr sp; /* stack pointer directly after call */
+ Addr ret_addr; /* address to which to return to
+ * is 0 on a simulated call */
+ BBCC* nonskipped; /* see above */
+ Context* cxt; /* context before call */
+ Int fn_sp; /* function stack index before call */
+};
+
+
+/*
+ * Execution state of main thread or a running signal handler in
+ * a thread while interrupted by another signal handler.
+ * As there's no scheduling among running signal handlers of one thread,
+ * we only need a subset of a full thread state:
+ * - event counter
+ * - collect state
+ * - last BB, last jump kind, last nonskipped BB
+ * - callstack pointer for sanity checking and correct unwinding
+ * after exit
+ */
+typedef struct _exec_state exec_state;
+struct _exec_state {
+
+ /* the signum of the handler, 0 for main thread context
+ */
+ Int sig;
+
+ /* the old call stack pointer at entering the signal handler */
+ Int orig_sp;
+
+ FullCost cost;
+ Bool collect;
+ Context* cxt;
+
+ Int jmps_passed; /* number of conditional jumps passed in last BB */
+ BBCC* bbcc; /* last BB executed */
+ BBCC* nonskipped;
+
+ Int call_stack_bottom; /* Index into fn_stack */
+};
+
+/* Global state structures */
+typedef struct _bb_hash bb_hash;
+struct _bb_hash {
+ UInt size, entries;
+ BB** table;
+};
+
+typedef struct _cxt_hash cxt_hash;
+struct _cxt_hash {
+ UInt size, entries;
+ Context** table;
+};
+
+/* Thread specific state structures, i.e. parts of a thread state.
+ * There are variables for the current state of each part,
+ * on which a thread state is copied at thread switch.
+ */
+typedef struct _bbcc_hash bbcc_hash;
+struct _bbcc_hash {
+ UInt size, entries;
+ BBCC** table;
+};
+
+typedef struct _jcc_hash jcc_hash;
+struct _jcc_hash {
+ UInt size, entries;
+ jCC** table;
+ jCC* spontaneous;
+};
+
+typedef struct _fn_array fn_array;
+struct _fn_array {
+ UInt size;
+ UInt* array;
+};
+
+typedef struct _call_stack call_stack;
+struct _call_stack {
+ UInt size;
+ Int sp;
+ call_entry* entry;
+};
+
+typedef struct _fn_stack fn_stack;
+struct _fn_stack {
+ UInt size;
+ fn_node **bottom, **top;
+};
+
+/* The maximum number of simultaneous running signal handlers per thread.
+ * This is the number of execution states storable in a thread.
+ */
+#define MAX_SIGHANDLERS 10
+
+typedef struct _exec_stack exec_stack;
+struct _exec_stack {
+ Int sp; /* > 0 if a handler is running */
+ exec_state* entry[MAX_SIGHANDLERS];
+};
+
+/* Thread State
+ *
+ * This structure stores thread specific info while a thread is *not*
+ * running. See function switch_thread() for save/restore on thread switch.
+ *
+ * If --separate-threads=no, BBCCs and JCCs can be shared by all threads, i.e.
+ * only structures of thread 1 are used.
+ * This involves variables fn_info_table, bbcc_table and jcc_table.
+ */
+struct _thread_info {
+
+ /* state */
+ fn_stack fns; /* function stack */
+ call_stack calls; /* context call arc stack */
+ exec_stack states; /* execution states interrupted by signals */
+
+ /* dump statistics */
+ FullCost lastdump_cost; /* Cost at last dump */
+ FullCost sighandler_cost;
+
+ /* thread specific data structure containers */
+ fn_array fn_active;
+ jcc_hash jccs;
+ bbcc_hash bbccs;
+};
+
+/* Structs used for dumping */
+
+/* Address position inside of a BBCC:
+ * This includes
+ * - the address offset from the BB start address
+ * - file/line from debug info for that address (can change inside a BB)
+ */
+typedef struct _AddrPos AddrPos;
+struct _AddrPos {
+ Addr addr;
+ Addr bb_addr;
+ file_node* file;
+ UInt line;
+};
+
+/* a simulator cost entity that can be written out in one line */
+typedef struct _AddrCost AddrCost;
+struct _AddrCost {
+ AddrPos p;
+ SimCost cost;
+};
+
+/* A function in an execution context */
+typedef struct _FnPos FnPos;
+struct _FnPos {
+ file_node* file;
+ fn_node* fn;
+ obj_node* obj;
+ Context* cxt;
+ int rec_index;
+ UInt line;
+};
+
+/*------------------------------------------------------------*/
+/*--- Cache simulator interface ---*/
+/*------------------------------------------------------------*/
+
+struct cachesim_if
+{
+ void (*print_opts)(void);
+ Bool (*parse_opt)(Char* arg);
+ void (*post_clo_init)(void);
+ void (*clear)(void);
+ void (*getdesc)(Char* buf);
+ void (*printstat)(void);
+ void (*add_icost)(SimCost, BBCC*, InstrInfo*, ULong);
+ void (*after_bbsetup)(void);
+ void (*finish)(void);
+
+ void (*log_1I0D)(InstrInfo*) VG_REGPARM(1);
+
+ void (*log_1I1Dr)(InstrInfo*, Addr) VG_REGPARM(2);
+ void (*log_1I1Dw)(InstrInfo*, Addr) VG_REGPARM(2);
+ void (*log_1I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3);
+
+ void (*log_0I1Dr)(InstrInfo*, Addr) VG_REGPARM(2);
+ void (*log_0I1Dw)(InstrInfo*, Addr) VG_REGPARM(2);
+ void (*log_0I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3);
+
+ // function names of helpers (for debugging generated code)
+ Char *log_1I0D_name;
+ Char *log_1I1Dr_name, *log_1I1Dw_name, *log_1I2D_name;
+ Char *log_0I1Dr_name, *log_0I1Dw_name, *log_0I2D_name;
+};
+
+
+/*------------------------------------------------------------*/
+/*--- Functions ---*/
+/*------------------------------------------------------------*/
+
+/* from clo.c */
+
+void CLG_(set_clo_defaults)(void);
+void CLG_(update_fn_config)(fn_node*);
+Bool CLG_(process_cmd_line_option)(Char*);
+void CLG_(print_usage)(void);
+void CLG_(print_debug_usage)(void);
+
+/* from sim.c */
+struct event_sets {
+ EventSet *use, *Ir, *Dr, *Dw;
+ EventSet *D0, *D1r, *D1w, *D2;
+ EventSet *sim;
+ EventSet *full; /* sim plus user events */
+
+ /* offsets into eventsets */
+ Int off_sim_Ir, off_sim_Dr, off_sim_Dw;
+ Int off_full_Ir, off_full_Dr, off_full_Dw;
+ Int off_full_user, off_full_alloc, off_full_systime;
+};
+
+extern struct event_sets CLG_(sets);
+extern struct cachesim_if CLG_(cachesim);
+
+void CLG_(init_eventsets)(Int user);
+
+/* from main.c */
+Bool CLG_(get_debug_info)(Addr, Char filename[FILENAME_LEN],
+ Char fn_name[FN_NAME_LEN], UInt*, SegInfo**);
+void CLG_(collectBlockInfo)(IRBB* bbIn, UInt*, UInt*, Bool*);
+void CLG_(set_instrument_state)(Char*,Bool);
+void CLG_(dump_profile)(Char* trigger,Bool only_current_thread);
+void CLG_(zero_all_cost)(Bool only_current_thread);
+Int CLG_(get_dump_counter)(void);
+void CLG_(fini)(Int exitcode);
+
+/* from command.c */
+void CLG_(init_command)(Char* dir, Char* dumps);
+void CLG_(check_command)(void);
+void CLG_(finish_command)(void);
+
+/* from bb.c */
+void CLG_(init_bb_hash)(void);
+bb_hash* CLG_(get_bb_hash)(void);
+BB* CLG_(get_bb)(Addr addr, IRBB* bb_in, Bool *seen_before);
+void CLG_(delete_bb)(Addr addr);
+
+static __inline__ Addr bb_addr(BB* bb)
+ { return bb->offset + bb->obj->offset; }
+static __inline__ Addr bb_jmpaddr(BB* bb)
+ { return bb->instr[bb->instr_count-1].instr_offset + bb->offset + bb->obj->offset; }
+
+/* from fn.c */
+void CLG_(init_fn_array)(fn_array*);
+void CLG_(copy_current_fn_array)(fn_array* dst);
+fn_array* CLG_(get_current_fn_array)(void);
+void CLG_(set_current_fn_array)(fn_array*);
+UInt* CLG_(get_fn_entry)(Int n);
+
+void CLG_(init_obj_table)(void);
+obj_node* CLG_(get_obj_node)(SegInfo* si);
+file_node* CLG_(get_file_node)(obj_node*, Char* filename);
+fn_node* CLG_(get_fn_node)(BB* bb);
+
+/* from bbcc.c */
+void CLG_(init_bbcc_hash)(bbcc_hash* bbccs);
+void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst);
+bbcc_hash* CLG_(get_current_bbcc_hash)(void);
+void CLG_(set_current_bbcc_hash)(bbcc_hash*);
+void CLG_(forall_bbccs)(void (*func)(BBCC*));
+void CLG_(zero_bbcc)(BBCC* bbcc);
+BBCC* CLG_(get_bbcc)(BB* bb);
+BBCC* CLG_(clone_bbcc)(BBCC* orig, Context* cxt, Int rec_index);
+void CLG_(setup_bbcc)(BB* bb) VG_REGPARM(1);
+
+
+/* from jumps.c */
+void CLG_(init_jcc_hash)(jcc_hash*);
+void CLG_(copy_current_jcc_hash)(jcc_hash* dst);
+jcc_hash* CLG_(get_current_jcc_hash)(void);
+void CLG_(set_current_jcc_hash)(jcc_hash*);
+jCC* CLG_(get_jcc)(BBCC* from, UInt, BBCC* to);
+
+/* from callstack.c */
+void CLG_(init_call_stack)(call_stack*);
+void CLG_(copy_current_call_stack)(call_stack* dst);
+void CLG_(set_current_call_stack)(call_stack*);
+call_entry* CLG_(get_call_entry)(Int n);
+
+void CLG_(push_call_stack)(BBCC* from, UInt jmp, BBCC* to, Addr sp, Bool skip);
+void CLG_(pop_call_stack)(void);
+void CLG_(unwind_call_stack)(Addr sp, Int);
+
+/* from context.c */
+void CLG_(init_fn_stack)(fn_stack*);
+void CLG_(copy_current_fn_stack)(fn_stack*);
+fn_stack* CLG_(get_current_fn_stack)(void);
+void CLG_(set_current_fn_stack)(fn_stack*);
+
+void CLG_(init_cxt_table)(void);
+cxt_hash* CLG_(get_cxt_hash)(void);
+Context* CLG_(get_cxt)(fn_node** fn);
+void CLG_(push_cxt)(fn_node* fn);
+
+/* from threads.c */
+void CLG_(init_threads)(void);
+thread_info** CLG_(get_threads)(void);
+thread_info* CLG_(get_current_thread)(void);
+void CLG_(switch_thread)(ThreadId tid);
+void CLG_(forall_threads)(void (*func)(thread_info*));
+void CLG_(run_thread)(ThreadId tid);
+
+void CLG_(init_exec_state)(exec_state* es);
+void CLG_(init_exec_stack)(exec_stack*);
+void CLG_(copy_current_exec_stack)(exec_stack*);
+void CLG_(set_current_exec_stack)(exec_stack*);
+void CLG_(pre_signal)(ThreadId tid, Int sigNum, Bool alt_stack);
+void CLG_(post_signal)(ThreadId tid, Int sigNum);
+void CLG_(run_post_signal_on_call_stack_bottom)(void);
+
+/* from dump.c */
+extern FullCost CLG_(total_cost);
+void CLG_(init_files)(Char** dir, Char** file);
+Char* CLG_(get_dump_file_base)(void);
+
+
+/*------------------------------------------------------------*/
+/*--- Exported global variables ---*/
+/*------------------------------------------------------------*/
+
+extern CommandLineOptions CLG_(clo);
+extern Statistics CLG_(stat);
+extern EventMapping* CLG_(dumpmap);
+
+/* Function active counter array, indexed by function number */
+extern UInt* CLG_(fn_active_array);
+extern Bool CLG_(instrument_state);
+
+extern call_stack CLG_(current_call_stack);
+extern fn_stack CLG_(current_fn_stack);
+extern exec_state CLG_(current_state);
+extern ThreadId CLG_(current_tid);
+
+
+/*------------------------------------------------------------*/
+/*--- Debug output ---*/
+/*------------------------------------------------------------*/
+
+#if CLG_ENABLE_DEBUG
+
+#define CLG_DEBUGIF(x) \
+ if ( (CLG_(clo).verbose >x) && \
+ (CLG_(stat).bb_executions >= CLG_(clo).verbose_start))
+
+#define CLG_DEBUG(x,format,args...) \
+ CLG_DEBUGIF(x) { \
+ CLG_(print_bbno)(); \
+ VG_(printf)(format,##args); \
+ }
+
+#define CLG_ASSERT(cond) \
+ if (!(cond)) { \
+ CLG_(print_context)(); \
+ CLG_(print_bbno)(); \
+ tl_assert(cond); \
+ }
+
+#else
+#define CLG_DEBUGIF(x) if (0)
+#define CLG_DEBUG(x...) {}
+#define CLG_ASSERT(cond) tl_assert(cond);
+#endif
+
+/* from debug.c */
+void CLG_(print_bbno)(void);
+void CLG_(print_context)(void);
+void CLG_(print_jcc)(int s, jCC* jcc);
+void CLG_(print_bbcc)(int s, BBCC* bbcc, Bool);
+void CLG_(print_bbcc_fn)(BBCC* bbcc);
+void CLG_(print_execstate)(int s, exec_state* es);
+void CLG_(print_eventset)(int s, EventSet* es);
+void CLG_(print_cost)(int s, EventSet*, ULong* cost);
+void CLG_(print_bb)(int s, BB* bb);
+void CLG_(print_bbcc_cost)(int s, BBCC*);
+void CLG_(print_cxt)(int s, Context* cxt, int rec_index);
+void CLG_(print_short_jcc)(jCC* jcc);
+void CLG_(print_stackentry)(int s, int sp);
+void CLG_(print_addr)(Addr addr);
+void CLG_(print_addr_ln)(Addr addr);
+
+void* CLG_(malloc)(UWord s, char* f);
+void* CLG_(free)(void* p, char* f);
+#if 0
+#define CLG_MALLOC(x) CLG_(malloc)(x,__FUNCTION__)
+#define CLG_FREE(p) CLG_(free)(p,__FUNCTION__)
+#else
+#define CLG_MALLOC(x) VG_(malloc)(x)
+#define CLG_FREE(p) VG_(free)(p)
+#endif
+
+#endif /* CLG_GLOBAL */
diff --git a/callgrind/jumps.c b/callgrind/jumps.c
new file mode 100644
index 0000000..2a6a09a
--- /dev/null
+++ b/callgrind/jumps.c
@@ -0,0 +1,233 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- ct_jumps.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+#define N_JCC_INITIAL_ENTRIES 4437
+
+/*------------------------------------------------------------*/
+/*--- Jump Cost Center (JCC) operations, including Calls ---*/
+/*------------------------------------------------------------*/
+
+#define N_JCC_INITIAL_ENTRIES 4437
+
+jcc_hash current_jccs;
+
+void CLG_(init_jcc_hash)(jcc_hash* jccs)
+{
+ Int i;
+
+ CLG_ASSERT(jccs != 0);
+
+ jccs->size = N_JCC_INITIAL_ENTRIES;
+ jccs->entries = 0;
+ jccs->table = (jCC**) CLG_MALLOC(jccs->size * sizeof(jCC*));
+ jccs->spontaneous = 0;
+
+ for (i = 0; i < jccs->size; i++)
+ jccs->table[i] = 0;
+}
+
+
+void CLG_(copy_current_jcc_hash)(jcc_hash* dst)
+{
+ CLG_ASSERT(dst != 0);
+
+ dst->size = current_jccs.size;
+ dst->entries = current_jccs.entries;
+ dst->table = current_jccs.table;
+ dst->spontaneous = current_jccs.spontaneous;
+}
+
+void CLG_(set_current_jcc_hash)(jcc_hash* h)
+{
+ CLG_ASSERT(h != 0);
+
+ current_jccs.size = h->size;
+ current_jccs.entries = h->entries;
+ current_jccs.table = h->table;
+ current_jccs.spontaneous = h->spontaneous;
+}
+
+__inline__
+static UInt jcc_hash_idx(BBCC* from, UInt jmp, BBCC* to, UInt size)
+{
+ return (UInt) ( (UWord)from + 7* (UWord)to + 13*jmp) % size;
+}
+
+/* double size of jcc table */
+static void resize_jcc_table(void)
+{
+ Int i, new_size, conflicts1 = 0, conflicts2 = 0;
+ jCC** new_table;
+ UInt new_idx;
+ jCC *curr_jcc, *next_jcc;
+
+ new_size = 2* current_jccs.size +3;
+ new_table = (jCC**) CLG_MALLOC(new_size * sizeof(jCC*));
+
+ if (!new_table) return;
+
+ for (i = 0; i < new_size; i++)
+ new_table[i] = NULL;
+
+ for (i = 0; i < current_jccs.size; i++) {
+ if (current_jccs.table[i] == NULL) continue;
+
+ curr_jcc = current_jccs.table[i];
+ while (NULL != curr_jcc) {
+ next_jcc = curr_jcc->next_hash;
+
+ new_idx = jcc_hash_idx(curr_jcc->from, curr_jcc->jmp,
+ curr_jcc->to, new_size);
+
+ curr_jcc->next_hash = new_table[new_idx];
+ new_table[new_idx] = curr_jcc;
+ if (curr_jcc->next_hash) {
+ conflicts1++;
+ if (curr_jcc->next_hash->next_hash)
+ conflicts2++;
+ }
+
+ curr_jcc = next_jcc;
+ }
+ }
+
+ VG_(free)(current_jccs.table);
+
+
+ CLG_DEBUG(0, "Resize JCC Hash: %d => %d (entries %d, conflicts %d/%d)\n",
+ current_jccs.size, new_size,
+ current_jccs.entries, conflicts1, conflicts2);
+
+ current_jccs.size = new_size;
+ current_jccs.table = new_table;
+ CLG_(stat).jcc_hash_resizes++;
+}
+
+
+
+/* new jCC structure: a call was done to a BB of a BBCC
+ * for a spontaneous call, from is 0 (i.e. caller unknown)
+ */
+static jCC* new_jcc(BBCC* from, UInt jmp, BBCC* to)
+{
+ jCC* new;
+ UInt new_idx;
+
+ /* check fill degree of jcc hash table and resize if needed (>80%) */
+ current_jccs.entries++;
+ if (10 * current_jccs.entries / current_jccs.size > 8)
+ resize_jcc_table();
+
+ new = (jCC*) CLG_MALLOC(sizeof(jCC));
+
+ new->from = from;
+ new->jmp = jmp;
+ new->to = to;
+ new->jmpkind = Ijk_Call;
+ new->call_counter = 0;
+ new->cost = 0;
+
+ /* insert into JCC chain of calling BBCC.
+ * This list is only used at dumping time */
+
+ if (from) {
+ new->next_from = from->jmp[jmp].jcc_list;
+ from->jmp[jmp].jcc_list = new;
+ }
+ else {
+ new->next_from = current_jccs.spontaneous;
+ current_jccs.spontaneous = new;
+ }
+
+ /* insert into JCC hash table */
+ new_idx = jcc_hash_idx(from, jmp, to, current_jccs.size);
+ new->next_hash = current_jccs.table[new_idx];
+ current_jccs.table[new_idx] = new;
+
+ CLG_(stat).distinct_jccs++;
+
+ CLG_DEBUGIF(3) {
+ VG_(printf)(" new_jcc (now %d): %p\n",
+ CLG_(stat).distinct_jccs, new);
+ }
+
+ return new;
+}
+
+
+/* get the jCC for a call arc (BBCC->BBCC) */
+jCC* CLG_(get_jcc)(BBCC* from, UInt jmp, BBCC* to)
+{
+ jCC* jcc;
+ UInt idx;
+
+ CLG_DEBUG(5, "+ get_jcc(bbcc %p/%d => bbcc %p)\n",
+ from, jmp, to);
+
+ /* first check last recently used JCC */
+ jcc = to->lru_to_jcc;
+ if (jcc && (jcc->from == from) && (jcc->jmp == jmp)) {
+ CLG_ASSERT(to == jcc->to);
+ CLG_DEBUG(5,"- get_jcc: [LRU to] jcc %p\n", jcc);
+ return jcc;
+ }
+
+ jcc = from->lru_from_jcc;
+ if (jcc && (jcc->to == to) && (jcc->jmp == jmp)) {
+ CLG_ASSERT(from == jcc->from);
+ CLG_DEBUG(5, "- get_jcc: [LRU from] jcc %p\n", jcc);
+ return jcc;
+ }
+
+ CLG_(stat).jcc_lru_misses++;
+
+ idx = jcc_hash_idx(from, jmp, to, current_jccs.size);
+ jcc = current_jccs.table[idx];
+
+ while(jcc) {
+ if ((jcc->from == from) &&
+ (jcc->jmp == jmp) &&
+ (jcc->to == to)) break;
+ jcc = jcc->next_hash;
+ }
+
+ if (!jcc)
+ jcc = new_jcc(from, jmp, to);
+
+ /* set LRU */
+ from->lru_from_jcc = jcc;
+ to->lru_to_jcc = jcc;
+
+ CLG_DEBUG(5, "- get_jcc(bbcc %p => bbcc %p)\n",
+ from, to);
+
+ return jcc;
+}
+
diff --git a/callgrind/main.c b/callgrind/main.c
new file mode 100644
index 0000000..dd19b3b
--- /dev/null
+++ b/callgrind/main.c
@@ -0,0 +1,1086 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call graph
+ profiling programs.
+
+ Copyright (C) 2002-2005, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This skin is derived from and contains code from Cachegrind
+ Copyright (C) 2002-2005 Nicholas Nethercote (njn25@cam.ac.uk)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "config.h"
+#include "callgrind.h"
+#include "global.h"
+
+#include <pub_tool_threadstate.h>
+
+/*------------------------------------------------------------*/
+/*--- Global variables ---*/
+/*------------------------------------------------------------*/
+
+/* for all threads */
+CommandLineOptions CLG_(clo);
+Statistics CLG_(stat);
+Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
+
+/* thread and signal handler specific */
+exec_state CLG_(current_state);
+
+
+/*------------------------------------------------------------*/
+/*--- Statistics ---*/
+/*------------------------------------------------------------*/
+
+static void CLG_(init_statistics)(Statistics* s)
+{
+ s->call_counter = 0;
+ s->jcnd_counter = 0;
+ s->jump_counter = 0;
+ s->rec_call_counter = 0;
+ s->ret_counter = 0;
+ s->bb_executions = 0;
+
+ s->context_counter = 0;
+ s->bb_retranslations = 0;
+
+ s->distinct_objs = 0;
+ s->distinct_files = 0;
+ s->distinct_fns = 0;
+ s->distinct_contexts = 0;
+ s->distinct_bbs = 0;
+ s->distinct_bbccs = 0;
+ s->distinct_instrs = 0;
+ s->distinct_skips = 0;
+
+ s->bb_hash_resizes = 0;
+ s->bbcc_hash_resizes = 0;
+ s->jcc_hash_resizes = 0;
+ s->cxt_hash_resizes = 0;
+ s->fn_array_resizes = 0;
+ s->call_stack_resizes = 0;
+ s->fn_stack_resizes = 0;
+
+ s->full_debug_BBs = 0;
+ s->file_line_debug_BBs = 0;
+ s->fn_name_debug_BBs = 0;
+ s->no_debug_BBs = 0;
+ s->bbcc_lru_misses = 0;
+ s->jcc_lru_misses = 0;
+ s->cxt_lru_misses = 0;
+ s->bbcc_clones = 0;
+}
+
+
+
+
+/*------------------------------------------------------------*/
+/*--- Cache simulation instrumentation phase ---*/
+/*------------------------------------------------------------*/
+
+
+static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+{
+ // I'm assuming that for 'modify' instructions, that Vex always makes
+ // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp
+ // expressions, or both Const expressions.
+ CLG_ASSERT(isIRAtom(loadAddrExpr));
+ CLG_ASSERT(isIRAtom(storeAddrExpr));
+ return eqIRAtom(loadAddrExpr, storeAddrExpr);
+}
+
+static
+EventSet* insert_simcall(IRBB* bbOut, InstrInfo* ii, UInt dataSize,
+ Bool instrIssued,
+ IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+{
+ HChar* helperName;
+ void* helperAddr;
+ Int argc;
+ EventSet* es;
+ IRExpr *arg1, *arg2 = 0, *arg3 = 0, **argv;
+ IRDirty* di;
+
+ /* Check type of original instruction regarding memory access,
+ * and collect info to be able to generate fitting helper call
+ */
+ if (!loadAddrExpr && !storeAddrExpr) {
+ // no load/store
+ CLG_ASSERT(0 == dataSize);
+ if (instrIssued) {
+ helperName = 0;
+ helperAddr = 0;
+ }
+ else {
+ helperName = CLG_(cachesim).log_1I0D_name;
+ helperAddr = CLG_(cachesim).log_1I0D;
+ }
+ argc = 1;
+ es = CLG_(sets).D0;
+
+ } else if (loadAddrExpr && !storeAddrExpr) {
+ // load
+ CLG_ASSERT( isIRAtom(loadAddrExpr) );
+ if (instrIssued) {
+ helperName = CLG_(cachesim).log_0I1Dr_name;
+ helperAddr = CLG_(cachesim).log_0I1Dr;
+ }
+ else {
+ helperName = CLG_(cachesim).log_1I1Dr_name;
+ helperAddr = CLG_(cachesim).log_1I1Dr;
+ }
+ argc = 2;
+ arg2 = loadAddrExpr;
+ es = CLG_(sets).D1r;
+
+ } else if (!loadAddrExpr && storeAddrExpr) {
+ // store
+ CLG_ASSERT( isIRAtom(storeAddrExpr) );
+ if (instrIssued) {
+ helperName = CLG_(cachesim).log_0I1Dw_name;
+ helperAddr = CLG_(cachesim).log_0I1Dw;
+ }
+ else {
+ helperName = CLG_(cachesim).log_1I1Dw_name;
+ helperAddr = CLG_(cachesim).log_1I1Dw;
+ }
+ argc = 2;
+ arg2 = storeAddrExpr;
+ es = CLG_(sets).D1w;
+
+ } else {
+ CLG_ASSERT( loadAddrExpr && storeAddrExpr );
+ CLG_ASSERT( isIRAtom(loadAddrExpr) );
+ CLG_ASSERT( isIRAtom(storeAddrExpr) );
+
+ if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) {
+ /* modify: suppose write access, as this is
+ * more resource consuming (as in callgrind for VG2)
+ * Cachegrind does a read here (!)
+ * DISCUSS: Best way depends on simulation model?
+ */
+ if (instrIssued) {
+ helperName = CLG_(cachesim).log_0I1Dw_name;
+ helperAddr = CLG_(cachesim).log_0I1Dw;
+ }
+ else {
+ helperName = CLG_(cachesim).log_1I1Dw_name;
+ helperAddr = CLG_(cachesim).log_1I1Dw;
+ }
+ argc = 2;
+ arg2 = storeAddrExpr;
+ es = CLG_(sets).D1w;
+
+ } else {
+ // load/store
+ if (instrIssued) {
+ helperName = CLG_(cachesim).log_0I2D_name;
+ helperAddr = CLG_(cachesim).log_0I2D;
+ }
+ else {
+ helperName = CLG_(cachesim).log_1I2D_name;
+ helperAddr = CLG_(cachesim).log_1I2D;
+ }
+ argc = 3;
+ arg2 = loadAddrExpr;
+ arg3 = storeAddrExpr;
+ es = CLG_(sets).D2;
+ }
+ }
+
+ /* helper could be unset depending on the simulator used */
+ if (helperAddr == 0) return 0;
+
+ /* Setup 1st arg: InstrInfo */
+ arg1 = mkIRExpr_HWord( (HWord)ii );
+
+ // Add call to the instrumentation function
+ if (argc == 1)
+ argv = mkIRExprVec_1(arg1);
+ else if (argc == 2)
+ argv = mkIRExprVec_2(arg1, arg2);
+ else if (argc == 3)
+ argv = mkIRExprVec_3(arg1, arg2, arg3);
+ else
+ VG_(tool_panic)("argc... not 1 or 2 or 3?");
+
+ di = unsafeIRDirty_0_N( argc, helperName, helperAddr, argv);
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
+
+ return es;
+}
+
+
+/* Instrumentation before a conditional jump or at the end
+ * of each original instruction.
+ * Fills the InstrInfo struct if not seen before
+ */
+static
+void endOfInstr(IRBB* bbOut, InstrInfo* ii, Bool bb_seen_before,
+ UInt instr_offset, UInt instrLen, UInt dataSize,
+ UInt* cost_offset, Bool instrIssued,
+ IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+{
+ IRType wordTy;
+ EventSet* es;
+
+ // Stay sane ...
+ CLG_ASSERT(sizeof(HWord) == sizeof(void*));
+ if (sizeof(HWord) == 4) {
+ wordTy = Ity_I32;
+ } else
+ if (sizeof(HWord) == 8) {
+ wordTy = Ity_I64;
+ } else {
+ VG_(tool_panic)("endOfInstr: strange word size");
+ }
+
+ if (loadAddrExpr)
+ CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr));
+ if (storeAddrExpr)
+ CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr));
+
+ // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be
+ // done inaccurately, but they're very rare and this avoids errors from
+ // hitting more than two cache lines in the simulation.
+ if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE;
+
+ /* returns 0 if simulator needs no instrumentation */
+ es = insert_simcall(bbOut, ii, dataSize, instrIssued,
+ loadAddrExpr, storeAddrExpr);
+
+ if (bb_seen_before) {
+ CLG_ASSERT(ii->instr_offset == instr_offset);
+ CLG_ASSERT(ii->instr_size == instrLen);
+ CLG_ASSERT(ii->data_size == dataSize);
+ CLG_ASSERT(ii->cost_offset == *cost_offset);
+ CLG_ASSERT(ii->eventset == es);
+ }
+ else {
+ ii->instr_offset = instr_offset;
+ ii->instr_size = instrLen;
+ ii->data_size = dataSize;
+ ii->cost_offset = *cost_offset;
+ ii->eventset = es;
+
+ CLG_(stat).distinct_instrs++;
+ }
+
+ *cost_offset += es ? es->size : 0;
+
+ CLG_DEBUG(5, " Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n",
+ instr_offset, instrLen, dataSize,
+ es ? es->name : (Char*)"(no Instr)",
+ es ? es->size : 0);
+}
+
+#if defined(VG_BIGENDIAN)
+# define CLGEndness Iend_BE
+#elif defined(VG_LITTLEENDIAN)
+# define CLGEndness Iend_LE
+#else
+# error "Unknown endianness"
+#endif
+
+static
+Addr IRConst2Addr(IRConst* con)
+{
+ Addr addr;
+
+ if (sizeof(Addr) == 4) {
+ CLG_ASSERT( con->tag == Ico_U32 );
+ addr = con->Ico.U32;
+ }
+ else if (sizeof(Addr) == 8) {
+ CLG_ASSERT( con->tag == Ico_U64 );
+ addr = con->Ico.U64;
+ }
+ else
+ VG_(tool_panic)("Callgrind: invalid Addr type");
+
+ return addr;
+}
+
+/* First pass over a BB to instrument, counting instructions and jumps
+ * This is needed for the size of the BB struct to allocate
+ *
+ * Called from CLG_(get_bb)
+ */
+void CLG_(collectBlockInfo)(IRBB* bbIn,
+ /*INOUT*/ UInt* instrs,
+ /*INOUT*/ UInt* cjmps,
+ /*INOUT*/ Bool* cjmp_inverted)
+{
+ Int i;
+ IRStmt* st;
+ Addr instrAddr =0, jumpDst;
+ UInt instrLen = 0;
+ Bool toNextInstr = False;
+
+ // Ist_Exit has to be ignored in preamble code, before first IMark:
+ // preamble code is added by VEX for self modifying code, and has
+ // nothing to do with client code
+ Bool inPreamble = True;
+
+ if (!bbIn) return;
+
+ for (i = 0; i < bbIn->stmts_used; i++) {
+ st = bbIn->stmts[i];
+ if (Ist_IMark == st->tag) {
+ inPreamble = False;
+
+ instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
+ instrLen = st->Ist.IMark.len;
+
+ (*instrs)++;
+ toNextInstr = False;
+ }
+ if (inPreamble) continue;
+ if (Ist_Exit == st->tag) {
+ jumpDst = IRConst2Addr(st->Ist.Exit.dst);
+ toNextInstr = (jumpDst == instrAddr + instrLen);
+
+ (*cjmps)++;
+ }
+ }
+
+ /* if the last instructions of BB conditionally jumps to next instruction
+ * (= first instruction of next BB in memory), this is a inverted by VEX.
+ */
+ *cjmp_inverted = toNextInstr;
+}
+
+static
+void collectStatementInfo(IRTypeEnv* tyenv, IRBB* bbOut, IRStmt* st,
+ Addr* instrAddr, UInt* instrLen,
+ IRExpr** loadAddrExpr, IRExpr** storeAddrExpr,
+ UInt* dataSize, IRType hWordTy)
+{
+ CLG_ASSERT(isFlatIRStmt(st));
+
+ switch (st->tag) {
+ case Ist_NoOp:
+ break;
+
+ case Ist_AbiHint:
+ /* ABI hints aren't interesting. Ignore. */
+ break;
+
+ case Ist_IMark:
+ /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this
+ to the host's native pointer type; if that is 32 bits then it
+ discards the upper 32 bits. If we are cachegrinding on a
+ 32-bit host then we are also ensured that the guest word size
+ is 32 bits, due to the assertion in cg_instrument that the
+ host and guest word sizes must be the same. Hence
+ st->Ist.IMark.addr will have been derived from a 32-bit guest
+ code address and truncation of it is safe. I believe this
+ assignment should be correct for both 32- and 64-bit
+ machines. */
+ *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
+ *instrLen = st->Ist.IMark.len;
+ break;
+
+ case Ist_Tmp: {
+ IRExpr* data = st->Ist.Tmp.data;
+ if (data->tag == Iex_Load) {
+ IRExpr* aexpr = data->Iex.Load.addr;
+ CLG_ASSERT( isIRAtom(aexpr) );
+ // Note also, endianness info is ignored. I guess that's not
+ // interesting.
+ // XXX: repe cmpsb does two loads... the first one is ignored here!
+ //tl_assert( NULL == *loadAddrExpr ); // XXX: ???
+ *loadAddrExpr = aexpr;
+ *dataSize = sizeofIRType(data->Iex.Load.ty);
+ }
+ break;
+ }
+
+ case Ist_Store: {
+ IRExpr* data = st->Ist.Store.data;
+ IRExpr* aexpr = st->Ist.Store.addr;
+ CLG_ASSERT( isIRAtom(aexpr) );
+ if ( NULL == *storeAddrExpr ) {
+ /* this is a kludge: ignore all except the first store from
+ an instruction. */
+ *storeAddrExpr = aexpr;
+ *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data));
+ }
+ break;
+ }
+
+ case Ist_Dirty: {
+ IRDirty* d = st->Ist.Dirty.details;
+ if (d->mFx != Ifx_None) {
+ /* This dirty helper accesses memory. Collect the
+ details. */
+ CLG_ASSERT(d->mAddr != NULL);
+ CLG_ASSERT(d->mSize != 0);
+ *dataSize = d->mSize;
+ if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
+ *loadAddrExpr = d->mAddr;
+ if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
+ *storeAddrExpr = d->mAddr;
+ } else {
+ CLG_ASSERT(d->mAddr == NULL);
+ CLG_ASSERT(d->mSize == 0);
+ }
+ break;
+ }
+
+ case Ist_Put:
+ case Ist_PutI:
+ case Ist_MFence:
+ case Ist_Exit:
+ break;
+
+ default:
+ VG_(printf)("\n");
+ ppIRStmt(st);
+ VG_(printf)("\n");
+ VG_(tool_panic)("Callgrind: unhandled IRStmt");
+ }
+}
+
+static
+void addConstMemStoreStmt( IRBB* bbOut, UWord addr, UInt val, IRType hWordTy)
+{
+ addStmtToIRBB( bbOut,
+ IRStmt_Store(CLGEndness,
+ IRExpr_Const(hWordTy == Ity_I32 ?
+ IRConst_U32( addr ) :
+ IRConst_U64( addr )),
+ IRExpr_Const(IRConst_U32(val)) ));
+}
+
+static
+IRBB* CLG_(instrument)( VgCallbackClosure* closure,
+ IRBB* bbIn,
+ VexGuestLayout* layout,
+ VexGuestExtents* vge,
+ IRType gWordTy, IRType hWordTy )
+{
+ Int i;
+ IRBB* bbOut;
+ IRStmt* st, *stnext;
+ Addr instrAddr, origAddr;
+ UInt instrLen = 0, dataSize;
+ UInt instrCount, costOffset;
+ IRExpr *loadAddrExpr, *storeAddrExpr;
+
+ BB* bb;
+
+ IRDirty* di;
+ IRExpr *arg1, **argv;
+
+ Bool bb_seen_before = False;
+ UInt cJumps = 0, cJumpsCorrected;
+ Bool beforeIBoundary, instrIssued;
+
+ if (gWordTy != hWordTy) {
+ /* We don't currently support this case. */
+ VG_(tool_panic)("host/guest word size mismatch");
+ }
+
+ // No instrumentation if it is switched off
+ if (! CLG_(instrument_state)) {
+ CLG_DEBUG(5, "instrument(BB %p) [Instrumentation OFF]\n",
+ (Addr)closure->readdr);
+ return bbIn;
+ }
+
+ CLG_DEBUG(3, "+ instrument(BB %p)\n", (Addr)closure->readdr);
+
+ /* Set up BB for instrumented IR */
+ bbOut = emptyIRBB();
+ bbOut->tyenv = dopyIRTypeEnv(bbIn->tyenv);
+ bbOut->next = dopyIRExpr(bbIn->next);
+ bbOut->jumpkind = bbIn->jumpkind;
+
+ // Copy verbatim any IR preamble preceding the first IMark
+ i = 0;
+ while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) {
+ addStmtToIRBB( bbOut, bbIn->stmts[i] );
+ i++;
+ }
+
+ // Get the first statement, and origAddr from it
+ CLG_ASSERT(bbIn->stmts_used > 0);
+ st = bbIn->stmts[i];
+ CLG_ASSERT(Ist_IMark == st->tag);
+ instrAddr = origAddr = (Addr)st->Ist.IMark.addr;
+ CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
+
+ /* Get BB (creating if necessary).
+ * JS: The hash table is keyed with orig_addr_noredir -- important!
+ * JW: Why? If it is because of different chasing of the redirection,
+ * this is not needed, as chasing is switched off in callgrind
+ */
+ bb = CLG_(get_bb)(origAddr, bbIn, &bb_seen_before);
+ //bb = CLG_(get_bb)(orig_addr_noredir, bbIn, &bb_seen_before);
+
+ /*
+ * Precondition:
+ * - jmps_passed has number of cond.jumps passed in last executed BB
+ * - current_bbcc has a pointer to the BBCC of the last executed BB
+ * Thus, if bbcc_jmpkind is != -1 (JmpNone),
+ * current_bbcc->bb->jmp_addr
+ * gives the address of the jump source.
+ *
+ * The BBCC setup does 2 things:
+ * - trace call:
+ * * Unwind own call stack, i.e sync our ESP with real ESP
+ * This is for ESP manipulation (longjmps, C++ exec handling) and RET
+ * * For CALLs or JMPs crossing objects, record call arg +
+ * push are on own call stack
+ *
+ * - prepare for cache log functions:
+ * Set current_bbcc to BBCC that gets the costs for this BB execution
+ * attached
+ */
+
+ // helper call to setup_bbcc, with pointer to basic block info struct as argument
+ arg1 = mkIRExpr_HWord( (HWord)bb );
+ argv = mkIRExprVec_1(arg1);
+ di = unsafeIRDirty_0_N( 1, "setup_bbcc", & CLG_(setup_bbcc), argv);
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
+
+ instrCount = 0;
+ costOffset = 0;
+
+ // loop for each host instruction (starting from 'i')
+ do {
+
+ // We should be at an IMark statement
+ CLG_ASSERT(Ist_IMark == st->tag);
+
+ // Reset stuff for this original instruction
+ loadAddrExpr = storeAddrExpr = NULL;
+ instrIssued = False;
+ dataSize = 0;
+
+ // Process all the statements for this original instruction (ie. until
+ // the next IMark statement, or the end of the block)
+ do {
+ i++;
+ stnext = ( i < bbIn->stmts_used ? bbIn->stmts[i] : NULL );
+ beforeIBoundary = !stnext || (Ist_IMark == stnext->tag);
+ collectStatementInfo(bbIn->tyenv, bbOut, st, &instrAddr, &instrLen,
+ &loadAddrExpr, &storeAddrExpr, &dataSize, hWordTy);
+
+ // instrument a simulator call before conditional jumps
+ if (st->tag == Ist_Exit) {
+ // Nb: instrLen will be zero if Vex failed to decode it.
+ // Also Client requests can appear to be very large (eg. 18
+ // bytes on x86) because they are really multiple instructions.
+ CLG_ASSERT( 0 == instrLen ||
+ bbIn->jumpkind == Ijk_ClientReq ||
+ (instrLen >= VG_MIN_INSTR_SZB &&
+ instrLen <= VG_MAX_INSTR_SZB) );
+
+ // Add instrumentation before this statement
+ endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
+ instrAddr - origAddr, instrLen, dataSize, &costOffset,
+ instrIssued, loadAddrExpr, storeAddrExpr);
+
+ // prepare for a possible further simcall in same host instr
+ loadAddrExpr = storeAddrExpr = NULL;
+ instrIssued = True;
+
+ if (!bb_seen_before) {
+ bb->jmp[cJumps].instr = instrCount;
+ bb->jmp[cJumps].skip = False;
+ }
+
+ /* Update global variable jmps_passed (this is before the jump!)
+ * A correction is needed if VEX inverted the last jump condition
+ */
+ cJumpsCorrected = cJumps;
+ if ((cJumps+1 == bb->cjmp_count) && bb->cjmp_inverted) cJumpsCorrected++;
+ addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
+ cJumpsCorrected, hWordTy);
+
+ cJumps++;
+ }
+
+ addStmtToIRBB( bbOut, st );
+ st = stnext;
+ }
+ while (!beforeIBoundary);
+
+ // Add instrumentation for this original instruction.
+ if (!instrIssued || (loadAddrExpr != 0) || (storeAddrExpr !=0))
+ endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
+ instrAddr - origAddr, instrLen, dataSize, &costOffset,
+ instrIssued, loadAddrExpr, storeAddrExpr);
+
+ instrCount++;
+ }
+ while (st);
+
+ /* Always update global variable jmps_passed (at end of BB)
+ * A correction is needed if VEX inverted the last jump condition
+ */
+ cJumpsCorrected = cJumps;
+ if (bb->cjmp_inverted) cJumpsCorrected--;
+ addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
+ cJumpsCorrected, hWordTy);
+
+ /* This stores the instr of the call/ret at BB end */
+ bb->jmp[cJumps].instr = instrCount-1;
+
+ CLG_ASSERT(bb->cjmp_count == cJumps);
+ CLG_ASSERT(bb->instr_count == instrCount);
+
+ instrAddr += instrLen;
+ if (bb_seen_before) {
+ CLG_ASSERT(bb->instr_len == instrAddr - origAddr);
+ CLG_ASSERT(bb->cost_count == costOffset);
+ CLG_ASSERT(bb->jmpkind == bbIn->jumpkind);
+ }
+ else {
+ bb->instr_len = instrAddr - origAddr;
+ bb->cost_count = costOffset;
+ bb->jmpkind = bbIn->jumpkind;
+ }
+
+ CLG_DEBUG(3, "- instrument(BB %p): byteLen %u, CJumps %u, CostLen %u\n",
+ origAddr, bb->instr_len, bb->cjmp_count, bb->cost_count);
+ if (cJumps>0) {
+ CLG_DEBUG(3, " [ ");
+ for (i=0;i<cJumps;i++)
+ CLG_DEBUG(3, "%d ", bb->jmp[i].instr);
+ CLG_DEBUG(3, "], last inverted: %s \n", bb->cjmp_inverted ? "yes":"no");
+ }
+
+ return bbOut;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Discarding BB info ---*/
+/*--------------------------------------------------------------------*/
+
+// Called when a translation is removed from the translation cache for
+// any reason at all: to free up space, because the guest code was
+// unmapped or modified, or for any arbitrary reason.
+static
+void clg_discard_basic_block_info ( Addr64 orig_addr64, VexGuestExtents vge )
+{
+ Addr orig_addr = (Addr)orig_addr64;
+
+ tl_assert(vge.n_used > 0);
+
+ if (0)
+ VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
+ (void*)(Addr)orig_addr,
+ (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
+
+ // Get BB info, remove from table, free BB info. Simple! Note that we
+ // use orig_addr, not the first instruction address in vge.
+ CLG_(delete_bb)(orig_addr);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- CLG_(fini)() and related function ---*/
+/*------------------------------------------------------------*/
+
+
+
+static void zero_thread_cost(thread_info* t)
+{
+ Int i;
+
+ for(i = 0; i < CLG_(current_call_stack).sp; i++) {
+ if (!CLG_(current_call_stack).entry[i].jcc) continue;
+
+ /* reset call counters to current for active calls */
+ CLG_(copy_cost)( CLG_(sets).full,
+ CLG_(current_call_stack).entry[i].enter_cost,
+ CLG_(current_state).cost );
+ }
+
+ CLG_(forall_bbccs)(CLG_(zero_bbcc));
+
+ /* set counter for last dump */
+ CLG_(copy_cost)( CLG_(sets).full,
+ t->lastdump_cost, CLG_(current_state).cost );
+}
+
+void CLG_(zero_all_cost)(Bool only_current_thread)
+{
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, " Zeroing costs...");
+
+ if (only_current_thread)
+ zero_thread_cost(CLG_(get_current_thread)());
+ else
+ CLG_(forall_threads)(zero_thread_cost);
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, " ...done");
+}
+
+static
+void unwind_thread(thread_info* t)
+{
+ /* unwind signal handlers */
+ while(CLG_(current_state).sig !=0)
+ CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
+
+ /* unwind regular call stack */
+ while(CLG_(current_call_stack).sp>0)
+ CLG_(pop_call_stack)();
+}
+
+/* Ups, this can go wrong... */
+extern void VG_(discard_translations) ( Addr64 start, ULong range );
+
+void CLG_(set_instrument_state)(Char* reason, Bool state)
+{
+ if (CLG_(instrument_state) == state) {
+ CLG_DEBUG(2, "%s: instrumentation already %s\n",
+ reason, state ? "ON" : "OFF");
+ return;
+ }
+ CLG_(instrument_state) = state;
+ CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
+ reason, state ? "ON" : "OFF");
+
+ VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl);
+
+ /* reset internal state: call stacks, simulator */
+ CLG_(forall_threads)(unwind_thread);
+ (*CLG_(cachesim).clear)();
+ if (0)
+ CLG_(forall_threads)(zero_thread_cost);
+
+ if (!state)
+ CLG_(init_exec_state)( &CLG_(current_state) );
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
+ reason, state ? "ON" : "OFF");
+}
+
+
+static
+Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
+{
+ if (!VG_IS_TOOL_USERREQ('C','T',args[0]))
+ return False;
+
+ switch(args[0]) {
+ case VG_USERREQ__DUMP_STATS:
+ CLG_(dump_profile)("Client Request", True);
+ *ret = 0; /* meaningless */
+ break;
+
+ case VG_USERREQ__DUMP_STATS_AT:
+ {
+ Char buf[512];
+ VG_(sprintf)(buf,"Client Request: %d", args[1]);
+ CLG_(dump_profile)(buf, True);
+ *ret = 0; /* meaningless */
+ }
+ break;
+
+ case VG_USERREQ__ZERO_STATS:
+ CLG_(zero_all_cost)(True);
+ *ret = 0; /* meaningless */
+ break;
+
+ case VG_USERREQ__TOGGLE_COLLECT:
+ CLG_(current_state).collect = !CLG_(current_state).collect;
+ CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
+ CLG_(current_state).collect ? "ON" : "OFF");
+ *ret = 0; /* meaningless */
+ break;
+
+ case VG_USERREQ__START_INSTRUMENTATION:
+ CLG_(set_instrument_state)("Client Request", True);
+ *ret = 0; /* meaningless */
+ break;
+
+ case VG_USERREQ__STOP_INSTRUMENTATION:
+ CLG_(set_instrument_state)("Client Request", False);
+ *ret = 0; /* meaningless */
+ break;
+
+ default:
+ return False;
+ }
+
+ return True;
+}
+
+
+/* Syscall Timing */
+
+/* struct timeval syscalltime[VG_N_THREADS]; */
+#if CLG_MICROSYSTIME
+#include <sys/time.h>
+#include <sys/syscall.h>
+extern Int VG_(do_syscall) ( UInt, ... );
+
+ULong syscalltime[VG_N_THREADS];
+#else
+UInt syscalltime[VG_N_THREADS];
+#endif
+
+static
+void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno)
+{
+ if (CLG_(clo).collect_systime) {
+#if CLG_MICROSYSTIME
+ struct vki_timeval tv_now;
+ VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
+ syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
+#else
+ syscalltime[tid] = VG_(read_millisecond_timer)();
+#endif
+ }
+}
+
+static
+void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, SysRes res)
+{
+ if (CLG_(clo).collect_systime) {
+ Int o = CLG_(sets).off_full_systime;
+#if CLG_MICROSYSTIME
+ struct vki_timeval tv_now;
+ ULong diff;
+
+ VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
+ diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
+#else
+ UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
+#endif
+
+ CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
+
+ if (o<0) return;
+
+ CLG_(current_state).cost[o] ++;
+ CLG_(current_state).cost[o+1] += diff;
+ if (!CLG_(current_state).bbcc->skipped)
+ CLG_(init_cost_lz)(CLG_(sets).full,
+ &(CLG_(current_state).bbcc->skipped));
+ CLG_(current_state).bbcc->skipped[o] ++;
+ CLG_(current_state).bbcc->skipped[o+1] += diff;
+ }
+}
+
+static
+void finish(void)
+{
+ char buf[RESULTS_BUF_LEN];
+
+ CLG_DEBUG(0, "finish()\n");
+
+ (*CLG_(cachesim).finish)();
+
+ /* pop all remaining items from CallStack for correct sum
+ */
+ CLG_(forall_threads)(unwind_thread);
+
+ CLG_(dump_profile)(0, False);
+
+ CLG_(finish_command)();
+
+ if (VG_(clo_verbosity) == 0) return;
+
+ /* Hash table stats */
+ if (VG_(clo_verbosity) > 1) {
+ int BB_lookups =
+ CLG_(stat).full_debug_BBs +
+ CLG_(stat).fn_name_debug_BBs +
+ CLG_(stat).file_line_debug_BBs +
+ CLG_(stat).no_debug_BBs;
+
+ VG_(message)(Vg_DebugMsg, "");
+ VG_(message)(Vg_DebugMsg, "Distinct objects: %d",
+ CLG_(stat).distinct_objs);
+ VG_(message)(Vg_DebugMsg, "Distinct files: %d",
+ CLG_(stat).distinct_files);
+ VG_(message)(Vg_DebugMsg, "Distinct fns: %d",
+ CLG_(stat).distinct_fns);
+ VG_(message)(Vg_DebugMsg, "Distinct contexts:%d",
+ CLG_(stat).distinct_contexts);
+ VG_(message)(Vg_DebugMsg, "Distinct BBs: %d",
+ CLG_(stat).distinct_bbs);
+ VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)",
+ CLG_(costarray_entries), CLG_(costarray_chunks));
+ VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d",
+ CLG_(stat).distinct_bbccs);
+ VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d",
+ CLG_(stat).distinct_jccs);
+ VG_(message)(Vg_DebugMsg, "Distinct skips: %d",
+ CLG_(stat).distinct_skips);
+ VG_(message)(Vg_DebugMsg, "BB lookups: %d",
+ BB_lookups);
+ if (BB_lookups>0) {
+ VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
+ CLG_(stat).full_debug_BBs * 100 / BB_lookups,
+ CLG_(stat).full_debug_BBs);
+ VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
+ CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
+ CLG_(stat).file_line_debug_BBs);
+ VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
+ CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
+ CLG_(stat).fn_name_debug_BBs);
+ VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
+ CLG_(stat).no_debug_BBs * 100 / BB_lookups,
+ CLG_(stat).no_debug_BBs);
+ }
+ VG_(message)(Vg_DebugMsg, "BBCC Clones: %d",
+ CLG_(stat).bbcc_clones);
+ VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d",
+ CLG_(stat).bb_retranslations);
+ VG_(message)(Vg_DebugMsg, "Distinct instrs: %d",
+ CLG_(stat).distinct_instrs);
+ VG_(message)(Vg_DebugMsg, "");
+
+ VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d",
+ CLG_(stat).cxt_lru_misses);
+ VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d",
+ CLG_(stat).bbcc_lru_misses);
+ VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d",
+ CLG_(stat).jcc_lru_misses);
+ VG_(message)(Vg_DebugMsg, "BBs Executed: %llu",
+ CLG_(stat).bb_executions);
+ VG_(message)(Vg_DebugMsg, "Calls: %llu",
+ CLG_(stat).call_counter);
+ VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu",
+ CLG_(stat).jcnd_counter);
+ VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu",
+ CLG_(stat).jump_counter);
+ VG_(message)(Vg_DebugMsg, "Recursive calls: %llu",
+ CLG_(stat).rec_call_counter);
+ VG_(message)(Vg_DebugMsg, "Returns: %llu",
+ CLG_(stat).ret_counter);
+
+ VG_(message)(Vg_DebugMsg, "");
+ }
+
+ CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
+ VG_(message)(Vg_UserMsg, "Events : %s", buf);
+ CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
+ VG_(message)(Vg_UserMsg, "Collected : %s", buf);
+ VG_(message)(Vg_UserMsg, "");
+
+ // if (CLG_(clo).simulate_cache)
+ (*CLG_(cachesim).printstat)();
+}
+
+
+void CLG_(fini)(Int exitcode)
+{
+ finish();
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- Setup ---*/
+/*--------------------------------------------------------------------*/
+
+static
+void CLG_(post_clo_init)(void)
+{
+ Char *dir = 0, *fname = 0;
+
+ VG_(clo_vex_control).iropt_unroll_thresh = 0;
+ VG_(clo_vex_control).guest_chase_thresh = 0;
+
+ CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
+ CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
+ CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
+
+ if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
+ VG_(message)(Vg_UserMsg, "Using source line as position.");
+ CLG_(clo).dump_line = True;
+ }
+
+ CLG_(init_files)(&dir,&fname);
+ CLG_(init_command)(dir,fname);
+
+ (*CLG_(cachesim).post_clo_init)();
+
+ CLG_(init_eventsets)(0);
+ CLG_(init_statistics)(& CLG_(stat));
+ CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
+
+ /* initialize hash tables */
+ CLG_(init_obj_table)();
+ CLG_(init_cxt_table)();
+ CLG_(init_bb_hash)();
+
+ CLG_(init_threads)();
+ CLG_(run_thread)(1);
+
+ CLG_(instrument_state) = CLG_(clo).instrument_atstart;
+
+ VG_(message)(Vg_UserMsg, "");
+ VG_(message)(Vg_UserMsg, "For interactive control, run 'callgrind_control -h'.");
+}
+
+static
+void CLG_(pre_clo_init)(void)
+{
+ VG_(details_name) ("Callgrind");
+ VG_(details_version) (VERSION);
+ VG_(details_description) ("a call-graph generating cache profiler");
+ VG_(details_copyright_author)("Copyright (C) 2002-2006, and GNU GPL'd, "
+ "by J.Weidendorfer et al.");
+ VG_(details_bug_reports_to) ("Josef.Weidendorfer@gmx.de");
+ VG_(details_avg_translation_sizeB) ( 155 );
+
+ VG_(basic_tool_funcs) (CLG_(post_clo_init),
+ CLG_(instrument),
+ CLG_(fini));
+
+ VG_(needs_basic_block_discards)(clg_discard_basic_block_info);
+
+
+ VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
+ CLG_(print_usage),
+ CLG_(print_debug_usage));
+
+ VG_(needs_client_requests)(CLG_(handle_client_request));
+ VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
+ CLG_(post_syscalltime));
+
+ VG_(track_thread_run) ( & CLG_(run_thread) );
+ VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
+ VG_(track_post_deliver_signal) ( & CLG_(post_signal) );
+
+ CLG_(set_clo_defaults)();
+}
+
+VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
+
+/*--------------------------------------------------------------------*/
+/*--- end main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/callgrind/sim.c b/callgrind/sim.c
new file mode 100644
index 0000000..e61eb69
--- /dev/null
+++ b/callgrind/sim.c
@@ -0,0 +1,2162 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Cache simulation. ---*/
+/*--- sim.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind.
+ (c) 2003-2005, Josef Weidendorfer
+
+ Parts are Copyright (C) 2002 Nicholas Nethercote
+ njn25@cam.ac.uk
+
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+
+/* Notes:
+ - simulates a write-allocate cache
+ - (block --> set) hash function uses simple bit selection
+ - handling of references straddling two cache blocks:
+ - counts as only one cache access (not two)
+ - both blocks hit --> one hit
+ - one block hits, the other misses --> one miss
+ - both blocks miss --> one miss (not two)
+*/
+
+/* Cache configuration */
+#include "cg_arch.h"
+
+/* additional structures for cache use info, separated
+ * according usage frequency:
+ * - line_loaded : pointer to cost center of instruction
+ * which loaded the line into cache.
+ * Needed to increment counters when line is evicted.
+ * - line_use : updated on every access
+ */
+typedef struct {
+ UInt count;
+ UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */
+} line_use;
+
+typedef struct {
+ Addr memline, iaddr;
+ line_use* dep_use; /* point to higher-level cacheblock for this memline */
+ ULong* use_base;
+} line_loaded;
+
+/* Cache state */
+typedef struct {
+ char* name;
+ int size; /* bytes */
+ int assoc;
+ int line_size; /* bytes */
+ Bool sectored; /* prefetch nearside cacheline on read */
+ int sets;
+ int sets_min_1;
+ int assoc_bits;
+ int line_size_bits;
+ int tag_shift;
+ UWord tag_mask;
+ char desc_line[128];
+ UWord* tags;
+
+ /* for cache use */
+ int line_size_mask;
+ int* line_start_mask;
+ int* line_end_mask;
+ line_loaded* loaded;
+ line_use* use;
+} cache_t2;
+
+/*
+ * States of flat caches in our model.
+ * We use a 2-level hierarchy,
+ */
+static cache_t2 I1, D1, L2;
+
+/* Lower bits of cache tags are used as flags for a cache line */
+#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1)
+#define CACHELINE_DIRTY 1
+
+
+/* Cache simulator Options */
+static Bool clo_simulate_writeback = False;
+static Bool clo_simulate_hwpref = False;
+static Bool clo_simulate_sectors = False;
+static Bool clo_collect_cacheuse = False;
+
+/* Following global vars are setup before by
+ * setup_bbcc()/cachesim_after_bbsetup():
+ *
+ * - Addr bb_base (instruction start address of original BB)
+ * - ULong* cost_base (start of cost array for BB)
+ * - BBCC* nonskipped (only != 0 when in a function not skipped)
+ */
+
+/* Offset to events in event set, used in log_* functions */
+static Int off_D0_Ir;
+static Int off_D1r_Ir;
+static Int off_D1r_Dr;
+static Int off_D1w_Ir;
+static Int off_D1w_Dw;
+static Int off_D2_Ir;
+static Int off_D2_Dr;
+static Int off_D2_Dw;
+
+static Addr bb_base;
+static ULong* cost_base;
+static InstrInfo* current_ii;
+
+/* Cache use offsets */
+/* FIXME: The offsets are only correct because all eventsets get
+ * the "Use" set added first !
+ */
+static Int off_I1_AcCost = 0;
+static Int off_I1_SpLoss = 1;
+static Int off_D1_AcCost = 0;
+static Int off_D1_SpLoss = 1;
+static Int off_L2_AcCost = 2;
+static Int off_L2_SpLoss = 3;
+
+/* Cache access types */
+typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType;
+
+/* Result of a reference into a flat cache */
+typedef enum { Hit = 0, Miss, MissDirty } CacheResult;
+
+/* Result of a reference into a hierarchical cache model */
+typedef enum {
+ L1_Hit,
+ L2_Hit,
+ MemAccess,
+ WriteBackMemAccess } CacheModelResult;
+
+typedef CacheModelResult (*simcall_type)(Addr, UChar);
+
+static struct {
+ simcall_type I1_Read;
+ simcall_type D1_Read;
+ simcall_type D1_Write;
+} simulator;
+
+/*------------------------------------------------------------*/
+/*--- Cache Simulator Initialization ---*/
+/*------------------------------------------------------------*/
+
+static void cachesim_clearcache(cache_t2* c)
+{
+ Int i;
+
+ for (i = 0; i < c->sets * c->assoc; i++)
+ c->tags[i] = 0;
+ if (c->use) {
+ for (i = 0; i < c->sets * c->assoc; i++) {
+ c->loaded[i].memline = 0;
+ c->loaded[i].use_base = 0;
+ c->loaded[i].dep_use = 0;
+ c->loaded[i].iaddr = 0;
+ c->use[i].mask = 0;
+ c->use[i].count = 0;
+ c->tags[i] = i % c->assoc; /* init lower bits as pointer */
+ }
+ }
+}
+
+static void cacheuse_initcache(cache_t2* c);
+
+/* By this point, the size/assoc/line_size has been checked. */
+static void cachesim_initcache(cache_t config, cache_t2* c)
+{
+ c->size = config.size;
+ c->assoc = config.assoc;
+ c->line_size = config.line_size;
+ c->sectored = False; // FIXME
+
+ c->sets = (c->size / c->line_size) / c->assoc;
+ c->sets_min_1 = c->sets - 1;
+ c->assoc_bits = VG_(log2)(c->assoc);
+ c->line_size_bits = VG_(log2)(c->line_size);
+ c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
+ c->tag_mask = ~((1<<c->tag_shift)-1);
+
+ /* Can bits in tag entries be used for flags?
+ * Should be always true as MIN_LINE_SIZE >= 16 */
+ CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0);
+
+ if (c->assoc == 1) {
+ VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s",
+ c->size, c->line_size,
+ c->sectored ? ", sectored":"");
+ } else {
+ VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s",
+ c->size, c->line_size, c->assoc,
+ c->sectored ? ", sectored":"");
+ }
+
+ c->tags = (UWord*) CLG_MALLOC(sizeof(UWord) * c->sets * c->assoc);
+ if (clo_collect_cacheuse)
+ cacheuse_initcache(c);
+ else
+ c->use = 0;
+ cachesim_clearcache(c);
+}
+
+
+#if 0
+static void print_cache(cache_t2* c)
+{
+ UInt set, way, i;
+
+ /* Note initialisation and update of 'i'. */
+ for (i = 0, set = 0; set < c->sets; set++) {
+ for (way = 0; way < c->assoc; way++, i++) {
+ VG_(printf)("%8x ", c->tags[i]);
+ }
+ VG_(printf)("\n");
+ }
+}
+#endif
+
+
+/*------------------------------------------------------------*/
+/*--- Write Through Cache Simulation ---*/
+/*------------------------------------------------------------*/
+
+/*
+ * Simple model: L1 & L2 Write Through
+ * Does not distinguish among read and write references
+ *
+ * Simulator functions:
+ * CacheModelResult cachesim_I1_ref(Addr a, UChar size)
+ * CacheModelResult cachesim_D1_ref(Addr a, UChar size)
+ */
+
+static __inline__
+CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag)
+{
+ int i, j;
+ UWord *set;
+
+ /* Shifting is a bit faster than multiplying */
+ set = &(c->tags[set_no << c->assoc_bits]);
+
+ /* This loop is unrolled for just the first case, which is the most */
+ /* common. We can't unroll any further because it would screw up */
+ /* if we have a direct-mapped (1-way) cache. */
+ if (tag == set[0])
+ return Hit;
+
+ /* If the tag is one other than the MRU, move it into the MRU spot */
+ /* and shuffle the rest down. */
+ for (i = 1; i < c->assoc; i++) {
+ if (tag == set[i]) {
+ for (j = i; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tag;
+ return Hit;
+ }
+ }
+
+ /* A miss; install this tag as MRU, shuffle rest down. */
+ for (j = c->assoc - 1; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tag;
+
+ return Miss;
+}
+
+static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size)
+{
+ UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
+ UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
+ UWord tag = a >> c->tag_shift;
+
+ /* Access entirely within line. */
+ if (set1 == set2)
+ return cachesim_setref(c, set1, tag);
+
+ /* Access straddles two lines. */
+ /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
+ else if (((set1 + 1) & (c->sets-1)) == set2) {
+
+ /* the call updates cache structures as side effect */
+ CacheResult res1 = cachesim_setref(c, set1, tag);
+ CacheResult res2 = cachesim_setref(c, set2, tag);
+ return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
+
+ } else {
+ VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2);
+ VG_(tool_panic)("item straddles more than two cache sets");
+ }
+ return Hit;
+}
+
+static
+CacheModelResult cachesim_I1_ref(Addr a, UChar size)
+{
+ if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
+ if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
+ return MemAccess;
+}
+
+static
+CacheModelResult cachesim_D1_ref(Addr a, UChar size)
+{
+ if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
+ if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
+ return MemAccess;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Write Back Cache Simulation ---*/
+/*------------------------------------------------------------*/
+
+/*
+ * More complex model: L1 Write-through, L2 Write-back
+ * This needs to distinguish among read and write references.
+ *
+ * Simulator functions:
+ * CacheModelResult cachesim_I1_Read(Addr a, UChar size)
+ * CacheModelResult cachesim_D1_Read(Addr a, UChar size)
+ * CacheModelResult cachesim_D1_Write(Addr a, UChar size)
+ */
+
+/*
+ * With write-back, result can be a miss evicting a dirty line
+ * The dirty state of a cache line is stored in Bit0 of the tag for
+ * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference
+ * type (Read/Write), the line gets dirty on a write.
+ */
+static __inline__
+CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag)
+{
+ int i, j;
+ UWord *set, tmp_tag;
+
+ /* Shifting is a bit faster than multiplying */
+ set = &(c->tags[set_no << c->assoc_bits]);
+
+ /* This loop is unrolled for just the first case, which is the most */
+ /* common. We can't unroll any further because it would screw up */
+ /* if we have a direct-mapped (1-way) cache. */
+ if (tag == (set[0] & ~CACHELINE_DIRTY)) {
+ set[0] |= ref;
+ return Hit;
+ }
+ /* If the tag is one other than the MRU, move it into the MRU spot */
+ /* and shuffle the rest down. */
+ for (i = 1; i < c->assoc; i++) {
+ if (tag == (set[i] & ~CACHELINE_DIRTY)) {
+ tmp_tag = set[i] | ref; // update dirty flag
+ for (j = i; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tmp_tag;
+ return Hit;
+ }
+ }
+
+ /* A miss; install this tag as MRU, shuffle rest down. */
+ tmp_tag = set[c->assoc - 1];
+ for (j = c->assoc - 1; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tag | ref;
+
+ return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss;
+}
+
+
+static __inline__
+CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size)
+{
+ UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
+ UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
+ UWord tag = a & c->tag_mask;
+
+ /* Access entirely within line. */
+ if (set1 == set2)
+ return cachesim_setref_wb(c, ref, set1, tag);
+
+ /* Access straddles two lines. */
+ /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
+ else if (((set1 + 1) & (c->sets-1)) == set2) {
+
+ /* the call updates cache structures as side effect */
+ CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag);
+ CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag);
+
+ if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty;
+ return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
+
+ } else {
+ VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2);
+ VG_(tool_panic)("item straddles more than two cache sets");
+ }
+ return Hit;
+}
+
+
+static
+CacheModelResult cachesim_I1_Read(Addr a, UChar size)
+{
+ if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
+ switch( cachesim_ref_wb( &L2, Read, a, size) ) {
+ case Hit: return L2_Hit;
+ case Miss: return MemAccess;
+ default: break;
+ }
+ return WriteBackMemAccess;
+}
+
+static
+CacheModelResult cachesim_D1_Read(Addr a, UChar size)
+{
+ if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
+ switch( cachesim_ref_wb( &L2, Read, a, size) ) {
+ case Hit: return L2_Hit;
+ case Miss: return MemAccess;
+ default: break;
+ }
+ return WriteBackMemAccess;
+}
+
+static
+CacheModelResult cachesim_D1_Write(Addr a, UChar size)
+{
+ if ( cachesim_ref( &D1, a, size) == Hit ) {
+ /* Even for a L1 hit, the write-trough L1 passes
+ * the write to the L2 to make the L2 line dirty.
+ * But this causes no latency, so return the hit.
+ */
+ cachesim_ref_wb( &L2, Write, a, size);
+ return L1_Hit;
+ }
+ switch( cachesim_ref_wb( &L2, Write, a, size) ) {
+ case Hit: return L2_Hit;
+ case Miss: return MemAccess;
+ default: break;
+ }
+ return WriteBackMemAccess;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Hardware Prefetch Simulation ---*/
+/*------------------------------------------------------------*/
+
+static ULong prefetch_up = 0;
+static ULong prefetch_down = 0;
+
+#define PF_STREAMS 8
+#define PF_PAGEBITS 12
+
+static UInt pf_lastblock[PF_STREAMS];
+static Int pf_seqblocks[PF_STREAMS];
+
+static
+void prefetch_clear(void)
+{
+ int i;
+ for(i=0;i<PF_STREAMS;i++)
+ pf_lastblock[i] = pf_seqblocks[i] = 0;
+}
+
+/*
+ * HW Prefetch emulation
+ * Start prefetching when detecting sequential access to 3 memory blocks.
+ * One stream can be detected per 4k page.
+ */
+static __inline__
+void prefetch_L2_doref(Addr a, UChar size)
+{
+ UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS;
+ UInt block = ( a >> L2.line_size_bits);
+
+ if (block != pf_lastblock[stream]) {
+ if (pf_seqblocks[stream] == 0) {
+ if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++;
+ else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--;
+ }
+ else if (pf_seqblocks[stream] >0) {
+ if (pf_lastblock[stream] +1 == block) {
+ pf_seqblocks[stream]++;
+ if (pf_seqblocks[stream] >= 2) {
+ prefetch_up++;
+ cachesim_ref(&L2, a + 5 * L2.line_size,1);
+ }
+ }
+ else pf_seqblocks[stream] = 0;
+ }
+ else if (pf_seqblocks[stream] <0) {
+ if (pf_lastblock[stream] -1 == block) {
+ pf_seqblocks[stream]--;
+ if (pf_seqblocks[stream] <= -2) {
+ prefetch_down++;
+ cachesim_ref(&L2, a - 5 * L2.line_size,1);
+ }
+ }
+ else pf_seqblocks[stream] = 0;
+ }
+ pf_lastblock[stream] = block;
+ }
+}
+
+/* simple model with hardware prefetch */
+
+static
+CacheModelResult prefetch_I1_ref(Addr a, UChar size)
+{
+ if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
+ prefetch_L2_doref(a,size);
+ if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
+ return MemAccess;
+}
+
+static
+CacheModelResult prefetch_D1_ref(Addr a, UChar size)
+{
+ if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
+ prefetch_L2_doref(a,size);
+ if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
+ return MemAccess;
+}
+
+
+/* complex model with hardware prefetch */
+
+static
+CacheModelResult prefetch_I1_Read(Addr a, UChar size)
+{
+ if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
+ prefetch_L2_doref(a,size);
+ switch( cachesim_ref_wb( &L2, Read, a, size) ) {
+ case Hit: return L2_Hit;
+ case Miss: return MemAccess;
+ default: break;
+ }
+ return WriteBackMemAccess;
+}
+
+static
+CacheModelResult prefetch_D1_Read(Addr a, UChar size)
+{
+ if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
+ prefetch_L2_doref(a,size);
+ switch( cachesim_ref_wb( &L2, Read, a, size) ) {
+ case Hit: return L2_Hit;
+ case Miss: return MemAccess;
+ default: break;
+ }
+ return WriteBackMemAccess;
+}
+
+static
+CacheModelResult prefetch_D1_Write(Addr a, UChar size)
+{
+ prefetch_L2_doref(a,size);
+ if ( cachesim_ref( &D1, a, size) == Hit ) {
+ /* Even for a L1 hit, the write-trough L1 passes
+ * the write to the L2 to make the L2 line dirty.
+ * But this causes no latency, so return the hit.
+ */
+ cachesim_ref_wb( &L2, Write, a, size);
+ return L1_Hit;
+ }
+ switch( cachesim_ref_wb( &L2, Write, a, size) ) {
+ case Hit: return L2_Hit;
+ case Miss: return MemAccess;
+ default: break;
+ }
+ return WriteBackMemAccess;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Cache Simulation with use metric collection ---*/
+/*------------------------------------------------------------*/
+
+/* can not be combined with write-back or prefetch */
+
+static
+void cacheuse_initcache(cache_t2* c)
+{
+ int i;
+ unsigned int start_mask, start_val;
+ unsigned int end_mask, end_val;
+
+ c->use = CLG_MALLOC(sizeof(line_use) * c->sets * c->assoc);
+ c->loaded = CLG_MALLOC(sizeof(line_loaded) * c->sets * c->assoc);
+ c->line_start_mask = CLG_MALLOC(sizeof(int) * c->line_size);
+ c->line_end_mask = CLG_MALLOC(sizeof(int) * c->line_size);
+
+
+ c->line_size_mask = c->line_size-1;
+
+ /* Meaning of line_start_mask/line_end_mask
+ * Example: for a given cache line, you get an access starting at
+ * byte offset 5, length 4, byte 5 - 8 was touched. For a cache
+ * line size of 32, you have 1 bit per byte in the mask:
+ *
+ * bit31 bit8 bit5 bit 0
+ * | | | |
+ * 11..111111100000 line_start_mask[5]
+ * 00..000111111111 line_end_mask[(5+4)-1]
+ *
+ * use_mask |= line_start_mask[5] && line_end_mask[8]
+ *
+ */
+ start_val = end_val = ~0;
+ if (c->line_size < 32) {
+ int bits_per_byte = 32/c->line_size;
+ start_mask = (1<<bits_per_byte)-1;
+ end_mask = start_mask << (32-bits_per_byte);
+ for(i=0;i<c->line_size;i++) {
+ c->line_start_mask[i] = start_val;
+ start_val = start_val & ~start_mask;
+ start_mask = start_mask << bits_per_byte;
+
+ c->line_end_mask[c->line_size-i-1] = end_val;
+ end_val = end_val & ~end_mask;
+ end_mask = end_mask >> bits_per_byte;
+ }
+ }
+ else {
+ int bytes_per_bit = c->line_size/32;
+ start_mask = 1;
+ end_mask = 1 << 31;
+ for(i=0;i<c->line_size;i++) {
+ c->line_start_mask[i] = start_val;
+ c->line_end_mask[c->line_size-i-1] = end_val;
+ if ( ((i+1)%bytes_per_bit) == 0) {
+ start_val &= ~start_mask;
+ end_val &= ~end_mask;
+ start_mask <<= 1;
+ end_mask >>= 1;
+ }
+ }
+ }
+
+ CLG_DEBUG(6, "Config %s:\n", c->desc_line);
+ for(i=0;i<c->line_size;i++) {
+ CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n",
+ i, c->line_start_mask[i], c->line_end_mask[i]);
+ }
+
+ /* We use lower tag bits as offset pointers to cache use info.
+ * I.e. some cache parameters don't work.
+ */
+ if (c->tag_shift < c->assoc_bits) {
+ VG_(message)(Vg_DebugMsg,
+ "error: Use associativity < %d for cache use statistics!",
+ (1<<c->tag_shift) );
+ VG_(tool_panic)("Unsupported cache configuration");
+ }
+}
+
+/* FIXME: A little tricky */
+#if 0
+
+static __inline__
+void cacheuse_update_hit(cache_t2* c, UInt high_idx, UInt low_idx, UInt use_mask)
+{
+ int idx = (high_idx << c->assoc_bits) | low_idx;
+
+ c->use[idx].count ++;
+ c->use[idx].mask |= use_mask;
+
+ CLG_DEBUG(6," Hit [idx %d] (line %p from %p): %x => %08x, count %d\n",
+ idx, c->loaded[idx].memline, c->loaded[idx].iaddr,
+ use_mask, c->use[idx].mask, c->use[idx].count);
+}
+
+/* only used for I1, D1 */
+
+static __inline__
+CacheResult cacheuse_setref(cache_t2* c, UInt set_no, UWord tag)
+{
+ int i, j, idx;
+ UWord *set, tmp_tag;
+ UInt use_mask;
+
+ /* Shifting is a bit faster than multiplying */
+ set = &(c->tags[set_no << c->assoc_bits]);
+ use_mask =
+ c->line_start_mask[a & c->line_size_mask] &
+ c->line_end_mask[(a+size-1) & c->line_size_mask];
+
+ /* This loop is unrolled for just the first case, which is the most */
+ /* common. We can't unroll any further because it would screw up */
+ /* if we have a direct-mapped (1-way) cache. */
+ if (tag == (set[0] & c->tag_mask)) {
+ cacheuse_update(c, set_no, set[0] & ~c->tag_mask, use_mask);
+ return L1_Hit;
+ }
+
+ /* If the tag is one other than the MRU, move it into the MRU spot */
+ /* and shuffle the rest down. */
+ for (i = 1; i < c->assoc; i++) {
+ if (tag == (set[i] & c->tag_mask)) {
+ tmp_tag = set[i];
+ for (j = i; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tmp_tag;
+
+ cacheuse_update(c, set_no, tmp_tag & ~c->tag_mask, use_mask);
+ return L1_Hit;
+ }
+ }
+
+ /* A miss; install this tag as MRU, shuffle rest down. */
+ tmp_tag = set[L.assoc - 1] & ~c->tag_mask;
+ for (j = c->assoc - 1; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tag | tmp_tag;
+
+ cacheuse_L2_miss(c, (set_no << c->assoc_bits) | tmp_tag,
+ use_mask, a & ~c->line_size_mask);
+
+ return Miss;
+}
+
+
+static CacheResult cacheuse_ref(cache_t2* c, Addr a, UChar size)
+{
+ UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
+ UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
+ UWord tag = a >> c->tag_shift;
+
+ /* Access entirely within line. */
+ if (set1 == set2)
+ return cacheuse_setref(c, set1, tag);
+
+ /* Access straddles two lines. */
+ /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
+ else if (((set1 + 1) & (c->sets-1)) == set2) {
+
+ /* the call updates cache structures as side effect */
+ CacheResult res1 = cacheuse_isMiss(c, set1, tag);
+ CacheResult res2 = cacheuse_isMiss(c, set2, tag);
+ return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
+
+ } else {
+ VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2);
+ VG_(tool_panic)("item straddles more than two cache sets");
+ }
+ return Hit;
+}
+#endif
+
+
+/* for I1/D1 caches */
+#define CACHEUSE(L) \
+ \
+static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \
+{ \
+ register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
+ register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
+ register UWord tag = a & L.tag_mask; \
+ int i, j, idx; \
+ UWord *set, tmp_tag; \
+ UInt use_mask; \
+ \
+ CLG_DEBUG(6,"%s.Acc(Addr %p, size %d): Sets [%d/%d]\n", \
+ L.name, a, size, set1, set2); \
+ \
+ /* First case: word entirely within line. */ \
+ if (set1 == set2) { \
+ \
+ /* Shifting is a bit faster than multiplying */ \
+ set = &(L.tags[set1 << L.assoc_bits]); \
+ use_mask = L.line_start_mask[a & L.line_size_mask] & \
+ L.line_end_mask[(a+size-1) & L.line_size_mask]; \
+ \
+ /* This loop is unrolled for just the first case, which is the most */\
+ /* common. We can't unroll any further because it would screw up */\
+ /* if we have a direct-mapped (1-way) cache. */\
+ if (tag == (set[0] & L.tag_mask)) { \
+ idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \
+ L.use[idx].count ++; \
+ L.use[idx].mask |= use_mask; \
+ CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\
+ idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
+ use_mask, L.use[idx].mask, L.use[idx].count); \
+ return L1_Hit; \
+ } \
+ /* If the tag is one other than the MRU, move it into the MRU spot */\
+ /* and shuffle the rest down. */\
+ for (i = 1; i < L.assoc; i++) { \
+ if (tag == (set[i] & L.tag_mask)) { \
+ tmp_tag = set[i]; \
+ for (j = i; j > 0; j--) { \
+ set[j] = set[j - 1]; \
+ } \
+ set[0] = tmp_tag; \
+ idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \
+ L.use[idx].count ++; \
+ L.use[idx].mask |= use_mask; \
+ CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\
+ i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
+ use_mask, L.use[idx].mask, L.use[idx].count); \
+ return L1_Hit; \
+ } \
+ } \
+ \
+ /* A miss; install this tag as MRU, shuffle rest down. */ \
+ tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
+ for (j = L.assoc - 1; j > 0; j--) { \
+ set[j] = set[j - 1]; \
+ } \
+ set[0] = tag | tmp_tag; \
+ idx = (set1 << L.assoc_bits) | tmp_tag; \
+ return update_##L##_use(&L, idx, \
+ use_mask, a &~ L.line_size_mask); \
+ \
+ /* Second case: word straddles two lines. */ \
+ /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
+ } else if (((set1 + 1) & (L.sets-1)) == set2) { \
+ Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:L2 miss */ \
+ set = &(L.tags[set1 << L.assoc_bits]); \
+ use_mask = L.line_start_mask[a & L.line_size_mask]; \
+ if (tag == (set[0] & L.tag_mask)) { \
+ idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \
+ L.use[idx].count ++; \
+ L.use[idx].mask |= use_mask; \
+ CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\
+ idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
+ use_mask, L.use[idx].mask, L.use[idx].count); \
+ goto block2; \
+ } \
+ for (i = 1; i < L.assoc; i++) { \
+ if (tag == (set[i] & L.tag_mask)) { \
+ tmp_tag = set[i]; \
+ for (j = i; j > 0; j--) { \
+ set[j] = set[j - 1]; \
+ } \
+ set[0] = tmp_tag; \
+ idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \
+ L.use[idx].count ++; \
+ L.use[idx].mask |= use_mask; \
+ CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\
+ i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
+ use_mask, L.use[idx].mask, L.use[idx].count); \
+ goto block2; \
+ } \
+ } \
+ tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
+ for (j = L.assoc - 1; j > 0; j--) { \
+ set[j] = set[j - 1]; \
+ } \
+ set[0] = tag | tmp_tag; \
+ idx = (set1 << L.assoc_bits) | tmp_tag; \
+ miss1 = update_##L##_use(&L, idx, \
+ use_mask, a &~ L.line_size_mask); \
+block2: \
+ set = &(L.tags[set2 << L.assoc_bits]); \
+ use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \
+ if (tag == (set[0] & L.tag_mask)) { \
+ idx = (set2 << L.assoc_bits) | (set[0] & ~L.tag_mask); \
+ L.use[idx].count ++; \
+ L.use[idx].mask |= use_mask; \
+ CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\
+ idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
+ use_mask, L.use[idx].mask, L.use[idx].count); \
+ return miss1; \
+ } \
+ for (i = 1; i < L.assoc; i++) { \
+ if (tag == (set[i] & L.tag_mask)) { \
+ tmp_tag = set[i]; \
+ for (j = i; j > 0; j--) { \
+ set[j] = set[j - 1]; \
+ } \
+ set[0] = tmp_tag; \
+ idx = (set2 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \
+ L.use[idx].count ++; \
+ L.use[idx].mask |= use_mask; \
+ CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\
+ i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
+ use_mask, L.use[idx].mask, L.use[idx].count); \
+ return miss1; \
+ } \
+ } \
+ tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
+ for (j = L.assoc - 1; j > 0; j--) { \
+ set[j] = set[j - 1]; \
+ } \
+ set[0] = tag | tmp_tag; \
+ idx = (set2 << L.assoc_bits) | tmp_tag; \
+ miss2 = update_##L##_use(&L, idx, \
+ use_mask, (a+size-1) &~ L.line_size_mask); \
+ return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:L2_Hit; \
+ \
+ } else { \
+ VG_(printf)("addr: %p size: %u sets: %d %d", a, size, set1, set2); \
+ VG_(tool_panic)("item straddles more than two cache sets"); \
+ } \
+ return 0; \
+}
+
+
+/* logarithmic bitcounting algorithm, see
+ * http://graphics.stanford.edu/~seander/bithacks.html
+ */
+static __inline__ unsigned int countBits(unsigned int bits)
+{
+ unsigned int c; // store the total here
+ const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers
+ const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF};
+
+ c = bits;
+ c = ((c >> S[0]) & B[0]) + (c & B[0]);
+ c = ((c >> S[1]) & B[1]) + (c & B[1]);
+ c = ((c >> S[2]) & B[2]) + (c & B[2]);
+ c = ((c >> S[3]) & B[3]) + (c & B[3]);
+ c = ((c >> S[4]) & B[4]) + (c & B[4]);
+ return c;
+}
+
+static void update_L2_use(int idx, Addr memline)
+{
+ line_loaded* loaded = &(L2.loaded[idx]);
+ line_use* use = &(L2.use[idx]);
+ int i = ((32 - countBits(use->mask)) * L2.line_size)>>5;
+
+ CLG_DEBUG(2, " L2.miss [%d]: at %p accessing memline %p\n",
+ idx, bb_base + current_ii->instr_offset, memline);
+ if (use->count>0) {
+ CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %p from %p]\n",
+ use->count, i, use->mask, loaded->memline, loaded->iaddr);
+ CLG_DEBUG(2, " collect: %d, use_base %p\n",
+ CLG_(current_state).collect, loaded->use_base);
+
+ if (CLG_(current_state).collect && loaded->use_base) {
+ (loaded->use_base)[off_L2_AcCost] += 1000 / use->count;
+ (loaded->use_base)[off_L2_SpLoss] += i;
+ }
+ }
+
+ use->count = 0;
+ use->mask = 0;
+
+ loaded->memline = memline;
+ loaded->iaddr = bb_base + current_ii->instr_offset;
+ loaded->use_base = (CLG_(current_state).nonskipped) ?
+ CLG_(current_state).nonskipped->skipped :
+ cost_base + current_ii->cost_offset;
+}
+
+static
+CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
+{
+ UInt setNo = (memline >> L2.line_size_bits) & (L2.sets_min_1);
+ UWord* set = &(L2.tags[setNo << L2.assoc_bits]);
+ UWord tag = memline & L2.tag_mask;
+
+ int i, j, idx;
+ UWord tmp_tag;
+
+ CLG_DEBUG(6,"L2.Acc(Memline %p): Set %d\n", memline, setNo);
+
+ if (tag == (set[0] & L2.tag_mask)) {
+ idx = (setNo << L2.assoc_bits) | (set[0] & ~L2.tag_mask);
+ l1_loaded->dep_use = &(L2.use[idx]);
+
+ CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): => %08x, count %d\n",
+ idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,
+ L2.use[idx].mask, L2.use[idx].count);
+ return L2_Hit;
+ }
+ for (i = 1; i < L2.assoc; i++) {
+ if (tag == (set[i] & L2.tag_mask)) {
+ tmp_tag = set[i];
+ for (j = i; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tmp_tag;
+ idx = (setNo << L2.assoc_bits) | (tmp_tag & ~L2.tag_mask);
+ l1_loaded->dep_use = &(L2.use[idx]);
+
+ CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): => %08x, count %d\n",
+ i, idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,
+ L2.use[idx].mask, L2.use[idx].count);
+ return L2_Hit;
+ }
+ }
+
+ /* A miss; install this tag as MRU, shuffle rest down. */
+ tmp_tag = set[L2.assoc - 1] & ~L2.tag_mask;
+ for (j = L2.assoc - 1; j > 0; j--) {
+ set[j] = set[j - 1];
+ }
+ set[0] = tag | tmp_tag;
+ idx = (setNo << L2.assoc_bits) | tmp_tag;
+ l1_loaded->dep_use = &(L2.use[idx]);
+
+ update_L2_use(idx, memline);
+
+ return MemAccess;
+}
+
+
+
+
+#define UPDATE_USE(L) \
+ \
+static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \
+ UInt mask, Addr memline) \
+{ \
+ line_loaded* loaded = &(cache->loaded[idx]); \
+ line_use* use = &(cache->use[idx]); \
+ int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \
+ \
+ CLG_DEBUG(2, " %s.miss [%d]: at %p accessing memline %p (mask %08x)\n", \
+ cache->name, idx, bb_base + current_ii->instr_offset, memline, mask); \
+ if (use->count>0) { \
+ CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %p from %p]\n",\
+ use->count, c, use->mask, loaded->memline, loaded->iaddr); \
+ CLG_DEBUG(2, " collect: %d, use_base %p\n", \
+ CLG_(current_state).collect, loaded->use_base); \
+ \
+ if (CLG_(current_state).collect && loaded->use_base) { \
+ (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \
+ (loaded->use_base)[off_##L##_SpLoss] += c; \
+ \
+ /* FIXME (?): L1/L2 line sizes must be equal ! */ \
+ loaded->dep_use->mask |= use->mask; \
+ loaded->dep_use->count += use->count; \
+ } \
+ } \
+ \
+ use->count = 1; \
+ use->mask = mask; \
+ loaded->memline = memline; \
+ loaded->iaddr = bb_base + current_ii->instr_offset; \
+ loaded->use_base = (CLG_(current_state).nonskipped) ? \
+ CLG_(current_state).nonskipped->skipped : \
+ cost_base + current_ii->cost_offset; \
+ \
+ if (memline == 0) return L2_Hit; \
+ return cacheuse_L2_access(memline, loaded); \
+}
+
+UPDATE_USE(I1);
+UPDATE_USE(D1);
+
+CACHEUSE(I1);
+CACHEUSE(D1);
+
+
+static
+void cacheuse_finish(void)
+{
+ int i;
+ InstrInfo ii = { 0,0,0,0,0 };
+
+ if (!CLG_(current_state).collect) return;
+
+ bb_base = 0;
+ current_ii = ⅈ
+ cost_base = 0;
+
+ /* update usage counters */
+ if (I1.use)
+ for (i = 0; i < I1.sets * I1.assoc; i++)
+ if (I1.loaded[i].use_base)
+ update_I1_use( &I1, i, 0,0);
+
+ if (D1.use)
+ for (i = 0; i < D1.sets * D1.assoc; i++)
+ if (D1.loaded[i].use_base)
+ update_D1_use( &D1, i, 0,0);
+
+ if (L2.use)
+ for (i = 0; i < L2.sets * L2.assoc; i++)
+ if (L2.loaded[i].use_base)
+ update_L2_use(i, 0);
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Helper functions called by instrumented code ---*/
+/*------------------------------------------------------------*/
+
+
+static __inline__
+void inc_costs(CacheModelResult r, ULong* c1, ULong* c2)
+{
+ switch(r) {
+ case WriteBackMemAccess:
+ if (clo_simulate_writeback) {
+ c1[3]++;
+ c2[3]++;
+ }
+ // fall through
+
+ case MemAccess:
+ c1[2]++;
+ c2[2]++;
+ // fall through
+
+ case L2_Hit:
+ c1[1]++;
+ c2[1]++;
+ // fall through
+
+ default:
+ c1[0]++;
+ c2[0]++;
+ }
+}
+
+
+VG_REGPARM(1)
+static void log_1I0D(InstrInfo* ii)
+{
+ CacheModelResult IrRes;
+
+ current_ii = ii;
+ IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
+
+ CLG_DEBUG(6, "log_1I0D: Ir=%p/%u => Ir %d\n",
+ bb_base + ii->instr_offset, ii->instr_size, IrRes);
+
+ if (CLG_(current_state).collect) {
+ ULong* cost_Ir;
+
+ if (CLG_(current_state).nonskipped)
+ cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;
+ else
+ cost_Ir = cost_base + ii->cost_offset + off_D0_Ir;
+
+ inc_costs(IrRes, cost_Ir,
+ CLG_(current_state).cost + CLG_(sets).off_full_Ir );
+ }
+}
+
+
+/* Instruction doing a read access */
+
+VG_REGPARM(2)
+static void log_1I1Dr(InstrInfo* ii, Addr data)
+{
+ CacheModelResult IrRes, DrRes;
+
+ current_ii = ii;
+ IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
+ DrRes = (*simulator.D1_Read)(data, ii->data_size);
+
+ CLG_DEBUG(6, "log_1I1Dr: Ir=%p/%u, Dr=%p/%u => Ir %d, Dr %d\n",
+ bb_base + ii->instr_offset, ii->instr_size,
+ data, ii->data_size, IrRes, DrRes);
+
+ if (CLG_(current_state).collect) {
+ ULong *cost_Ir, *cost_Dr;
+
+ if (CLG_(current_state).nonskipped) {
+ cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;
+ cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;
+ }
+ else {
+ cost_Ir = cost_base + ii->cost_offset + off_D1r_Ir;
+ cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr;
+ }
+
+ inc_costs(IrRes, cost_Ir,
+ CLG_(current_state).cost + CLG_(sets).off_full_Ir );
+ inc_costs(DrRes, cost_Dr,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dr );
+ }
+}
+
+
+VG_REGPARM(2)
+static void log_0I1Dr(InstrInfo* ii, Addr data)
+{
+ CacheModelResult DrRes;
+
+ current_ii = ii;
+ DrRes = (*simulator.D1_Read)(data, ii->data_size);
+
+ CLG_DEBUG(6, "log_0I1Dr: Dr=%p/%u => Dr %d\n",
+ data, ii->data_size, DrRes);
+
+ if (CLG_(current_state).collect) {
+ ULong *cost_Dr;
+
+ if (CLG_(current_state).nonskipped) {
+ cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;
+ }
+ else {
+ cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr;
+ }
+
+ inc_costs(DrRes, cost_Dr,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dr );
+ }
+}
+
+
+/* Instruction doing a write access */
+
+VG_REGPARM(2)
+static void log_1I1Dw(InstrInfo* ii, Addr data)
+{
+ CacheModelResult IrRes, DwRes;
+
+ current_ii = ii;
+ IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
+ DwRes = (*simulator.D1_Write)(data, ii->data_size);
+
+ CLG_DEBUG(6, "log_1I1Dw: Ir=%p/%u, Dw=%p/%u => Ir %d, Dw %d\n",
+ bb_base + ii->instr_offset, ii->instr_size,
+ data, ii->data_size, IrRes, DwRes);
+
+ if (CLG_(current_state).collect) {
+ ULong *cost_Ir, *cost_Dw;
+
+ if (CLG_(current_state).nonskipped) {
+ cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir;
+ cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;
+ }
+ else {
+ cost_Ir = cost_base + ii->cost_offset + off_D1w_Ir;
+ cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw;
+ }
+
+ inc_costs(IrRes, cost_Ir,
+ CLG_(current_state).cost + CLG_(sets).off_full_Ir );
+ inc_costs(DwRes, cost_Dw,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dw );
+ }
+}
+
+VG_REGPARM(2)
+static void log_0I1Dw(InstrInfo* ii, Addr data)
+{
+ CacheModelResult DwRes;
+
+ current_ii = ii;
+ DwRes = (*simulator.D1_Write)(data, ii->data_size);
+
+ CLG_DEBUG(6, "log_0I1Dw: Dw=%p/%u => Dw %d\n",
+ data, ii->data_size, DwRes);
+
+ if (CLG_(current_state).collect) {
+ ULong *cost_Dw;
+
+ if (CLG_(current_state).nonskipped) {
+ cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw;
+ }
+ else {
+ cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw;
+ }
+
+ inc_costs(DwRes, cost_Dw,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dw );
+ }
+}
+
+/* Instruction doing a read and a write access */
+
+VG_REGPARM(3)
+static void log_1I2D(InstrInfo* ii, Addr data1, Addr data2)
+{
+ CacheModelResult IrRes, DrRes, DwRes;
+
+ current_ii = ii;
+ IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
+ DrRes = (*simulator.D1_Read)(data1, ii->data_size);
+ DwRes = (*simulator.D1_Write)(data2, ii->data_size);
+
+ CLG_DEBUG(6,
+ "log_1I2D: Ir=%p/%u, Dr=%p/%u, Dw=%p/%u => Ir %d, Dr %d, Dw %d\n",
+ bb_base + ii->instr_offset, ii->instr_size,
+ data1, ii->data_size, data2, ii->data_size, IrRes, DrRes, DwRes);
+
+ if (CLG_(current_state).collect) {
+ ULong *cost_Ir, *cost_Dr, *cost_Dw;
+
+ if (CLG_(current_state).nonskipped) {
+ cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir;
+ cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr;
+ cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;
+ }
+ else {
+ cost_Ir = cost_base + ii->cost_offset + off_D2_Ir;
+ cost_Dr = cost_base + ii->cost_offset + off_D2_Dr;
+ cost_Dw = cost_base + ii->cost_offset + off_D2_Dw;
+ }
+
+ inc_costs(IrRes, cost_Ir,
+ CLG_(current_state).cost + CLG_(sets).off_full_Ir );
+ inc_costs(DrRes, cost_Dr,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dr );
+ inc_costs(DwRes, cost_Dw,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dw );
+ }
+}
+
+VG_REGPARM(3)
+static void log_0I2D(InstrInfo* ii, Addr data1, Addr data2)
+{
+ CacheModelResult DrRes, DwRes;
+
+ current_ii = ii;
+ DrRes = (*simulator.D1_Read)(data1, ii->data_size);
+ DwRes = (*simulator.D1_Write)(data2, ii->data_size);
+
+ CLG_DEBUG(6,
+ "log_0D2D: Dr=%p/%u, Dw=%p/%u => Dr %d, Dw %d\n",
+ data1, ii->data_size, data2, ii->data_size, DrRes, DwRes);
+
+ if (CLG_(current_state).collect) {
+ ULong *cost_Dr, *cost_Dw;
+
+ if (CLG_(current_state).nonskipped) {
+ cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr;
+ cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;
+ }
+ else {
+ cost_Dr = cost_base + ii->cost_offset + off_D2_Dr;
+ cost_Dw = cost_base + ii->cost_offset + off_D2_Dw;
+ }
+
+ inc_costs(DrRes, cost_Dr,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dr );
+ inc_costs(DwRes, cost_Dw,
+ CLG_(current_state).cost + CLG_(sets).off_full_Dw );
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Cache configuration ---*/
+/*------------------------------------------------------------*/
+
+#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
+
+static cache_t clo_I1_cache = UNDEFINED_CACHE;
+static cache_t clo_D1_cache = UNDEFINED_CACHE;
+static cache_t clo_L2_cache = UNDEFINED_CACHE;
+
+
+/* Checks cache config is ok; makes it so if not. */
+static
+void check_cache(cache_t* cache, Char *name)
+{
+ /* First check they're all powers of two */
+ if (-1 == VG_(log2)(cache->size)) {
+ VG_(message)(Vg_UserMsg,
+ "error: %s size of %dB not a power of two; aborting.",
+ name, cache->size);
+ VG_(exit)(1);
+ }
+
+ if (-1 == VG_(log2)(cache->assoc)) {
+ VG_(message)(Vg_UserMsg,
+ "error: %s associativity of %d not a power of two; aborting.",
+ name, cache->assoc);
+ VG_(exit)(1);
+ }
+
+ if (-1 == VG_(log2)(cache->line_size)) {
+ VG_(message)(Vg_UserMsg,
+ "error: %s line size of %dB not a power of two; aborting.",
+ name, cache->line_size);
+ VG_(exit)(1);
+ }
+
+ // Then check line size >= 16 -- any smaller and a single instruction could
+ // straddle three cache lines, which breaks a simulation assertion and is
+ // stupid anyway.
+ if (cache->line_size < MIN_LINE_SIZE) {
+ VG_(message)(Vg_UserMsg,
+ "error: %s line size of %dB too small; aborting.",
+ name, cache->line_size);
+ VG_(exit)(1);
+ }
+
+ /* Then check cache size > line size (causes seg faults if not). */
+ if (cache->size <= cache->line_size) {
+ VG_(message)(Vg_UserMsg,
+ "error: %s cache size of %dB <= line size of %dB; aborting.",
+ name, cache->size, cache->line_size);
+ VG_(exit)(1);
+ }
+
+ /* Then check assoc <= (size / line size) (seg faults otherwise). */
+ if (cache->assoc > (cache->size / cache->line_size)) {
+ VG_(message)(Vg_UserMsg,
+ "warning: %s associativity > (size / line size); aborting.", name);
+ VG_(exit)(1);
+ }
+}
+
+static
+void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
+
+ Int n_clos = 0;
+
+ // Count how many were defined on the command line.
+ if (DEFINED(clo_I1_cache)) { n_clos++; }
+ if (DEFINED(clo_D1_cache)) { n_clos++; }
+ if (DEFINED(clo_L2_cache)) { n_clos++; }
+
+ // Set the cache config (using auto-detection, if supported by the
+ // architecture)
+ VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) );
+
+ // Then replace with any defined on the command line.
+ if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
+ if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
+ if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
+
+ // Then check values and fix if not acceptable.
+ check_cache(I1c, "I1");
+ check_cache(D1c, "D1");
+ check_cache(L2c, "L2");
+
+ if (VG_(clo_verbosity) > 1) {
+ VG_(message)(Vg_UserMsg, "Cache configuration used:");
+ VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
+ I1c->size, I1c->assoc, I1c->line_size);
+ VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
+ D1c->size, D1c->assoc, D1c->line_size);
+ VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
+ L2c->size, L2c->assoc, L2c->line_size);
+ }
+#undef CMD_LINE_DEFINED
+}
+
+
+/* Initialize and clear simulator state */
+static void cachesim_post_clo_init(void)
+{
+ /* Cache configurations. */
+ cache_t I1c, D1c, L2c;
+
+ /* Initialize access handlers */
+ if (!CLG_(clo).simulate_cache) {
+ CLG_(cachesim).log_1I0D = 0;
+ CLG_(cachesim).log_1I0D_name = "(no function)";
+
+ CLG_(cachesim).log_1I1Dr = 0;
+ CLG_(cachesim).log_1I1Dw = 0;
+ CLG_(cachesim).log_1I2D = 0;
+ CLG_(cachesim).log_1I1Dr_name = "(no function)";
+ CLG_(cachesim).log_1I1Dw_name = "(no function)";
+ CLG_(cachesim).log_1I2D_name = "(no function)";
+
+ CLG_(cachesim).log_0I1Dr = 0;
+ CLG_(cachesim).log_0I1Dw = 0;
+ CLG_(cachesim).log_0I2D = 0;
+ CLG_(cachesim).log_0I1Dr_name = "(no function)";
+ CLG_(cachesim).log_0I1Dw_name = "(no function)";
+ CLG_(cachesim).log_0I2D_name = "(no function)";
+ return;
+ }
+
+ /* Configuration of caches only needed with real cache simulation */
+ configure_caches(&I1c, &D1c, &L2c);
+
+ I1.name = "I1";
+ D1.name = "D1";
+ L2.name = "L2";
+
+ cachesim_initcache(I1c, &I1);
+ cachesim_initcache(D1c, &D1);
+ cachesim_initcache(L2c, &L2);
+
+ /* the other cache simulators use the standard helpers
+ * with dispatching via simulator struct */
+
+ CLG_(cachesim).log_1I0D = log_1I0D;
+ CLG_(cachesim).log_1I0D_name = "log_1I0D";
+
+ CLG_(cachesim).log_1I1Dr = log_1I1Dr;
+ CLG_(cachesim).log_1I1Dw = log_1I1Dw;
+ CLG_(cachesim).log_1I2D = log_1I2D;
+ CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr";
+ CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw";
+ CLG_(cachesim).log_1I2D_name = "log_1I2D";
+
+ CLG_(cachesim).log_0I1Dr = log_0I1Dr;
+ CLG_(cachesim).log_0I1Dw = log_0I1Dw;
+ CLG_(cachesim).log_0I2D = log_0I2D;
+ CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr";
+ CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw";
+ CLG_(cachesim).log_0I2D_name = "log_0I2D";
+
+ if (clo_collect_cacheuse) {
+
+ /* Output warning for not supported option combinations */
+ if (clo_simulate_hwpref) {
+ VG_(message)(Vg_DebugMsg,
+ "warning: prefetch simulation can not be used with cache usage");
+ clo_simulate_hwpref = False;
+ }
+
+ if (clo_simulate_writeback) {
+ VG_(message)(Vg_DebugMsg,
+ "warning: write-back simulation can not be used with cache usage");
+ clo_simulate_writeback = False;
+ }
+
+ simulator.I1_Read = cacheuse_I1_doRead;
+ simulator.D1_Read = cacheuse_D1_doRead;
+ simulator.D1_Write = cacheuse_D1_doRead;
+ return;
+ }
+
+ if (clo_simulate_hwpref) {
+ prefetch_clear();
+
+ if (clo_simulate_writeback) {
+ simulator.I1_Read = prefetch_I1_Read;
+ simulator.D1_Read = prefetch_D1_Read;
+ simulator.D1_Write = prefetch_D1_Write;
+ }
+ else {
+ simulator.I1_Read = prefetch_I1_ref;
+ simulator.D1_Read = prefetch_D1_ref;
+ simulator.D1_Write = prefetch_D1_ref;
+ }
+
+ return;
+ }
+
+ if (clo_simulate_writeback) {
+ simulator.I1_Read = cachesim_I1_Read;
+ simulator.D1_Read = cachesim_D1_Read;
+ simulator.D1_Write = cachesim_D1_Write;
+ }
+ else {
+ simulator.I1_Read = cachesim_I1_ref;
+ simulator.D1_Read = cachesim_D1_ref;
+ simulator.D1_Write = cachesim_D1_ref;
+ }
+}
+
+
+/* Clear simulator state. Has to be initialized before */
+static
+void cachesim_clear(void)
+{
+ cachesim_clearcache(&I1);
+ cachesim_clearcache(&D1);
+ cachesim_clearcache(&L2);
+
+ prefetch_clear();
+}
+
+
+static void cachesim_getdesc(Char* buf)
+{
+ Int p;
+ p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line);
+ p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line);
+ VG_(sprintf)(buf+p, "desc: L2 cache: %s\n", L2.desc_line);
+}
+
+static
+void cachesim_print_opts(void)
+{
+ VG_(printf)(
+"\n cache simulator options:\n"
+" --simulate-cache=no|yes Do cache simulation [no]\n"
+" --simulate-wb=no|yes Count write-back events [no]\n"
+" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n"
+#if CLG_EXPERIMENTAL
+" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n"
+#endif
+" --cacheuse=no|yes Collect cache block use [no]\n"
+" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
+" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
+" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
+ );
+}
+
+static void parse_opt ( cache_t* cache, char* orig_opt, int opt_len )
+{
+ int i1, i2, i3;
+ int i;
+ char *opt = VG_(strdup)(orig_opt);
+
+ i = i1 = opt_len;
+
+ /* Option looks like "--I1=65536,2,64".
+ * Find commas, replace with NULs to make three independent
+ * strings, then extract numbers. Yuck. */
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i2 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if (',' == opt[i]) {
+ opt[i++] = '\0';
+ i3 = i;
+ } else goto bad;
+ while (VG_(isdigit)(opt[i])) i++;
+ if ('\0' != opt[i]) goto bad;
+
+ cache->size = (Int)VG_(atoll)(opt + i1);
+ cache->assoc = (Int)VG_(atoll)(opt + i2);
+ cache->line_size = (Int)VG_(atoll)(opt + i3);
+
+ VG_(free)(opt);
+
+ return;
+
+ bad:
+ VG_(bad_option)(orig_opt);
+}
+
+/* Check for command line option for cache configuration.
+ * Return False if unknown and not handled.
+ *
+ * Called from CLG_(process_cmd_line_option)() in clo.c
+ */
+static Bool cachesim_parse_opt(Char* arg)
+{
+ if (0 == VG_(strcmp)(arg, "--simulate-wb=yes"))
+ clo_simulate_writeback = True;
+ else if (0 == VG_(strcmp)(arg, "--simulate-wb=no"))
+ clo_simulate_writeback = False;
+
+ else if (0 == VG_(strcmp)(arg, "--simulate-hwpref=yes"))
+ clo_simulate_hwpref = True;
+ else if (0 == VG_(strcmp)(arg, "--simulate-hwpref=no"))
+ clo_simulate_hwpref = False;
+
+ else if (0 == VG_(strcmp)(arg, "--simulate-sectors=yes"))
+ clo_simulate_sectors = True;
+ else if (0 == VG_(strcmp)(arg, "--simulate-sectors=no"))
+ clo_simulate_sectors = False;
+
+ else if (0 == VG_(strcmp)(arg, "--cacheuse=yes")) {
+ clo_collect_cacheuse = True;
+ /* Use counters only make sense with fine dumping */
+ CLG_(clo).dump_instr = True;
+ }
+ else if (0 == VG_(strcmp)(arg, "--cacheuse=no"))
+ clo_collect_cacheuse = False;
+
+ /* 5 is length of "--I1=" */
+ else if (0 == VG_(strncmp)(arg, "--I1=", 5))
+ parse_opt(&clo_I1_cache, arg, 5);
+ else if (0 == VG_(strncmp)(arg, "--D1=", 5))
+ parse_opt(&clo_D1_cache, arg, 5);
+ else if (0 == VG_(strncmp)(arg, "--L2=", 5))
+ parse_opt(&clo_L2_cache, arg, 5);
+ else
+ return False;
+
+ return True;
+}
+
+/* Adds commas to ULong, right justifying in a field field_width wide, returns
+ * the string in buf. */
+static
+Int commify(ULong n, int field_width, char* buf)
+{
+ int len, n_commas, i, j, new_len, space;
+
+ VG_(sprintf)(buf, "%llu", n);
+ len = VG_(strlen)(buf);
+ n_commas = (len - 1) / 3;
+ new_len = len + n_commas;
+ space = field_width - new_len;
+
+ /* Allow for printing a number in a field_width smaller than it's size */
+ if (space < 0) space = 0;
+
+ /* Make j = -1 because we copy the '\0' before doing the numbers in groups
+ * of three. */
+ for (j = -1, i = len ; i >= 0; i--) {
+ buf[i + n_commas + space] = buf[i];
+
+ if ((i>0) && (3 == ++j)) {
+ j = 0;
+ n_commas--;
+ buf[i + n_commas + space] = ',';
+ }
+ }
+ /* Right justify in field. */
+ for (i = 0; i < space; i++) buf[i] = ' ';
+ return new_len;
+}
+
+static
+void percentify(Int n, Int ex, Int field_width, char buf[])
+{
+ int i, len, space;
+
+ VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
+ len = VG_(strlen)(buf);
+ space = field_width - len;
+ if (space < 0) space = 0; /* Allow for v. small field_width */
+ i = len;
+
+ /* Right justify in field */
+ for ( ; i >= 0; i--) buf[i + space] = buf[i];
+ for (i = 0; i < space; i++) buf[i] = ' ';
+}
+
+static
+void cachesim_printstat(void)
+{
+ FullCost total = CLG_(total_cost), D_total = 0;
+ ULong L2_total_m, L2_total_mr, L2_total_mw,
+ L2_total, L2_total_r, L2_total_w;
+ char buf1[RESULTS_BUF_LEN],
+ buf2[RESULTS_BUF_LEN],
+ buf3[RESULTS_BUF_LEN];
+ Int l1, l2, l3;
+ Int p;
+
+ if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) {
+ VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu",
+ prefetch_up);
+ VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu",
+ prefetch_down);
+ VG_(message)(Vg_DebugMsg, "");
+ }
+
+ /* I cache results. Use the I_refs value to determine the first column
+ * width. */
+ l1 = commify(total[CLG_(sets).off_full_Ir], 0, buf1);
+ VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
+
+ if (!CLG_(clo).simulate_cache) return;
+
+ commify(total[CLG_(sets).off_full_Ir +1], l1, buf1);
+ VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
+
+ commify(total[CLG_(sets).off_full_Ir +2], l1, buf1);
+ VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
+
+ p = 100;
+
+ if (0 == total[CLG_(sets).off_full_Ir])
+ total[CLG_(sets).off_full_Ir] = 1;
+
+ percentify(total[CLG_(sets).off_full_Ir+1] * 100 * p /
+ total[CLG_(sets).off_full_Ir], p, l1+1, buf1);
+ VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
+
+ percentify(total[CLG_(sets).off_full_Ir+2] * 100 * p /
+ total[CLG_(sets).off_full_Ir], p, l1+1, buf1);
+ VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
+ VG_(message)(Vg_UserMsg, "");
+
+ /* D cache results.
+ Use the D_refs.rd and D_refs.wr values to determine the
+ * width of columns 2 & 3. */
+
+ D_total = CLG_(get_eventset_cost)( CLG_(sets).full );
+ CLG_(init_cost)( CLG_(sets).full, D_total);
+ CLG_(copy_cost)( CLG_(sets).Dr, D_total, total + CLG_(sets).off_full_Dr );
+ CLG_(add_cost) ( CLG_(sets).Dw, D_total, total + CLG_(sets).off_full_Dw );
+
+ commify( D_total[0], l1, buf1);
+ l2 = commify(total[CLG_(sets).off_full_Dr], 0, buf2);
+ l3 = commify(total[CLG_(sets).off_full_Dw], 0, buf3);
+ VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
+ buf1, buf2, buf3);
+
+ commify( D_total[1], l1, buf1);
+ commify(total[CLG_(sets).off_full_Dr+1], l2, buf2);
+ commify(total[CLG_(sets).off_full_Dw+1], l3, buf3);
+ VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
+ buf1, buf2, buf3);
+
+ commify( D_total[2], l1, buf1);
+ commify(total[CLG_(sets).off_full_Dr+2], l2, buf2);
+ commify(total[CLG_(sets).off_full_Dw+2], l3, buf3);
+ VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
+ buf1, buf2, buf3);
+
+ p = 10;
+
+ if (0 == D_total[0]) D_total[0] = 1;
+ if (0 == total[CLG_(sets).off_full_Dr]) total[CLG_(sets).off_full_Dr] = 1;
+ if (0 == total[CLG_(sets).off_full_Dw]) total[CLG_(sets).off_full_Dw] = 1;
+
+ percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1);
+ percentify(total[CLG_(sets).off_full_Dr+1] * 100 * p /
+ total[CLG_(sets).off_full_Dr], p, l2+1, buf2);
+ percentify(total[CLG_(sets).off_full_Dw+1] * 100 * p /
+ total[CLG_(sets).off_full_Dw], p, l3+1, buf3);
+ VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
+
+ percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1);
+ percentify(total[CLG_(sets).off_full_Dr+2] * 100 * p /
+ total[CLG_(sets).off_full_Dr], p, l2+1, buf2);
+ percentify(total[CLG_(sets).off_full_Dw+2] * 100 * p /
+ total[CLG_(sets).off_full_Dw], p, l3+1, buf3);
+ VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
+ VG_(message)(Vg_UserMsg, "");
+
+
+
+ /* L2 overall results */
+
+ L2_total =
+ total[CLG_(sets).off_full_Dr +1] +
+ total[CLG_(sets).off_full_Dw +1] +
+ total[CLG_(sets).off_full_Ir +1];
+ L2_total_r =
+ total[CLG_(sets).off_full_Dr +1] +
+ total[CLG_(sets).off_full_Ir +1];
+ L2_total_w = total[CLG_(sets).off_full_Dw +1];
+ commify(L2_total, l1, buf1);
+ commify(L2_total_r, l2, buf2);
+ commify(L2_total_w, l3, buf3);
+ VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
+ buf1, buf2, buf3);
+
+ L2_total_m =
+ total[CLG_(sets).off_full_Dr +2] +
+ total[CLG_(sets).off_full_Dw +2] +
+ total[CLG_(sets).off_full_Ir +2];
+ L2_total_mr =
+ total[CLG_(sets).off_full_Dr +2] +
+ total[CLG_(sets).off_full_Ir +2];
+ L2_total_mw = total[CLG_(sets).off_full_Dw +2];
+ commify(L2_total_m, l1, buf1);
+ commify(L2_total_mr, l2, buf2);
+ commify(L2_total_mw, l3, buf3);
+ VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
+ buf1, buf2, buf3);
+
+ percentify(L2_total_m * 100 * p /
+ (total[CLG_(sets).off_full_Ir] + D_total[0]), p, l1+1, buf1);
+ percentify(L2_total_mr * 100 * p /
+ (total[CLG_(sets).off_full_Ir] + total[CLG_(sets).off_full_Dr]),
+ p, l2+1, buf2);
+ percentify(L2_total_mw * 100 * p /
+ total[CLG_(sets).off_full_Dw], p, l3+1, buf3);
+ VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )",
+ buf1, buf2,buf3);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup for Event set. ---*/
+/*------------------------------------------------------------*/
+
+struct event_sets CLG_(sets);
+
+void CLG_(init_eventsets)(Int max_user)
+{
+ EventType * e1, *e2, *e3, *e4;
+ EventSet *Ir, *Dr, *Dw;
+ EventSet *D0, *D1r, *D1w, *D2;
+ EventSet *sim, *full;
+ EventSet *use;
+ int sizeOfUseIr;
+
+ use = CLG_(get_eventset)("Use", 4);
+ if (clo_collect_cacheuse) {
+ /* if TUse is 0, there was never a load, and no loss, too */
+ e1 = CLG_(register_eventtype)("AcCost1");
+ CLG_(add_eventtype)(use, e1);
+ e1 = CLG_(register_eventtype)("SpLoss1");
+ CLG_(add_eventtype)(use, e1);
+ e1 = CLG_(register_eventtype)("AcCost2");
+ CLG_(add_eventtype)(use, e1);
+ e1 = CLG_(register_eventtype)("SpLoss2");
+ CLG_(add_eventtype)(use, e1);
+ }
+
+ Ir = CLG_(get_eventset)("Ir", 4);
+ Dr = CLG_(get_eventset)("Dr", 4);
+ Dw = CLG_(get_eventset)("Dw", 4);
+ if (CLG_(clo).simulate_cache) {
+ e1 = CLG_(register_eventtype)("Ir");
+ e2 = CLG_(register_eventtype)("I1mr");
+ e3 = CLG_(register_eventtype)("I2mr");
+ if (clo_simulate_writeback) {
+ e4 = CLG_(register_eventtype)("I2dmr");
+ CLG_(add_dep_event4)(Ir, e1,e2,e3,e4);
+ }
+ else
+ CLG_(add_dep_event3)(Ir, e1,e2,e3);
+
+ e1 = CLG_(register_eventtype)("Dr");
+ e2 = CLG_(register_eventtype)("D1mr");
+ e3 = CLG_(register_eventtype)("D2mr");
+ if (clo_simulate_writeback) {
+ e4 = CLG_(register_eventtype)("D2dmr");
+ CLG_(add_dep_event4)(Dr, e1,e2,e3,e4);
+ }
+ else
+ CLG_(add_dep_event3)(Dr, e1,e2,e3);
+
+ e1 = CLG_(register_eventtype)("Dw");
+ e2 = CLG_(register_eventtype)("D1mw");
+ e3 = CLG_(register_eventtype)("D2mw");
+ if (clo_simulate_writeback) {
+ e4 = CLG_(register_eventtype)("D2dmw");
+ CLG_(add_dep_event4)(Dw, e1,e2,e3,e4);
+ }
+ else
+ CLG_(add_dep_event3)(Dw, e1,e2,e3);
+
+ }
+ else {
+ e1 = CLG_(register_eventtype)("Ir");
+ CLG_(add_eventtype)(Ir, e1);
+ }
+
+ sizeOfUseIr = use->size + Ir->size;
+ D0 = CLG_(get_eventset)("D0", sizeOfUseIr);
+ CLG_(add_eventset)(D0, use);
+ off_D0_Ir = CLG_(add_eventset)(D0, Ir);
+
+ D1r = CLG_(get_eventset)("D1r", sizeOfUseIr + Dr->size);
+ CLG_(add_eventset)(D1r, use);
+ off_D1r_Ir = CLG_(add_eventset)(D1r, Ir);
+ off_D1r_Dr = CLG_(add_eventset)(D1r, Dr);
+
+ D1w = CLG_(get_eventset)("D1w", sizeOfUseIr + Dw->size);
+ CLG_(add_eventset)(D1w, use);
+ off_D1w_Ir = CLG_(add_eventset)(D1w, Ir);
+ off_D1w_Dw = CLG_(add_eventset)(D1w, Dw);
+
+ D2 = CLG_(get_eventset)("D2", sizeOfUseIr + Dr->size + Dw->size);
+ CLG_(add_eventset)(D2, use);
+ off_D2_Ir = CLG_(add_eventset)(D2, Ir);
+ off_D2_Dr = CLG_(add_eventset)(D2, Dr);
+ off_D2_Dw = CLG_(add_eventset)(D2, Dw);
+
+ sim = CLG_(get_eventset)("sim", sizeOfUseIr + Dr->size + Dw->size);
+ CLG_(add_eventset)(sim, use);
+ CLG_(sets).off_sim_Ir = CLG_(add_eventset)(sim, Ir);
+ CLG_(sets).off_sim_Dr = CLG_(add_eventset)(sim, Dr);
+ CLG_(sets).off_sim_Dw = CLG_(add_eventset)(sim, Dw);
+
+ if (CLG_(clo).collect_alloc) max_user += 2;
+ if (CLG_(clo).collect_systime) max_user += 2;
+
+ full = CLG_(get_eventset)("full", sim->size + max_user);
+ CLG_(add_eventset)(full, sim);
+ CLG_(sets).off_full_Ir = CLG_(sets).off_sim_Ir;
+ CLG_(sets).off_full_Dr = CLG_(sets).off_sim_Dr;
+ CLG_(sets).off_full_Dw = CLG_(sets).off_sim_Dw;
+
+ CLG_(sets).use = use;
+ CLG_(sets).Ir = Ir;
+ CLG_(sets).Dr = Dr;
+ CLG_(sets).Dw = Dw;
+
+ CLG_(sets).D0 = D0;
+ CLG_(sets).D1r = D1r;
+ CLG_(sets).D1w = D1w;
+ CLG_(sets).D2 = D2;
+
+ CLG_(sets).sim = sim;
+ CLG_(sets).full = full;
+
+ if (CLG_(clo).collect_alloc) {
+ e1 = CLG_(register_eventtype)("allocCount");
+ e2 = CLG_(register_eventtype)("allocSize");
+ CLG_(sets).off_full_user = CLG_(add_dep_event2)(full, e1,e2);
+ }
+
+ if (CLG_(clo).collect_systime) {
+ e1 = CLG_(register_eventtype)("sysCount");
+ e2 = CLG_(register_eventtype)("sysTime");
+ CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2);
+ }
+
+ CLG_DEBUGIF(1) {
+ CLG_DEBUG(1, "EventSets:\n");
+ CLG_(print_eventset)(-2, use);
+ CLG_(print_eventset)(-2, Ir);
+ CLG_(print_eventset)(-2, Dr);
+ CLG_(print_eventset)(-2, Dw);
+ CLG_(print_eventset)(-2, sim);
+ CLG_(print_eventset)(-2, full);
+ }
+
+ /* Not-existing events are silently ignored */
+ CLG_(dumpmap) = CLG_(get_eventmapping)(full);
+ CLG_(append_event)(CLG_(dumpmap), "Ir");
+ CLG_(append_event)(CLG_(dumpmap), "Dr");
+ CLG_(append_event)(CLG_(dumpmap), "Dw");
+ CLG_(append_event)(CLG_(dumpmap), "I1mr");
+ CLG_(append_event)(CLG_(dumpmap), "D1mr");
+ CLG_(append_event)(CLG_(dumpmap), "D1mw");
+ CLG_(append_event)(CLG_(dumpmap), "I2mr");
+ CLG_(append_event)(CLG_(dumpmap), "D2mr");
+ CLG_(append_event)(CLG_(dumpmap), "D2mw");
+ CLG_(append_event)(CLG_(dumpmap), "I2dmr");
+ CLG_(append_event)(CLG_(dumpmap), "D2dmr");
+ CLG_(append_event)(CLG_(dumpmap), "D2dmw");
+ CLG_(append_event)(CLG_(dumpmap), "AcCost1");
+ CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
+ CLG_(append_event)(CLG_(dumpmap), "AcCost2");
+ CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
+ CLG_(append_event)(CLG_(dumpmap), "allocCount");
+ CLG_(append_event)(CLG_(dumpmap), "allocSize");
+ CLG_(append_event)(CLG_(dumpmap), "sysCount");
+ CLG_(append_event)(CLG_(dumpmap), "sysTime");
+
+}
+
+
+
+static
+void add_and_zero_Dx(EventSet* es, SimCost dst, ULong* cost)
+{
+ /* if eventset use is defined, it is always first (hardcoded!) */
+ CLG_(add_and_zero_cost)( CLG_(sets).use, dst, cost);
+
+ /* FIXME: This is hardcoded... */
+ if (es == CLG_(sets).D0) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
+ cost + off_D0_Ir);
+ }
+ else if (es == CLG_(sets).D1r) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
+ cost + off_D1r_Ir);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr,
+ cost + off_D1r_Dr);
+ }
+ else if (es == CLG_(sets).D1w) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
+ cost + off_D1w_Ir);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw,
+ cost + off_D1w_Dw);
+ }
+ else {
+ CLG_ASSERT(es == CLG_(sets).D2);
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
+ cost + off_D2_Ir);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr,
+ cost + off_D2_Dr);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw,
+ cost + off_D2_Dw);
+ }
+}
+
+/* this is called at dump time for every instruction executed */
+static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
+ InstrInfo* ii, ULong exe_count)
+{
+ if (!CLG_(clo).simulate_cache)
+ cost[CLG_(sets).off_sim_Ir] += exe_count;
+ else {
+
+#if 0
+/* There is always a trivial case where exe_count and Ir can be
+ * slightly different because ecounter is updated when executing
+ * the next BB. E.g. for last BB executed, or when toggling collection
+ */
+ /* FIXME: Hardcoded that each eventset has Ir as first */
+ if ((bbcc->cost + ii->cost_offset)[0] != exe_count) {
+ VG_(printf)("==> Ir %llu, exe %llu\n",
+ (bbcc->cost + ii->cost_offset)[0], exe_count);
+ CLG_(print_bbcc_cost)(-2, bbcc);
+ //CLG_ASSERT((bbcc->cost + ii->cost_offset)[0] == exe_count);
+ }
+#endif
+
+ add_and_zero_Dx(ii->eventset, cost,
+ bbcc->cost + ii->cost_offset);
+ }
+}
+
+static
+void cachesim_after_bbsetup(void)
+{
+ BBCC* bbcc = CLG_(current_state).bbcc;
+
+ if (CLG_(clo).simulate_cache) {
+ BB* bb = bbcc->bb;
+
+ /* only needed if log_* functions are called */
+ bb_base = bb->obj->offset + bb->offset;
+ cost_base = bbcc->cost;
+ }
+}
+
+static
+void cachesim_finish(void)
+{
+ if (clo_collect_cacheuse)
+ cacheuse_finish();
+}
+
+/*------------------------------------------------------------*/
+/*--- The simulator defined in this file ---*/
+/*------------------------------------------------------------*/
+
+struct cachesim_if CLG_(cachesim) = {
+ .print_opts = cachesim_print_opts,
+ .parse_opt = cachesim_parse_opt,
+ .post_clo_init = cachesim_post_clo_init,
+ .clear = cachesim_clear,
+ .getdesc = cachesim_getdesc,
+ .printstat = cachesim_printstat,
+ .add_icost = cachesim_add_icost,
+ .after_bbsetup = cachesim_after_bbsetup,
+ .finish = cachesim_finish,
+
+ /* these will be set by cachesim_post_clo_init */
+ .log_1I0D = 0,
+
+ .log_1I1Dr = 0,
+ .log_1I1Dw = 0,
+ .log_1I2D = 0,
+
+ .log_0I1Dr = 0,
+ .log_0I1Dw = 0,
+ .log_0I2D = 0,
+
+ .log_1I0D_name = "(no function)",
+
+ .log_1I1Dr_name = "(no function)",
+ .log_1I1Dw_name = "(no function)",
+ .log_1I2D_name = "(no function)",
+
+ .log_0I1Dr_name = "(no function)",
+ .log_0I1Dw_name = "(no function)",
+ .log_0I2D_name = "(no function)"
+};
+
+
+/*--------------------------------------------------------------------*/
+/*--- end ct_sim.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/callgrind/tests/Makefile.am b/callgrind/tests/Makefile.am
new file mode 100644
index 0000000..bc7d201
--- /dev/null
+++ b/callgrind/tests/Makefile.am
@@ -0,0 +1,14 @@
+# For AM_FLAG_M3264_PRI
+include $(top_srcdir)/Makefile.flags.am
+
+SUBDIRS = .
+DIST_SUBDIRS = .
+
+noinst_SCRIPTS =
+
+EXTRA_DIST =
+
+check_PROGRAMS =
+
+AM_CPPFLAGS = -I$(top_srcdir)/include
+AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -g $(AM_FLAG_M3264_PRI)
diff --git a/callgrind/threads.c b/callgrind/threads.c
new file mode 100644
index 0000000..eda9d0c
--- /dev/null
+++ b/callgrind/threads.c
@@ -0,0 +1,456 @@
+/*--------------------------------------------------------------------*/
+/*--- Callgrind ---*/
+/*--- ct_threads.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Callgrind, a Valgrind tool for call tracing.
+
+ Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "global.h"
+
+#include <pub_tool_threadstate.h>
+
+/* forward decls */
+static exec_state* exec_state_save(void);
+static exec_state* exec_state_restore(void);
+static exec_state* push_exec_state(int);
+static exec_state* top_exec_state(void);
+
+static exec_stack current_states;
+
+
+/*------------------------------------------------------------*/
+/*--- Support for multi-threading ---*/
+/*------------------------------------------------------------*/
+
+
+/*
+ * For Valgrind, MT is cooperative (no preemting in our code),
+ * so we don't need locks...
+ *
+ * Per-thread data:
+ * - BBCCs
+ * - call stack
+ * - call hash
+ * - event counters: last, current
+ *
+ * Even when ignoring MT, we need this functions to set up some
+ * datastructures for the process (= Thread 1).
+ */
+
+/* current running thread */
+ThreadId CLG_(current_tid);
+
+static thread_info* thread[VG_N_THREADS];
+
+thread_info** CLG_(get_threads)()
+{
+ return thread;
+}
+
+thread_info* CLG_(get_current_thread)()
+{
+ return thread[CLG_(current_tid)];
+}
+
+void CLG_(init_threads)()
+{
+ Int i;
+ for(i=0;i<VG_N_THREADS;i++)
+ thread[i] = 0;
+ CLG_(current_tid) = VG_INVALID_THREADID;
+}
+
+/* switches through all threads and calls func */
+void CLG_(forall_threads)(void (*func)(thread_info*))
+{
+ Int t, orig_tid = CLG_(current_tid);
+
+ for(t=1;t<VG_N_THREADS;t++) {
+ if (!thread[t]) continue;
+ CLG_(switch_thread)(t);
+ (*func)(thread[t]);
+ }
+ CLG_(switch_thread)(orig_tid);
+}
+
+
+static
+thread_info* new_thread(void)
+{
+ thread_info* t;
+
+ t = (thread_info*) CLG_MALLOC(sizeof(thread_info));
+
+ /* init state */
+ CLG_(init_exec_stack)( &(t->states) );
+ CLG_(init_call_stack)( &(t->calls) );
+ CLG_(init_fn_stack) ( &(t->fns) );
+ /* t->states.entry[0]->cxt = CLG_(get_cxt)(t->fns.bottom); */
+
+ /* event counters */
+ t->lastdump_cost = CLG_(get_eventset_cost)( CLG_(sets).full );
+ t->sighandler_cost = CLG_(get_eventset_cost)( CLG_(sets).full );
+ CLG_(init_cost)( CLG_(sets).full, t->lastdump_cost );
+ CLG_(init_cost)( CLG_(sets).full, t->sighandler_cost );
+
+ /* init data containers */
+ CLG_(init_fn_array)( &(t->fn_active) );
+ CLG_(init_bbcc_hash)( &(t->bbccs) );
+ CLG_(init_jcc_hash)( &(t->jccs) );
+
+ return t;
+}
+
+
+void CLG_(switch_thread)(ThreadId tid)
+{
+ if (tid == CLG_(current_tid)) return;
+
+ CLG_DEBUG(0, ">> thread %d (was %d)\n", tid, CLG_(current_tid));
+
+ if (CLG_(current_tid) != VG_INVALID_THREADID) {
+ /* save thread state */
+ thread_info* t = thread[CLG_(current_tid)];
+
+ CLG_ASSERT(t != 0);
+
+ /* current context (including signal handler contexts) */
+ exec_state_save();
+ CLG_(copy_current_exec_stack)( &(t->states) );
+ CLG_(copy_current_call_stack)( &(t->calls) );
+ CLG_(copy_current_fn_stack) ( &(t->fns) );
+
+ CLG_(copy_current_fn_array) ( &(t->fn_active) );
+ /* If we cumulate costs of threads, use TID 1 for all jccs/bccs */
+ if (!CLG_(clo).separate_threads) t = thread[1];
+ CLG_(copy_current_bbcc_hash)( &(t->bbccs) );
+ CLG_(copy_current_jcc_hash) ( &(t->jccs) );
+ }
+
+ CLG_(current_tid) = tid;
+ CLG_ASSERT(tid < VG_N_THREADS);
+
+ if (tid != VG_INVALID_THREADID) {
+ thread_info* t;
+
+ /* load thread state */
+
+ if (thread[tid] == 0) thread[tid] = new_thread();
+ t = thread[tid];
+
+ /* current context (including signal handler contexts) */
+ CLG_(set_current_exec_stack)( &(t->states) );
+ exec_state_restore();
+ CLG_(set_current_call_stack)( &(t->calls) );
+ CLG_(set_current_fn_stack) ( &(t->fns) );
+
+ CLG_(set_current_fn_array) ( &(t->fn_active) );
+ /* If we cumulate costs of threads, use TID 1 for all jccs/bccs */
+ if (!CLG_(clo).separate_threads) t = thread[1];
+ CLG_(set_current_bbcc_hash) ( &(t->bbccs) );
+ CLG_(set_current_jcc_hash) ( &(t->jccs) );
+ }
+}
+
+
+void CLG_(run_thread)(ThreadId tid)
+{
+ /* check for dumps needed */
+ static ULong bbs_done = 0;
+ static Char buf[512];
+
+ if (CLG_(clo).dump_every_bb >0) {
+ if (CLG_(stat).bb_executions - bbs_done > CLG_(clo).dump_every_bb) {
+ VG_(sprintf)(buf, "--dump-every-bb=%d", CLG_(clo).dump_every_bb);
+ CLG_(dump_profile)(buf, False);
+ bbs_done = CLG_(stat).bb_executions;
+ }
+ }
+
+ CLG_(check_command)();
+
+ /* now check for thread switch */
+ CLG_(switch_thread)(tid);
+}
+
+void CLG_(pre_signal)(ThreadId tid, Int sigNum, Bool alt_stack)
+{
+ exec_state *es;
+
+ CLG_DEBUG(0, ">> pre_signal(TID %d, sig %d, alt_st %s)\n",
+ tid, sigNum, alt_stack ? "yes":"no");
+
+ /* switch to the thread the handler runs in */
+ CLG_(run_thread)(tid);
+
+ /* save current execution state */
+ exec_state_save();
+
+ /* setup current state for a spontaneous call */
+ CLG_(init_exec_state)( &CLG_(current_state) );
+ CLG_(push_cxt)(0);
+
+ /* setup new cxtinfo struct for this signal handler */
+ es = push_exec_state(sigNum);
+ CLG_(init_cost)( CLG_(sets).full, es->cost);
+ CLG_(current_state).cost = es->cost;
+ es->call_stack_bottom = CLG_(current_call_stack).sp;
+
+ CLG_(current_state).sig = sigNum;
+}
+
+/* Run post-signal if the stackpointer for call stack is at
+ * the bottom in current exec state (e.g. a signal handler)
+ *
+ * Called from CLG_(pop_call_stack)
+ */
+void CLG_(run_post_signal_on_call_stack_bottom)()
+{
+ exec_state* es = top_exec_state();
+ CLG_ASSERT(es != 0);
+ CLG_ASSERT(CLG_(current_state).sig >0);
+
+ if (CLG_(current_call_stack).sp == es->call_stack_bottom)
+ CLG_(post_signal)( CLG_(current_tid), CLG_(current_state).sig );
+}
+
+void CLG_(post_signal)(ThreadId tid, Int sigNum)
+{
+ exec_state* es;
+ UInt fn_number, *pactive;
+
+ CLG_DEBUG(0, ">> post_signal(TID %d, sig %d)\n",
+ tid, sigNum);
+
+ CLG_ASSERT(tid == CLG_(current_tid));
+ CLG_ASSERT(sigNum == CLG_(current_state).sig);
+
+ /* Unwind call stack of this signal handler.
+ * This should only be needed at finalisation time
+ */
+ es = top_exec_state();
+ CLG_ASSERT(es != 0);
+ while(CLG_(current_call_stack).sp > es->call_stack_bottom)
+ CLG_(pop_call_stack)();
+
+ if (CLG_(current_state).cxt) {
+ /* correct active counts */
+ fn_number = CLG_(current_state).cxt->fn[0]->number;
+ pactive = CLG_(get_fn_entry)(fn_number);
+ (*pactive)--;
+ CLG_DEBUG(0, " set active count of %s back to %d\n",
+ CLG_(current_state).cxt->fn[0]->name, *pactive);
+ }
+
+ if (CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) {
+ /* set fn_stack_top back.
+ * top can point to 0 if nothing was executed in the signal handler;
+ * this is possible at end on unwinding handlers.
+ */
+ if (*(CLG_(current_fn_stack).top) != 0) {
+ CLG_(current_fn_stack).top--;
+ CLG_ASSERT(*(CLG_(current_fn_stack).top) == 0);
+ }
+ if (CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom)
+ CLG_(current_fn_stack).top--;
+ }
+
+ /* sum up costs */
+ CLG_ASSERT(CLG_(current_state).cost == es->cost);
+ CLG_(add_and_zero_cost)( CLG_(sets).full,
+ thread[CLG_(current_tid)]->sighandler_cost,
+ CLG_(current_state).cost );
+
+ /* restore previous context */
+ es->sig = -1;
+ current_states.sp--;
+ es = top_exec_state();
+ CLG_(current_state).sig = es->sig;
+ exec_state_restore();
+
+ /* There is no way to reliable get the thread ID we are switching to
+ * after this handler returns. So we sync with actual TID at start of
+ * CLG_(setup_bb)(), which should be the next for callgrind.
+ */
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Execution states in a thread & signal handlers ---*/
+/*------------------------------------------------------------*/
+
+/* Each thread can be interrupted by a signal handler, and they
+ * themselves again. But as there's no scheduling among handlers
+ * of the same thread, we don't need additional stacks.
+ * So storing execution contexts and
+ * adding separators in the callstack(needed to not intermix normal/handler
+ * functions in contexts) should be enough.
+ */
+
+/* not initialized: call_stack_bottom, sig */
+void CLG_(init_exec_state)(exec_state* es)
+{
+ es->collect = CLG_(clo).collect_atstart;
+ es->cxt = 0;
+ es->jmps_passed = 0;
+ es->bbcc = 0;
+ es->nonskipped = 0;
+}
+
+
+static exec_state* new_exec_state(Int sigNum)
+{
+ exec_state* es;
+ es = (exec_state*) CLG_MALLOC(sizeof(exec_state));
+
+ /* allocate real cost space: needed as incremented by
+ * simulation functions */
+ es->cost = CLG_(get_eventset_cost)(CLG_(sets).full);
+ CLG_(init_cost)( CLG_(sets).full, es->cost );
+
+ CLG_(init_exec_state)(es);
+ es->sig = sigNum;
+ es->call_stack_bottom = 0;
+
+ return es;
+}
+
+void CLG_(init_exec_stack)(exec_stack* es)
+{
+ Int i;
+
+ /* The first element is for the main thread */
+ es->entry[0] = new_exec_state(0);
+ for(i=1;i<MAX_SIGHANDLERS;i++)
+ es->entry[i] = 0;
+ es->sp = 0;
+}
+
+void CLG_(copy_current_exec_stack)(exec_stack* dst)
+{
+ Int i;
+
+ dst->sp = current_states.sp;
+ for(i=0;i<MAX_SIGHANDLERS;i++)
+ dst->entry[i] = current_states.entry[i];
+}
+
+void CLG_(set_current_exec_stack)(exec_stack* dst)
+{
+ Int i;
+
+ current_states.sp = dst->sp;
+ for(i=0;i<MAX_SIGHANDLERS;i++)
+ current_states.entry[i] = dst->entry[i];
+}
+
+
+/* Get top context info struct of current thread */
+static
+exec_state* top_exec_state(void)
+{
+ Int sp = current_states.sp;
+ exec_state* es;
+
+ CLG_ASSERT((sp >= 0) && (sp < MAX_SIGHANDLERS));
+ es = current_states.entry[sp];
+ CLG_ASSERT(es != 0);
+ return es;
+}
+
+/* Allocates a free context info structure for a new entered
+ * signal handler, putting it on the context stack.
+ * Returns a pointer to the structure.
+ */
+static exec_state* push_exec_state(int sigNum)
+{
+ Int sp;
+ exec_state* es;
+
+ current_states.sp++;
+ sp = current_states.sp;
+
+ CLG_ASSERT((sigNum > 0) && (sigNum <= _VKI_NSIG));
+ CLG_ASSERT((sp > 0) && (sp < MAX_SIGHANDLERS));
+ es = current_states.entry[sp];
+ if (!es) {
+ es = new_exec_state(sigNum);
+ current_states.entry[sp] = es;
+ }
+ else
+ es->sig = sigNum;
+
+ return es;
+}
+
+/* Save current context to top cxtinfo struct */
+static
+exec_state* exec_state_save(void)
+{
+ exec_state* es = top_exec_state();
+
+ es->cxt = CLG_(current_state).cxt;
+ es->collect = CLG_(current_state).collect;
+ es->jmps_passed = CLG_(current_state).jmps_passed;
+ es->bbcc = CLG_(current_state).bbcc;
+ es->nonskipped = CLG_(current_state).nonskipped;
+
+ CLG_DEBUGIF(1) {
+ CLG_DEBUG(1, " cxtinfo_save(sig %d): collect %s, jmps_passed %d\n",
+ es->sig, es->collect ? "Yes": "No", es->jmps_passed);
+ CLG_(print_bbcc)(-9, es->bbcc, False);
+ CLG_(print_cost)(-9, CLG_(sets).full, es->cost);
+ }
+
+ /* signal number does not need to be saved */
+ CLG_ASSERT(CLG_(current_state).sig == es->sig);
+
+ return es;
+}
+
+static
+exec_state* exec_state_restore(void)
+{
+ exec_state* es = top_exec_state();
+
+ CLG_(current_state).cxt = es->cxt;
+ CLG_(current_state).collect = es->collect;
+ CLG_(current_state).jmps_passed = es->jmps_passed;
+ CLG_(current_state).bbcc = es->bbcc;
+ CLG_(current_state).nonskipped = es->nonskipped;
+ CLG_(current_state).cost = es->cost;
+ CLG_(current_state).sig = es->sig;
+
+ CLG_DEBUGIF(1) {
+ CLG_DEBUG(1, " exec_state_restore(sig %d): collect %s, jmps_passed %d\n",
+ es->sig, es->collect ? "Yes": "No", es->jmps_passed);
+ CLG_(print_bbcc)(-9, es->bbcc, False);
+ CLG_(print_cxt)(-9, es->cxt, 0);
+ CLG_(print_cost)(-9, CLG_(sets).full, es->cost);
+ }
+
+ return es;
+}
+