Callgrind: use jmpkind from VEX for side exits.

To detect calls and returns, Callgrind's heuristic
starts with using the jumpkind got from VEX for
a control flow change instruction. However, for
side exits, it always assumed a (conditional) jump,
which holds true for x86, but e.g. not for ARM.

This fixes Callgrind to use the jumpkind found
by VEX for all exits, which should help making
Callgrind work for ARM. It also moves the check
whether a boring jump is actually a fall-through
to instrumentation time. This changes (fixes) the
result for indirect jumps to the next instruction,
which should not be classified as fall-through
(anyway, this case is probably very rare).

This patch introduces an own enum for jump kinds
in Callgrind. This is less confusing than misusing
the VEX jump kind type, as Callgrinds wants
to distinguish BB fall-throughs from real jumps
(which both are Ijk_Boring in VEX).
Also, setup_bbcc now stores separately whether the
jump kind is conditional or not.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12269 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c
index 1f76342..374ed2a 100644
--- a/callgrind/bbcc.c
+++ b/callgrind/bbcc.c
@@ -555,7 +555,9 @@
   Addr sp;
   BB* last_bb;
   ThreadId tid;
-  Int jmpkind, passed = 0, csp;
+  ClgJumpKind jmpkind;
+  Bool isConditionalJump;
+  Int passed = 0, csp;
   Bool ret_without_call = False;
   Int popcount_on_return = 1;
 
@@ -581,16 +583,8 @@
   if (last_bb) {
       passed = CLG_(current_state).jmps_passed;
       CLG_ASSERT(passed <= last_bb->cjmp_count);
-      if (passed == last_bb->cjmp_count) {
-	  jmpkind = last_bb->jmpkind;
-
-	  /* VEX always gives a Boring jump kind also when passed trough */
-	  if ((jmpkind == Ijk_Boring) &&
-	      (last_bb->offset + last_bb->instr_len == bb->offset))
-	      jmpkind = JmpNone;
-      }
-      else
-	  jmpkind = JmpCond;
+      jmpkind = last_bb->jmp[passed].jmpkind;
+      isConditionalJump = (passed < last_bb->cjmp_count);
 
       /* if we are in a function which is skipped in the call graph, we
        * do not increment the exe counter to produce cost (if simulation off),
@@ -612,7 +606,8 @@
       }
   }
   else {
-      jmpkind = JmpNone;
+      jmpkind = jk_None;
+      isConditionalJump = False;
   }
 
   /* Manipulate JmpKind if needed, only using BB specific info */
@@ -620,7 +615,7 @@
   csp = CLG_(current_call_stack).sp;
 
   /* A return not matching the top call in our callstack is a jump */
-  if ( (jmpkind == Ijk_Ret) && (csp >0)) {
+  if ( (jmpkind == jk_Return) && (csp >0)) {
       Int csp_up = csp-1;      
       call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
 
@@ -650,14 +645,14 @@
 	  }
       }
       if (popcount_on_return == 0) {
-	  jmpkind = Ijk_Boring;
+	  jmpkind = jk_Jump;
 	  ret_without_call = True;
       }
   }
 
   /* Should this jump be converted to call or pop/call ? */
-  if (( jmpkind != Ijk_Ret) &&
-      ( jmpkind != Ijk_Call) && last_bb) {
+  if (( jmpkind != jk_Return) &&
+      ( jmpkind != jk_Call) && last_bb) {
 
     /* We simulate a JMP/Cont to be a CALL if
      * - jump is in another ELF object or section kind
@@ -701,30 +696,32 @@
 	    }
 	}
 
-	jmpkind = Ijk_Call;
+	jmpkind = jk_Call;
 	call_emulation = True;
     }
   }
 
-  if (jmpkind == Ijk_Call)
+  if (jmpkind == jk_Call)
     skip = CLG_(get_fn_node)(bb)->skip;
 
   CLG_DEBUGIF(1) {
-      if (jmpkind == JmpCond)
-	  VG_(printf)("Conditional");
-      else if (jmpkind == JmpNone)
-	  VG_(printf)("None");
-      else
-	  ppIRJumpKind( jmpkind );
-
-      VG_(printf)(" %08lx -> %08lx, SP %08lx\n",
-		  last_bb ? bb_jmpaddr(last_bb) : 0,
-		  bb_addr(bb), sp);
+    if (isConditionalJump)
+      VG_(printf)("Cond-");
+    switch(jmpkind) {
+    case jk_None:   VG_(printf)("Fall-through"); break;
+    case jk_Jump:   VG_(printf)("Jump"); break;
+    case jk_Call:   VG_(printf)("Call"); break;
+    case jk_Return: VG_(printf)("Return"); break;
+    default:        tl_assert(0);
+    }
+    VG_(printf)(" %08lx -> %08lx, SP %08lx\n",
+		last_bb ? bb_jmpaddr(last_bb) : 0,
+		bb_addr(bb), sp);
   }
 
   /* Handle CALL/RET and update context to get correct BBCC */
   
-  if (jmpkind == Ijk_Ret) {
+  if (jmpkind == jk_Return) {
     
     if ((csp == 0) || 
 	((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
@@ -745,10 +742,10 @@
     Int unwind_count = CLG_(unwind_call_stack)(sp, 0);
     if (unwind_count > 0) {
       /* if unwinding was done, this actually is a return */
-      jmpkind = Ijk_Ret;
+      jmpkind = jk_Return;
     }
     
-    if (jmpkind == Ijk_Call) {
+    if (jmpkind == jk_Call) {
       delayed_push = True;
 
       csp = CLG_(current_call_stack).sp;
@@ -848,8 +845,7 @@
 			 bbcc, sp, skip);
   }
 
-  if (CLG_(clo).collect_jumps &&
-      ((jmpkind == JmpCond) || (jmpkind == Ijk_Boring))) {
+  if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) {
     
     /* Handle conditional jumps followed, i.e. trace arcs
      * This uses JCC structures, too */
@@ -857,15 +853,15 @@
     jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
     CLG_ASSERT(jcc != 0);
     // Change from default, and check if already changed
-    if (jcc->jmpkind == Ijk_Call)
-      jcc->jmpkind = jmpkind;
+    if (jcc->jmpkind == jk_Call)
+      jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump;
     else {
 	// FIXME: Why can this fail?
 	// CLG_ASSERT(jcc->jmpkind == jmpkind);
     }
     
     jcc->call_counter++;
-    if (jmpkind == JmpCond)
+    if (isConditionalJump)
       CLG_(stat).jcnd_counter++;
     else
       CLG_(stat).jump_counter++;
diff --git a/callgrind/callstack.c b/callgrind/callstack.c
index 59dc117..092acb2 100644
--- a/callgrind/callstack.c
+++ b/callgrind/callstack.c
@@ -235,8 +235,14 @@
 
     /* return address is only is useful with a real call;
      * used to detect RET w/o CALL */
-    ret_addr = (from->bb->jmpkind == Ijk_Call) ?
-	bb_addr(from->bb) + from->bb->instr_len : 0;
+    if (from->bb->jmp[jmp].jmpkind == jk_Call) {
+      UInt instr = from->bb->jmp[jmp].instr;
+      ret_addr = bb_addr(from->bb) +
+	from->bb->instr[instr].instr_offset +
+	from->bb->instr[instr].instr_size;
+    }
+    else
+      ret_addr = 0;
 
     /* put jcc on call stack */
     current_entry->jcc = jcc;
diff --git a/callgrind/dump.c b/callgrind/dump.c
index 7db9ec5..9995b4a 100644
--- a/callgrind/dump.c
+++ b/callgrind/dump.c
@@ -669,7 +669,7 @@
 	target.file = last->file;
     }
 
-    if ((jcc->jmpkind == JmpCond) || (jcc->jmpkind == Ijk_Boring)) {
+    if ((jcc->jmpkind == jk_CondJump) || (jcc->jmpkind == jk_Jump)) {
 	    
       /* this is a JCC for a followed conditional or boring jump. */
       CLG_ASSERT(CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost));
@@ -703,7 +703,7 @@
 		print_fn(fd, outbuf, "jfn", jcc->to->cxt->fn[0]);
 	}
 	    
-	if (jcc->jmpkind == JmpCond) {
+	if (jcc->jmpkind == jk_CondJump) {
 	    /* format: jcnd=<followed>/<executions> <target> */
 	    VG_(sprintf)(outbuf, "jcnd=%llu/%llu ",
 			 jcc->call_counter, ecounter);
@@ -834,7 +834,7 @@
     if (bb->jmp[jmp].instr == instr) {
 	jcc_count=0;
 	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from)
-	    if (((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
+	    if (((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
 		(!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
 	      jcc_count++;
 
@@ -848,7 +848,7 @@
 	    fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
 	    something_written = True;
 	    for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
-		if (((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
+		if (((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
 		    (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
 		    fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
 	    }
@@ -867,7 +867,7 @@
   jcc_count = 0;
   for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
       /* yes, if JCC only counts jmp arcs or cost >0 */
-      if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
+      if ( ((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
 	   (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
 	  jcc_count++;
   }
@@ -901,7 +901,7 @@
     if (jcc_count > 0)
 	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
 	    CLG_ASSERT(jcc->jmp == jmp);
-	    if ( ((jcc->jmpkind != Ijk_Call) && (jcc->call_counter >0)) ||
+	    if ( ((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
 		 (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
 	  
 		fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
diff --git a/callgrind/global.h b/callgrind/global.h
index 5f06917..63f6e10 100644
--- a/callgrind/global.h
+++ b/callgrind/global.h
@@ -229,6 +229,18 @@
 typedef ULong* FullCost; /* Simulator + User */
 
 
+/* The types of control flow changes that can happen between
+ * execution of two BBs in a thread.
+ */
+typedef enum {
+  jk_None = 0,   /* no explicit change by a guest instruction */
+  jk_Jump,       /* regular jump */
+  jk_Call,
+  jk_Return,
+  jk_CondJump    /* conditional jump taken (only used as jCC type) */
+} ClgJumpKind;
+
+
 /* JmpCall cost center
  * for subroutine call (from->bb->jmp_addr => to->bb->addr)
  *
@@ -248,11 +260,9 @@
  * After updating, <last> is set to current event counters. Thus,
  * events are not counted twice for recursive calls (TODO: True?)
  */
-#define JmpNone (Ijk_Boring+30)
-#define JmpCond (Ijk_Boring+31)
 
 struct _jCC {
-  Int  jmpkind;     /* JmpCall, JmpBoring, JmpCond */
+  ClgJumpKind jmpkind; /* jk_Call, jk_Jump, jk_CondJump */
   jCC* next_hash;   /* for hash entry chain */
   jCC* next_from;   /* next JCC from a BBCC */
   BBCC *from, *to;  /* call arc from/to this BBCC */
@@ -276,13 +286,14 @@
 };
 
 
+
 /*
- * Info for a conditional jump in a basic block
+ * Info for a side exit in a BB
  */
 typedef struct _CJmpInfo CJmpInfo;
 struct _CJmpInfo {
-    UInt instr; /* instruction index in this basic block */
-    Bool skip;   /* Cond.Jumps to next instruction should be ignored */
+  UInt instr;          /* instruction index for BB.instr array */
+  ClgJumpKind jmpkind; /* jump kind when leaving BB at this side exit */
 };
 
 
@@ -319,11 +330,10 @@
   BBCC*      last_bbcc;  /* Temporary: Cached for faster access (LRU) */
 
   /* filled by CLG_(instrument) if not seen before */
-  UInt       cjmp_count;  /* number of conditional exits */
+  UInt       cjmp_count;  /* number of side exits */
   CJmpInfo*  jmp;         /* array of info for condition jumps,
 			   * allocated directly after this struct */
-  Int        jmpkind;    /* remember jump kind of final exit */
-  Bool       cjmp_inverted; /* condition of last cond.jump can be inverted by VEX */
+  Bool       cjmp_inverted; /* is last side exit actually fall through? */
 
   UInt       instr_len;
   UInt       cost_count;
@@ -357,12 +367,12 @@
 
 
 /*
- * Info for a conditional jump in a basic block
+ * Cost info for a side exits from a BB
  */
 typedef struct _JmpData JmpData;
 struct _JmpData {
     ULong ecounter; /* number of times the BB was left at this exit */
-    jCC*  jcc_list;  /* JCCs for Cond.Jumps from this exit */
+    jCC*  jcc_list; /* JCCs used for this exit */
 };
 
 
diff --git a/callgrind/jumps.c b/callgrind/jumps.c
index f40665b..c623811 100644
--- a/callgrind/jumps.c
+++ b/callgrind/jumps.c
@@ -152,7 +152,7 @@
    jcc->from      = from;
    jcc->jmp       = jmp;
    jcc->to        = to;
-   jcc->jmpkind   = Ijk_Call;
+   jcc->jmpkind   = jk_Call;
    jcc->call_counter = 0;
    jcc->cost = 0;
 
diff --git a/callgrind/main.c b/callgrind/main.c
index 22bff9e..62ca8c9 100644
--- a/callgrind/main.c
+++ b/callgrind/main.c
@@ -1146,8 +1146,20 @@
 
 	    CLG_ASSERT(clgs.ii_index>0);
 	    if (!clgs.seen_before) {
-		clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
-		clgs.bb->jmp[cJumps].skip = False;
+	      ClgJumpKind jk;
+
+	      if      (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
+	      else if (st->Ist.Exit.jk == Ijk_Ret)  jk = jk_Return;
+	      else {
+		if (IRConst2Addr(st->Ist.Exit.dst) ==
+		    origAddr + curr_inode->instr_offset + curr_inode->instr_size)
+		  jk = jk_None;
+		else
+		  jk = jk_Jump;
+	      }
+
+	      clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
+	      clgs.bb->jmp[cJumps].jmpkind = jk;
 	    }
 
 	    /* Update global variable jmps_passed before the jump
@@ -1212,18 +1224,45 @@
    CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
    CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index);
 
-   /* This stores the instr of the call/ret at BB end */
-   clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
+   /* Info for final exit from BB */
+   {
+     ClgJumpKind jk;
+
+     if      (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
+     else if (sbIn->jumpkind == Ijk_Ret)  jk = jk_Return;
+     else {
+       jk = jk_Jump;
+       if ((sbIn->next->tag == Iex_Const) &&
+	   (IRConst2Addr(sbIn->next->Iex.Const.con) ==
+	    origAddr + clgs.instr_offset))
+	 jk = jk_None;
+     }
+     clgs.bb->jmp[cJumps].jmpkind = jk;
+     /* Instruction index of the call/ret at BB end
+      * (it is wrong for fall-through, but does not matter) */
+     clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
+   }
+
+   /* swap information of last exit with final exit if inverted */
+   if (clgs.bb->cjmp_inverted) {
+     ClgJumpKind jk;
+     UInt instr;
+
+     jk = clgs.bb->jmp[cJumps].jmpkind;
+     clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
+     clgs.bb->jmp[cJumps-1].jmpkind = jk;
+     instr = clgs.bb->jmp[cJumps].instr;
+     clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
+     clgs.bb->jmp[cJumps-1].instr = instr;
+   }
 
    if (clgs.seen_before) {
        CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
        CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset);
-       CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind);
    }
    else {
        clgs.bb->cost_count = update_cost_offsets(&clgs);
        clgs.bb->instr_len = clgs.instr_offset;
-       clgs.bb->jmpkind = sbIn->jumpkind;
    }
 
    CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",