Add exp-bbv to the tool-suite.  I'm seeing a couple of amd64-linux test
failures, but they can be fixed up in-repo.  This resolves bug 198395.

git-svn-id: svn:// a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/exp-bbv/bbv_main.c b/exp-bbv/bbv_main.c
new file mode 100644
index 0000000..b5db191
--- /dev/null
+++ b/exp-bbv/bbv_main.c
@@ -0,0 +1,633 @@
+//--- BBV: a SimPoint basic block vector generator      bbv_main.c ---*/
+   This file is part of BBV, a Valgrind tool for generating SimPoint
+   basic block vectors.
+   Copyright (C) 2006-2009 Vince Weaver
+      vince _at_
+   pcfile code is Copyright (C) 2006-2009 Oriol Prat
+      oriol.prat _at _
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   General Public License for more details.
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+   The GNU General Public License is contained in the file COPYING.
+#include "pub_tool_basics.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_options.h"    /* command line options */
+#include "pub_tool_vki.h"        /* vki_stat */
+#include "pub_tool_libcbase.h"   /* VG_(strlen) */
+#include "pub_tool_libcfile.h"   /* VG_(write) */
+#include "pub_tool_libcprint.h"  /* VG_(printf) */
+#include "pub_tool_libcassert.h" /* VG_(exit) */
+#include "pub_tool_mallocfree.h" /* plain_free */
+#include "pub_tool_machine.h"    /* VG_(fnptr_to_fnentry) */
+#include "pub_tool_debuginfo.h"  /* VG_(get_fnname) */
+#include "pub_tool_oset.h"       /* ordered set stuff */
+   /* instruction special cases */
+#define REP_INSTRUCTION   0x1
+   /* interval variables */
+#define DEFAULT_GRAIN_SIZE 100000000  /* 100 million by default */
+static Int interval_size=DEFAULT_GRAIN_SIZE;
+   /* filenames */
+static UChar *clo_bb_out_file="bb.out.%p";
+static UChar *clo_pc_out_file="pc.out.%p";
+static UChar *pc_out_file=NULL;
+static UChar *bb_out_file=NULL;
+   /* output parameters */
+static Bool instr_count_only=False;
+static Bool generate_pc_file=False;
+   /* write buffer */
+static UChar buf[1024];
+   /* Global values */
+static OSet* instr_info_table;  /* table that holds the basic block info */
+static Int block_num=1;         /* global next block number */
+static Int current_thread=0;
+static Int allocated_threads=1;
+struct thread_info *bbv_thread=NULL;
+   /* Per-thread variables */
+struct thread_info {
+   ULong dyn_instr;         /* Current retired instruction count */
+   ULong total_instr;       /* Total retired instruction count   */
+   Addr last_rep_addr;      /* rep counting values */
+   ULong rep_count;
+   ULong global_rep_count;
+   ULong unique_rep_count;
+   ULong fldcw_count;       /* fldcw count */
+   Int bbtrace_fd;          /* file descriptor */
+struct BB_info {
+   Addr       BB_addr;           /* used as key, must be first           */
+   Int        n_instrs;          /* instructions in the basic block      */
+   Int        block_num;         /* unique block identifier              */
+   Int        *inst_counter;     /* times entered * num_instructions     */
+   Bool       is_entry;          /* is this block a function entry point */
+   UChar      fn_name[FUNCTION_NAME_LENGTH];  /* Function block is in    */
+   /* dump the optional PC file, which contains basic block number to */
+   /*   instruction address and function name mappings                */
+static void dumpPcFile(void)
+   struct BB_info   *bb_elem;
+   Int              pctrace_fd;
+   SysRes           sres;
+   pc_out_file =
+          VG_(expand_file_name)("--pc-out-file", clo_pc_out_file);
+   sres = VG_(open)(pc_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
+                              VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
+   if (sr_isError(sres)) {
+      VG_UMSG("Error: cannot create pc file %s\n", pc_out_file);
+      VG_(exit)(1);
+   } else {
+      pctrace_fd = sr_Res(sres);
+   }
+      /* Loop through the table, printing the number, address, */
+      /*    and function name for each basic block             */
+   VG_(OSetGen_ResetIter)(instr_info_table);
+   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
+      VG_(write)(pctrace_fd,"F",1);
+      VG_(sprintf)( buf,":%d:%x:%s\n",
+                       bb_elem->block_num,
+                       (Int)bb_elem->BB_addr,
+                       bb_elem->fn_name);
+      VG_(write)(pctrace_fd, (void*)buf, VG_(strlen)(buf));
+   }
+   VG_(close)(pctrace_fd);
+static Int open_tracefile(Int thread_num)
+   SysRes  sres;
+   UChar temp_string[2048];
+      /* For thread 1, don't append any thread number  */
+      /* This lets the single-thread case not have any */
+      /* extra values appended to the file name.       */
+   if (thread_num==1) {
+      VG_(strncpy)(temp_string,bb_out_file,2047);
+   }
+   else {
+      VG_(sprintf)(temp_string,"%s.%d",bb_out_file,thread_num);
+   }
+   sres = VG_(open)(temp_string, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
+                              VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
+   if (sr_isError(sres)) {
+      VG_UMSG("Error: cannot create bb file %s\n",temp_string);
+      VG_(exit)(1);
+   }
+   return sr_Res(sres);
+static void handle_overflow(void)
+   struct BB_info *bb_elem;
+   if (bbv_thread[current_thread].dyn_instr > interval_size) {
+      if (!instr_count_only) {
+            /* If our output fd hasn't been opened, open it */
+         if (bbv_thread[current_thread].bbtrace_fd < 0) {
+            bbv_thread[current_thread].bbtrace_fd=open_tracefile(current_thread);
+         }
+           /* put an entry to the bb.out file */
+         VG_(write)(bbv_thread[current_thread].bbtrace_fd,"T",1);
+         VG_(OSetGen_ResetIter)(instr_info_table);
+         while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
+            if ( bb_elem->inst_counter[current_thread] != 0 ) {
+               VG_(sprintf)( buf,":%d:%d   ",
+                         bb_elem->block_num,
+                         bb_elem->inst_counter[current_thread]);
+               VG_(write)(bbv_thread[current_thread].bbtrace_fd,
+                          (void*)buf, VG_(strlen)(buf));
+               bb_elem->inst_counter[current_thread] = 0;
+            }
+         }
+         VG_(write)(bbv_thread[current_thread].bbtrace_fd,"\n",1);
+      }
+      bbv_thread[current_thread].dyn_instr -= interval_size;
+   }
+static void close_out_reps(void)
+   bbv_thread[current_thread].global_rep_count+=bbv_thread[current_thread].rep_count;
+   bbv_thread[current_thread].unique_rep_count++;
+   bbv_thread[current_thread].rep_count=0;
+   /* Generic function to get called each instruction */
+static VG_REGPARM(1) void per_instruction_BBV(struct BB_info *bbInfo)
+   Int n_instrs=1;
+   tl_assert(bbInfo);
+      /* we finished rep but didn't clear out count */
+   if (bbv_thread[current_thread].rep_count) {
+      n_instrs++;
+      close_out_reps();
+   }
+   bbInfo->inst_counter[current_thread]+=n_instrs;
+   bbv_thread[current_thread].total_instr+=n_instrs;
+   bbv_thread[current_thread].dyn_instr +=n_instrs;
+   handle_overflow();
+   /* Function to get called if instruction has a rep prefix */
+static VG_REGPARM(1) void per_instruction_BBV_rep(Addr addr)
+      /* handle back-to-back rep instructions */
+   if (bbv_thread[current_thread].last_rep_addr!=addr) {
+      if (bbv_thread[current_thread].rep_count) {
+         close_out_reps();
+         bbv_thread[current_thread].total_instr++;
+         bbv_thread[current_thread].dyn_instr++;
+      }
+      bbv_thread[current_thread].last_rep_addr=addr;
+   }
+   bbv_thread[current_thread].rep_count++;
+   /* Function to call if our instruction has a fldcw instruction */
+static VG_REGPARM(1) void per_instruction_BBV_fldcw(struct BB_info *bbInfo)
+   Int n_instrs=1;
+   tl_assert(bbInfo);
+      /* we finished rep but didn't clear out count */
+   if (bbv_thread[current_thread].rep_count) {
+      n_instrs++;
+      close_out_reps();
+   }
+      /* count fldcw instructions */
+   bbv_thread[current_thread].fldcw_count++;
+   bbInfo->inst_counter[current_thread]+=n_instrs;
+   bbv_thread[current_thread].total_instr+=n_instrs;
+   bbv_thread[current_thread].dyn_instr +=n_instrs;
+   handle_overflow();
+   /* Check if the instruction pointed to is one that needs */
+   /*   special handling.  If so, set a bit in the return   */
+   /*   value indicating what type.                         */
+static Int get_inst_type(Int len, Addr addr)
+   int result=0;
+#if defined(VGA_x86) || defined(VGA_amd64)
+   unsigned char *inst_pointer;
+   unsigned char inst_byte;
+   int i,possible_rep;
+   /* rep prefixed instructions are counted as one instruction on */
+   /*     x86 processors and must be handled as a special case    */
+   /* Also, the rep prefix is re-used as part of the opcode for   */
+   /*     SSE instructions.  So we need to specifically check for */
+   /*     the following: movs, cmps, scas, lods, stos, ins, outs  */
+   inst_pointer=(unsigned char *)addr;
+   i=0;
+   inst_byte=0;
+   possible_rep=0;
+   while (i<len) {
+      inst_byte=*inst_pointer;
+      if ( (inst_byte == 0x67) ||            /* size override prefix */
+           (inst_byte == 0x66) ||            /* size override prefix */
+           (inst_byte == 0x48) ) {           /* 64-bit prefix */
+      } else if ( (inst_byte == 0xf2) ||     /* rep prefix    */
+                  (inst_byte == 0xf3) ) {    /* repne prefix  */
+         possible_rep=1;
+      } else {
+         break;                              /* other byte, exit */
+      }
+      i++;
+      inst_pointer++;
+   }
+   if ( possible_rep &&
+        ( ( (inst_byte >= 0xa4) &&     /* movs,cmps,scas */
+            (inst_byte <= 0xaf) ) ||   /* lods,stos      */
+          ( (inst_byte >= 0x6c) &&
+            (inst_byte <= 0x6f) ) ) ) {  /* ins,outs       */
+      result|=REP_INSTRUCTION;
+   }
+   /* fldcw instructions are double-counted by the hardware       */
+   /*     performance counters on pentium 4 processors so it is   */
+   /*     useful to have that count when doing validation work.   */
+   inst_pointer=(unsigned char *)addr;
+   if (len>1) {
+         /* FLDCW detection */
+         /* opcode is 0xd9/5, ie 1101 1001 oo10 1mmm */
+      if ((*inst_pointer==0xd9) &&
+          (*(inst_pointer+1)<0xb0) &&  /* need this case of fldz, etc, count */
+          ( (*(inst_pointer+1) & 0x38) == 0x28)) {
+         result|=FLDCW_INSTRUCTION;
+      }
+   }
+   return result;
+   /* Our instrumentation function       */
+   /*    sbIn = super block to translate */
+   /*    layout = guest layout           */
+   /*    gWordTy = size of guest word    */
+   /*    hWordTy = size of host word     */
+static IRSB* bbv_instrument ( VgCallbackClosure* closure,
+                             IRSB* sbIn, VexGuestLayout* layout,
+                             VexGuestExtents* vge,
+                             IRType gWordTy, IRType hWordTy )
+   Int      i,n_instrs=1;
+   IRSB     *sbOut;
+   IRStmt   *st;
+   struct BB_info  *bbInfo;
+   Addr64   origAddr,ourAddr;
+   IRDirty  *di;
+   IRExpr   **argv, *arg1;
+   Int      regparms,opcode_type;
+      /* We don't handle a host/guest word size mismatch */
+   if (gWordTy != hWordTy) {
+      VG_(tool_panic)("host/guest word size mismatch");
+   }
+      /* Set up SB */
+   sbOut = deepCopyIRSBExceptStmts(sbIn);
+      /* Copy verbatim any IR preamble preceding the first IMark */
+   i = 0;
+   while ( (i < sbIn->stmts_used) && (sbIn->stmts[i]->tag!=Ist_IMark)) {
+      addStmtToIRSB( sbOut, sbIn->stmts[i] );
+      i++;
+   }
+      /* Get the first statement */
+   tl_assert(sbIn->stmts_used > 0);
+   st = sbIn->stmts[i];
+      /* double check we are at a Mark statement */
+   tl_assert(Ist_IMark == st->tag);
+   origAddr=st->Ist.IMark.addr;
+      /* Get the BB_info */
+   bbInfo = VG_(OSetGen_Lookup)(instr_info_table, &origAddr);
+   if (bbInfo==NULL) {
+         /* BB never translated before (at this address, at least;          */
+         /* could have been unloaded and then reloaded elsewhere in memory) */
+         /* allocate and initialize a new basic block structure */
+      bbInfo=VG_(OSetGen_AllocNode)(instr_info_table, sizeof(struct BB_info));
+      bbInfo->BB_addr = origAddr;
+      bbInfo->n_instrs = n_instrs;
+      bbInfo->inst_counter=VG_(calloc)("bbv_instrument",
+                                       allocated_threads,
+                                       sizeof(Int));
+         /* assign a unique block number */
+      bbInfo->block_num=block_num;
+      block_num++;
+         /* get function name and entry point information */
+      VG_(get_fnname)(origAddr,bbInfo->fn_name,FUNCTION_NAME_LENGTH);
+      bbInfo->is_entry=VG_(get_fnname_if_entry)(origAddr, bbInfo->fn_name,
+                                                FUNCTION_NAME_LENGTH);
+         /* insert structure into table */
+      VG_(OSetGen_Insert)( instr_info_table, bbInfo );
+   }
+      /* Iterate through the basic block, putting the original   */
+      /* instructions in place, plus putting a call to updateBBV */
+      /* for each original instruction                           */
+      /* This is less efficient than only instrumenting the BB   */
+      /* But it gives proper results given the fact that         */
+      /* valgrind uses superblocks (not basic blocks) by default */
+   while(i < sbIn->stmts_used) {
+      st=sbIn->stmts[i];
+      if (st->tag == Ist_IMark) {
+         ourAddr = st->Ist.IMark.addr;
+         opcode_type=get_inst_type(st->Ist.IMark.len,ourAddr);
+         regparms=1;
+         arg1= mkIRExpr_HWord( (HWord)bbInfo);
+         argv= mkIRExprVec_1(arg1);
+         if (opcode_type&REP_INSTRUCTION) {
+            arg1= mkIRExpr_HWord(ourAddr);
+            argv= mkIRExprVec_1(arg1);
+            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_rep",
+                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_rep ),
+                                argv);
+         }
+         else if (opcode_type&FLDCW_INSTRUCTION) {
+            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_fldcw",
+                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_fldcw ),
+                                argv);
+         }
+         else {
+         di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV",
+                                VG_(fnptr_to_fnentry)( &per_instruction_BBV ),
+                                argv);
+         }
+            /* Insert our call */
+         addStmtToIRSB( sbOut,  IRStmt_Dirty(di));
+      }
+         /* Insert the original instruction */
+      addStmtToIRSB( sbOut, st );
+      i++;
+   }
+   return sbOut;
+static struct thread_info *allocate_new_thread(struct thread_info *old,
+                                     Int old_number, Int new_number)
+   struct thread_info *temp;
+   struct BB_info   *bb_elem;
+   Int i;
+   temp=VG_(realloc)("bbv_main.c allocate_threads",
+                     old,
+                     new_number*sizeof(struct thread_info));
+      /* init the new thread */
+      /* We loop in case the new thread is not contiguous */
+   for(i=old_number;i<new_number;i++) {
+      temp[i].last_rep_addr=0;
+      temp[i].dyn_instr=0;
+      temp[i].total_instr=0;
+      temp[i].global_rep_count=0;
+      temp[i].unique_rep_count=0;
+      temp[i].rep_count=0;
+      temp[i].fldcw_count=0;
+      temp[i].bbtrace_fd=-1;
+   }
+      /* expand the inst_counter on all allocated basic blocks */
+   VG_(OSetGen_ResetIter)(instr_info_table);
+   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
+      bb_elem->inst_counter =
+                    VG_(realloc)("bbv_main.c inst_counter",
+                                 bb_elem->inst_counter,
+                                 new_number*sizeof(Int));
+      for(i=old_number;i<new_number;i++) {
+         bb_elem->inst_counter[i]=0;
+      }
+   }
+   return temp;
+static void bbv_thread_called ( ThreadId tid, ULong nDisp )
+   if (tid >= allocated_threads) {
+      bbv_thread=allocate_new_thread(bbv_thread,allocated_threads,tid+1);
+      allocated_threads=tid+1;
+   }
+   current_thread=tid;
+/*--- Setup                                                        ---*/
+static void bbv_post_clo_init(void)
+   bb_out_file =
+          VG_(expand_file_name)("--bb-out-file", clo_bb_out_file);
+      /* Try a closer approximation of basic blocks  */
+      /* This is the same as the command line option */
+      /* --vex-guest-chase-thresh=0                  */
+   VG_(clo_vex_control).guest_chase_thresh = 0;
+   /* Parse the command line options */
+static Bool bbv_process_cmd_line_option(Char* arg)
+   if VG_INT_CLO       (arg, "--interval-size",    interval_size) {}
+   else if VG_STR_CLO  (arg, "--bb-out-file",      clo_bb_out_file) {}
+   else if VG_STR_CLO  (arg, "--pc-out-file",      clo_pc_out_file) {
+      generate_pc_file = True;
+   }
+   else if VG_XACT_CLO (arg, "--instr-count-only", instr_count_only, True) {}
+   else {
+      return False;
+   }
+   return True;
+static void bbv_print_usage(void)
+   VG_(printf) ("   --bb-out-file=<file>  filename for basic block vector info\n");
+   VG_(printf) ("   --pc-out-file=<file>  filename for basic block addresses and function names\n");
+   VG_(printf) ("   --interval-size=<num> interval size\n");
+   VG_(printf) ("   --instr-count-only    only print total instruction count\n");
+static void bbv_print_debug_usage(void)
+   VG_(printf)("    (none)\n");
+static void bbv_fini(Int exitcode)
+   Int i;
+   if (generate_pc_file) {
+      dumpPcFile();
+   }
+   for(i=0;i<allocated_threads;i++) {
+      if (bbv_thread[i].total_instr!=0) {
+         VG_(sprintf)(buf,"\n\n"
+                          "# Thread %d\n"
+                          "#   Total intervals: %d (Interval Size %d)\n"
+                          "#   Total instructions: %lld\n"
+                          "#   Total reps: %lld\n"
+                          "#   Unique reps: %lld\n"
+                          "#   Total fldcw instructions: %lld\n\n",
+                i,
+                (Int)(bbv_thread[i].total_instr/(ULong)interval_size),
+                interval_size,
+                bbv_thread[i].total_instr,
+                bbv_thread[i].global_rep_count,
+                bbv_thread[i].unique_rep_count,
+                bbv_thread[i].fldcw_count);
+            /* Print results to display */
+         VG_UMSG("%s", buf);
+            /* open the output file if it hasn't already */
+         if (bbv_thread[i].bbtrace_fd < 0) {
+            bbv_thread[i].bbtrace_fd=open_tracefile(i);
+         }
+            /* Also print to results file */
+         VG_(write)(bbv_thread[i].bbtrace_fd,(void*)buf,VG_(strlen)(buf));
+         VG_(close)(bbv_thread[i].bbtrace_fd);
+      }
+   }
+static void bbv_pre_clo_init(void)
+   VG_(details_name)            ("exp-bbv");
+   VG_(details_version)         (NULL);
+   VG_(details_description)     ("a SimPoint basic block vector generator");
+   VG_(details_copyright_author)(
+      "Copyright (C) 2006-2009 Vince Weaver");
+   VG_(details_bug_reports_to)  (VG_BUGS_TO);
+   VG_(basic_tool_funcs)          (bbv_post_clo_init,
+                                   bbv_instrument,
+                                   bbv_fini);
+   VG_(needs_command_line_options)(bbv_process_cmd_line_option,
+                                   bbv_print_usage,
+                                   bbv_print_debug_usage);
+   VG_(track_start_client_code)( bbv_thread_called );
+   instr_info_table = VG_(OSetGen_Create)(/*keyOff*/0,
+                                          NULL,
+                                          VG_(malloc), "bbv.1", VG_(free));
+   bbv_thread=allocate_new_thread(bbv_thread,0,allocated_threads);
+/*--- end                                                          ---*/