New files:
  - vg_cachesim.c
  - vg_cachesim_{I1,D1,L2}.c
  - vg_annotate.in
  - vg_cachegen.in

Changes to existing files:

  - valgrind/valgrind.in, added option:

        --cachesim=no|yes       [no]

  - Makefile/Makefile.am:
        * added vg_cachesim.c to valgrind_so_SOURCES var
        * added vg_cachesim_I1.c, vg_cachesim_D1.c, vg_cachesim_L2.c to
          noinst_HEADERS var
        * added vg_annotate, vg_cachegen to 'bin_SCRIPTS' var, and added empty
          targets for them

  - vg_main.c:
        * added two offsets for cache sim functions (put in positions 17a,17b)
        * added option handling (detection of --cachesim=yes which turns off of
          --instrument);
        * added calls to cachesim initialisation/finalisation functions

  - vg_mylibc: added some system call wrappers (for chmod, open_write, etc) for
    file writing

  - vg_symtab2.c:
        * allow it to read symbols if either of --instrument or --cachesim is
          used
        * made vg_symtab2.c:vg_what_{line,fn}_is_this extern, renaming it as
          VG_(what_line_is_this) (and added to vg_include.h)
        * completely rewrote the read loop in vg_read_lib_symbols, fixing
          several bugs.  Much better now, although probably not perfect.  It's
          also relatively fragile -- I'm using the "die immediately if anything
          unexpected happens" approach.

  - vg_to_ucode.c:
        * in VG_(disBB), patching in x86 instruction size into extra4b field of
          JMP instructions at the end of basic blocks if --cachesim=yes.
          Shifted things around to do this;  also had to fiddle around with
          single-step stuff to get this to work, by not sticking extra JMPs on
          the end of the single-instruction block if there was already one
          there (to avoid breaking an assertion in vg_cachesim.c).  Did a
          similar thing to avoid an extra JMP on huge basic blocks that are
          split.

  - vg_translate.c:
        * if --cachesim=yes call the cachesim instrumentation phase
        * made some functions extern and renamed:
                allocCodeBlock() --> VG_(allocCodeBlock)()
                freeCodeBlock()  --> VG_(freeCodeBlock)()
                copyUInstr()     --> VG_(copyUInstr)()
          (added to vg_include.h too)

  - vg_include.c: declared
        * cachesim offsets
        * exports of vg_cachesim.c
        * added four new profiling events (increasing VGP_M_CCS to 24 -- I kept
          the spare ones)
        * added comment about UInstr.extra4b field being used for instr size in
          JMPs for cache simulation

  - docs/manual.html:
        * Added --cachesim option to section 2.5.
        * Added cache profiling stuff as section 7.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@168 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/vg_main.c b/vg_main.c
index 5f7fe59..ee875e2 100644
--- a/vg_main.c
+++ b/vg_main.c
@@ -103,7 +103,8 @@
 Int VGOFF_(handle_esp_assignment) = INVALID_OFFSET;
 Int VGOFF_(fpu_write_check) = INVALID_OFFSET;
 Int VGOFF_(fpu_read_check) = INVALID_OFFSET;
-
+Int VGOFF_(cachesim_log_non_mem_instr) = INVALID_OFFSET;
+Int VGOFF_(cachesim_log_mem_instr)     = INVALID_OFFSET;
 
 /* This is the actual defn of baseblock. */
 UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
@@ -165,6 +166,13 @@
    /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
    /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
 
+   /* 17a */ 
+   VGOFF_(cachesim_log_non_mem_instr)  
+      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_non_mem_instr) );
+   /* 17b */ 
+   VGOFF_(cachesim_log_mem_instr)  
+      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_mem_instr) );
+
    /* 18  */ 
    VGOFF_(helper_value_check4_fail) 
       = alloc_BaB_1_set( (Addr) & VG_(helper_value_check4_fail) );
@@ -419,6 +427,7 @@
 Bool   VG_(clo_optimise);
 Bool   VG_(clo_instrument);
 Bool   VG_(clo_cleanup);
+Bool   VG_(clo_cachesim);
 Int    VG_(clo_smc_check);
 Bool   VG_(clo_trace_syscalls);
 Bool   VG_(clo_trace_signals);
@@ -745,6 +754,11 @@
       else if (STREQ(argv[i], "--cleanup=no"))
          VG_(clo_cleanup) = False;
 
+      else if (STREQ(argv[i], "--cachesim=yes"))
+         VG_(clo_cachesim) = True;     
+      else if (STREQ(argv[i], "--cachesim=no"))
+         VG_(clo_cachesim) = False;
+
       else if (STREQ(argv[i], "--smc-check=none"))
          VG_(clo_smc_check) = VG_CLO_SMC_NONE;
       else if (STREQ(argv[i], "--smc-check=some"))
@@ -821,6 +835,11 @@
 
    VG_(clo_logfile_fd) = eventually_logfile_fd;
 
+   /* Don't do memory checking if simulating the cache. */
+   if (VG_(clo_cachesim)) {
+       VG_(clo_instrument) = False;
+   }
+
    if (VG_(clo_verbosity > 0))
       VG_(message)(Vg_UserMsg, 
                    "valgrind-%s, a memory error detector for x86 GNU/Linux.",
@@ -978,7 +997,7 @@
       attach GDB in another shell. */
    /* {extern unsigned int sleep(unsigned int seconds); sleep(10);} */
 
-   if (VG_(clo_instrument)) {
+   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
       VGP_PUSHCC(VgpInitAudit);
       VGM_(init_memory_audit)();
       VGP_POPCC;
@@ -1012,6 +1031,9 @@
       VGM_(make_readable) ( (Addr)&VG_(clo_sloppy_malloc), 1 );
    }
 
+   if (VG_(clo_cachesim)) 
+      VG_(init_cachesim)();
+
    if (VG_(clo_verbosity) > 0)
       VG_(message)(Vg_UserMsg, "");
 
@@ -1039,6 +1061,9 @@
    }
    VG_(running_on_simd_CPU) = False;
 
+   if (VG_(clo_cachesim))
+      VG_(show_cachesim_results)(VG_(client_argc), VG_(client_argv));
+
    VG_(do_sanity_checks)( 1 /* root thread */, 
                           True /*include expensive checks*/ );