New files:
  - vg_cachesim.c
  - vg_cachesim_{I1,D1,L2}.c
  - vg_annotate.in
  - vg_cachegen.in

Changes to existing files:

  - valgrind/valgrind.in, added option:

        --cachesim=no|yes       [no]

  - Makefile/Makefile.am:
        * added vg_cachesim.c to valgrind_so_SOURCES var
        * added vg_cachesim_I1.c, vg_cachesim_D1.c, vg_cachesim_L2.c to
          noinst_HEADERS var
        * added vg_annotate, vg_cachegen to 'bin_SCRIPTS' var, and added empty
          targets for them

  - vg_main.c:
        * added two offsets for cache sim functions (put in positions 17a,17b)
        * added option handling (detection of --cachesim=yes which turns off of
          --instrument);
        * added calls to cachesim initialisation/finalisation functions

  - vg_mylibc: added some system call wrappers (for chmod, open_write, etc) for
    file writing

  - vg_symtab2.c:
        * allow it to read symbols if either of --instrument or --cachesim is
          used
        * made vg_symtab2.c:vg_what_{line,fn}_is_this extern, renaming it as
          VG_(what_line_is_this) (and added to vg_include.h)
        * completely rewrote the read loop in vg_read_lib_symbols, fixing
          several bugs.  Much better now, although probably not perfect.  It's
          also relatively fragile -- I'm using the "die immediately if anything
          unexpected happens" approach.

  - vg_to_ucode.c:
        * in VG_(disBB), patching in x86 instruction size into extra4b field of
          JMP instructions at the end of basic blocks if --cachesim=yes.
          Shifted things around to do this;  also had to fiddle around with
          single-step stuff to get this to work, by not sticking extra JMPs on
          the end of the single-instruction block if there was already one
          there (to avoid breaking an assertion in vg_cachesim.c).  Did a
          similar thing to avoid an extra JMP on huge basic blocks that are
          split.

  - vg_translate.c:
        * if --cachesim=yes call the cachesim instrumentation phase
        * made some functions extern and renamed:
                allocCodeBlock() --> VG_(allocCodeBlock)()
                freeCodeBlock()  --> VG_(freeCodeBlock)()
                copyUInstr()     --> VG_(copyUInstr)()
          (added to vg_include.h too)

  - vg_include.c: declared
        * cachesim offsets
        * exports of vg_cachesim.c
        * added four new profiling events (increasing VGP_M_CCS to 24 -- I kept
          the spare ones)
        * added comment about UInstr.extra4b field being used for instr size in
          JMPs for cache simulation

  - docs/manual.html:
        * Added --cachesim option to section 2.5.
        * Added cache profiling stuff as section 7.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@168 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index b51ef05..08a1ab2 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -234,6 +234,8 @@
 extern Bool  VG_(clo_instrument);
 /* DEBUG: clean up instrumented code?  default: YES */
 extern Bool  VG_(clo_cleanup);
+/* Cache simulation instrumentation?  default: NO */
+extern Bool  VG_(clo_cachesim);
 /* SMC write checks?  default: SOME (1,2,4 byte movs to mem) */
 extern Int   VG_(clo_smc_check);
 /* DEBUG: print system calls?  default: NO */
@@ -287,7 +289,7 @@
 
 #define VGP_M_STACK 10
 
-#define VGP_M_CCS 20  /* == the # of elems in VGP_LIST */
+#define VGP_M_CCS 24  /* == the # of elems in VGP_LIST */
 #define VGP_LIST \
    VGP_PAIR(VgpRun=0,      "running"),                \
    VGP_PAIR(VgpMalloc,     "low-lev malloc/free"),    \
@@ -307,6 +309,10 @@
    VGP_PAIR(VgpAddToT,     "add-to-transtab"),        \
    VGP_PAIR(VgpSARP,       "set-addr-range-perms"),   \
    VGP_PAIR(VgpSyscall,    "syscall wrapper"),        \
+   VGP_PAIR(VgpCacheInstrument, "cache instrument"),  \
+   VGP_PAIR(VgpCacheGetBBCC,"cache get BBCC"),        \
+   VGP_PAIR(VgpCacheSimulate, "cache simulate"),      \
+   VGP_PAIR(VgpCacheDump,  "cache stats dump"),       \
    VGP_PAIR(VgpSpare1,     "spare 1"),                \
    VGP_PAIR(VgpSpare2,     "spare 2")
 
@@ -718,8 +724,10 @@
                                Int line, Char* fn )
             __attribute__ ((__noreturn__));
 
-/* Reading files. */
+/* Reading and writing files. */
 extern Int  VG_(open_read) ( Char* pathname );
+extern Int  VG_(open_write)       ( Char* pathname );
+extern Int  VG_(create_and_write) ( Char* pathname );
 extern void VG_(close)     ( Int fd );
 extern Int  VG_(read)      ( Int fd, void* buf, Int count);
 extern Int  VG_(write)     ( Int fd, void* buf, Int count);
@@ -955,7 +963,9 @@
       UChar   tag3:4;     /* third  operand tag */
       UChar   extra4b:4;  /* Spare field, used by WIDEN for src
                              -size, and by LEA2 for scale 
-                             (1,2,4 or 8) */
+                             (1,2,4 or 8), and by unconditional JMPs for
+                             orig x86 instr size if --cachesim=yes */
+
 
       /* word 5 */
       UChar   cond;            /* condition, for jumps */
@@ -1044,6 +1054,10 @@
 extern void  VG_(ppUInstr)        ( Int instrNo, UInstr* u );
 extern void  VG_(ppUCodeBlock)    ( UCodeBlock* cb, Char* title );
 
+extern UCodeBlock* VG_(allocCodeBlock) ( void );
+extern void  VG_(freeCodeBlock)        ( UCodeBlock* cb );
+extern void  VG_(copyUInstr)                ( UCodeBlock* cb, UInstr* instr );
+
 extern Char* VG_(nameCondcode)    ( Condcode cond );
 extern Bool  VG_(saneUInstr)      ( Bool beforeRA, UInstr* u );
 extern Bool  VG_(saneUCodeBlock)  ( UCodeBlock* cb );
@@ -1184,6 +1198,11 @@
                                      ( Addr a,
                                        Char* obj_buf, Int n_obj_buf,
                                        Char* fun_buf, Int n_fun_buf );
+extern Bool VG_(what_line_is_this) ( Addr a,
+                                     UChar* filename, Int n_filename,
+                                     UInt* lineno );
+extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
+                                     Char* fn_name, Int n_fn_name);
 
 extern void VG_(symtab_notify_munmap) ( Addr start, UInt length );
 
@@ -1590,6 +1609,20 @@
 extern void VG_(signalreturn_bogusRA)( void );
 extern void VG_(pthreadreturn_bogusRA)( void );
 
+/* ---------------------------------------------------------------------
+   Exports of vg_cachesim.c
+   ------------------------------------------------------------------ */
+
+extern UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr);
+
+typedef struct  _iCC  iCC;
+typedef struct _idCC idCC;
+
+extern void VG_(init_cachesim)        ( void );
+extern void VG_(show_cachesim_results)( Int client_argc, Char** client_argv );
+
+extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
+extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
 
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
@@ -1724,7 +1757,8 @@
 extern Int VGOFF_(fpu_write_check);       /* :: Addr -> Int -> void */
 extern Int VGOFF_(fpu_read_check);        /* :: Addr -> Int -> void */
 
-
+extern Int VGOFF_(cachesim_log_non_mem_instr);
+extern Int VGOFF_(cachesim_log_mem_instr);
 
 #endif /* ndef __VG_INCLUDE_H */