New files:
  - vg_cachesim.c
  - vg_cachesim_{I1,D1,L2}.c
  - vg_annotate.in
  - vg_cachegen.in

Changes to existing files:

  - valgrind/valgrind.in, added option:

        --cachesim=no|yes       [no]

  - Makefile/Makefile.am:
        * added vg_cachesim.c to valgrind_so_SOURCES var
        * added vg_cachesim_I1.c, vg_cachesim_D1.c, vg_cachesim_L2.c to
          noinst_HEADERS var
        * added vg_annotate, vg_cachegen to 'bin_SCRIPTS' var, and added empty
          targets for them

  - vg_main.c:
        * added two offsets for cache sim functions (put in positions 17a,17b)
        * added option handling (detection of --cachesim=yes which turns off of
          --instrument);
        * added calls to cachesim initialisation/finalisation functions

  - vg_mylibc: added some system call wrappers (for chmod, open_write, etc) for
    file writing

  - vg_symtab2.c:
        * allow it to read symbols if either of --instrument or --cachesim is
          used
        * made vg_symtab2.c:vg_what_{line,fn}_is_this extern, renaming it as
          VG_(what_line_is_this) (and added to vg_include.h)
        * completely rewrote the read loop in vg_read_lib_symbols, fixing
          several bugs.  Much better now, although probably not perfect.  It's
          also relatively fragile -- I'm using the "die immediately if anything
          unexpected happens" approach.

  - vg_to_ucode.c:
        * in VG_(disBB), patching in x86 instruction size into extra4b field of
          JMP instructions at the end of basic blocks if --cachesim=yes.
          Shifted things around to do this;  also had to fiddle around with
          single-step stuff to get this to work, by not sticking extra JMPs on
          the end of the single-instruction block if there was already one
          there (to avoid breaking an assertion in vg_cachesim.c).  Did a
          similar thing to avoid an extra JMP on huge basic blocks that are
          split.

  - vg_translate.c:
        * if --cachesim=yes call the cachesim instrumentation phase
        * made some functions extern and renamed:
                allocCodeBlock() --> VG_(allocCodeBlock)()
                freeCodeBlock()  --> VG_(freeCodeBlock)()
                copyUInstr()     --> VG_(copyUInstr)()
          (added to vg_include.h too)

  - vg_include.c: declared
        * cachesim offsets
        * exports of vg_cachesim.c
        * added four new profiling events (increasing VGP_M_CCS to 24 -- I kept
          the spare ones)
        * added comment about UInstr.extra4b field being used for instr size in
          JMPs for cache simulation

  - docs/manual.html:
        * Added --cachesim option to section 2.5.
        * Added cache profiling stuff as section 7.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@168 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/vg_cachesim_I1.c b/vg_cachesim_I1.c
new file mode 100644
index 0000000..32b89b5
--- /dev/null
+++ b/vg_cachesim_I1.c
@@ -0,0 +1,93 @@
+/*  I1 cache simulator, generated by vg_cachegen.
+ *     total size    = 65536 bytes
+ *     line size     = 64 bytes
+ *     associativity = 2-way associative
+ *
+ *  This file should be #include-d into vg_cachesim.c
+ */
+
+static char I1_desc_line[] = 
+    "desc: I1 cache:         65536 B, 64 B, 2-way associative\n";
+
+static UInt I1_tags[512][2];
+
+static void cachesim_I1_initcache(void)
+{
+   UInt set, way;
+   for (set = 0; set < 512; set++)
+      for (way = 0; way < 2; way++)
+         I1_tags[set][way] = 0;
+}
+
+static __inline__ 
+void cachesim_I1_doref(Addr a, UChar size, ULong* m1, ULong *m2)
+{
+   register UInt set1 = ( a         >> 6) & (512-1);
+   register UInt set2 = ((a + size) >> 6) & (512-1);
+   register UInt tag  = a >> (6 + 9);
+
+   if (set1 == set2) {
+
+      if (tag == I1_tags[set1][0]) {
+         return;
+      }
+      else if (tag == I1_tags[set1][1]) {
+         I1_tags[set1][1] = I1_tags[set1][0];
+         I1_tags[set1][0] = tag;
+         return;
+      }
+      else {
+         /* A miss */
+         I1_tags[set1][1] = I1_tags[set1][0];
+         I1_tags[set1][0] = tag;
+
+         (*m1)++;
+         cachesim_L2_doref(a, size, m2);
+      }
+
+   } else if ((set1 + 1) % 512 == set2) {
+
+      Bool is_I1_miss = False;
+
+      /* Block one */
+      if (tag == I1_tags[set1][0]) {
+      }
+      else if (tag == I1_tags[set1][1]) {
+         I1_tags[set1][1] = I1_tags[set1][0];
+         I1_tags[set1][0] = tag;
+      }
+      else {
+         /* A miss */
+         I1_tags[set1][1] = I1_tags[set1][0];
+         I1_tags[set1][0] = tag;
+
+         is_I1_miss = True;
+      }
+
+      /* Block two */
+      if (tag == I1_tags[set2][0]) {
+      }
+      else if (tag == I1_tags[set2][1]) {
+         I1_tags[set2][1] = I1_tags[set2][0];
+         I1_tags[set2][0] = tag;
+      }
+      else {
+         /* A miss */
+         I1_tags[set2][1] = I1_tags[set2][0];
+         I1_tags[set2][0] = tag;
+
+         is_I1_miss = True;
+      }
+
+      /* Miss treatment */
+      if (is_I1_miss) {
+         (*m1)++;
+         cachesim_L2_doref(a, size, m2);
+      }
+
+   } else {
+      VG_(printf)("\nERROR: Data item 0x%x of size %u bytes is in two non-adjacent\n", a, size);
+      VG_(printf)("sets %d and %d.\n", set1, set2);
+      VG_(panic)("I1 cache set mismatch");
+   }
+}