Fix obscure bug in cache simulation, found by Ulrich Drepper.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@6365 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_sim.c b/cachegrind/cg_sim.c
index 90cae41..cf5b7ba 100644
--- a/cachegrind/cg_sim.c
+++ b/cachegrind/cg_sim.c
@@ -80,21 +80,6 @@
       c->tags[i] = 0;
 }
 
-#if 0
-static void print_cache(cache_t2* c)
-{
-   UInt set, way, i;
-
-   /* Note initialisation and update of 'i'. */
-   for (i = 0, set = 0; set < c->sets; set++) {
-      for (way = 0; way < c->assoc; way++, i++) {
-         VG_(printf)("%16lx ", c->tags[i]);
-      }
-      VG_(printf)("\n");
-   }
-}
-#endif 
-
 /* This is done as a macro rather than by passing in the cache_t2 as an 
  * arg because it slows things down by a small amount (3-5%) due to all 
  * that extra indirection. */
@@ -114,9 +99,10 @@
 static __inline__                                                           \
 void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2)         \
 {                                                                           \
-   register UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1); \
-   register UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
-   register UWord tag  = a >> L.tag_shift;                                  \
+   UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1);          \
+   UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1);          \
+   UWord tag  = a >> L.tag_shift;                                           \
+   UWord tag2;                                                              \
    Int i, j;                                                                \
    Bool is_miss = False;                                                    \
    UWord* set;                                                              \
@@ -176,22 +162,23 @@
       is_miss = True;                                                       \
 block2:                                                                     \
       set = &(L.tags[set2 << L.assoc_bits]);                                \
-      if (tag == set[0]) {                                                  \
+      tag2 = (a+size-1) >> L.tag_shift;                                     \
+      if (tag2 == set[0]) {                                                 \
          goto miss_treatment;                                               \
       }                                                                     \
       for (i = 1; i < L.assoc; i++) {                                       \
-         if (tag == set[i]) {                                               \
+         if (tag2 == set[i]) {                                              \
             for (j = i; j > 0; j--) {                                       \
                set[j] = set[j - 1];                                         \
             }                                                               \
-            set[0] = tag;                                                   \
+            set[0] = tag2;                                                  \
             goto miss_treatment;                                            \
          }                                                                  \
       }                                                                     \
       for (j = L.assoc - 1; j > 0; j--) {                                   \
          set[j] = set[j - 1];                                               \
       }                                                                     \
-      set[0] = tag;                                                         \
+      set[0] = tag2;                                                        \
       is_miss = True;                                                       \
 miss_treatment:                                                             \
       if (is_miss) { MISS_TREATMENT; }                                      \