Cache simulator now handles basic block discards correctly.  When
VG_(cachesim_discard_notify) is called, the cost centre array for the basic
block is removed from the table, and its counts are aggregated into a single
"discard" cost centre, and the cost centre array is free'd.

The aggregate discard cost centre is given the filename:function_name
"(discarded):(discarded)".  Mentioned this in the manual.

Only tested with tests/discard.c.  Seems to work well for that case though :)


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@385 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 130e7eb..20a693c 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -132,6 +132,11 @@
    initCC(&cc->D);
 }
 
+#define ADD_CC_TO(CC_type, cc, total)           \
+   total.a  += ((CC_type*)BBCC_ptr)->cc.a;      \
+   total.m1 += ((CC_type*)BBCC_ptr)->cc.m1;     \
+   total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
+          
 /* If 1, address of each instruction is printed as a comment after its counts
  * in cachegrind.out */
 #define PRINT_INSTR_ADDRS 0
@@ -223,6 +228,10 @@
 
 static Int  BB_retranslations   = 0;
 
+static CC Ir_discards;
+static CC Dr_discards;
+static CC Dw_discards;
+
 static void init_BBCC_table()
 {
    Int i;
@@ -315,11 +324,11 @@
  * cost centre.  Also sets BB_seen_before by reference. 
  */ 
 static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb, 
-                                 Bool *BB_seen_before)
+                                 Bool remove, Bool *BB_seen_before)
 {
    file_node *curr_file_node;
    fn_node   *curr_fn_node;
-   BBCC      *curr_BBCC;
+   BBCC     **prev_BBCC_next_ptr, *curr_BBCC;
    Char       filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
    UInt       filename_hash, fnname_hash, BBCC_hash;
    Int        dummy_line_num;
@@ -352,11 +361,16 @@
    }
 
    BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
+   prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
    curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
    while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
+      prev_BBCC_next_ptr = &(curr_BBCC->next);
       curr_BBCC = curr_BBCC->next;
    }
    if (curr_BBCC == NULL) {
+
+      vg_assert(False == remove);
+
       curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC = 
          new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
       *BB_seen_before = False;
@@ -369,7 +383,15 @@
             "BB retranslation, retrieving from BBCC table");
       }
       *BB_seen_before = True;
-      BB_retranslations++;
+
+      if (True == remove) {
+          // Remove curr_BBCC from chain;  it will be used and free'd by the
+          // caller.
+          *prev_BBCC_next_ptr = curr_BBCC->next;
+
+      } else {
+          BB_retranslations++;
+      }
    }
    VGP_POPCC;
    return curr_BBCC;
@@ -471,7 +493,7 @@
    /* Get BBCC (creating if necessary -- requires a counting pass over the BB
     * if it's the first time it's been seen), and point to start of the 
     * BBCC array.  */
-   BBCC_node = get_BBCC(orig_addr, cb_in, &BB_seen_before);
+   BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before);
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
    cb = VG_(allocCodeBlock)();
@@ -708,6 +730,10 @@
    initCC(&Dr_total);
    initCC(&Dw_total);
    
+   initCC(&Ir_discards);
+   initCC(&Dr_discards);
+   initCC(&Dw_discards);
+
    cachesim_I1_initcache();
    cachesim_D1_initcache();
    cachesim_L2_initcache();
@@ -768,11 +794,6 @@
       Addr instr_addr;
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
-#define ADD_CC_TO(CC_type, cc, total)           \
-   total.a  += ((CC_type*)BBCC_ptr)->cc.a;      \
-   total.m1 += ((CC_type*)BBCC_ptr)->cc.m1;     \
-   total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
-          
          case INSTR_CC:
             instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
             sprint_iCC(buf, (iCC*)BBCC_ptr);
@@ -797,8 +818,6 @@
             BBCC_ptr += sizeof(idCC);
             break;
 
-#undef ADD_CC_TO
-
          default:
             VG_(panic)("Unknown CC type in fprint_BBCC()\n");
             break;
@@ -907,6 +926,32 @@
       }
    }
 
+   /* Print stats from any discarded basic blocks */
+   if (0 != Ir_discards.a) {
+
+      VG_(sprintf)(buf, "fl=(discarded)\n");
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+      VG_(sprintf)(buf, "fn=(discarded)\n");
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+      /* Use 0 as line number */
+      VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+                   Ir_discards.a, Ir_discards.m1, Ir_discards.m2, 
+                   Dr_discards.a, Dr_discards.m1, Dr_discards.m2, 
+                   Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+      Ir_total.a  += Ir_discards.a;
+      Ir_total.m1 += Ir_discards.m1;
+      Ir_total.m2 += Ir_discards.m2;
+      Dr_total.a  += Dr_discards.a;
+      Dr_total.m1 += Dr_discards.m1;
+      Dr_total.m2 += Dr_discards.m2;
+      Dw_total.a  += Dw_discards.a;
+      Dw_total.m1 += Dw_discards.m1;
+      Dw_total.m2 += Dw_discards.m2;
+   }
+
    /* Summary stats must come after rest of table, since we calculate them
     * during traversal.  */ 
    VG_(sprintf)(buf, "summary: "
@@ -1091,10 +1136,60 @@
 }
 
 
+/* Called when a translation is invalidated due to self-modifying code or
+ * unloaded of a shared object.
+ *
+ * Finds the BBCC in the table, removes it, adds the counts to the discard
+ * counters, and then frees the BBCC. */
 void VG_(cachesim_notify_discard) ( TTEntry* tte )
 {
-  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
-               tte->orig_addr, (Int)tte->orig_size);
+   BBCC *BBCC_node;
+   Addr BBCC_ptr0, BBCC_ptr;
+   Bool BB_seen_before;
+    
+   VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+                tte->orig_addr, (Int)tte->orig_size);
+
+   /* 2nd arg won't be used since BB should have been seen before (assertions
+    * ensure this). */
+   BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before);
+   BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
+
+   vg_assert(True == BB_seen_before);
+
+   while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
+
+      /* We pretend the CC is an iCC for getting the tag.  This is ok
+       * because both CC types have tag as their first byte.  Once we know
+       * the type, we can cast and act appropriately. */
+
+      switch ( ((iCC*)BBCC_ptr)->tag ) {
+
+         case INSTR_CC:
+            ADD_CC_TO(iCC, I, Ir_discards);
+            BBCC_ptr += sizeof(iCC);
+            break;
+
+         case READ_CC:
+         case  MOD_CC:
+            ADD_CC_TO(idCC, I, Ir_discards);
+            ADD_CC_TO(idCC, D, Dr_discards);
+            BBCC_ptr += sizeof(idCC);
+            break;
+
+         case WRITE_CC:
+            ADD_CC_TO(idCC, I, Ir_discards);
+            ADD_CC_TO(idCC, D, Dw_discards);
+            BBCC_ptr += sizeof(idCC);
+            break;
+
+         default:
+            VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n");
+            break;
+      }
+   }
+
+   VG_(free)(VG_AR_PRIVATE, BBCC_node);
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html
index 3daf152..3b797df 100644
--- a/cachegrind/docs/manual.html
+++ b/cachegrind/docs/manual.html
@@ -1197,8 +1197,7 @@
     fresh memory, and just call this occasionally to discard large
     chunks of old code all at once.
     <p>
-    Warning: minimally tested.  Also, doesn't interact well with the
-    cache simulator.
+    Warning: minimally tested, especially for the cache simulator.
 </ul>
 <p>
 
@@ -2313,7 +2312,9 @@
 <code>???</code> is used if the the file name and/or function name
 could not be determined from debugging information. If most of the
 entries have the form <code>???:???</code> the program probably wasn't
-compiled with <code>-g</code>.  <p>
+compiled with <code>-g</code>.  If any code was invalidated (either due to
+self-modifying code or unloading of shared objects) its counts are aggregated
+into a single cost centre written as <code>(discarded):(discarded)</code>.<p>
 
 It is worth noting that functions will come from three types of source files:
 <ol>
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html
index 3daf152..3b797df 100644
--- a/coregrind/docs/manual.html
+++ b/coregrind/docs/manual.html
@@ -1197,8 +1197,7 @@
     fresh memory, and just call this occasionally to discard large
     chunks of old code all at once.
     <p>
-    Warning: minimally tested.  Also, doesn't interact well with the
-    cache simulator.
+    Warning: minimally tested, especially for the cache simulator.
 </ul>
 <p>
 
@@ -2313,7 +2312,9 @@
 <code>???</code> is used if the the file name and/or function name
 could not be determined from debugging information. If most of the
 entries have the form <code>???:???</code> the program probably wasn't
-compiled with <code>-g</code>.  <p>
+compiled with <code>-g</code>.  If any code was invalidated (either due to
+self-modifying code or unloading of shared objects) its counts are aggregated
+into a single cost centre written as <code>(discarded):(discarded)</code>.<p>
 
 It is worth noting that functions will come from three types of source files:
 <ol>
diff --git a/docs/manual.html b/docs/manual.html
index 3daf152..3b797df 100644
--- a/docs/manual.html
+++ b/docs/manual.html
@@ -1197,8 +1197,7 @@
     fresh memory, and just call this occasionally to discard large
     chunks of old code all at once.
     <p>
-    Warning: minimally tested.  Also, doesn't interact well with the
-    cache simulator.
+    Warning: minimally tested, especially for the cache simulator.
 </ul>
 <p>
 
@@ -2313,7 +2312,9 @@
 <code>???</code> is used if the the file name and/or function name
 could not be determined from debugging information. If most of the
 entries have the form <code>???:???</code> the program probably wasn't
-compiled with <code>-g</code>.  <p>
+compiled with <code>-g</code>.  If any code was invalidated (either due to
+self-modifying code or unloading of shared objects) its counts are aggregated
+into a single cost centre written as <code>(discarded):(discarded)</code>.<p>
 
 It is worth noting that functions will come from three types of source files:
 <ol>
diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html
index 3daf152..3b797df 100644
--- a/memcheck/docs/manual.html
+++ b/memcheck/docs/manual.html
@@ -1197,8 +1197,7 @@
     fresh memory, and just call this occasionally to discard large
     chunks of old code all at once.
     <p>
-    Warning: minimally tested.  Also, doesn't interact well with the
-    cache simulator.
+    Warning: minimally tested, especially for the cache simulator.
 </ul>
 <p>
 
@@ -2313,7 +2312,9 @@
 <code>???</code> is used if the the file name and/or function name
 could not be determined from debugging information. If most of the
 entries have the form <code>???:???</code> the program probably wasn't
-compiled with <code>-g</code>.  <p>
+compiled with <code>-g</code>.  If any code was invalidated (either due to
+self-modifying code or unloading of shared objects) its counts are aggregated
+into a single cost centre written as <code>(discarded):(discarded)</code>.<p>
 
 It is worth noting that functions will come from three types of source files:
 <ol>
diff --git a/vg_cachesim.c b/vg_cachesim.c
index 130e7eb..20a693c 100644
--- a/vg_cachesim.c
+++ b/vg_cachesim.c
@@ -132,6 +132,11 @@
    initCC(&cc->D);
 }
 
+#define ADD_CC_TO(CC_type, cc, total)           \
+   total.a  += ((CC_type*)BBCC_ptr)->cc.a;      \
+   total.m1 += ((CC_type*)BBCC_ptr)->cc.m1;     \
+   total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
+          
 /* If 1, address of each instruction is printed as a comment after its counts
  * in cachegrind.out */
 #define PRINT_INSTR_ADDRS 0
@@ -223,6 +228,10 @@
 
 static Int  BB_retranslations   = 0;
 
+static CC Ir_discards;
+static CC Dr_discards;
+static CC Dw_discards;
+
 static void init_BBCC_table()
 {
    Int i;
@@ -315,11 +324,11 @@
  * cost centre.  Also sets BB_seen_before by reference. 
  */ 
 static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb, 
-                                 Bool *BB_seen_before)
+                                 Bool remove, Bool *BB_seen_before)
 {
    file_node *curr_file_node;
    fn_node   *curr_fn_node;
-   BBCC      *curr_BBCC;
+   BBCC     **prev_BBCC_next_ptr, *curr_BBCC;
    Char       filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
    UInt       filename_hash, fnname_hash, BBCC_hash;
    Int        dummy_line_num;
@@ -352,11 +361,16 @@
    }
 
    BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
+   prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
    curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
    while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
+      prev_BBCC_next_ptr = &(curr_BBCC->next);
       curr_BBCC = curr_BBCC->next;
    }
    if (curr_BBCC == NULL) {
+
+      vg_assert(False == remove);
+
       curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC = 
          new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
       *BB_seen_before = False;
@@ -369,7 +383,15 @@
             "BB retranslation, retrieving from BBCC table");
       }
       *BB_seen_before = True;
-      BB_retranslations++;
+
+      if (True == remove) {
+          // Remove curr_BBCC from chain;  it will be used and free'd by the
+          // caller.
+          *prev_BBCC_next_ptr = curr_BBCC->next;
+
+      } else {
+          BB_retranslations++;
+      }
    }
    VGP_POPCC;
    return curr_BBCC;
@@ -471,7 +493,7 @@
    /* Get BBCC (creating if necessary -- requires a counting pass over the BB
     * if it's the first time it's been seen), and point to start of the 
     * BBCC array.  */
-   BBCC_node = get_BBCC(orig_addr, cb_in, &BB_seen_before);
+   BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before);
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
    cb = VG_(allocCodeBlock)();
@@ -708,6 +730,10 @@
    initCC(&Dr_total);
    initCC(&Dw_total);
    
+   initCC(&Ir_discards);
+   initCC(&Dr_discards);
+   initCC(&Dw_discards);
+
    cachesim_I1_initcache();
    cachesim_D1_initcache();
    cachesim_L2_initcache();
@@ -768,11 +794,6 @@
       Addr instr_addr;
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
-#define ADD_CC_TO(CC_type, cc, total)           \
-   total.a  += ((CC_type*)BBCC_ptr)->cc.a;      \
-   total.m1 += ((CC_type*)BBCC_ptr)->cc.m1;     \
-   total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
-          
          case INSTR_CC:
             instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
             sprint_iCC(buf, (iCC*)BBCC_ptr);
@@ -797,8 +818,6 @@
             BBCC_ptr += sizeof(idCC);
             break;
 
-#undef ADD_CC_TO
-
          default:
             VG_(panic)("Unknown CC type in fprint_BBCC()\n");
             break;
@@ -907,6 +926,32 @@
       }
    }
 
+   /* Print stats from any discarded basic blocks */
+   if (0 != Ir_discards.a) {
+
+      VG_(sprintf)(buf, "fl=(discarded)\n");
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+      VG_(sprintf)(buf, "fn=(discarded)\n");
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+      /* Use 0 as line number */
+      VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+                   Ir_discards.a, Ir_discards.m1, Ir_discards.m2, 
+                   Dr_discards.a, Dr_discards.m1, Dr_discards.m2, 
+                   Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+
+      Ir_total.a  += Ir_discards.a;
+      Ir_total.m1 += Ir_discards.m1;
+      Ir_total.m2 += Ir_discards.m2;
+      Dr_total.a  += Dr_discards.a;
+      Dr_total.m1 += Dr_discards.m1;
+      Dr_total.m2 += Dr_discards.m2;
+      Dw_total.a  += Dw_discards.a;
+      Dw_total.m1 += Dw_discards.m1;
+      Dw_total.m2 += Dw_discards.m2;
+   }
+
    /* Summary stats must come after rest of table, since we calculate them
     * during traversal.  */ 
    VG_(sprintf)(buf, "summary: "
@@ -1091,10 +1136,60 @@
 }
 
 
+/* Called when a translation is invalidated due to self-modifying code or
+ * unloaded of a shared object.
+ *
+ * Finds the BBCC in the table, removes it, adds the counts to the discard
+ * counters, and then frees the BBCC. */
 void VG_(cachesim_notify_discard) ( TTEntry* tte )
 {
-  VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
-               tte->orig_addr, (Int)tte->orig_size);
+   BBCC *BBCC_node;
+   Addr BBCC_ptr0, BBCC_ptr;
+   Bool BB_seen_before;
+    
+   VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
+                tte->orig_addr, (Int)tte->orig_size);
+
+   /* 2nd arg won't be used since BB should have been seen before (assertions
+    * ensure this). */
+   BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before);
+   BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
+
+   vg_assert(True == BB_seen_before);
+
+   while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
+
+      /* We pretend the CC is an iCC for getting the tag.  This is ok
+       * because both CC types have tag as their first byte.  Once we know
+       * the type, we can cast and act appropriately. */
+
+      switch ( ((iCC*)BBCC_ptr)->tag ) {
+
+         case INSTR_CC:
+            ADD_CC_TO(iCC, I, Ir_discards);
+            BBCC_ptr += sizeof(iCC);
+            break;
+
+         case READ_CC:
+         case  MOD_CC:
+            ADD_CC_TO(idCC, I, Ir_discards);
+            ADD_CC_TO(idCC, D, Dr_discards);
+            BBCC_ptr += sizeof(idCC);
+            break;
+
+         case WRITE_CC:
+            ADD_CC_TO(idCC, I, Ir_discards);
+            ADD_CC_TO(idCC, D, Dw_discards);
+            BBCC_ptr += sizeof(idCC);
+            break;
+
+         default:
+            VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n");
+            break;
+      }
+   }
+
+   VG_(free)(VG_AR_PRIVATE, BBCC_node);
 }
 
 /*--------------------------------------------------------------------*/