Rollup changes for Helgrind:

* Add new client request VALGRIND_HG_CLEAN_MEMORY_HEAPBLOCK.  This is
  like VALGRIND_HG_CLEAN_MEMORY but doesn't take an address range.
  Instead it takes a single argument which is supposed to be a pointer
  to the start of, or anywhere within, a heap allocated block.
  Helgrind then finds the block and paints it as belonging to the
  calling thread.  This is needed for correctly describing the
  behaviour of threadsafe reference counting when applied to classes
  involving inheritance of release methods or involving multiple
  inheritance.

* Add statistics counters for all basic VTS operations (tick, join,
  cmpLEQ, cmp_structural).

* Rewrite VTS__cmp_structural to be much faster.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11123 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c
index 622d802..004ce69 100644
--- a/helgrind/libhb_core.c
+++ b/helgrind/libhb_core.c
@@ -341,6 +341,16 @@
 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
 static UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
+static UWord stats__vts__tick            = 0; // # calls to VTS__tick
+static UWord stats__vts__join            = 0; // # calls to VTS__join
+static UWord stats__vts__cmpLEQ          = 0; // # calls to VTS__cmpLEQ
+static UWord stats__vts__cmp_structural  = 0; // # calls to VTS__cmp_structural
+static UWord stats__vts__cmp_structural_slow = 0; // # calls to VTS__cmp_structural w/ slow case
+static UWord stats__vts__indexat_slow    = 0; // # calls to VTS__indexAt_SLOW
+static UWord stats__vts_set__fadoa       = 0; // # calls to vts_set__find_and_dealloc__or_add
+static UWord stats__vts_set__fadoa_d     = 0; // # calls to vts_set__find_and_dealloc__or_add
+                                              // that lead to a deallocation
+
 
 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
    return a & ~(N_SECMAP_ARANGE - 1);
@@ -1660,6 +1670,9 @@
    ScalarTS  tmp;
    VTS*      res;
    Word      i, n; 
+
+   stats__vts__tick++;
+
    tl_assert(me);
    tl_assert(is_sane_VTS(vts));
    //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
@@ -1730,6 +1743,8 @@
    Thr*     thr;
    VTS*     res;
 
+   stats__vts__join++;
+
    tl_assert(a && a->ts);
    tl_assert(b && b->ts);
    useda = VG_(sizeXA)( a->ts );
@@ -1821,6 +1836,8 @@
    Word  ia, ib, useda, usedb;
    ULong tyma, tymb;
 
+   stats__vts__cmpLEQ++;
+
    tl_assert(a && a->ts);
    tl_assert(b && b->ts);
    useda = VG_(sizeXA)( a->ts );
@@ -1903,37 +1920,59 @@
 
 /* Compute an arbitrary structural (total) ordering on the two args,
    based on their VCs, so they can be looked up in a table, tree, etc.
-   Returns -1, 0 or 1.  (really just 'deriving Ord' :-)
+   Returns -1, 0 or 1.  (really just 'deriving Ord' :-) This can be
+   performance critical so there is some effort expended to make it sa
+   fast as possible.
 */
 Word VTS__cmp_structural ( VTS* a, VTS* b )
 {
    /* We just need to generate an arbitrary total ordering based on
       a->ts and b->ts.  Preferably do it in a way which comes across likely
       differences relatively quickly. */
-   Word     i, useda, usedb;
-   ScalarTS *tmpa, *tmpb;
+   Word     i;
+   Word     useda = 0,    usedb = 0;
+   ScalarTS *ctsa = NULL, *ctsb = NULL;
 
-   tl_assert(a && a->ts);
-   tl_assert(b && b->ts);
-   useda = VG_(sizeXA)( a->ts );
-   usedb = VG_(sizeXA)( b->ts );
+   stats__vts__cmp_structural++;
+
+   tl_assert(a);
+   tl_assert(b);
+
+   VG_(getContentsXA_UNSAFE)( a->ts, (void**)&ctsa, &useda );
+   VG_(getContentsXA_UNSAFE)( b->ts, (void**)&ctsb, &usedb );
+
+   if (LIKELY(useda == usedb)) {
+      ScalarTS *tmpa = NULL, *tmpb = NULL;
+      stats__vts__cmp_structural_slow++;
+      /* Same length vectors.  Find the first difference, if any, as
+         fast as possible. */
+      for (i = 0; i < useda; i++) {
+         tmpa = &ctsa[i];
+         tmpb = &ctsb[i];
+         if (LIKELY(tmpa->tym == tmpb->tym && tmpa->thr == tmpb->thr))
+            continue;
+         else
+            break;
+      }
+      if (UNLIKELY(i == useda)) {
+         /* They're identical. */
+         return 0;
+      } else {
+         tl_assert(i >= 0 && i < useda);
+         if (tmpa->tym < tmpb->tym) return -1;
+         if (tmpa->tym > tmpb->tym) return 1;
+         if (tmpa->thr < tmpb->thr) return -1;
+         if (tmpa->thr > tmpb->thr) return 1;
+         /* we just established them as non-identical, hence: */
+      }
+      /*NOTREACHED*/
+      tl_assert(0);
+   }
 
    if (useda < usedb) return -1;
    if (useda > usedb) return 1;
-
-   /* Same length vectors, so let's step through them together. */
-   tl_assert(useda == usedb);
-   for (i = 0; i < useda; i++) {
-      tmpa = VG_(indexXA)( a->ts, i );
-      tmpb = VG_(indexXA)( b->ts, i );
-      if (tmpa->tym < tmpb->tym) return -1;
-      if (tmpa->tym > tmpb->tym) return 1;
-      if (tmpa->thr < tmpb->thr) return -1;
-      if (tmpa->thr > tmpb->thr) return 1;
-   }
-
-   /* They're identical. */
-   return 0;
+   /*NOTREACHED*/
+   tl_assert(0);
 }
 
 
@@ -1972,6 +2011,7 @@
 */
 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
    UWord i, n;
+   stats__vts__indexat_slow++;
    tl_assert(vts && vts->ts);
    n = VG_(sizeXA)( vts->ts );
    for (i = 0; i < n; i++) {
@@ -2029,12 +2069,14 @@
 static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
 {
    UWord keyW, valW;
+   stats__vts_set__fadoa++;
    /* lookup cand (by value) */
    if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
       /* found it */
       tl_assert(valW == 0);
       /* if this fails, cand (by ref) was already present (!) */
       tl_assert(keyW != (UWord)cand);
+      stats__vts_set__fadoa_d++;
       VTS__delete(cand);
       return (VTS*)keyW;
    } else {
@@ -5469,9 +5511,9 @@
 
       VG_(printf)("%s","\n");
 
-      VG_(printf)("   libhb: %'13llu msmcread  (%'llu changed)\n",
+      VG_(printf)("   libhb: %'13llu msmcread  (%'llu dragovers)\n",
                   stats__msmcread, stats__msmcread_change);
-      VG_(printf)("   libhb: %'13llu msmcwrite (%'llu changed)\n",
+      VG_(printf)("   libhb: %'13llu msmcwrite (%'llu dragovers)\n",
                   stats__msmcwrite, stats__msmcwrite_change);
       VG_(printf)("   libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
                   stats__cmpLEQ_queries, stats__cmpLEQ_misses);
@@ -5479,6 +5521,16 @@
                   stats__join2_queries, stats__join2_misses);
 
       VG_(printf)("%s","\n");
+      VG_(printf)( "   libhb: VTSops: tick %'lu,  join %'lu,  cmpLEQ %'lu\n",
+                   stats__vts__tick, stats__vts__join,  stats__vts__cmpLEQ );
+      VG_(printf)( "   libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
+                   stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
+      VG_(printf)( "   libhb: VTSset: find_and_dealloc__or_add %'lu (%'lu deallocd)\n",
+                   stats__vts_set__fadoa, stats__vts_set__fadoa_d );
+      VG_(printf)( "   libhb: VTSops: indexAt_SLOW %'lu\n",
+                   stats__vts__indexat_slow );
+
+      VG_(printf)("%s","\n");
       VG_(printf)(
          "   libhb: %ld entries in vts_table (approximately %lu bytes)\n",
          VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)