Merge in r5435 from COMPVBITS.  Also added a note to
docs/internals/performance.txt about it.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5438 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/m_execontext.c b/coregrind/m_execontext.c
index c5b5c93..2b99e92 100644
--- a/coregrind/m_execontext.c
+++ b/coregrind/m_execontext.c
@@ -46,7 +46,8 @@
 
 struct _ExeContext {
    struct _ExeContext * next;
-   /* Variable-length array.  The size is VG_(clo_backtrace_size); at
+   UInt   n_ips;
+   /* Variable-length array.  The size is 'n_ips'; at
       least 1, at most VG_DEEPEST_BACKTRACE.  [0] is the current IP,
       [1] is its caller, [2] is the caller of [1], etc. */
    Addr ips[0];
@@ -126,38 +127,42 @@
 /* Print an ExeContext. */
 void VG_(pp_ExeContext) ( ExeContext* ec )
 {
-   VG_(pp_StackTrace)( ec->ips, VG_(clo_backtrace_size) );
+   VG_(pp_StackTrace)( ec->ips, ec->n_ips );
 }
 
 
 /* Compare two ExeContexts, comparing all callers. */
 Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
 {
+   Int i;
+
    if (e1 == NULL || e2 == NULL) 
       return False;
+
+   // Must be at least one address in each trace.
+   tl_assert(e1->n_ips >= 1 && e2->n_ips >= 1);
+
    switch (res) {
    case Vg_LowRes:
       /* Just compare the top two callers. */
       ec_cmp2s++;
-      if (e1->ips[0] != e2->ips[0]) return False;
-
-      if (VG_(clo_backtrace_size) < 2) return True;
-      if (e1->ips[1] != e2->ips[1]) return False;
+      for (i = 0; i < 2; i++) {
+         if ( (e1->n_ips <= i) &&  (e2->n_ips <= i)) return True;
+         if ( (e1->n_ips <= i) && !(e2->n_ips <= i)) return False;
+         if (!(e1->n_ips <= i) &&  (e2->n_ips <= i)) return False;
+         if (e1->ips[i] != e2->ips[i])               return False;
+      }
       return True;
 
    case Vg_MedRes:
       /* Just compare the top four callers. */
       ec_cmp4s++;
-      if (e1->ips[0] != e2->ips[0]) return False;
-
-      if (VG_(clo_backtrace_size) < 2) return True;
-      if (e1->ips[1] != e2->ips[1]) return False;
-
-      if (VG_(clo_backtrace_size) < 3) return True;
-      if (e1->ips[2] != e2->ips[2]) return False;
-
-      if (VG_(clo_backtrace_size) < 4) return True;
-      if (e1->ips[3] != e2->ips[3]) return False;
+      for (i = 0; i < 4; i++) {
+         if ( (e1->n_ips <= i) &&  (e2->n_ips <= i)) return True;
+         if ( (e1->n_ips <= i) && !(e2->n_ips <= i)) return False;
+         if (!(e1->n_ips <= i) &&  (e2->n_ips <= i)) return False;
+         if (e1->ips[i] != e2->ips[i])               return False;
+      }
       return True;
 
    case Vg_HighRes:
@@ -188,18 +193,20 @@
    UWord       hash;
    ExeContext* new_ec;
    ExeContext* list;
+   UInt        n_ips;
 
    init_ExeContext_storage();
-   vg_assert(VG_(clo_backtrace_size) >= 1 
-             && VG_(clo_backtrace_size) <= VG_DEEPEST_BACKTRACE);
+   vg_assert(VG_(clo_backtrace_size) >= 1 &&
+             VG_(clo_backtrace_size) <= VG_DEEPEST_BACKTRACE);
 
-   VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+   n_ips = VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+   tl_assert(n_ips >= 1);
 
    /* Now figure out if we've seen this one before.  First hash it so
       as to determine the list number. */
 
    hash = 0;
-   for (i = 0; i < VG_(clo_backtrace_size); i++) {
+   for (i = 0; i < n_ips; i++) {
       hash ^= ips[i];
       hash = (hash << 29) | (hash >> 3);
    }
@@ -215,7 +222,7 @@
       if (list == NULL) break;
       ec_searchcmps++;
       same = True;
-      for (i = 0; i < VG_(clo_backtrace_size); i++) {
+      for (i = 0; i < n_ips; i++) {
          if (list->ips[i] != ips[i]) {
             same = False;
             break; 
@@ -234,13 +241,14 @@
    ec_totstored++;
 
    new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, 
-                               sizeof(struct _ExeContext *) 
-                               + VG_(clo_backtrace_size) * sizeof(Addr) );
+                               sizeof(struct _ExeContext) 
+                               + n_ips * sizeof(Addr) );
 
-   for (i = 0; i < VG_(clo_backtrace_size); i++)
+   for (i = 0; i < n_ips; i++)
       new_ec->ips[i] = ips[i];
 
-   new_ec->next = ec_list[hash];
+   new_ec->n_ips = n_ips;
+   new_ec->next  = ec_list[hash];
    ec_list[hash] = new_ec;
 
    return new_ec;
diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c
index 21bc0e9..a9ad219 100644
--- a/coregrind/m_stacktrace.c
+++ b/coregrind/m_stacktrace.c
@@ -65,12 +65,9 @@
    vg_assert(sizeof(Addr) == sizeof(void*));
 
    /* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
-      putting zeroes in when the trail goes cold, which we guess to be
+      stopping when the trail goes cold, which we guess to be
       when FP is not a reasonable stack location. */
 
-   for (i = 0; i < n_ips; i++)
-      ips[i] = 0;
-
    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
    // current page, at least.  Dunno if it helps.
    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again