Scalability fix for Helgrind: reduce the size of ScalarTS (scalar
timestamps) from 16 to 8 bytes. This halves the size of vector
timestamps and reduces the amount of memory needed to run programs
that have many threads and/or many synchronisation events.
The tradeoff is that Helgrind must abort the run if the program
creates more than 2^20 (1.0e+6) threads or performs more than 2^44
(1.76e+13) synchronisation events. Neither of these seem like a
significant limitation in practice. It's easy to argue that a limit
of 2^44 synch events would take at a minimum, several CPU months on a
very fast machine.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11570 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c
index 62726d7..ca5cfa2 100644
--- a/helgrind/libhb_core.c
+++ b/helgrind/libhb_core.c
@@ -1524,11 +1524,82 @@
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
-#ifndef __HB_VTS_H
-#define __HB_VTS_H
-/* VtsIDs can't exceed 30 bits, since they have to be packed into the
- lowest 30 bits of an SVal. */
+/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
+ being compact stand-ins for Thr*'s. Use these functions to map
+ between them. */
+static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
+static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
+
+
+/* Scalar Timestamp. We have to store a lot of these, so there is
+ some effort to make them as small as possible. Logically they are
+ a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
+ We pack it into 64 bits by representing the Thr* using a ThrID, a
+ small integer (20 bits), and a 44 bit integer for the timestamp
+ number. The 44/20 split is arbitary, but has the effect that
+ Helgrind can only handle programs that create 2^20 or fewer threads
+ over their entire lifetime, and have no more than 2^44 timestamp
+ ticks (synchronisation operations on the same thread).
+
+ This doesn't seem like much of a limitation. 2^44 ticks is
+ 1.76e+13, and if each tick (optimistically) takes the machine 1000
+ cycles to process, then the minimum time to process that many ticks
+ at a clock rate of 5 GHz is 40.72 days. And that's doing nothing
+ but VTS ticks, which isn't realistic.
+
+ NB1: SCALARTS_N_THRBITS must be 32 or lower, so they fit in a ThrID
+ (== a UInt).
+
+ NB2: thrid values are issued upwards from 1024, and values less
+ than that aren't valid. This isn't per se necessary (any order
+ will do, so long as they are unique), but it does help ensure they
+ are less likely to get confused with the various other kinds of
+ small-integer thread ids drifting around (eg, TId).
+
+ NB3: this probably also relies on the fact that Thr's are never
+ deallocated -- they exist forever. Hence the 1-1 mapping from
+ Thr's to thrid values (set up in Thr__new) persists forever.
+*/
+#define SCALARTS_N_THRBITS 20 /* valid range: 13 to 32 inclusive */
+#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
+typedef
+ struct {
+ ThrID thrid : SCALARTS_N_THRBITS;
+ ULong tym : SCALARTS_N_TYMBITS;
+ }
+ ScalarTS;
+
+__attribute__((noreturn))
+static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
+{
+ if (due_to_nThrs) {
+ HChar* s =
+ "\n"
+ "Helgrind: cannot continue, run aborted: too many threads.\n"
+ "Sorry. Helgrind can only handle programs that create\n"
+ "%'llu or fewer threads over their entire lifetime.\n"
+ "\n";
+ VG_(umsg)(s, (1ULL << SCALARTS_N_THRBITS) - 1024);
+ } else {
+ HChar* s =
+ "\n"
+ "Helgrind: cannot continue, run aborted: too many\n"
+ "synchronisation events. Sorry. Helgrind can only handle\n"
+ "programs which perform %'llu or fewer\n"
+ "inter-thread synchronisation events (locks, unlocks, etc).\n"
+ "\n";
+ VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
+ }
+ VG_(exit)(1);
+ /*NOTREACHED*/
+ tl_assert(0); /*wtf?!*/
+}
+
+
+/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
+ bits, since they have to be packed into the lowest 30 bits of an
+ SVal. */
typedef UInt VtsID;
#define VtsID_INVALID 0xFFFFFFFF
@@ -1570,13 +1641,14 @@
LT, GT, UN), in fact we only need LEQ, and so we may as well
hardwire that fact.
- Returns NULL iff LEQ(A,B), or non-NULL if not. In the latter case,
- the returned Thr* indicates the discovered point for which they are
- not. There may be more than one such point, but we only care about
- seeing one of them, not all of them. This rather strange
- convention is used because sometimes we want to know the actual
- index at which they first differ. */
-static Thr* VTS__cmpLEQ ( VTS* a, VTS* b );
+ Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
+ invald ThrID). In the latter case, the returned ThrID indicates
+ the discovered point for which they are not. There may be more
+ than one such point, but we only care about seeing one of them, not
+ all of them. This rather strange convention is used because
+ sometimes we want to know the actual index at which they first
+ differ. */
+static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
/* Compute an arbitrary structural (total) ordering on the two args,
based on their VCs, so they can be looked up in a table, tree, etc.
@@ -1589,20 +1661,9 @@
/* Debugging only. Return vts[index], so to speak. */
static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
-#endif /* ! __HB_VTS_H */
-
/*--------------- to do with Vector Timestamps ---------------*/
-/* Scalar Timestamp */
-typedef
- struct {
- Thr* thr;
- ULong tym;
- }
- ScalarTS;
-
-
static Bool is_sane_VTS ( VTS* vts )
{
UWord i, n;
@@ -1614,7 +1675,7 @@
for (i = 0; i < n-1; i++) {
st1 = VG_(indexXA)( vts->ts, i );
st2 = VG_(indexXA)( vts->ts, i+1 );
- if (st1->thr >= st2->thr)
+ if (st1->thrid >= st2->thrid)
return False;
if (st1->tym == 0 || st2->tym == 0)
return False;
@@ -1624,7 +1685,11 @@
}
-/* Create a new, empty VTS.
+/* Create a new, empty VTS. The argument is a hint for the expected
+ number of elements that will eventually be added to the array;
+ doesn't matter (from a correctness perspective) if it's wrong.
+ (Important from a performance perspective though.) Pass zero to
+ mean "no hint".
*/
VTS* VTS__new ( Word nElemsHint )
{
@@ -1679,8 +1744,8 @@
tl_assert(thr);
tl_assert(tym >= 1);
vts = VTS__new(1);
- st.thr = thr;
- st.tym = tym;
+ st.thrid = Thr__to_ThrID(thr);
+ st.tym = tym;
VG_(addToXA)( vts->ts, &st );
return vts;
}
@@ -1695,10 +1760,12 @@
ScalarTS tmp;
VTS* res;
Word i, n, hintsize;
+ ThrID me_thrid;
stats__vts__tick++;
tl_assert(me);
+ me_thrid = Thr__to_ThrID(me);
tl_assert(is_sane_VTS(vts));
//if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
// (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
@@ -1709,8 +1776,8 @@
/* main loop doesn't handle zero-entry case correctly, so
special-case it. */
if (n == 0) {
- tmp.thr = me;
- tmp.tym = 1;
+ tmp.thrid = me_thrid;
+ tmp.tym = 1;
VG_(addToXA)( res->ts, &tmp );
tl_assert(is_sane_VTS(res));
return res;
@@ -1718,32 +1785,36 @@
for (i = 0; i < n; i++) {
here = VG_(indexXA)( vts->ts, i );
- if (me < here->thr) {
+ if (me_thrid < here->thrid) {
/* We just went past 'me', without seeing it. */
- tmp.thr = me;
- tmp.tym = 1;
+ tmp.thrid = me_thrid;
+ tmp.tym = 1;
VG_(addToXA)( res->ts, &tmp );
tmp = *here;
VG_(addToXA)( res->ts, &tmp );
i++;
break;
}
- else if (me == here->thr) {
+ else if (me_thrid == here->thrid) {
tmp = *here;
+ if (UNLIKELY(tmp.tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
+ /* We're hosed. We have to stop. */
+ scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
+ }
tmp.tym++;
VG_(addToXA)( res->ts, &tmp );
i++;
break;
}
- else /* me > here->thr */ {
+ else /* me_thrid > here->thrid */ {
tmp = *here;
VG_(addToXA)( res->ts, &tmp );
}
}
tl_assert(i >= 0 && i <= n);
- if (i == n && here && here->thr < me) {
- tmp.thr = me;
- tmp.tym = 1;
+ if (i == n && here && here->thrid < me_thrid) {
+ tmp.thrid = me_thrid;
+ tmp.tym = 1;
VG_(addToXA)( res->ts, &tmp );
} else {
for (/*keepgoing*/; i < n; i++) {
@@ -1766,7 +1837,7 @@
{
Word ia, ib, useda, usedb, hintsize;
ULong tyma, tymb, tymMax;
- Thr* thr;
+ ThrID thrid;
VTS* res;
stats__vts__join++;
@@ -1782,8 +1853,8 @@
while (1) {
- /* This logic is to enumerate triples (thr, tyma, tymb) drawn
- from a and b in order, where thr is the next Thr*
+ /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
+ from a and b in order, where thrid is the next ThrID
occurring in either a or b, and tyma/b are the relevant
scalar timestamps, taking into account implicit zeroes. */
tl_assert(ia >= 0 && ia <= useda);
@@ -1796,41 +1867,41 @@
} else if (ia == useda && ib != usedb) {
/* a empty, use up b */
ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
- thr = tmpb->thr;
- tyma = 0;
- tymb = tmpb->tym;
+ thrid = tmpb->thrid;
+ tyma = 0;
+ tymb = tmpb->tym;
ib++;
} else if (ia != useda && ib == usedb) {
/* b empty, use up a */
ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
- thr = tmpa->thr;
- tyma = tmpa->tym;
- tymb = 0;
+ thrid = tmpa->thrid;
+ tyma = tmpa->tym;
+ tymb = 0;
ia++;
} else {
- /* both not empty; extract lowest-Thr*'d triple */
+ /* both not empty; extract lowest-ThrID'd triple */
ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
- if (tmpa->thr < tmpb->thr) {
- /* a has the lowest unconsidered Thr* */
- thr = tmpa->thr;
- tyma = tmpa->tym;
- tymb = 0;
+ if (tmpa->thrid < tmpb->thrid) {
+ /* a has the lowest unconsidered ThrID */
+ thrid = tmpa->thrid;
+ tyma = tmpa->tym;
+ tymb = 0;
ia++;
- } else if (tmpa->thr > tmpb->thr) {
- /* b has the lowest unconsidered Thr* */
- thr = tmpb->thr;
- tyma = 0;
- tymb = tmpb->tym;
+ } else if (tmpa->thrid > tmpb->thrid) {
+ /* b has the lowest unconsidered ThrID */
+ thrid = tmpb->thrid;
+ tyma = 0;
+ tymb = tmpb->tym;
ib++;
} else {
- /* they both next mention the same Thr* */
- tl_assert(tmpa->thr == tmpb->thr);
- thr = tmpa->thr; /* == tmpb->thr */
- tyma = tmpa->tym;
- tymb = tmpb->tym;
+ /* they both next mention the same ThrID */
+ tl_assert(tmpa->thrid == tmpb->thrid);
+ thrid = tmpa->thrid; /* == tmpb->thrid */
+ tyma = tmpa->tym;
+ tymb = tmpb->tym;
ia++;
ib++;
}
@@ -1841,8 +1912,8 @@
tymMax = tyma > tymb ? tyma : tymb;
if (tymMax > 0) {
ScalarTS st;
- st.thr = thr;
- st.tym = tymMax;
+ st.thrid = thrid;
+ st.tym = tymMax;
VG_(addToXA)( res->ts, &st );
}
@@ -1854,11 +1925,12 @@
}
-/* Determine if 'a' <= 'b', in the partial ordering. Returns NULL if
- they are, or the first Thr* for which they are not. This rather
- strange convention is used because sometimes we want to know the
- actual index at which they first differ. */
-static Thr* VTS__cmpLEQ ( VTS* a, VTS* b )
+/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
+ they are, or the first ThrID for which they are not (no valid ThrID
+ has the value zero). This rather strange convention is used
+ because sometimes we want to know the actual index at which they
+ first differ. */
+static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
{
Word ia, ib, useda, usedb;
ULong tyma, tymb;
@@ -1877,7 +1949,7 @@
/* This logic is to enumerate doubles (tyma, tymb) drawn
from a and b in order, and tyma/b are the relevant
scalar timestamps, taking into account implicit zeroes. */
- Thr* thr;
+ ThrID thrid;
tl_assert(ia >= 0 && ia <= useda);
tl_assert(ib >= 0 && ib <= usedb);
@@ -1889,43 +1961,43 @@
} else if (ia == useda && ib != usedb) {
/* a empty, use up b */
ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
- tyma = 0;
- tymb = tmpb->tym;
- thr = tmpb->thr;
+ tyma = 0;
+ tymb = tmpb->tym;
+ thrid = tmpb->thrid;
ib++;
} else if (ia != useda && ib == usedb) {
/* b empty, use up a */
ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
- tyma = tmpa->tym;
- thr = tmpa->thr;
- tymb = 0;
+ tyma = tmpa->tym;
+ thrid = tmpa->thrid;
+ tymb = 0;
ia++;
} else {
- /* both not empty; extract lowest-Thr*'d triple */
+ /* both not empty; extract lowest-ThrID'd triple */
ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
- if (tmpa->thr < tmpb->thr) {
- /* a has the lowest unconsidered Thr* */
- tyma = tmpa->tym;
- thr = tmpa->thr;
- tymb = 0;
+ if (tmpa->thrid < tmpb->thrid) {
+ /* a has the lowest unconsidered ThrID */
+ tyma = tmpa->tym;
+ thrid = tmpa->thrid;
+ tymb = 0;
ia++;
}
else
- if (tmpa->thr > tmpb->thr) {
- /* b has the lowest unconsidered Thr* */
- tyma = 0;
- tymb = tmpb->tym;
- thr = tmpb->thr;
+ if (tmpa->thrid > tmpb->thrid) {
+ /* b has the lowest unconsidered ThrID */
+ tyma = 0;
+ tymb = tmpb->tym;
+ thrid = tmpb->thrid;
ib++;
} else {
- /* they both next mention the same Thr* */
- tl_assert(tmpa->thr == tmpb->thr);
- tyma = tmpa->tym;
- thr = tmpa->thr;
- tymb = tmpb->tym;
+ /* they both next mention the same ThrID */
+ tl_assert(tmpa->thrid == tmpb->thrid);
+ tyma = tmpa->tym;
+ thrid = tmpa->thrid;
+ tymb = tmpb->tym;
ia++;
ib++;
}
@@ -1936,12 +2008,12 @@
if (tyma > tymb) {
/* not LEQ at this index. Quit, since the answer is
determined already. */
- tl_assert(thr);
- return thr;
+ tl_assert(thrid >= 1024);
+ return thrid;
}
}
- return NULL; /* all points are LEQ */
+ return 0; /* all points are LEQ => return an invalid ThrID */
}
@@ -1976,7 +2048,8 @@
for (i = 0; i < useda; i++) {
tmpa = &ctsa[i];
tmpb = &ctsb[i];
- if (LIKELY(tmpa->tym == tmpb->tym && tmpa->thr == tmpb->thr))
+ if (LIKELY(tmpa->tym == tmpb->tym
+ && tmpa->thrid == tmpb->thrid))
continue;
else
break;
@@ -1988,8 +2061,8 @@
tl_assert(i >= 0 && i < useda);
if (tmpa->tym < tmpb->tym) return -1;
if (tmpa->tym > tmpb->tym) return 1;
- if (tmpa->thr < tmpb->thr) return -1;
- if (tmpa->thr > tmpb->thr) return 1;
+ if (tmpa->thrid < tmpb->thrid) return -1;
+ if (tmpa->thrid > tmpb->thrid) return 1;
/* we just established them as non-identical, hence: */
}
/*NOTREACHED*/
@@ -2019,8 +2092,8 @@
tl_assert(avail >= 40);
st = VG_(indexXA)( vts->ts, i );
VG_(memset)(unit, 0, sizeof(unit));
- VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
- st->thr, st->tym);
+ VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
+ st->thrid, (ULong)st->tym);
if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
VG_(strcat)(buf, " ...]");
buf[nBuf-1] = 0;
@@ -2038,12 +2111,13 @@
*/
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
UWord i, n;
+ ThrID idx_thrid = Thr__to_ThrID(idx);
stats__vts__indexat_slow++;
tl_assert(vts && vts->ts);
n = VG_(sizeXA)( vts->ts );
for (i = 0; i < n; i++) {
ScalarTS* st = VG_(indexXA)( vts->ts, i );
- if (st->thr == idx)
+ if (st->thrid == idx_thrid)
return st->tym;
}
return 0;
@@ -2457,7 +2531,7 @@
////--
v1 = VtsID__to_VTS(vi1);
v2 = VtsID__to_VTS(vi2);
- leq = VTS__cmpLEQ( v1, v2 ) == NULL;
+ leq = VTS__cmpLEQ( v1, v2 ) == 0;
////++
cmpLEQ_cache[hash].vi1 = vi1;
cmpLEQ_cache[hash].vi2 = vi2;
@@ -2527,12 +2601,14 @@
static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
{
VTS *vts1, *vts2;
- Thr* diffthr;
+ Thr* diffthr;
+ ThrID diffthrid;
tl_assert(vi1 != vi2);
vts1 = VtsID__to_VTS(vi1);
vts2 = VtsID__to_VTS(vi2);
tl_assert(vts1 != vts2);
- diffthr = VTS__cmpLEQ(vts1, vts2);
+ diffthrid = VTS__cmpLEQ(vts1, vts2);
+ diffthr = Thr__from_ThrID(diffthrid);
tl_assert(diffthr); /* else they are LEQ ! */
return diffthr;
}
@@ -2934,6 +3010,10 @@
has done a low-level exit. */
Bool still_alive;
+ /* A small integer giving a unique identity to this Thr. See
+ comments on the definition of ScalarTS for details. */
+ ThrID thrid : SCALARTS_N_THRBITS;
+
/* A filter that removes references for which we believe that
msmcread/msmcwrite will not change the state, nor report a
race. */
@@ -2949,7 +3029,27 @@
XArray* /* ULong_n_EC */ local_Kws_n_stacks;
};
-static Thr* Thr__new ( void ) {
+
+/* Maps ThrID values to their Thr*s (which contain ThrID values that
+ should point back to the relevant slot in the array. Lowest
+ numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
+static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
+
+/* And a counter to dole out ThrID values. For rationale/background,
+ see comments on definition of ScalarTS (far) above. */
+static ThrID thrid_counter = 1024; /* runs up to 2^SCALARTS_N_THRBITS-1 */
+
+static ThrID Thr__to_ThrID ( Thr* thr ) {
+ return thr->thrid;
+}
+static Thr* Thr__from_ThrID ( UInt thrid ) {
+ Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
+ tl_assert(thr->thrid == thrid);
+ return thr;
+}
+
+static Thr* Thr__new ( void )
+{
Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
thr->viR = VtsID_INVALID;
thr->viW = VtsID_INVALID;
@@ -2963,6 +3063,24 @@
= VG_(newXA)( HG_(zalloc),
"libhb.Thr__new.3 (local_Kws_and_stacks)",
HG_(free), sizeof(ULong_n_EC) );
+
+ /* Add this Thr* <-> ThrID binding to the mapping, and
+ cross-check */
+ if (!thrid_to_thr_map) {
+ thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
+ HG_(free), sizeof(Thr*) );
+ tl_assert(thrid_to_thr_map);
+ }
+
+ if (thrid_counter >= (((ThrID)1) << SCALARTS_N_THRBITS) - 2) {
+ /* We're hosed. We have to stop. */
+ scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
+ }
+
+ thr->thrid = thrid_counter++;
+ Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
+ tl_assert(ix + 1024 == thr->thrid);
+
return thr;
}
@@ -5407,6 +5525,11 @@
{
Thr* thr;
VtsID vi;
+
+ // We will have to have to store a large number of these,
+ // so make sure they're the size we expect them to be.
+ tl_assert(sizeof(ScalarTS) == 8);
+
tl_assert(get_stacktrace);
tl_assert(get_EC);
main_get_stacktrace = get_stacktrace;