Change the representation of VTSs. Instead of using an XArray of
ScalarTSs, have the ScalarTS array as a trailing array directly on the
VTS structure. This reduces the number of malloc'd blocks per VTS
from 3 to 1, since an XArray always requires 2 malloc'd blocks. At
least for tc19_shadowmem this reduces the total amount of heap
turnover in Arena 'tool' by a factor of 3, and modestly improves
performance whilst modestly reducing overall memory use.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11571 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/helgrind/libhb_core.c b/helgrind/libhb_core.c
index ca5cfa2..ef51d22 100644
--- a/helgrind/libhb_core.c
+++ b/helgrind/libhb_core.c
@@ -345,11 +345,19 @@
static UWord stats__vts__join = 0; // # calls to VTS__join
static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
-static UWord stats__vts__cmp_structural_slow = 0; // # calls to VTS__cmp_structural w/ slow case
-static UWord stats__vts__indexat_slow = 0; // # calls to VTS__indexAt_SLOW
-static UWord stats__vts_set__fadoa = 0; // # calls to vts_set__find_and_dealloc__or_add
-static UWord stats__vts_set__fadoa_d = 0; // # calls to vts_set__find_and_dealloc__or_add
- // that lead to a deallocation
+
+// # calls to VTS__cmp_structural w/ slow case
+static UWord stats__vts__cmp_structural_slow = 0;
+
+// # calls to VTS__indexAt_SLOW
+static UWord stats__vts__indexat_slow = 0;
+
+// # calls to vts_set__find__or__clone_and_add
+static UWord stats__vts_set__focaa = 0;
+
+// # calls to vts_set__find__or__clone_and_add that lead to an
+// allocation
+static UWord stats__vts_set__focaa_a = 0;
static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
@@ -1536,16 +1544,16 @@
some effort to make them as small as possible. Logically they are
a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
We pack it into 64 bits by representing the Thr* using a ThrID, a
- small integer (20 bits), and a 44 bit integer for the timestamp
- number. The 44/20 split is arbitary, but has the effect that
- Helgrind can only handle programs that create 2^20 or fewer threads
- over their entire lifetime, and have no more than 2^44 timestamp
+ small integer (18 bits), and a 46 bit integer for the timestamp
+ number. The 46/18 split is arbitary, but has the effect that
+ Helgrind can only handle programs that create 2^18 or fewer threads
+ over their entire lifetime, and have no more than 2^46 timestamp
ticks (synchronisation operations on the same thread).
- This doesn't seem like much of a limitation. 2^44 ticks is
- 1.76e+13, and if each tick (optimistically) takes the machine 1000
+ This doesn't seem like much of a limitation. 2^46 ticks is
+ 7.06e+13, and if each tick (optimistically) takes the machine 1000
cycles to process, then the minimum time to process that many ticks
- at a clock rate of 5 GHz is 40.72 days. And that's doing nothing
+ at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
but VTS ticks, which isn't realistic.
NB1: SCALARTS_N_THRBITS must be 32 or lower, so they fit in a ThrID
@@ -1560,8 +1568,18 @@
NB3: this probably also relies on the fact that Thr's are never
deallocated -- they exist forever. Hence the 1-1 mapping from
Thr's to thrid values (set up in Thr__new) persists forever.
+
+ NB4: temp_max_sized_VTS is allocated at startup and never freed.
+ It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
+ ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
+ making the memory use for this go sky-high. With
+ SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
+ like an OK tradeoff. If more than 256k threads need to be
+ supported, we could change SCALARTS_N_THRBITS to 20, which would
+ facilitate supporting 1 million threads at the cost of 8MB storage
+ for temp_max_sized_VTS.
*/
-#define SCALARTS_N_THRBITS 20 /* valid range: 13 to 32 inclusive */
+#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 32 inclusive */
#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
typedef
struct {
@@ -1570,6 +1588,9 @@
}
ScalarTS;
+#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
+
+
__attribute__((noreturn))
static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
{
@@ -1580,7 +1601,7 @@
"Sorry. Helgrind can only handle programs that create\n"
"%'llu or fewer threads over their entire lifetime.\n"
"\n";
- VG_(umsg)(s, (1ULL << SCALARTS_N_THRBITS) - 1024);
+ VG_(umsg)(s, ThrID_MAX_VALID - 1024);
} else {
HChar* s =
"\n"
@@ -1609,32 +1630,37 @@
VtsID_INVALID. */
typedef
struct {
- VtsID id;
- XArray* ts; /* XArray* ScalarTS(abstract) */
+ VtsID id;
+ UInt usedTS;
+ UInt sizeTS;
+ ScalarTS ts[0];
}
VTS;
+/* Allocate a VTS capable of storing 'sizeTS' entries. */
+static VTS* VTS__new ( HChar* who, UInt sizeTS );
-/* Create a new, empty VTS. The argument is a hint for the expected
- number of elements that will eventually be added to the array;
- doesn't matter (from a correctness perspective) if it's wrong.
- (Important from a performance perspective though.) Pass zero to
- mean "no hint". */
-static VTS* VTS__new ( Word nElemsHint );
+/* Make a clone of 'vts', resizing the array to exactly match the
+ number of ScalarTSs present. */
+static VTS* VTS__clone ( HChar* who, VTS* vts );
/* Delete this VTS in its entirety. */
static void VTS__delete ( VTS* vts );
-/* Create a new singleton VTS. */
-static VTS* VTS__singleton ( Thr* thr, ULong tym );
+/* Create a new singleton VTS in 'out'. Caller must have
+ pre-allocated 'out' sufficiently big to hold the result in all
+ possible cases. */
+static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
-/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
- not modified. */
-static VTS* VTS__tick ( Thr* me, VTS* vts );
+/* Create in 'out' a VTS which is the same as 'vts' except with
+ vts[me]++, so to speak. Caller must have pre-allocated 'out'
+ sufficiently big to hold the result in all possible cases. */
+static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
-/* Return a new VTS constructed as the join (max) of the 2 args.
- Neither arg is modified. */
-static VTS* VTS__join ( VTS* a, VTS* b );
+/* Create in 'out' a VTS which is the join (max) of 'a' and
+ 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
+ the result in all possible cases. */
+static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
/* Compute the partial ordering relation of the two args. Although we
could be completely general and return an enumeration value (EQ,
@@ -1670,11 +1696,17 @@
ScalarTS *st1, *st2;
if (!vts) return False;
if (!vts->ts) return False;
- n = VG_(sizeXA)( vts->ts );
+ n = vts->usedTS;
+ if (n == 1) {
+ st1 = &vts->ts[0];
+ if (st1->tym == 0)
+ return False;
+ }
+ else
if (n >= 2) {
for (i = 0; i < n-1; i++) {
- st1 = VG_(indexXA)( vts->ts, i );
- st2 = VG_(indexXA)( vts->ts, i+1 );
+ st1 = &vts->ts[i];
+ st2 = &vts->ts[i+1];
if (st1->thrid >= st2->thrid)
return False;
if (st1->tym == 0 || st2->tym == 0)
@@ -1685,170 +1717,169 @@
}
-/* Create a new, empty VTS. The argument is a hint for the expected
- number of elements that will eventually be added to the array;
- doesn't matter (from a correctness perspective) if it's wrong.
- (Important from a performance perspective though.) Pass zero to
- mean "no hint".
+/* Create a new, empty VTS.
*/
-VTS* VTS__new ( Word nElemsHint )
+static VTS* VTS__new ( HChar* who, UInt sizeTS )
{
- VTS* vts;
- tl_assert(nElemsHint >= 0);
-
- /* (optional) try to avoid m_mallocfree's tendency to do lengthy
- searches of freelists which don't contain quite big enough free
- blocks (due to containing lots of freed VTSs of size
- nElemsHint-1) by rounding the size up to the next even number,
- thereby halving the number of different sized blocks in
- circulation. NOTE: maps 0 to 0, as required to maintain the "no
- hint" indication.*/
- if (nElemsHint & 1) nElemsHint++;
-
- vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
- tl_assert(vts);
- vts->id = VtsID_INVALID;
- if (nElemsHint == 0) {
- vts->ts = VG_(newXA)(
- HG_(zalloc), "libhb.VTS__new.2",
- HG_(free), sizeof(ScalarTS)
- );
- } else {
- vts->ts = VG_(newSizedXA)(
- HG_(zalloc), "libhb.VTS__new.3",
- HG_(free), sizeof(ScalarTS),
- nElemsHint
- );
- }
- tl_assert(vts->ts);
+ VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
+ tl_assert(vts->usedTS == 0);
+ vts->sizeTS = sizeTS;
+ *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
return vts;
}
+/* Clone this VTS.
+*/
+static VTS* VTS__clone ( HChar* who, VTS* vts )
+{
+ tl_assert(vts);
+ tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
+ UInt nTS = vts->usedTS;
+ VTS* clone = VTS__new(who, nTS);
+ clone->id = vts->id;
+ clone->sizeTS = nTS;
+ clone->usedTS = nTS;
+ UInt i;
+ for (i = 0; i < nTS; i++) {
+ clone->ts[i] = vts->ts[i];
+ }
+ tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
+ return clone;
+}
+
/* Delete this VTS in its entirety.
*/
-void VTS__delete ( VTS* vts )
+static void VTS__delete ( VTS* vts )
{
tl_assert(vts);
- tl_assert(vts->ts);
- VG_(deleteXA)( vts->ts );
+ tl_assert(vts->usedTS <= vts->sizeTS);
+ tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
HG_(free)(vts);
}
/* Create a new singleton VTS.
*/
-VTS* VTS__singleton ( Thr* thr, ULong tym ) {
- ScalarTS st;
- VTS* vts;
+static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
+{
tl_assert(thr);
tl_assert(tym >= 1);
- vts = VTS__new(1);
- st.thrid = Thr__to_ThrID(thr);
- st.tym = tym;
- VG_(addToXA)( vts->ts, &st );
- return vts;
+ tl_assert(out);
+ tl_assert(out->usedTS == 0);
+ tl_assert(out->sizeTS >= 1);
+ UInt hi = out->usedTS++;
+ out->ts[hi].thrid = Thr__to_ThrID(thr);
+ out->ts[hi].tym = tym;
}
/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
not modified.
*/
-VTS* VTS__tick ( Thr* me, VTS* vts )
+static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
{
ScalarTS* here = NULL;
- ScalarTS tmp;
- VTS* res;
- Word i, n, hintsize;
+ UInt i, n;
ThrID me_thrid;
+ Bool found = False;
stats__vts__tick++;
+ tl_assert(out);
+ tl_assert(out->usedTS == 0);
+ if (vts->usedTS >= ThrID_MAX_VALID)
+ scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
+ tl_assert(out->sizeTS >= 1 + vts->usedTS);
+
tl_assert(me);
me_thrid = Thr__to_ThrID(me);
tl_assert(is_sane_VTS(vts));
- //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
- // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
- n = VG_(sizeXA)( vts->ts );
- hintsize = n;
- res = VTS__new(hintsize);
+ n = vts->usedTS;
/* main loop doesn't handle zero-entry case correctly, so
special-case it. */
if (n == 0) {
- tmp.thrid = me_thrid;
- tmp.tym = 1;
- VG_(addToXA)( res->ts, &tmp );
- tl_assert(is_sane_VTS(res));
- return res;
+ UInt hi = out->usedTS++;
+ out->ts[hi].thrid = me_thrid;
+ out->ts[hi].tym = 1;
+ tl_assert(out->usedTS <= out->sizeTS);
+ return;
}
for (i = 0; i < n; i++) {
- here = VG_(indexXA)( vts->ts, i );
+ here = &vts->ts[i];
if (me_thrid < here->thrid) {
/* We just went past 'me', without seeing it. */
- tmp.thrid = me_thrid;
- tmp.tym = 1;
- VG_(addToXA)( res->ts, &tmp );
- tmp = *here;
- VG_(addToXA)( res->ts, &tmp );
+ UInt hi = out->usedTS++;
+ out->ts[hi].thrid = me_thrid;
+ out->ts[hi].tym = 1;
+ hi = out->usedTS++;
+ out->ts[hi] = *here;
i++;
break;
}
else if (me_thrid == here->thrid) {
- tmp = *here;
- if (UNLIKELY(tmp.tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
+ found = True;
+ if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
/* We're hosed. We have to stop. */
scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
}
- tmp.tym++;
- VG_(addToXA)( res->ts, &tmp );
+ UInt hi = out->usedTS++;
+ out->ts[hi].thrid = here->thrid;
+ out->ts[hi].tym = here->tym + 1;
i++;
break;
}
else /* me_thrid > here->thrid */ {
- tmp = *here;
- VG_(addToXA)( res->ts, &tmp );
+ UInt hi = out->usedTS++;
+ out->ts[hi] = *here;
}
}
tl_assert(i >= 0 && i <= n);
- if (i == n && here && here->thrid < me_thrid) {
- tmp.thrid = me_thrid;
- tmp.tym = 1;
- VG_(addToXA)( res->ts, &tmp );
+ tl_assert(here);
+ if (i == n && here->thrid < me_thrid) {
+ UInt hi = out->usedTS++;
+ out->ts[hi].thrid = me_thrid;
+ out->ts[hi].tym = 1;
} else {
for (/*keepgoing*/; i < n; i++) {
- here = VG_(indexXA)( vts->ts, i );
- tmp = *here;
- VG_(addToXA)( res->ts, &tmp );
+ here = &vts->ts[i];
+ UInt hi = out->usedTS++;
+ out->ts[hi] = *here;
}
}
- tl_assert(is_sane_VTS(res));
- //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
- // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
- return res;
+ tl_assert(is_sane_VTS(out));
+ tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
+ tl_assert(out->usedTS <= out->sizeTS);
}
/* Return a new VTS constructed as the join (max) of the 2 args.
Neither arg is modified.
*/
-VTS* VTS__join ( VTS* a, VTS* b )
+static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
{
- Word ia, ib, useda, usedb, hintsize;
+ UInt ia, ib, useda, usedb;
ULong tyma, tymb, tymMax;
ThrID thrid;
- VTS* res;
+ UInt ncommon = 0;
stats__vts__join++;
- tl_assert(a && a->ts);
- tl_assert(b && b->ts);
- useda = VG_(sizeXA)( a->ts );
- usedb = VG_(sizeXA)( b->ts );
+ tl_assert(a);
+ tl_assert(b);
+ useda = a->usedTS;
+ usedb = b->usedTS;
- hintsize = useda > usedb ? useda : usedb;
- res = VTS__new(hintsize);
+ tl_assert(out);
+ tl_assert(out->usedTS == 0);
+ /* overly conservative test, but doing better involves comparing
+ the two VTSs, which we don't want to do at this point. */
+ if (useda + usedb >= ThrID_MAX_VALID)
+ scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
+ tl_assert(out->sizeTS >= useda + usedb);
+
ia = ib = 0;
while (1) {
@@ -1866,7 +1897,7 @@
} else if (ia == useda && ib != usedb) {
/* a empty, use up b */
- ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ ScalarTS* tmpb = &b->ts[ib];
thrid = tmpb->thrid;
tyma = 0;
tymb = tmpb->tym;
@@ -1874,7 +1905,7 @@
} else if (ia != useda && ib == usedb) {
/* b empty, use up a */
- ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+ ScalarTS* tmpa = &a->ts[ia];
thrid = tmpa->thrid;
tyma = tmpa->tym;
tymb = 0;
@@ -1882,8 +1913,8 @@
} else {
/* both not empty; extract lowest-ThrID'd triple */
- ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
- ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ ScalarTS* tmpa = &a->ts[ia];
+ ScalarTS* tmpb = &b->ts[ib];
if (tmpa->thrid < tmpb->thrid) {
/* a has the lowest unconsidered ThrID */
thrid = tmpa->thrid;
@@ -1904,6 +1935,7 @@
tymb = tmpb->tym;
ia++;
ib++;
+ ncommon++;
}
}
@@ -1911,17 +1943,16 @@
useful with it. */
tymMax = tyma > tymb ? tyma : tymb;
if (tymMax > 0) {
- ScalarTS st;
- st.thrid = thrid;
- st.tym = tymMax;
- VG_(addToXA)( res->ts, &st );
+ UInt hi = out->usedTS++;
+ out->ts[hi].thrid = thrid;
+ out->ts[hi].tym = tymMax;
}
}
- tl_assert(is_sane_VTS( res ));
-
- return res;
+ tl_assert(is_sane_VTS(out));
+ tl_assert(out->usedTS <= out->sizeTS);
+ tl_assert(out->usedTS == useda + usedb - ncommon);
}
@@ -1937,10 +1968,10 @@
stats__vts__cmpLEQ++;
- tl_assert(a && a->ts);
- tl_assert(b && b->ts);
- useda = VG_(sizeXA)( a->ts );
- usedb = VG_(sizeXA)( b->ts );
+ tl_assert(a);
+ tl_assert(b);
+ useda = a->usedTS;
+ usedb = b->usedTS;
ia = ib = 0;
@@ -1960,7 +1991,7 @@
} else if (ia == useda && ib != usedb) {
/* a empty, use up b */
- ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ ScalarTS* tmpb = &b->ts[ib];
tyma = 0;
tymb = tmpb->tym;
thrid = tmpb->thrid;
@@ -1968,7 +1999,7 @@
} else if (ia != useda && ib == usedb) {
/* b empty, use up a */
- ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
+ ScalarTS* tmpa = &a->ts[ia];
tyma = tmpa->tym;
thrid = tmpa->thrid;
tymb = 0;
@@ -1976,8 +2007,8 @@
} else {
/* both not empty; extract lowest-ThrID'd triple */
- ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
- ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
+ ScalarTS* tmpa = &a->ts[ia];
+ ScalarTS* tmpb = &b->ts[ib];
if (tmpa->thrid < tmpb->thrid) {
/* a has the lowest unconsidered ThrID */
tyma = tmpa->tym;
@@ -2037,8 +2068,8 @@
tl_assert(a);
tl_assert(b);
- VG_(getContentsXA_UNSAFE)( a->ts, (void**)&ctsa, &useda );
- VG_(getContentsXA_UNSAFE)( b->ts, (void**)&ctsb, &usedb );
+ ctsa = &a->ts[0]; useda = a->usedTS;
+ ctsb = &b->ts[0]; usedb = b->usedTS;
if (LIKELY(useda == usedb)) {
ScalarTS *tmpa = NULL, *tmpb = NULL;
@@ -2078,7 +2109,8 @@
/* Debugging only. Display the given VTS in the buffer.
*/
-void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
+void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
+{
ScalarTS* st;
HChar unit[64];
Word i, n;
@@ -2087,10 +2119,10 @@
tl_assert(nBuf > 16);
buf[0] = '[';
buf[1] = 0;
- n = VG_(sizeXA)( vts->ts );
+ n = vts->usedTS;
for (i = 0; i < n; i++) {
tl_assert(avail >= 40);
- st = VG_(indexXA)( vts->ts, i );
+ st = &vts->ts[i];
VG_(memset)(unit, 0, sizeof(unit));
VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
st->thrid, (ULong)st->tym);
@@ -2109,14 +2141,15 @@
/* Debugging only. Return vts[index], so to speak.
*/
-ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
+ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
+{
UWord i, n;
ThrID idx_thrid = Thr__to_ThrID(idx);
stats__vts__indexat_slow++;
tl_assert(vts && vts->ts);
- n = VG_(sizeXA)( vts->ts );
+ n = vts->usedTS;
for (i = 0; i < n; i++) {
- ScalarTS* st = VG_(indexXA)( vts->ts, i );
+ ScalarTS* st = &vts->ts[i];
if (st->thrid == idx_thrid)
return st->tym;
}
@@ -2160,30 +2193,31 @@
tl_assert(vts_set);
}
-/* Given a newly made VTS, look in vts_set to see if we already have
- an identical one. If yes, free up this one and return instead a
- pointer to the existing one. If no, add this one to the set and
- return the same pointer. Caller differentiates the two cases by
- comparing returned pointer with the supplied one (although that
- does require that the supplied VTS is not already in the set).
-*/
-static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
+/* Given a VTS, look in vts_set to see if we already have a
+ structurally identical one. If yes, return the pair (True, pointer
+ to the existing one). If no, clone this one, add the clone to the
+ set, and return (False, pointer to the clone). */
+static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
{
UWord keyW, valW;
- stats__vts_set__fadoa++;
+ stats__vts_set__focaa++;
+ tl_assert(cand->id == VtsID_INVALID);
/* lookup cand (by value) */
if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
/* found it */
tl_assert(valW == 0);
/* if this fails, cand (by ref) was already present (!) */
tl_assert(keyW != (UWord)cand);
- stats__vts_set__fadoa_d++;
- VTS__delete(cand);
- return (VTS*)keyW;
+ *res = (VTS*)keyW;
+ return True;
} else {
- /* not present. Add and return pointer to same. */
- VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
- return cand;
+ /* not present. Clone, add and return address of clone. */
+ stats__vts_set__focaa_a++;
+ VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
+ tl_assert(clone != cand);
+ VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
+ *res = clone;
+ return False;
}
}
@@ -2307,29 +2341,30 @@
}
-/* Look up 'cand' in our collection of VTSs. If present, deallocate
- it and return the VtsID for the pre-existing version. If not
- present, add it to both vts_tab and vts_set, allocate a fresh VtsID
- for it, and return that. */
-static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
+/* Look up 'cand' in our collection of VTSs. If present, return the
+ VtsID for the pre-existing version. If not present, clone it, add
+ the clone to both vts_tab and vts_set, allocate a fresh VtsID for
+ it, and return that. */
+static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
{
- VTS* auld;
+ VTS* in_tab = NULL;
tl_assert(cand->id == VtsID_INVALID);
- auld = vts_set__find_and_dealloc__or_add(cand);
- if (auld != cand) {
- /* We already have an Aulde one. Use that. */
+ Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
+ tl_assert(in_tab);
+ if (already_have) {
+ /* We already have a copy of 'cand'. Use that. */
VtsTE* ie;
- tl_assert(auld->id != VtsID_INVALID);
- ie = VG_(indexXA)( vts_tab, auld->id );
- tl_assert(ie->vts == auld);
- return auld->id;
+ tl_assert(in_tab->id != VtsID_INVALID);
+ ie = VG_(indexXA)( vts_tab, in_tab->id );
+ tl_assert(ie->vts == in_tab);
+ return in_tab->id;
} else {
VtsID ii = get_new_VtsID();
VtsTE* ie = VG_(indexXA)( vts_tab, ii );
- ie->vts = cand;
+ ie->vts = in_tab;
ie->rc = 0;
ie->freelink = VtsID_INVALID;
- cand->id = ii;
+ in_tab->id = ii;
return ii;
}
}
@@ -2449,6 +2484,11 @@
/////////////////////////////////////////////////////////
//////////////////////////
+/* A temporary, max-sized VTS which is used as a temporary (the first
+ argument) in VTS__singleton, VTS__tick and VTS__join operations. */
+static VTS* temp_max_sized_VTS = NULL;
+
+//////////////////////////
static ULong stats__cmpLEQ_queries = 0;
static ULong stats__cmpLEQ_misses = 0;
static ULong stats__join2_queries = 0;
@@ -2548,7 +2588,7 @@
static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
UInt hash;
VtsID res;
- VTS *vts1, *vts2, *nyu;
+ VTS *vts1, *vts2;
//if (vi1 == vi2) return vi1;
tl_assert(vi1 != vi2);
////++
@@ -2561,8 +2601,9 @@
////--
vts1 = VtsID__to_VTS(vi1);
vts2 = VtsID__to_VTS(vi2);
- nyu = VTS__join(vts1,vts2);
- res = vts_tab__find_and_dealloc__or_add(nyu);
+ temp_max_sized_VTS->usedTS = 0;
+ VTS__join(temp_max_sized_VTS, vts1,vts2);
+ res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
////++
join2_cache[hash].vi1 = vi1;
join2_cache[hash].vi2 = vi2;
@@ -2576,15 +2617,17 @@
/* create a singleton VTS, namely [thr:1] */
static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
- VTS* nyu = VTS__singleton(thr,tym);
- return vts_tab__find_and_dealloc__or_add(nyu);
+ temp_max_sized_VTS->usedTS = 0;
+ VTS__singleton(temp_max_sized_VTS, thr,tym);
+ return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
}
/* tick operation, creates value 1 if specified index is absent */
static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
VTS* vts = VtsID__to_VTS(vi);
- VTS* nyu = VTS__tick(idx,vts);
- return vts_tab__find_and_dealloc__or_add(nyu);
+ temp_max_sized_VTS->usedTS = 0;
+ VTS__tick(temp_max_sized_VTS, idx,vts);
+ return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
}
/* index into a VTS (only for assertions) */
@@ -3037,7 +3080,7 @@
/* And a counter to dole out ThrID values. For rationale/background,
see comments on definition of ScalarTS (far) above. */
-static ThrID thrid_counter = 1024; /* runs up to 2^SCALARTS_N_THRBITS-1 */
+static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
static ThrID Thr__to_ThrID ( Thr* thr ) {
return thr->thrid;
@@ -3072,7 +3115,7 @@
tl_assert(thrid_to_thr_map);
}
- if (thrid_counter >= (((ThrID)1) << SCALARTS_N_THRBITS) - 2) {
+ if (thrid_counter >= ThrID_MAX_VALID) {
/* We're hosed. We have to stop. */
scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
}
@@ -5529,6 +5572,8 @@
// We will have to have to store a large number of these,
// so make sure they're the size we expect them to be.
tl_assert(sizeof(ScalarTS) == 8);
+ tl_assert(SCALARTS_N_THRBITS >= 11); /* because first 1024 unusable */
+ tl_assert(SCALARTS_N_THRBITS <= 32); /* so as to fit in a UInt */
tl_assert(get_stacktrace);
tl_assert(get_EC);
@@ -5538,6 +5583,10 @@
// No need to initialise hg_wordfm.
// No need to initialise hg_wordset.
+ /* Allocated once and never deallocated. Used as a temporary in
+ VTS singleton, tick and join operations. */
+ temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
+ temp_max_sized_VTS->id = VtsID_INVALID;
vts_set_init();
vts_tab_init();
event_map_init();
@@ -5675,8 +5724,8 @@
stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
- VG_(printf)( " libhb: VTSset: find_and_dealloc__or_add %'lu (%'lu deallocd)\n",
- stats__vts_set__fadoa, stats__vts_set__fadoa_d );
+ VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
+ stats__vts_set__focaa, stats__vts_set__focaa_a );
VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
stats__vts__indexat_slow );