Allow garbage collection of the LAOG data structure(s). This avoids
quadratic growth on some apparently simple test cases. Fixes #267925.
(Philippe Waroquiers, philippe.waroquiers@skynet.be)
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12201 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
index e89ef30..49e7692 100644
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -136,6 +136,9 @@
/* The word-set universes for lock sets. */
static WordSetU* univ_lsets = NULL; /* sets of Lock* */
static WordSetU* univ_laog = NULL; /* sets of Lock*, for LAOG */
+static Int next_gc_univ_laog = 1;
+/* univ_laog will be garbaged collected when the nr of element in univ_laog is
+ >= next_gc_univ_laog. */
/* Allow libhb to get at the universe of locksets stored
here. Sigh. */
@@ -3300,6 +3303,85 @@
VG_(printf)("}\n");
}
+static void univ_laog_do_GC ( void ) {
+ Word i;
+ LAOGLinks* links;
+ Word seen = 0;
+ Int prev_next_gc_univ_laog = next_gc_univ_laog;
+ const UWord univ_laog_cardinality = HG_(cardinalityWSU)( univ_laog);
+
+ Bool *univ_laog_seen = HG_(zalloc) ( "hg.gc_univ_laog.1",
+ (Int) univ_laog_cardinality
+ * sizeof(Bool) );
+ // univ_laog_seen[*] set to 0 (False) by zalloc.
+
+ if (VG_(clo_stats))
+ VG_(message)(Vg_DebugMsg,
+ "univ_laog_do_GC enter cardinality %'10d\n",
+ (Int)univ_laog_cardinality);
+
+ VG_(initIterFM)( laog );
+ links = NULL;
+ while (VG_(nextIterFM)( laog, NULL, (UWord*)&links )) {
+ tl_assert(links);
+ tl_assert(links->inns >= 0 && links->inns < univ_laog_cardinality);
+ univ_laog_seen[links->inns] = True;
+ tl_assert(links->outs >= 0 && links->outs < univ_laog_cardinality);
+ univ_laog_seen[links->outs] = True;
+ links = NULL;
+ }
+ VG_(doneIterFM)( laog );
+
+ for (i = 0; i < (Int)univ_laog_cardinality; i++) {
+ if (univ_laog_seen[i])
+ seen++;
+ else
+ HG_(dieWS) ( univ_laog, (WordSet)i );
+ }
+
+ HG_(free) (univ_laog_seen);
+
+ // We need to decide the value of the next_gc.
+ // 3 solutions were looked at:
+ // Sol 1: garbage collect at seen * 2
+ // This solution was a lot slower, probably because we both do a lot of
+ // garbage collection and do not keep long enough laog WV that will become
+ // useful again very soon.
+ // Sol 2: garbage collect at a percentage increase of the current cardinality
+ // (with a min increase of 1)
+ // Trials on a small test program with 1%, 5% and 10% increase was done.
+ // 1% is slightly faster than 5%, which is slightly slower than 10%.
+ // However, on a big application, this caused the memory to be exhausted,
+ // as even a 1% increase of size at each gc becomes a lot, when many gc
+ // are done.
+ // Sol 3: always garbage collect at current cardinality + 1.
+ // This solution was the fastest of the 3 solutions, and caused no memory
+ // exhaustion in the big application.
+ //
+ // With regards to cost introduced by gc: on the t2t perf test (doing only
+ // lock/unlock operations), t2t 50 10 2 was about 25% faster than the
+ // version with garbage collection. With t2t 50 20 2, my machine started
+ // to page out, and so the garbage collected version was much faster.
+ // On smaller lock sets (e.g. t2t 20 5 2, giving about 100 locks), the
+ // difference performance is insignificant (~ 0.1 s).
+ // Of course, it might be that real life programs are not well represented
+ // by t2t.
+
+ // If ever we want to have a more sophisticated control
+ // (e.g. clo options to control the percentage increase or fixed increased),
+ // we should do it here, eg.
+ // next_gc_univ_laog = prev_next_gc_univ_laog + VG_(clo_laog_gc_fixed);
+ // Currently, we just hard-code the solution 3 above.
+ next_gc_univ_laog = prev_next_gc_univ_laog + 1;
+
+ if (VG_(clo_stats))
+ VG_(message)
+ (Vg_DebugMsg,
+ "univ_laog_do_GC exit seen %'8d next gc at cardinality %'10d\n",
+ (Int)seen, next_gc_univ_laog);
+}
+
+
__attribute__((noinline))
static void laog__add_edge ( Lock* src, Lock* dst ) {
Word keyW;
@@ -3378,13 +3460,16 @@
VG_(addToFM)( laog_exposition, (Word)expo2, (Word)NULL );
}
}
+
+ if (HG_(cardinalityWSU) (univ_laog) >= next_gc_univ_laog)
+ univ_laog_do_GC();
}
__attribute__((noinline))
static void laog__del_edge ( Lock* src, Lock* dst ) {
Word keyW;
LAOGLinks* links;
- if (0) VG_(printf)("laog__del_edge %p %p\n", src, dst);
+ if (0) VG_(printf)("laog__del_edge enter %p %p\n", src, dst);
/* Update the out edges for src */
keyW = 0;
links = NULL;
@@ -3401,6 +3486,27 @@
tl_assert(keyW == (Word)dst);
links->inns = HG_(delFromWS)( univ_laog, links->inns, (Word)src );
}
+
+ /* Remove the exposition of src,dst (if present) */
+ {
+ LAOGLinkExposition *fm_expo;
+
+ LAOGLinkExposition expo;
+ expo.src_ga = src->guestaddr;
+ expo.dst_ga = dst->guestaddr;
+ expo.src_ec = NULL;
+ expo.dst_ec = NULL;
+
+ if (VG_(delFromFM) (laog_exposition,
+ (UWord*)&fm_expo, NULL, (UWord)&expo )) {
+ HG_(free) (fm_expo);
+ }
+ }
+
+ /* deleting edges can increase nr of of WS so check for gc. */
+ if (HG_(cardinalityWSU) (univ_laog) >= next_gc_univ_laog)
+ univ_laog_do_GC();
+ if (0) VG_(printf)("laog__del_edge exit\n");
}
__attribute__((noinline))
@@ -3611,6 +3717,21 @@
all_except_Locks__sanity_check("laog__pre_thread_acquires_lock-post");
}
+/* Allocates a duplicate of words. Caller must HG_(free) the result. */
+static UWord* UWordV_dup(UWord* words, Word words_size)
+{
+ UInt i;
+
+ if (words_size == 0)
+ return NULL;
+
+ UWord *dup = HG_(zalloc) ("hg.dup.1", (SizeT) words_size * sizeof(UWord));
+
+ for (i = 0; i < words_size; i++)
+ dup[i] = words[i];
+
+ return dup;
+}
/* Delete from 'laog' any pair mentioning a lock in locksToDelete */
@@ -3624,11 +3745,17 @@
preds = laog__preds( lk );
succs = laog__succs( lk );
+ // We need to duplicate the payload, as these can be garbage collected
+ // during the del/add operations below.
HG_(getPayloadWS)( &preds_words, &preds_size, univ_laog, preds );
+ preds_words = UWordV_dup(preds_words, preds_size);
+
+ HG_(getPayloadWS)( &succs_words, &succs_size, univ_laog, succs );
+ succs_words = UWordV_dup(succs_words, succs_size);
+
for (i = 0; i < preds_size; i++)
laog__del_edge( (Lock*)preds_words[i], lk );
- HG_(getPayloadWS)( &succs_words, &succs_size, univ_laog, succs );
for (j = 0; j < succs_size; j++)
laog__del_edge( lk, (Lock*)succs_words[j] );
@@ -3642,6 +3769,24 @@
}
}
}
+
+ if (preds_words)
+ HG_(free) (preds_words);
+ if (succs_words)
+ HG_(free) (succs_words);
+
+ // Remove lk information from laog links FM
+ {
+ LAOGLinks *links;
+ Lock* linked_lk;
+
+ if (VG_(delFromFM) (laog,
+ (UWord*)&linked_lk, (UWord*)&links, (UWord)lk)) {
+ tl_assert (linked_lk == lk);
+ HG_(free) (links);
+ }
+ }
+ /* FIXME ??? What about removing lock lk data from EXPOSITION ??? */
}
//__attribute__((noinline))
@@ -3654,6 +3799,7 @@
//
//
// HG_(getPayloadWS)( &ws_words, &ws_size, univ_lsets, locksToDelete );
+// UWordV_dup call needed here ...
// for (i = 0; i < ws_size; i++)
// laog__handle_one_lock_deletion( (Lock*)ws_words[i] );
//