Redid the --trace-mem=yes option of Lackey properly. Updated some related
stuff along with it, such as the NEWS file.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@5840 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/lackey/lk_main.c b/lackey/lk_main.c
index 975fb57..c4f6294 100644
--- a/lackey/lk_main.c
+++ b/lackey/lk_main.c
@@ -51,19 +51,86 @@
//
// Specific Details about --trace-mem=yes
// --------------------------------------
-// The address trace produced by --trace-mem=yes is good, but not perfect;
-// see Section 3.3.7 of Nicholas Nethercote's PhD dissertation "Dynamic
-// Binary Analysis and Instrumentation", 2004, for details about the few
-// loads and stores that it misses, and other caveats about the accuracy of
-// the address trace.
+// Lackey's --trace-mem code is a good starting point for building Valgrind
+// tools that act on memory loads and stores. It also could be used as is,
+// with its output used as input to a post-mortem processing step. However,
+// because memory traces can be very large, online analysis is generally
+// better.
//
-// [Actually, the traces aren't quite right because instructions that modify
-// a memory location are treated like a load followed by a store.]
+// It prints memory data access traces that look like this:
+//
+// instr : 0x0023C790, 2 # instruction read at 0x0023C790 of size 2
+// instr : 0x0023C792, 5
+// store : 0xBE80199C, 4 # data store at 0xBE80199C of size 4
+// instr : 0x0025242B, 3
+// load : 0xBE801950, 4 # data load at 0xBE801950 of size 4
+// instr : 0x0023D476, 7
+// modify: 0x0025747C, 1 # data modify at 0x0025747C of size 1
+// instr : 0x0023DC20, 2
+// load : 0x00254962, 1
+// load : 0xBE801FB3, 1
+// instr : 0x00252305, 1
+// load : 0x00254AEB, 1
+// store : 0x00257998, 1
+//
+// Every instruction executed has an "instr" event representing it.
+// Instructions that do memory accesses are followed by one or more "load",
+// "store" or "modify" events. Some instructions do more than one load or
+// store, as in the last two examples in the above trace.
+//
+// Here are some examples of x86 instructions that do different combinations
+// of loads, stores, and modifies.
+//
+// Instruction Memory accesses Event sequence
+// ----------- --------------- --------------
+// add %eax, %ebx No loads or stores instr
+//
+// movl (%eax), %ebx loads (%eax) instr, load
+//
+// movl %eax, (%ebx) stores (%ebx) instr, store
+//
+// incl (%ecx) modifies (%ecx) instr, modify
+//
+// cmpsb loads (%esi), loads(%edi) instr, load, load
+//
+// call*l (%edx) loads (%edx), stores -4(%esp) instr, load, store
+// pushl (%edx) loads (%edx), stores -4(%esp) instr, load, store
+// movsw loads (%esi), stores (%edi) instr, load, store
+//
+// Instructions using x86 "rep" prefixes are traced as if they are repeated
+// N times.
+//
+// Lackey with --trace-mem gives good traces, but they are not perfect, for
+// the following reasons:
+//
+// - It does not trace into the OS kernel, so system calls and other kernel
+// operations (eg. some scheduling and signal handling code) are ignored.
+//
+// - Valgrind replaces some code with its own, notably parts of code for
+// scheduling operations and signal handling. This code is not traced.
+//
+// - There is no consideration of virtual-to-physical address mapping.
+// This may not matter for many purposes.
+//
+// - Valgrind modifies the instruction stream in some very minor ways. For
+// example, on x86 the bts, btc, btr instructions are incorrectly
+// considered to always touch memory (this is a consequence of these
+// instructions being very difficult to simulate).
+//
+// - Valgrind tools layout memory differently to normal programs, so the
+// addresses you get will not be typical. Thus Lackey (and all Valgrind
+// tools) is suitable for getting relative memory traces -- eg. if you
+// want to analyse locality of memory accesses -- but is not good if
+// absolute addresses are important.
+//
+// Despite all these warnings, Dullard's results should be good enough for a
+// wide range of purposes. For example, Cachegrind shares all the above
+// shortcomings and it is still useful.
//
// For further inspiration, you should look at cachegrind/cg_main.c which
-// handles memory accesses in a more sophisticated way -- it groups them
-// together for processing into twos and threes so that fewer C calls are
-// made and things run faster.
+// uses the same basic technique for tracing memory accesses, but also groups
+// events together for processing into twos and threes so that fewer C calls
+// are made and things run faster.
#include "pub_tool_basics.h"
#include "pub_tool_tooliface.h"
@@ -111,23 +178,25 @@
" --trace-mem=no|yes trace all loads and stores [no]\n"
" --fnname=<name> count calls to <name> (only used if\n"
" --basic-count=yes) [_dl_runtime_resolve]\n"
-
);
}
static void lk_print_debug_usage(void)
{
+ VG_(printf)(
+" (none)\n"
+ );
}
/*------------------------------------------------------------*/
-/*--- Data and helpers for --basic-counts ---*/
+/*--- Stuff for --basic-counts ---*/
/*------------------------------------------------------------*/
/* Nb: use ULongs because the numbers can get very big */
static ULong n_func_calls = 0;
static ULong n_BBs_entered = 0;
static ULong n_BBs_completed = 0;
-static ULong n_IRStmts = 0;
+static ULong n_IRStmts = 0;
static ULong n_guest_instrs = 0;
static ULong n_Jccs = 0;
static ULong n_Jccs_untaken = 0;
@@ -168,7 +237,7 @@
}
/*------------------------------------------------------------*/
-/*--- Data and helpers for --detailed-counts ---*/
+/*--- Stuff for --detailed-counts ---*/
/*------------------------------------------------------------*/
/* --- Operations --- */
@@ -244,7 +313,6 @@
}
/* Summarize and print the details. */
-
static void print_details ( void )
{
Int typeIx;
@@ -265,17 +333,189 @@
/*------------------------------------------------------------*/
-/*--- Data and helpers for --trace-mem ---*/
+/*--- Stuff for --trace-mem ---*/
/*------------------------------------------------------------*/
+#define MAX_DSIZE 512
+
+typedef
+ IRExpr
+ IRAtom;
+
+typedef
+ enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
+ EventKind;
+
+typedef
+ struct {
+ EventKind ekind;
+ IRAtom* addr;
+ Int size;
+ }
+ Event;
+
+/* Up to this many unnotified events are allowed. Must be at least two,
+ so that reads and writes to the same address can be merged into a modify.
+ Beyond that, larger numbers just potentially induce more spilling due to
+ extending live ranges of address temporaries. */
+#define N_EVENTS 4
+
+/* Maintain an ordered list of memory events which are outstanding, in
+ the sense that no IR has yet been generated to do the relevant
+ helper calls. The BB is scanned top to bottom and memory events
+ are added to the end of the list, merging with the most recent
+ notified event where possible (Dw immediately following Dr and
+ having the same size and EA can be merged).
+
+ This merging is done so that for architectures which have
+ load-op-store instructions (x86, amd64), the instr is treated as if
+ it makes just one memory reference (a modify), rather than two (a
+ read followed by a write at the same address).
+
+ At various points the list will need to be flushed, that is, IR
+ generated from it. That must happen before any possible exit from
+ the block (the end, or an IRStmt_Exit). Flushing also takes place
+ when there is no space to add a new event.
+
+ If we require the simulation statistics to be up to date with
+ respect to possible memory exceptions, then the list would have to
+ be flushed before each memory reference. That's a pain so we don't
+ bother.
+
+ Flushing the list consists of walking it start to end and emitting
+ instrumentation IR for each event, in the order in which they
+ appear. */
+
+static Event events[N_EVENTS];
+static Int events_used = 0;
+
+
+static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
+{
+ VG_(printf)("instr : %08p, %d\n", addr, size);
+}
+
static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
{
- VG_(printf)("load : %p, %d\n", addr, size);
+ VG_(printf)(" load : %08p, %d\n", addr, size);
}
static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
{
- VG_(printf)("store: %p, %d\n", addr, size);
+ VG_(printf)(" store : %08p, %d\n", addr, size);
+}
+
+static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
+{
+ VG_(printf)(" modify: %08p, %d\n", addr, size);
+}
+
+
+static void flushEvents(IRBB* bb)
+{
+ Int i;
+ Char* helperName;
+ void* helperAddr;
+ IRExpr** argv;
+ IRDirty* di;
+ Event* ev;
+
+ for (i = 0; i < events_used; i++) {
+
+ ev = &events[i];
+
+ // Decide on helper fn to call and args to pass it.
+ switch (ev->ekind) {
+ case Event_Ir: helperName = "trace_instr";
+ helperAddr = trace_instr; break;
+
+ case Event_Dr: helperName = "trace_load";
+ helperAddr = trace_load; break;
+
+ case Event_Dw: helperName = "trace_store";
+ helperAddr = trace_store; break;
+
+ case Event_Dm: helperName = "trace_modify";
+ helperAddr = trace_modify; break;
+ default:
+ tl_assert(0);
+ }
+
+ // Add the helper.
+ argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
+ di = unsafeIRDirty_0_N( /*regparms*/2,
+ helperName, VG_(fnptr_to_fnentry)( helperAddr ),
+ argv );
+ addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ }
+
+ events_used = 0;
+}
+
+// WARNING: If you aren't interested in instruction reads, you can omit the
+// code that adds calls to trace_instr() in flushEvents(). However, you
+// must still call this function, addEvent_Ir() -- it is necessary to add
+// the Ir events to the events list so that merging of paired load/store
+// events into modify events works correctly.
+static void addEvent_Ir ( IRBB* bb, IRAtom* iaddr, UInt isize )
+{
+ Event* evt;
+ tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
+ || VG_CLREQ_SZB == isize );
+ if (events_used == N_EVENTS)
+ flushEvents(bb);
+ tl_assert(events_used >= 0 && events_used < N_EVENTS);
+ evt = &events[events_used];
+ evt->ekind = Event_Ir;
+ evt->addr = iaddr;
+ evt->size = isize;
+ events_used++;
+}
+
+static
+void addEvent_Dr ( IRBB* bb, IRAtom* daddr, Int dsize )
+{
+ Event* evt;
+ tl_assert(isIRAtom(daddr));
+ tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
+ if (events_used == N_EVENTS)
+ flushEvents(bb);
+ tl_assert(events_used >= 0 && events_used < N_EVENTS);
+ evt = &events[events_used];
+ evt->ekind = Event_Dr;
+ evt->addr = daddr;
+ evt->size = dsize;
+ events_used++;
+}
+
+static
+void addEvent_Dw ( IRBB* bb, IRAtom* daddr, Int dsize )
+{
+ Event* lastEvt;
+ Event* evt;
+ tl_assert(isIRAtom(daddr));
+ tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
+
+ // Is it possible to merge this write with the preceding read?
+ lastEvt = &events[events_used-1];
+ if (events_used > 0
+ && lastEvt->ekind == Event_Dr
+ && lastEvt->size == dsize
+ && eqIRAtom(lastEvt->addr, daddr))
+ {
+ lastEvt->ekind = Event_Dm;
+ return;
+ }
+
+ // No. Add as normal.
+ if (events_used == N_EVENTS)
+ flushEvents(bb);
+ tl_assert(events_used >= 0 && events_used < N_EVENTS);
+ evt = &events[events_used];
+ evt->ekind = Event_Dw;
+ evt->size = dsize;
+ evt->addr = daddr;
+ events_used++;
}
@@ -296,19 +536,17 @@
static
IRBB* lk_instrument ( VgCallbackClosure* closure,
- IRBB* bb_in,
+ IRBB* bbIn,
VexGuestLayout* layout,
VexGuestExtents* vge,
IRType gWordTy, IRType hWordTy )
{
- IRDirty* di;
- Int i;
- IRBB* bb;
- Char fnname[100];
- IRType type;
- IRExpr** argv;
- IRExpr* addr_expr;
- IRExpr* size_expr;
+ IRDirty* di;
+ Int i;
+ IRBB* bbOut;
+ Char fnname[100];
+ IRType type;
+ IRTypeEnv* tyenv = bbIn->tyenv;
if (gWordTy != hWordTy) {
/* We don't currently support this case. */
@@ -316,15 +554,15 @@
}
/* Set up BB */
- bb = emptyIRBB();
- bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
- bb->next = dopyIRExpr(bb_in->next);
- bb->jumpkind = bb_in->jumpkind;
+ bbOut = emptyIRBB();
+ bbOut->tyenv = dopyIRTypeEnv(bbIn->tyenv);
+ bbOut->next = dopyIRExpr(bbIn->next);
+ bbOut->jumpkind = bbIn->jumpkind;
// Copy verbatim any IR preamble preceding the first IMark
i = 0;
- while (i < bb_in->stmts_used && bb_in->stmts[i]->tag != Ist_IMark) {
- addStmtToIRBB( bb, bb_in->stmts[i] );
+ while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) {
+ addStmtToIRBB( bbOut, bbIn->stmts[i] );
i++;
}
@@ -333,11 +571,15 @@
di = unsafeIRDirty_0_N( 0, "add_one_BB_entered",
VG_(fnptr_to_fnentry)( &add_one_BB_entered ),
mkIRExprVec_0() );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
}
- for (/*use current i*/; i < bb_in->stmts_used; i++) {
- IRStmt* st = bb_in->stmts[i];
+ if (clo_trace_mem) {
+ events_used = 0;
+ }
+
+ for (/*use current i*/; i < bbIn->stmts_used; i++) {
+ IRStmt* st = bbIn->stmts[i];
if (!st || st->tag == Ist_NoOp) continue;
if (clo_basic_counts) {
@@ -345,17 +587,25 @@
di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
mkIRExprVec_0() );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
}
switch (st->tag) {
+ case Ist_NoOp:
+ case Ist_AbiHint:
+ case Ist_Put:
+ case Ist_PutI:
+ case Ist_MFence:
+ addStmtToIRBB( bbOut, st );
+ break;
+
case Ist_IMark:
if (clo_basic_counts) {
/* Count guest instruction. */
di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
mkIRExprVec_0() );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
/* An unconditional branch to a known destination in the
* guest's instructions can be represented, in the IRBB to
@@ -378,53 +628,17 @@
0, "add_one_func_call",
VG_(fnptr_to_fnentry)( &add_one_func_call ),
mkIRExprVec_0() );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
}
}
- addStmtToIRBB( bb, st );
- break;
-
- case Ist_Exit:
- if (clo_basic_counts) {
- /* Count Jcc */
- di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
- VG_(fnptr_to_fnentry)( &add_one_Jcc ),
- mkIRExprVec_0() );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
- }
-
- addStmtToIRBB( bb, st );
-
- if (clo_basic_counts) {
- /* Count non-taken Jcc */
- di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
- VG_(fnptr_to_fnentry)(
- &add_one_Jcc_untaken ),
- mkIRExprVec_0() );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
- }
- break;
-
- case Ist_Store:
- // Add a call to trace_store() if --trace-mem=yes.
if (clo_trace_mem) {
- addr_expr = st->Ist.Store.addr;
- size_expr = mkIRExpr_HWord(
- sizeofIRType(
- typeOfIRExpr(bb->tyenv, st->Ist.Store.data)));
- argv = mkIRExprVec_2( addr_expr, size_expr );
- di = unsafeIRDirty_0_N( /*regparms*/2,
- "trace_store",
- VG_(fnptr_to_fnentry)( trace_store ),
- argv );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ // WARNING: do not remove this function call, even if you
+ // aren't interested in instruction reads. See the comment
+ // above the function itself for more detail.
+ addEvent_Ir( bbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
+ st->Ist.IMark.len );
}
- if (clo_detailed_counts) {
- type = typeOfIRExpr(bb->tyenv, st->Ist.Store.data);
- tl_assert(type != Ity_INVALID);
- instrument_detail( bb, OpStore, type );
- }
- addStmtToIRBB( bb, st );
+ addStmtToIRBB( bbOut, st );
break;
case Ist_Tmp:
@@ -432,40 +646,92 @@
if (clo_trace_mem) {
IRExpr* data = st->Ist.Tmp.data;
if (data->tag == Iex_Load) {
- addr_expr = data->Iex.Load.addr;
- size_expr = mkIRExpr_HWord( sizeofIRType(data->Iex.Load.ty) );
- argv = mkIRExprVec_2( addr_expr, size_expr );
- di = unsafeIRDirty_0_N( /*regparms*/2,
- "trace_load",
- VG_(fnptr_to_fnentry)( trace_load ),
- argv );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ addEvent_Dr( bbOut, data->Iex.Load.addr,
+ sizeofIRType(data->Iex.Load.ty) );
}
}
if (clo_detailed_counts) {
IRExpr* expr = st->Ist.Tmp.data;
- type = typeOfIRExpr(bb->tyenv, expr);
+ type = typeOfIRExpr(bbOut->tyenv, expr);
tl_assert(type != Ity_INVALID);
switch (expr->tag) {
case Iex_Load:
- instrument_detail( bb, OpLoad, type );
+ instrument_detail( bbOut, OpLoad, type );
break;
case Iex_Unop:
case Iex_Binop:
case Iex_Triop:
case Iex_Qop:
case Iex_Mux0X:
- instrument_detail( bb, OpAlu, type );
+ instrument_detail( bbOut, OpAlu, type );
break;
default:
break;
}
}
- addStmtToIRBB( bb, st );
+ addStmtToIRBB( bbOut, st );
+ break;
+
+ case Ist_Store:
+ if (clo_trace_mem) {
+ IRExpr* data = st->Ist.Store.data;
+ addEvent_Dw( bbOut, st->Ist.Store.addr,
+ sizeofIRType(typeOfIRExpr(tyenv, data)) );
+ }
+ if (clo_detailed_counts) {
+ type = typeOfIRExpr(bbOut->tyenv, st->Ist.Store.data);
+ tl_assert(type != Ity_INVALID);
+ instrument_detail( bbOut, OpStore, type );
+ }
+ addStmtToIRBB( bbOut, st );
+ break;
+
+ case Ist_Dirty: {
+ Int dsize;
+ IRDirty* d = st->Ist.Dirty.details;
+ if (d->mFx != Ifx_None) {
+ // This dirty helper accesses memory. Collect the details.
+ tl_assert(d->mAddr != NULL);
+ tl_assert(d->mSize != 0);
+ dsize = d->mSize;
+ if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
+ addEvent_Dr( bbOut, d->mAddr, dsize );
+ if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
+ addEvent_Dw( bbOut, d->mAddr, dsize );
+ } else {
+ tl_assert(d->mAddr == NULL);
+ tl_assert(d->mSize == 0);
+ }
+ addStmtToIRBB( bbOut, st );
+ break;
+ }
+
+ case Ist_Exit:
+ if (clo_basic_counts) {
+ /* Count Jcc */
+ di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
+ VG_(fnptr_to_fnentry)( &add_one_Jcc ),
+ mkIRExprVec_0() );
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
+ }
+ if (clo_trace_mem) {
+ flushEvents(bbOut);
+ }
+
+ addStmtToIRBB( bbOut, st ); // Original statement
+
+ if (clo_basic_counts) {
+ /* Count non-taken Jcc */
+ di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
+ VG_(fnptr_to_fnentry)(
+ &add_one_Jcc_untaken ),
+ mkIRExprVec_0() );
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
+ }
break;
default:
- addStmtToIRBB( bb, st );
+ tl_assert(0);
}
}
@@ -474,10 +740,15 @@
di = unsafeIRDirty_0_N( 0, "add_one_BB_completed",
VG_(fnptr_to_fnentry)( &add_one_BB_completed ),
mkIRExprVec_0() );
- addStmtToIRBB( bb, IRStmt_Dirty(di) );
+ addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
}
- return bb;
+ if (clo_trace_mem) {
+ /* At the end of the bbIn. Flush outstandings. */
+ flushEvents(bbOut);
+ }
+
+ return bbOut;
}
static void lk_fini(Int exitcode)