blob: b54b72cc4f0341b6d5b7655c4f008f815cbf8e3b [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001
2/*--------------------------------------------------------------------*/
3/*--- Callgrind ---*/
4/*--- main.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
10
sewardje4b0bf02006-06-05 23:21:15 +000011 Copyright (C) 2002-2006, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
weidendoa17f2a32006-03-20 10:27:30 +000012
13 This skin is derived from and contains code from Cachegrind
sewardje4b0bf02006-06-05 23:21:15 +000014 Copyright (C) 2002-2006 Nicholas Nethercote (njn25@cam.ac.uk)
weidendoa17f2a32006-03-20 10:27:30 +000015
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
30
31 The GNU General Public License is contained in the file COPYING.
32*/
33
34#include "config.h"
35#include "callgrind.h"
36#include "global.h"
37
38#include <pub_tool_threadstate.h>
39
40/*------------------------------------------------------------*/
41/*--- Global variables ---*/
42/*------------------------------------------------------------*/
43
44/* for all threads */
45CommandLineOptions CLG_(clo);
46Statistics CLG_(stat);
47Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
48
49/* thread and signal handler specific */
50exec_state CLG_(current_state);
51
52
53/*------------------------------------------------------------*/
54/*--- Statistics ---*/
55/*------------------------------------------------------------*/
56
57static void CLG_(init_statistics)(Statistics* s)
58{
59 s->call_counter = 0;
60 s->jcnd_counter = 0;
61 s->jump_counter = 0;
62 s->rec_call_counter = 0;
63 s->ret_counter = 0;
64 s->bb_executions = 0;
65
66 s->context_counter = 0;
67 s->bb_retranslations = 0;
68
69 s->distinct_objs = 0;
70 s->distinct_files = 0;
71 s->distinct_fns = 0;
72 s->distinct_contexts = 0;
73 s->distinct_bbs = 0;
74 s->distinct_bbccs = 0;
75 s->distinct_instrs = 0;
76 s->distinct_skips = 0;
77
78 s->bb_hash_resizes = 0;
79 s->bbcc_hash_resizes = 0;
80 s->jcc_hash_resizes = 0;
81 s->cxt_hash_resizes = 0;
82 s->fn_array_resizes = 0;
83 s->call_stack_resizes = 0;
84 s->fn_stack_resizes = 0;
85
86 s->full_debug_BBs = 0;
87 s->file_line_debug_BBs = 0;
88 s->fn_name_debug_BBs = 0;
89 s->no_debug_BBs = 0;
90 s->bbcc_lru_misses = 0;
91 s->jcc_lru_misses = 0;
92 s->cxt_lru_misses = 0;
93 s->bbcc_clones = 0;
94}
95
96
97
98
99/*------------------------------------------------------------*/
100/*--- Cache simulation instrumentation phase ---*/
101/*------------------------------------------------------------*/
102
103
104static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
105{
106 // I'm assuming that for 'modify' instructions, that Vex always makes
107 // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp
108 // expressions, or both Const expressions.
109 CLG_ASSERT(isIRAtom(loadAddrExpr));
110 CLG_ASSERT(isIRAtom(storeAddrExpr));
111 return eqIRAtom(loadAddrExpr, storeAddrExpr);
112}
113
114static
115EventSet* insert_simcall(IRBB* bbOut, InstrInfo* ii, UInt dataSize,
116 Bool instrIssued,
117 IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
118{
119 HChar* helperName;
120 void* helperAddr;
121 Int argc;
122 EventSet* es;
123 IRExpr *arg1, *arg2 = 0, *arg3 = 0, **argv;
124 IRDirty* di;
125
126 /* Check type of original instruction regarding memory access,
127 * and collect info to be able to generate fitting helper call
128 */
129 if (!loadAddrExpr && !storeAddrExpr) {
130 // no load/store
131 CLG_ASSERT(0 == dataSize);
132 if (instrIssued) {
133 helperName = 0;
134 helperAddr = 0;
135 }
136 else {
137 helperName = CLG_(cachesim).log_1I0D_name;
138 helperAddr = CLG_(cachesim).log_1I0D;
139 }
140 argc = 1;
141 es = CLG_(sets).D0;
142
143 } else if (loadAddrExpr && !storeAddrExpr) {
144 // load
145 CLG_ASSERT( isIRAtom(loadAddrExpr) );
146 if (instrIssued) {
147 helperName = CLG_(cachesim).log_0I1Dr_name;
148 helperAddr = CLG_(cachesim).log_0I1Dr;
149 }
150 else {
151 helperName = CLG_(cachesim).log_1I1Dr_name;
152 helperAddr = CLG_(cachesim).log_1I1Dr;
153 }
154 argc = 2;
155 arg2 = loadAddrExpr;
156 es = CLG_(sets).D1r;
157
158 } else if (!loadAddrExpr && storeAddrExpr) {
159 // store
160 CLG_ASSERT( isIRAtom(storeAddrExpr) );
161 if (instrIssued) {
162 helperName = CLG_(cachesim).log_0I1Dw_name;
163 helperAddr = CLG_(cachesim).log_0I1Dw;
164 }
165 else {
166 helperName = CLG_(cachesim).log_1I1Dw_name;
167 helperAddr = CLG_(cachesim).log_1I1Dw;
168 }
169 argc = 2;
170 arg2 = storeAddrExpr;
171 es = CLG_(sets).D1w;
172
173 } else {
174 CLG_ASSERT( loadAddrExpr && storeAddrExpr );
175 CLG_ASSERT( isIRAtom(loadAddrExpr) );
176 CLG_ASSERT( isIRAtom(storeAddrExpr) );
177
178 if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) {
179 /* modify: suppose write access, as this is
180 * more resource consuming (as in callgrind for VG2)
181 * Cachegrind does a read here (!)
182 * DISCUSS: Best way depends on simulation model?
183 */
184 if (instrIssued) {
185 helperName = CLG_(cachesim).log_0I1Dw_name;
186 helperAddr = CLG_(cachesim).log_0I1Dw;
187 }
188 else {
189 helperName = CLG_(cachesim).log_1I1Dw_name;
190 helperAddr = CLG_(cachesim).log_1I1Dw;
191 }
192 argc = 2;
193 arg2 = storeAddrExpr;
194 es = CLG_(sets).D1w;
195
196 } else {
197 // load/store
198 if (instrIssued) {
199 helperName = CLG_(cachesim).log_0I2D_name;
200 helperAddr = CLG_(cachesim).log_0I2D;
201 }
202 else {
203 helperName = CLG_(cachesim).log_1I2D_name;
204 helperAddr = CLG_(cachesim).log_1I2D;
205 }
206 argc = 3;
207 arg2 = loadAddrExpr;
208 arg3 = storeAddrExpr;
209 es = CLG_(sets).D2;
210 }
211 }
212
213 /* helper could be unset depending on the simulator used */
214 if (helperAddr == 0) return 0;
215
216 /* Setup 1st arg: InstrInfo */
217 arg1 = mkIRExpr_HWord( (HWord)ii );
218
219 // Add call to the instrumentation function
220 if (argc == 1)
221 argv = mkIRExprVec_1(arg1);
222 else if (argc == 2)
223 argv = mkIRExprVec_2(arg1, arg2);
224 else if (argc == 3)
225 argv = mkIRExprVec_3(arg1, arg2, arg3);
226 else
227 VG_(tool_panic)("argc... not 1 or 2 or 3?");
228
sewardj8a95fd32006-04-02 16:21:44 +0000229 di = unsafeIRDirty_0_N( argc, helperName,
230 VG_(fnptr_to_fnentry)( helperAddr ), argv);
weidendoa17f2a32006-03-20 10:27:30 +0000231 addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
232
233 return es;
234}
235
236
237/* Instrumentation before a conditional jump or at the end
238 * of each original instruction.
239 * Fills the InstrInfo struct if not seen before
240 */
241static
242void endOfInstr(IRBB* bbOut, InstrInfo* ii, Bool bb_seen_before,
243 UInt instr_offset, UInt instrLen, UInt dataSize,
244 UInt* cost_offset, Bool instrIssued,
245 IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
246{
247 IRType wordTy;
248 EventSet* es;
249
250 // Stay sane ...
251 CLG_ASSERT(sizeof(HWord) == sizeof(void*));
252 if (sizeof(HWord) == 4) {
253 wordTy = Ity_I32;
254 } else
255 if (sizeof(HWord) == 8) {
256 wordTy = Ity_I64;
257 } else {
258 VG_(tool_panic)("endOfInstr: strange word size");
259 }
260
261 if (loadAddrExpr)
262 CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr));
263 if (storeAddrExpr)
264 CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr));
265
266 // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be
267 // done inaccurately, but they're very rare and this avoids errors from
268 // hitting more than two cache lines in the simulation.
269 if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE;
270
271 /* returns 0 if simulator needs no instrumentation */
272 es = insert_simcall(bbOut, ii, dataSize, instrIssued,
273 loadAddrExpr, storeAddrExpr);
274
weidendoc8e76152006-05-27 15:30:58 +0000275 CLG_DEBUG(5, " Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n",
276 instr_offset, instrLen, dataSize,
277 es ? es->name : (Char*)"(no instrumentation)",
278 es ? es->size : 0);
279
weidendoa17f2a32006-03-20 10:27:30 +0000280 if (bb_seen_before) {
weidendoc8e76152006-05-27 15:30:58 +0000281 CLG_DEBUG(5, " before: Instr +%2d (Size %d, DSize %d)\n",
282 ii->instr_offset, ii->instr_size, ii->data_size);
283
weidendoa17f2a32006-03-20 10:27:30 +0000284 CLG_ASSERT(ii->instr_offset == instr_offset);
285 CLG_ASSERT(ii->instr_size == instrLen);
weidendoa17f2a32006-03-20 10:27:30 +0000286 CLG_ASSERT(ii->cost_offset == *cost_offset);
287 CLG_ASSERT(ii->eventset == es);
weidendoc8e76152006-05-27 15:30:58 +0000288
289 /* Only check size if data size >0.
290 * This is needed: e.g. for rep or cmov x86 instructions, the same InstrInfo
291 * is used both for 2 simulator calls: for the pure instruction fetch and
292 * separately for an memory access (which may not happen depending on flags).
293 * If checked always, this triggers an assertion failure on retranslation.
294 */
295 if (dataSize>0) CLG_ASSERT(ii->data_size == dataSize);
296
weidendoa17f2a32006-03-20 10:27:30 +0000297 }
298 else {
299 ii->instr_offset = instr_offset;
300 ii->instr_size = instrLen;
weidendoa17f2a32006-03-20 10:27:30 +0000301 ii->cost_offset = *cost_offset;
302 ii->eventset = es;
weidendoc8e76152006-05-27 15:30:58 +0000303
304 /* data size only relevant if >0 */
305 if (dataSize > 0) ii->data_size = dataSize;
306
weidendoa17f2a32006-03-20 10:27:30 +0000307
308 CLG_(stat).distinct_instrs++;
309 }
310
311 *cost_offset += es ? es->size : 0;
312
weidendoa17f2a32006-03-20 10:27:30 +0000313}
314
315#if defined(VG_BIGENDIAN)
316# define CLGEndness Iend_BE
317#elif defined(VG_LITTLEENDIAN)
318# define CLGEndness Iend_LE
319#else
320# error "Unknown endianness"
321#endif
322
323static
324Addr IRConst2Addr(IRConst* con)
325{
326 Addr addr;
327
328 if (sizeof(Addr) == 4) {
329 CLG_ASSERT( con->tag == Ico_U32 );
330 addr = con->Ico.U32;
331 }
332 else if (sizeof(Addr) == 8) {
333 CLG_ASSERT( con->tag == Ico_U64 );
334 addr = con->Ico.U64;
335 }
336 else
337 VG_(tool_panic)("Callgrind: invalid Addr type");
338
339 return addr;
340}
341
342/* First pass over a BB to instrument, counting instructions and jumps
343 * This is needed for the size of the BB struct to allocate
344 *
345 * Called from CLG_(get_bb)
346 */
347void CLG_(collectBlockInfo)(IRBB* bbIn,
348 /*INOUT*/ UInt* instrs,
349 /*INOUT*/ UInt* cjmps,
350 /*INOUT*/ Bool* cjmp_inverted)
351{
352 Int i;
353 IRStmt* st;
354 Addr instrAddr =0, jumpDst;
355 UInt instrLen = 0;
356 Bool toNextInstr = False;
357
358 // Ist_Exit has to be ignored in preamble code, before first IMark:
359 // preamble code is added by VEX for self modifying code, and has
360 // nothing to do with client code
361 Bool inPreamble = True;
362
363 if (!bbIn) return;
364
365 for (i = 0; i < bbIn->stmts_used; i++) {
366 st = bbIn->stmts[i];
367 if (Ist_IMark == st->tag) {
368 inPreamble = False;
369
370 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
371 instrLen = st->Ist.IMark.len;
372
373 (*instrs)++;
374 toNextInstr = False;
375 }
376 if (inPreamble) continue;
377 if (Ist_Exit == st->tag) {
378 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
379 toNextInstr = (jumpDst == instrAddr + instrLen);
380
381 (*cjmps)++;
382 }
383 }
384
385 /* if the last instructions of BB conditionally jumps to next instruction
386 * (= first instruction of next BB in memory), this is a inverted by VEX.
387 */
388 *cjmp_inverted = toNextInstr;
389}
390
391static
392void collectStatementInfo(IRTypeEnv* tyenv, IRBB* bbOut, IRStmt* st,
393 Addr* instrAddr, UInt* instrLen,
394 IRExpr** loadAddrExpr, IRExpr** storeAddrExpr,
395 UInt* dataSize, IRType hWordTy)
396{
397 CLG_ASSERT(isFlatIRStmt(st));
398
399 switch (st->tag) {
400 case Ist_NoOp:
401 break;
402
403 case Ist_AbiHint:
404 /* ABI hints aren't interesting. Ignore. */
405 break;
406
407 case Ist_IMark:
408 /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this
409 to the host's native pointer type; if that is 32 bits then it
410 discards the upper 32 bits. If we are cachegrinding on a
411 32-bit host then we are also ensured that the guest word size
412 is 32 bits, due to the assertion in cg_instrument that the
413 host and guest word sizes must be the same. Hence
414 st->Ist.IMark.addr will have been derived from a 32-bit guest
415 code address and truncation of it is safe. I believe this
416 assignment should be correct for both 32- and 64-bit
417 machines. */
418 *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
419 *instrLen = st->Ist.IMark.len;
420 break;
421
422 case Ist_Tmp: {
423 IRExpr* data = st->Ist.Tmp.data;
424 if (data->tag == Iex_Load) {
425 IRExpr* aexpr = data->Iex.Load.addr;
426 CLG_ASSERT( isIRAtom(aexpr) );
427 // Note also, endianness info is ignored. I guess that's not
428 // interesting.
429 // XXX: repe cmpsb does two loads... the first one is ignored here!
430 //tl_assert( NULL == *loadAddrExpr ); // XXX: ???
431 *loadAddrExpr = aexpr;
432 *dataSize = sizeofIRType(data->Iex.Load.ty);
433 }
434 break;
435 }
436
437 case Ist_Store: {
438 IRExpr* data = st->Ist.Store.data;
439 IRExpr* aexpr = st->Ist.Store.addr;
440 CLG_ASSERT( isIRAtom(aexpr) );
441 if ( NULL == *storeAddrExpr ) {
442 /* this is a kludge: ignore all except the first store from
443 an instruction. */
444 *storeAddrExpr = aexpr;
445 *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data));
446 }
447 break;
448 }
449
450 case Ist_Dirty: {
451 IRDirty* d = st->Ist.Dirty.details;
452 if (d->mFx != Ifx_None) {
453 /* This dirty helper accesses memory. Collect the
454 details. */
455 CLG_ASSERT(d->mAddr != NULL);
456 CLG_ASSERT(d->mSize != 0);
457 *dataSize = d->mSize;
458 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
459 *loadAddrExpr = d->mAddr;
460 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
461 *storeAddrExpr = d->mAddr;
462 } else {
463 CLG_ASSERT(d->mAddr == NULL);
464 CLG_ASSERT(d->mSize == 0);
465 }
466 break;
467 }
468
469 case Ist_Put:
470 case Ist_PutI:
471 case Ist_MFence:
472 case Ist_Exit:
473 break;
474
475 default:
476 VG_(printf)("\n");
477 ppIRStmt(st);
478 VG_(printf)("\n");
479 VG_(tool_panic)("Callgrind: unhandled IRStmt");
480 }
481}
482
483static
484void addConstMemStoreStmt( IRBB* bbOut, UWord addr, UInt val, IRType hWordTy)
485{
486 addStmtToIRBB( bbOut,
487 IRStmt_Store(CLGEndness,
488 IRExpr_Const(hWordTy == Ity_I32 ?
489 IRConst_U32( addr ) :
490 IRConst_U64( addr )),
491 IRExpr_Const(IRConst_U32(val)) ));
492}
493
494static
495IRBB* CLG_(instrument)( VgCallbackClosure* closure,
496 IRBB* bbIn,
497 VexGuestLayout* layout,
498 VexGuestExtents* vge,
499 IRType gWordTy, IRType hWordTy )
500{
501 Int i;
502 IRBB* bbOut;
503 IRStmt* st, *stnext;
504 Addr instrAddr, origAddr;
505 UInt instrLen = 0, dataSize;
506 UInt instrCount, costOffset;
507 IRExpr *loadAddrExpr, *storeAddrExpr;
508
509 BB* bb;
510
511 IRDirty* di;
512 IRExpr *arg1, **argv;
513
514 Bool bb_seen_before = False;
515 UInt cJumps = 0, cJumpsCorrected;
516 Bool beforeIBoundary, instrIssued;
517
518 if (gWordTy != hWordTy) {
519 /* We don't currently support this case. */
520 VG_(tool_panic)("host/guest word size mismatch");
521 }
522
523 // No instrumentation if it is switched off
524 if (! CLG_(instrument_state)) {
525 CLG_DEBUG(5, "instrument(BB %p) [Instrumentation OFF]\n",
526 (Addr)closure->readdr);
527 return bbIn;
528 }
529
530 CLG_DEBUG(3, "+ instrument(BB %p)\n", (Addr)closure->readdr);
531
532 /* Set up BB for instrumented IR */
njn149aed82006-11-25 22:38:11 +0000533 bbOut = dopyIRBBExceptStmts(bbIn);
weidendoa17f2a32006-03-20 10:27:30 +0000534
535 // Copy verbatim any IR preamble preceding the first IMark
536 i = 0;
537 while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) {
538 addStmtToIRBB( bbOut, bbIn->stmts[i] );
539 i++;
540 }
541
542 // Get the first statement, and origAddr from it
543 CLG_ASSERT(bbIn->stmts_used > 0);
544 st = bbIn->stmts[i];
545 CLG_ASSERT(Ist_IMark == st->tag);
546 instrAddr = origAddr = (Addr)st->Ist.IMark.addr;
547 CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
548
549 /* Get BB (creating if necessary).
550 * JS: The hash table is keyed with orig_addr_noredir -- important!
551 * JW: Why? If it is because of different chasing of the redirection,
552 * this is not needed, as chasing is switched off in callgrind
553 */
554 bb = CLG_(get_bb)(origAddr, bbIn, &bb_seen_before);
555 //bb = CLG_(get_bb)(orig_addr_noredir, bbIn, &bb_seen_before);
556
557 /*
558 * Precondition:
559 * - jmps_passed has number of cond.jumps passed in last executed BB
560 * - current_bbcc has a pointer to the BBCC of the last executed BB
561 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
562 * current_bbcc->bb->jmp_addr
563 * gives the address of the jump source.
564 *
565 * The BBCC setup does 2 things:
566 * - trace call:
567 * * Unwind own call stack, i.e sync our ESP with real ESP
568 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
569 * * For CALLs or JMPs crossing objects, record call arg +
570 * push are on own call stack
571 *
572 * - prepare for cache log functions:
573 * Set current_bbcc to BBCC that gets the costs for this BB execution
574 * attached
575 */
576
577 // helper call to setup_bbcc, with pointer to basic block info struct as argument
578 arg1 = mkIRExpr_HWord( (HWord)bb );
579 argv = mkIRExprVec_1(arg1);
sewardjbb760702006-04-02 15:53:59 +0000580 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
581 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
582 argv);
weidendoa17f2a32006-03-20 10:27:30 +0000583 addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
584
585 instrCount = 0;
586 costOffset = 0;
587
588 // loop for each host instruction (starting from 'i')
589 do {
590
591 // We should be at an IMark statement
592 CLG_ASSERT(Ist_IMark == st->tag);
593
594 // Reset stuff for this original instruction
595 loadAddrExpr = storeAddrExpr = NULL;
596 instrIssued = False;
597 dataSize = 0;
598
599 // Process all the statements for this original instruction (ie. until
600 // the next IMark statement, or the end of the block)
601 do {
602 i++;
603 stnext = ( i < bbIn->stmts_used ? bbIn->stmts[i] : NULL );
604 beforeIBoundary = !stnext || (Ist_IMark == stnext->tag);
605 collectStatementInfo(bbIn->tyenv, bbOut, st, &instrAddr, &instrLen,
606 &loadAddrExpr, &storeAddrExpr, &dataSize, hWordTy);
607
608 // instrument a simulator call before conditional jumps
609 if (st->tag == Ist_Exit) {
610 // Nb: instrLen will be zero if Vex failed to decode it.
611 // Also Client requests can appear to be very large (eg. 18
612 // bytes on x86) because they are really multiple instructions.
613 CLG_ASSERT( 0 == instrLen ||
614 bbIn->jumpkind == Ijk_ClientReq ||
615 (instrLen >= VG_MIN_INSTR_SZB &&
616 instrLen <= VG_MAX_INSTR_SZB) );
617
618 // Add instrumentation before this statement
619 endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
620 instrAddr - origAddr, instrLen, dataSize, &costOffset,
621 instrIssued, loadAddrExpr, storeAddrExpr);
622
623 // prepare for a possible further simcall in same host instr
624 loadAddrExpr = storeAddrExpr = NULL;
625 instrIssued = True;
626
627 if (!bb_seen_before) {
628 bb->jmp[cJumps].instr = instrCount;
629 bb->jmp[cJumps].skip = False;
630 }
631
632 /* Update global variable jmps_passed (this is before the jump!)
633 * A correction is needed if VEX inverted the last jump condition
634 */
635 cJumpsCorrected = cJumps;
636 if ((cJumps+1 == bb->cjmp_count) && bb->cjmp_inverted) cJumpsCorrected++;
637 addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
638 cJumpsCorrected, hWordTy);
639
640 cJumps++;
641 }
642
643 addStmtToIRBB( bbOut, st );
644 st = stnext;
645 }
646 while (!beforeIBoundary);
647
648 // Add instrumentation for this original instruction.
649 if (!instrIssued || (loadAddrExpr != 0) || (storeAddrExpr !=0))
650 endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
651 instrAddr - origAddr, instrLen, dataSize, &costOffset,
652 instrIssued, loadAddrExpr, storeAddrExpr);
653
654 instrCount++;
655 }
656 while (st);
657
658 /* Always update global variable jmps_passed (at end of BB)
659 * A correction is needed if VEX inverted the last jump condition
660 */
661 cJumpsCorrected = cJumps;
662 if (bb->cjmp_inverted) cJumpsCorrected--;
663 addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
664 cJumpsCorrected, hWordTy);
665
666 /* This stores the instr of the call/ret at BB end */
667 bb->jmp[cJumps].instr = instrCount-1;
668
669 CLG_ASSERT(bb->cjmp_count == cJumps);
670 CLG_ASSERT(bb->instr_count == instrCount);
671
672 instrAddr += instrLen;
673 if (bb_seen_before) {
674 CLG_ASSERT(bb->instr_len == instrAddr - origAddr);
675 CLG_ASSERT(bb->cost_count == costOffset);
676 CLG_ASSERT(bb->jmpkind == bbIn->jumpkind);
677 }
678 else {
679 bb->instr_len = instrAddr - origAddr;
680 bb->cost_count = costOffset;
681 bb->jmpkind = bbIn->jumpkind;
682 }
683
684 CLG_DEBUG(3, "- instrument(BB %p): byteLen %u, CJumps %u, CostLen %u\n",
685 origAddr, bb->instr_len, bb->cjmp_count, bb->cost_count);
686 if (cJumps>0) {
687 CLG_DEBUG(3, " [ ");
688 for (i=0;i<cJumps;i++)
689 CLG_DEBUG(3, "%d ", bb->jmp[i].instr);
690 CLG_DEBUG(3, "], last inverted: %s \n", bb->cjmp_inverted ? "yes":"no");
691 }
692
693 return bbOut;
694}
695
696/*--------------------------------------------------------------------*/
697/*--- Discarding BB info ---*/
698/*--------------------------------------------------------------------*/
699
700// Called when a translation is removed from the translation cache for
701// any reason at all: to free up space, because the guest code was
702// unmapped or modified, or for any arbitrary reason.
703static
704void clg_discard_basic_block_info ( Addr64 orig_addr64, VexGuestExtents vge )
705{
706 Addr orig_addr = (Addr)orig_addr64;
707
708 tl_assert(vge.n_used > 0);
709
710 if (0)
711 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
712 (void*)(Addr)orig_addr,
713 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
714
715 // Get BB info, remove from table, free BB info. Simple! Note that we
716 // use orig_addr, not the first instruction address in vge.
717 CLG_(delete_bb)(orig_addr);
718}
719
720
721/*------------------------------------------------------------*/
722/*--- CLG_(fini)() and related function ---*/
723/*------------------------------------------------------------*/
724
725
726
727static void zero_thread_cost(thread_info* t)
728{
729 Int i;
730
731 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
732 if (!CLG_(current_call_stack).entry[i].jcc) continue;
733
734 /* reset call counters to current for active calls */
735 CLG_(copy_cost)( CLG_(sets).full,
736 CLG_(current_call_stack).entry[i].enter_cost,
737 CLG_(current_state).cost );
738 }
739
740 CLG_(forall_bbccs)(CLG_(zero_bbcc));
741
742 /* set counter for last dump */
743 CLG_(copy_cost)( CLG_(sets).full,
744 t->lastdump_cost, CLG_(current_state).cost );
745}
746
747void CLG_(zero_all_cost)(Bool only_current_thread)
748{
749 if (VG_(clo_verbosity) > 1)
750 VG_(message)(Vg_DebugMsg, " Zeroing costs...");
751
752 if (only_current_thread)
753 zero_thread_cost(CLG_(get_current_thread)());
754 else
755 CLG_(forall_threads)(zero_thread_cost);
756
757 if (VG_(clo_verbosity) > 1)
758 VG_(message)(Vg_DebugMsg, " ...done");
759}
760
761static
762void unwind_thread(thread_info* t)
763{
764 /* unwind signal handlers */
765 while(CLG_(current_state).sig !=0)
766 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
767
768 /* unwind regular call stack */
769 while(CLG_(current_call_stack).sp>0)
770 CLG_(pop_call_stack)();
weidendof3e0b492006-09-10 22:34:20 +0000771
772 /* reset context and function stack for context generation */
773 CLG_(init_exec_state)( &CLG_(current_state) );
774 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
weidendoa17f2a32006-03-20 10:27:30 +0000775}
776
777/* Ups, this can go wrong... */
778extern void VG_(discard_translations) ( Addr64 start, ULong range );
779
780void CLG_(set_instrument_state)(Char* reason, Bool state)
781{
782 if (CLG_(instrument_state) == state) {
783 CLG_DEBUG(2, "%s: instrumentation already %s\n",
784 reason, state ? "ON" : "OFF");
785 return;
786 }
787 CLG_(instrument_state) = state;
788 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
789 reason, state ? "ON" : "OFF");
790
791 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl);
792
793 /* reset internal state: call stacks, simulator */
794 CLG_(forall_threads)(unwind_thread);
795 (*CLG_(cachesim).clear)();
796 if (0)
797 CLG_(forall_threads)(zero_thread_cost);
798
weidendoa17f2a32006-03-20 10:27:30 +0000799 if (VG_(clo_verbosity) > 1)
weidendof3e0b492006-09-10 22:34:20 +0000800 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s",
weidendoa17f2a32006-03-20 10:27:30 +0000801 reason, state ? "ON" : "OFF");
802}
803
804
805static
806Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
807{
808 if (!VG_IS_TOOL_USERREQ('C','T',args[0]))
809 return False;
810
811 switch(args[0]) {
812 case VG_USERREQ__DUMP_STATS:
813 CLG_(dump_profile)("Client Request", True);
814 *ret = 0; /* meaningless */
815 break;
816
817 case VG_USERREQ__DUMP_STATS_AT:
818 {
819 Char buf[512];
weidendoca472c52006-03-31 19:34:51 +0000820 VG_(sprintf)(buf,"Client Request: %s", args[1]);
weidendoa17f2a32006-03-20 10:27:30 +0000821 CLG_(dump_profile)(buf, True);
822 *ret = 0; /* meaningless */
823 }
824 break;
825
826 case VG_USERREQ__ZERO_STATS:
827 CLG_(zero_all_cost)(True);
828 *ret = 0; /* meaningless */
829 break;
830
831 case VG_USERREQ__TOGGLE_COLLECT:
832 CLG_(current_state).collect = !CLG_(current_state).collect;
833 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
834 CLG_(current_state).collect ? "ON" : "OFF");
835 *ret = 0; /* meaningless */
836 break;
837
838 case VG_USERREQ__START_INSTRUMENTATION:
839 CLG_(set_instrument_state)("Client Request", True);
840 *ret = 0; /* meaningless */
841 break;
842
843 case VG_USERREQ__STOP_INSTRUMENTATION:
844 CLG_(set_instrument_state)("Client Request", False);
845 *ret = 0; /* meaningless */
846 break;
847
848 default:
849 return False;
850 }
851
852 return True;
853}
854
855
856/* Syscall Timing */
857
858/* struct timeval syscalltime[VG_N_THREADS]; */
859#if CLG_MICROSYSTIME
860#include <sys/time.h>
861#include <sys/syscall.h>
862extern Int VG_(do_syscall) ( UInt, ... );
863
864ULong syscalltime[VG_N_THREADS];
865#else
866UInt syscalltime[VG_N_THREADS];
867#endif
868
869static
870void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno)
871{
872 if (CLG_(clo).collect_systime) {
873#if CLG_MICROSYSTIME
874 struct vki_timeval tv_now;
875 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
876 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
877#else
878 syscalltime[tid] = VG_(read_millisecond_timer)();
879#endif
880 }
881}
882
883static
884void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, SysRes res)
885{
886 if (CLG_(clo).collect_systime) {
887 Int o = CLG_(sets).off_full_systime;
888#if CLG_MICROSYSTIME
889 struct vki_timeval tv_now;
890 ULong diff;
891
892 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
893 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
894#else
895 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
896#endif
897
898 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
899
900 if (o<0) return;
901
902 CLG_(current_state).cost[o] ++;
903 CLG_(current_state).cost[o+1] += diff;
904 if (!CLG_(current_state).bbcc->skipped)
905 CLG_(init_cost_lz)(CLG_(sets).full,
906 &(CLG_(current_state).bbcc->skipped));
907 CLG_(current_state).bbcc->skipped[o] ++;
908 CLG_(current_state).bbcc->skipped[o+1] += diff;
909 }
910}
911
912static
913void finish(void)
914{
915 char buf[RESULTS_BUF_LEN];
916
917 CLG_DEBUG(0, "finish()\n");
918
919 (*CLG_(cachesim).finish)();
920
921 /* pop all remaining items from CallStack for correct sum
922 */
923 CLG_(forall_threads)(unwind_thread);
sewardje45a7992006-10-17 02:24:18 +0000924
weidendoa17f2a32006-03-20 10:27:30 +0000925 CLG_(dump_profile)(0, False);
sewardje45a7992006-10-17 02:24:18 +0000926
weidendoa17f2a32006-03-20 10:27:30 +0000927 CLG_(finish_command)();
sewardje45a7992006-10-17 02:24:18 +0000928
weidendoa17f2a32006-03-20 10:27:30 +0000929 if (VG_(clo_verbosity) == 0) return;
930
931 /* Hash table stats */
932 if (VG_(clo_verbosity) > 1) {
933 int BB_lookups =
934 CLG_(stat).full_debug_BBs +
935 CLG_(stat).fn_name_debug_BBs +
936 CLG_(stat).file_line_debug_BBs +
937 CLG_(stat).no_debug_BBs;
938
939 VG_(message)(Vg_DebugMsg, "");
940 VG_(message)(Vg_DebugMsg, "Distinct objects: %d",
941 CLG_(stat).distinct_objs);
942 VG_(message)(Vg_DebugMsg, "Distinct files: %d",
943 CLG_(stat).distinct_files);
944 VG_(message)(Vg_DebugMsg, "Distinct fns: %d",
945 CLG_(stat).distinct_fns);
946 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d",
947 CLG_(stat).distinct_contexts);
948 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d",
949 CLG_(stat).distinct_bbs);
950 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)",
951 CLG_(costarray_entries), CLG_(costarray_chunks));
952 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d",
953 CLG_(stat).distinct_bbccs);
954 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d",
955 CLG_(stat).distinct_jccs);
956 VG_(message)(Vg_DebugMsg, "Distinct skips: %d",
957 CLG_(stat).distinct_skips);
958 VG_(message)(Vg_DebugMsg, "BB lookups: %d",
959 BB_lookups);
960 if (BB_lookups>0) {
961 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
962 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
963 CLG_(stat).full_debug_BBs);
964 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
965 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
966 CLG_(stat).file_line_debug_BBs);
967 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
968 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
969 CLG_(stat).fn_name_debug_BBs);
970 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
971 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
972 CLG_(stat).no_debug_BBs);
973 }
974 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d",
975 CLG_(stat).bbcc_clones);
976 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d",
977 CLG_(stat).bb_retranslations);
978 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d",
979 CLG_(stat).distinct_instrs);
980 VG_(message)(Vg_DebugMsg, "");
981
982 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d",
983 CLG_(stat).cxt_lru_misses);
984 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d",
985 CLG_(stat).bbcc_lru_misses);
986 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d",
987 CLG_(stat).jcc_lru_misses);
988 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu",
989 CLG_(stat).bb_executions);
990 VG_(message)(Vg_DebugMsg, "Calls: %llu",
991 CLG_(stat).call_counter);
992 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu",
993 CLG_(stat).jcnd_counter);
994 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu",
995 CLG_(stat).jump_counter);
996 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu",
997 CLG_(stat).rec_call_counter);
998 VG_(message)(Vg_DebugMsg, "Returns: %llu",
999 CLG_(stat).ret_counter);
1000
1001 VG_(message)(Vg_DebugMsg, "");
1002 }
1003
1004 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
1005 VG_(message)(Vg_UserMsg, "Events : %s", buf);
1006 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
1007 VG_(message)(Vg_UserMsg, "Collected : %s", buf);
1008 VG_(message)(Vg_UserMsg, "");
1009
1010 // if (CLG_(clo).simulate_cache)
1011 (*CLG_(cachesim).printstat)();
1012}
1013
1014
1015void CLG_(fini)(Int exitcode)
1016{
1017 finish();
1018}
1019
1020
1021/*--------------------------------------------------------------------*/
1022/*--- Setup ---*/
1023/*--------------------------------------------------------------------*/
1024
sewardj97561812006-12-23 01:21:12 +00001025static void clg_thread_runstate_callback ( ThreadId tid,
1026 Bool is_running,
1027 ULong blocks_done )
1028{
1029 if (0)
1030 VG_(printf)("%d %c %llu\n",
1031 (Int)tid, is_running ? 'R' : 's', blocks_done);
1032 /* Simply call onwards to CLG_(run_thread). Maybe this can be
1033 simplified later? */
1034 if (is_running)
1035 CLG_(run_thread)( tid );
1036}
1037
weidendoa17f2a32006-03-20 10:27:30 +00001038static
1039void CLG_(post_clo_init)(void)
1040{
weidendoa17f2a32006-03-20 10:27:30 +00001041 VG_(clo_vex_control).iropt_unroll_thresh = 0;
1042 VG_(clo_vex_control).guest_chase_thresh = 0;
1043
1044 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
1045 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
1046 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
1047
1048 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
1049 VG_(message)(Vg_UserMsg, "Using source line as position.");
1050 CLG_(clo).dump_line = True;
1051 }
1052
weidendo4ce5e792006-09-20 21:29:39 +00001053 CLG_(init_dumps)();
1054 CLG_(init_command)();
weidendoa17f2a32006-03-20 10:27:30 +00001055
1056 (*CLG_(cachesim).post_clo_init)();
1057
1058 CLG_(init_eventsets)(0);
1059 CLG_(init_statistics)(& CLG_(stat));
1060 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
1061
1062 /* initialize hash tables */
1063 CLG_(init_obj_table)();
1064 CLG_(init_cxt_table)();
1065 CLG_(init_bb_hash)();
1066
1067 CLG_(init_threads)();
1068 CLG_(run_thread)(1);
1069
1070 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
1071
weidendoca472c52006-03-31 19:34:51 +00001072 if (VG_(clo_verbosity > 0)) {
weidendoca472c52006-03-31 19:34:51 +00001073 VG_(message)(Vg_UserMsg,
1074 "For interactive control, run 'callgrind_control -h'.");
1075 }
weidendoa17f2a32006-03-20 10:27:30 +00001076}
1077
1078static
1079void CLG_(pre_clo_init)(void)
1080{
1081 VG_(details_name) ("Callgrind");
weidendoca472c52006-03-31 19:34:51 +00001082 VG_(details_version) (NULL);
weidendoa17f2a32006-03-20 10:27:30 +00001083 VG_(details_description) ("a call-graph generating cache profiler");
1084 VG_(details_copyright_author)("Copyright (C) 2002-2006, and GNU GPL'd, "
weidendoca472c52006-03-31 19:34:51 +00001085 "by Josef Weidendorfer et al.");
weidendodb70ed72006-05-27 15:39:45 +00001086 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardje45a7992006-10-17 02:24:18 +00001087 VG_(details_avg_translation_sizeB) ( 500 );
weidendoa17f2a32006-03-20 10:27:30 +00001088
1089 VG_(basic_tool_funcs) (CLG_(post_clo_init),
1090 CLG_(instrument),
1091 CLG_(fini));
1092
1093 VG_(needs_basic_block_discards)(clg_discard_basic_block_info);
1094
1095
1096 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
1097 CLG_(print_usage),
1098 CLG_(print_debug_usage));
1099
1100 VG_(needs_client_requests)(CLG_(handle_client_request));
1101 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1102 CLG_(post_syscalltime));
1103
sewardj97561812006-12-23 01:21:12 +00001104 VG_(track_thread_runstate) ( & clg_thread_runstate_callback );
weidendoa17f2a32006-03-20 10:27:30 +00001105 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
1106 VG_(track_post_deliver_signal) ( & CLG_(post_signal) );
1107
1108 CLG_(set_clo_defaults)();
1109}
1110
1111VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
1112
1113/*--------------------------------------------------------------------*/
1114/*--- end main.c ---*/
1115/*--------------------------------------------------------------------*/