blob: 2a21a9c53c86aa978a4070bdd5e207d042db2e56 [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001
2/*--------------------------------------------------------------------*/
3/*--- Callgrind ---*/
4/*--- main.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
10
sewardje4b0bf02006-06-05 23:21:15 +000011 Copyright (C) 2002-2006, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
weidendoa17f2a32006-03-20 10:27:30 +000012
13 This skin is derived from and contains code from Cachegrind
sewardje4b0bf02006-06-05 23:21:15 +000014 Copyright (C) 2002-2006 Nicholas Nethercote (njn25@cam.ac.uk)
weidendoa17f2a32006-03-20 10:27:30 +000015
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
30
31 The GNU General Public License is contained in the file COPYING.
32*/
33
34#include "config.h"
35#include "callgrind.h"
36#include "global.h"
37
38#include <pub_tool_threadstate.h>
39
40/*------------------------------------------------------------*/
41/*--- Global variables ---*/
42/*------------------------------------------------------------*/
43
44/* for all threads */
45CommandLineOptions CLG_(clo);
46Statistics CLG_(stat);
47Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
48
49/* thread and signal handler specific */
50exec_state CLG_(current_state);
51
52
53/*------------------------------------------------------------*/
54/*--- Statistics ---*/
55/*------------------------------------------------------------*/
56
57static void CLG_(init_statistics)(Statistics* s)
58{
59 s->call_counter = 0;
60 s->jcnd_counter = 0;
61 s->jump_counter = 0;
62 s->rec_call_counter = 0;
63 s->ret_counter = 0;
64 s->bb_executions = 0;
65
66 s->context_counter = 0;
67 s->bb_retranslations = 0;
68
69 s->distinct_objs = 0;
70 s->distinct_files = 0;
71 s->distinct_fns = 0;
72 s->distinct_contexts = 0;
73 s->distinct_bbs = 0;
74 s->distinct_bbccs = 0;
75 s->distinct_instrs = 0;
76 s->distinct_skips = 0;
77
78 s->bb_hash_resizes = 0;
79 s->bbcc_hash_resizes = 0;
80 s->jcc_hash_resizes = 0;
81 s->cxt_hash_resizes = 0;
82 s->fn_array_resizes = 0;
83 s->call_stack_resizes = 0;
84 s->fn_stack_resizes = 0;
85
86 s->full_debug_BBs = 0;
87 s->file_line_debug_BBs = 0;
88 s->fn_name_debug_BBs = 0;
89 s->no_debug_BBs = 0;
90 s->bbcc_lru_misses = 0;
91 s->jcc_lru_misses = 0;
92 s->cxt_lru_misses = 0;
93 s->bbcc_clones = 0;
94}
95
96
97
98
99/*------------------------------------------------------------*/
100/*--- Cache simulation instrumentation phase ---*/
101/*------------------------------------------------------------*/
102
103
104static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
105{
106 // I'm assuming that for 'modify' instructions, that Vex always makes
107 // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp
108 // expressions, or both Const expressions.
109 CLG_ASSERT(isIRAtom(loadAddrExpr));
110 CLG_ASSERT(isIRAtom(storeAddrExpr));
111 return eqIRAtom(loadAddrExpr, storeAddrExpr);
112}
113
114static
115EventSet* insert_simcall(IRBB* bbOut, InstrInfo* ii, UInt dataSize,
116 Bool instrIssued,
117 IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
118{
119 HChar* helperName;
120 void* helperAddr;
121 Int argc;
122 EventSet* es;
123 IRExpr *arg1, *arg2 = 0, *arg3 = 0, **argv;
124 IRDirty* di;
125
126 /* Check type of original instruction regarding memory access,
127 * and collect info to be able to generate fitting helper call
128 */
129 if (!loadAddrExpr && !storeAddrExpr) {
130 // no load/store
131 CLG_ASSERT(0 == dataSize);
132 if (instrIssued) {
133 helperName = 0;
134 helperAddr = 0;
135 }
136 else {
137 helperName = CLG_(cachesim).log_1I0D_name;
138 helperAddr = CLG_(cachesim).log_1I0D;
139 }
140 argc = 1;
141 es = CLG_(sets).D0;
142
143 } else if (loadAddrExpr && !storeAddrExpr) {
144 // load
145 CLG_ASSERT( isIRAtom(loadAddrExpr) );
146 if (instrIssued) {
147 helperName = CLG_(cachesim).log_0I1Dr_name;
148 helperAddr = CLG_(cachesim).log_0I1Dr;
149 }
150 else {
151 helperName = CLG_(cachesim).log_1I1Dr_name;
152 helperAddr = CLG_(cachesim).log_1I1Dr;
153 }
154 argc = 2;
155 arg2 = loadAddrExpr;
156 es = CLG_(sets).D1r;
157
158 } else if (!loadAddrExpr && storeAddrExpr) {
159 // store
160 CLG_ASSERT( isIRAtom(storeAddrExpr) );
161 if (instrIssued) {
162 helperName = CLG_(cachesim).log_0I1Dw_name;
163 helperAddr = CLG_(cachesim).log_0I1Dw;
164 }
165 else {
166 helperName = CLG_(cachesim).log_1I1Dw_name;
167 helperAddr = CLG_(cachesim).log_1I1Dw;
168 }
169 argc = 2;
170 arg2 = storeAddrExpr;
171 es = CLG_(sets).D1w;
172
173 } else {
174 CLG_ASSERT( loadAddrExpr && storeAddrExpr );
175 CLG_ASSERT( isIRAtom(loadAddrExpr) );
176 CLG_ASSERT( isIRAtom(storeAddrExpr) );
177
178 if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) {
179 /* modify: suppose write access, as this is
180 * more resource consuming (as in callgrind for VG2)
181 * Cachegrind does a read here (!)
182 * DISCUSS: Best way depends on simulation model?
183 */
184 if (instrIssued) {
185 helperName = CLG_(cachesim).log_0I1Dw_name;
186 helperAddr = CLG_(cachesim).log_0I1Dw;
187 }
188 else {
189 helperName = CLG_(cachesim).log_1I1Dw_name;
190 helperAddr = CLG_(cachesim).log_1I1Dw;
191 }
192 argc = 2;
193 arg2 = storeAddrExpr;
194 es = CLG_(sets).D1w;
195
196 } else {
197 // load/store
198 if (instrIssued) {
199 helperName = CLG_(cachesim).log_0I2D_name;
200 helperAddr = CLG_(cachesim).log_0I2D;
201 }
202 else {
203 helperName = CLG_(cachesim).log_1I2D_name;
204 helperAddr = CLG_(cachesim).log_1I2D;
205 }
206 argc = 3;
207 arg2 = loadAddrExpr;
208 arg3 = storeAddrExpr;
209 es = CLG_(sets).D2;
210 }
211 }
212
213 /* helper could be unset depending on the simulator used */
214 if (helperAddr == 0) return 0;
215
216 /* Setup 1st arg: InstrInfo */
217 arg1 = mkIRExpr_HWord( (HWord)ii );
218
219 // Add call to the instrumentation function
220 if (argc == 1)
221 argv = mkIRExprVec_1(arg1);
222 else if (argc == 2)
223 argv = mkIRExprVec_2(arg1, arg2);
224 else if (argc == 3)
225 argv = mkIRExprVec_3(arg1, arg2, arg3);
226 else
227 VG_(tool_panic)("argc... not 1 or 2 or 3?");
228
sewardj8a95fd32006-04-02 16:21:44 +0000229 di = unsafeIRDirty_0_N( argc, helperName,
230 VG_(fnptr_to_fnentry)( helperAddr ), argv);
weidendoa17f2a32006-03-20 10:27:30 +0000231 addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
232
233 return es;
234}
235
236
237/* Instrumentation before a conditional jump or at the end
238 * of each original instruction.
239 * Fills the InstrInfo struct if not seen before
240 */
241static
242void endOfInstr(IRBB* bbOut, InstrInfo* ii, Bool bb_seen_before,
243 UInt instr_offset, UInt instrLen, UInt dataSize,
244 UInt* cost_offset, Bool instrIssued,
245 IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
246{
247 IRType wordTy;
248 EventSet* es;
249
250 // Stay sane ...
251 CLG_ASSERT(sizeof(HWord) == sizeof(void*));
252 if (sizeof(HWord) == 4) {
253 wordTy = Ity_I32;
254 } else
255 if (sizeof(HWord) == 8) {
256 wordTy = Ity_I64;
257 } else {
258 VG_(tool_panic)("endOfInstr: strange word size");
259 }
260
261 if (loadAddrExpr)
262 CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr));
263 if (storeAddrExpr)
264 CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr));
265
266 // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be
267 // done inaccurately, but they're very rare and this avoids errors from
268 // hitting more than two cache lines in the simulation.
269 if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE;
270
271 /* returns 0 if simulator needs no instrumentation */
272 es = insert_simcall(bbOut, ii, dataSize, instrIssued,
273 loadAddrExpr, storeAddrExpr);
274
weidendoc8e76152006-05-27 15:30:58 +0000275 CLG_DEBUG(5, " Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n",
276 instr_offset, instrLen, dataSize,
277 es ? es->name : (Char*)"(no instrumentation)",
278 es ? es->size : 0);
279
weidendoa17f2a32006-03-20 10:27:30 +0000280 if (bb_seen_before) {
weidendoc8e76152006-05-27 15:30:58 +0000281 CLG_DEBUG(5, " before: Instr +%2d (Size %d, DSize %d)\n",
282 ii->instr_offset, ii->instr_size, ii->data_size);
283
weidendoa17f2a32006-03-20 10:27:30 +0000284 CLG_ASSERT(ii->instr_offset == instr_offset);
285 CLG_ASSERT(ii->instr_size == instrLen);
weidendoa17f2a32006-03-20 10:27:30 +0000286 CLG_ASSERT(ii->cost_offset == *cost_offset);
287 CLG_ASSERT(ii->eventset == es);
weidendoc8e76152006-05-27 15:30:58 +0000288
289 /* Only check size if data size >0.
290 * This is needed: e.g. for rep or cmov x86 instructions, the same InstrInfo
291 * is used both for 2 simulator calls: for the pure instruction fetch and
292 * separately for an memory access (which may not happen depending on flags).
293 * If checked always, this triggers an assertion failure on retranslation.
294 */
295 if (dataSize>0) CLG_ASSERT(ii->data_size == dataSize);
296
weidendoa17f2a32006-03-20 10:27:30 +0000297 }
298 else {
299 ii->instr_offset = instr_offset;
300 ii->instr_size = instrLen;
weidendoa17f2a32006-03-20 10:27:30 +0000301 ii->cost_offset = *cost_offset;
302 ii->eventset = es;
weidendoc8e76152006-05-27 15:30:58 +0000303
304 /* data size only relevant if >0 */
305 if (dataSize > 0) ii->data_size = dataSize;
306
weidendoa17f2a32006-03-20 10:27:30 +0000307
308 CLG_(stat).distinct_instrs++;
309 }
310
311 *cost_offset += es ? es->size : 0;
312
weidendoa17f2a32006-03-20 10:27:30 +0000313}
314
315#if defined(VG_BIGENDIAN)
316# define CLGEndness Iend_BE
317#elif defined(VG_LITTLEENDIAN)
318# define CLGEndness Iend_LE
319#else
320# error "Unknown endianness"
321#endif
322
323static
324Addr IRConst2Addr(IRConst* con)
325{
326 Addr addr;
327
328 if (sizeof(Addr) == 4) {
329 CLG_ASSERT( con->tag == Ico_U32 );
330 addr = con->Ico.U32;
331 }
332 else if (sizeof(Addr) == 8) {
333 CLG_ASSERT( con->tag == Ico_U64 );
334 addr = con->Ico.U64;
335 }
336 else
337 VG_(tool_panic)("Callgrind: invalid Addr type");
338
339 return addr;
340}
341
342/* First pass over a BB to instrument, counting instructions and jumps
343 * This is needed for the size of the BB struct to allocate
344 *
345 * Called from CLG_(get_bb)
346 */
347void CLG_(collectBlockInfo)(IRBB* bbIn,
348 /*INOUT*/ UInt* instrs,
349 /*INOUT*/ UInt* cjmps,
350 /*INOUT*/ Bool* cjmp_inverted)
351{
352 Int i;
353 IRStmt* st;
354 Addr instrAddr =0, jumpDst;
355 UInt instrLen = 0;
356 Bool toNextInstr = False;
357
358 // Ist_Exit has to be ignored in preamble code, before first IMark:
359 // preamble code is added by VEX for self modifying code, and has
360 // nothing to do with client code
361 Bool inPreamble = True;
362
363 if (!bbIn) return;
364
365 for (i = 0; i < bbIn->stmts_used; i++) {
366 st = bbIn->stmts[i];
367 if (Ist_IMark == st->tag) {
368 inPreamble = False;
369
370 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
371 instrLen = st->Ist.IMark.len;
372
373 (*instrs)++;
374 toNextInstr = False;
375 }
376 if (inPreamble) continue;
377 if (Ist_Exit == st->tag) {
378 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
379 toNextInstr = (jumpDst == instrAddr + instrLen);
380
381 (*cjmps)++;
382 }
383 }
384
385 /* if the last instructions of BB conditionally jumps to next instruction
386 * (= first instruction of next BB in memory), this is a inverted by VEX.
387 */
388 *cjmp_inverted = toNextInstr;
389}
390
391static
392void collectStatementInfo(IRTypeEnv* tyenv, IRBB* bbOut, IRStmt* st,
393 Addr* instrAddr, UInt* instrLen,
394 IRExpr** loadAddrExpr, IRExpr** storeAddrExpr,
395 UInt* dataSize, IRType hWordTy)
396{
397 CLG_ASSERT(isFlatIRStmt(st));
398
399 switch (st->tag) {
400 case Ist_NoOp:
401 break;
402
403 case Ist_AbiHint:
404 /* ABI hints aren't interesting. Ignore. */
405 break;
406
407 case Ist_IMark:
408 /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this
409 to the host's native pointer type; if that is 32 bits then it
410 discards the upper 32 bits. If we are cachegrinding on a
411 32-bit host then we are also ensured that the guest word size
412 is 32 bits, due to the assertion in cg_instrument that the
413 host and guest word sizes must be the same. Hence
414 st->Ist.IMark.addr will have been derived from a 32-bit guest
415 code address and truncation of it is safe. I believe this
416 assignment should be correct for both 32- and 64-bit
417 machines. */
418 *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
419 *instrLen = st->Ist.IMark.len;
420 break;
421
422 case Ist_Tmp: {
423 IRExpr* data = st->Ist.Tmp.data;
424 if (data->tag == Iex_Load) {
425 IRExpr* aexpr = data->Iex.Load.addr;
426 CLG_ASSERT( isIRAtom(aexpr) );
427 // Note also, endianness info is ignored. I guess that's not
428 // interesting.
429 // XXX: repe cmpsb does two loads... the first one is ignored here!
430 //tl_assert( NULL == *loadAddrExpr ); // XXX: ???
431 *loadAddrExpr = aexpr;
432 *dataSize = sizeofIRType(data->Iex.Load.ty);
433 }
434 break;
435 }
436
437 case Ist_Store: {
438 IRExpr* data = st->Ist.Store.data;
439 IRExpr* aexpr = st->Ist.Store.addr;
440 CLG_ASSERT( isIRAtom(aexpr) );
441 if ( NULL == *storeAddrExpr ) {
442 /* this is a kludge: ignore all except the first store from
443 an instruction. */
444 *storeAddrExpr = aexpr;
445 *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data));
446 }
447 break;
448 }
449
450 case Ist_Dirty: {
451 IRDirty* d = st->Ist.Dirty.details;
452 if (d->mFx != Ifx_None) {
453 /* This dirty helper accesses memory. Collect the
454 details. */
455 CLG_ASSERT(d->mAddr != NULL);
456 CLG_ASSERT(d->mSize != 0);
457 *dataSize = d->mSize;
458 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
459 *loadAddrExpr = d->mAddr;
460 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
461 *storeAddrExpr = d->mAddr;
462 } else {
463 CLG_ASSERT(d->mAddr == NULL);
464 CLG_ASSERT(d->mSize == 0);
465 }
466 break;
467 }
468
469 case Ist_Put:
470 case Ist_PutI:
471 case Ist_MFence:
472 case Ist_Exit:
473 break;
474
475 default:
476 VG_(printf)("\n");
477 ppIRStmt(st);
478 VG_(printf)("\n");
479 VG_(tool_panic)("Callgrind: unhandled IRStmt");
480 }
481}
482
483static
484void addConstMemStoreStmt( IRBB* bbOut, UWord addr, UInt val, IRType hWordTy)
485{
486 addStmtToIRBB( bbOut,
487 IRStmt_Store(CLGEndness,
488 IRExpr_Const(hWordTy == Ity_I32 ?
489 IRConst_U32( addr ) :
490 IRConst_U64( addr )),
491 IRExpr_Const(IRConst_U32(val)) ));
492}
493
494static
495IRBB* CLG_(instrument)( VgCallbackClosure* closure,
496 IRBB* bbIn,
497 VexGuestLayout* layout,
498 VexGuestExtents* vge,
499 IRType gWordTy, IRType hWordTy )
500{
501 Int i;
502 IRBB* bbOut;
503 IRStmt* st, *stnext;
504 Addr instrAddr, origAddr;
505 UInt instrLen = 0, dataSize;
506 UInt instrCount, costOffset;
507 IRExpr *loadAddrExpr, *storeAddrExpr;
508
509 BB* bb;
510
511 IRDirty* di;
512 IRExpr *arg1, **argv;
513
514 Bool bb_seen_before = False;
515 UInt cJumps = 0, cJumpsCorrected;
516 Bool beforeIBoundary, instrIssued;
517
518 if (gWordTy != hWordTy) {
519 /* We don't currently support this case. */
520 VG_(tool_panic)("host/guest word size mismatch");
521 }
522
523 // No instrumentation if it is switched off
524 if (! CLG_(instrument_state)) {
525 CLG_DEBUG(5, "instrument(BB %p) [Instrumentation OFF]\n",
526 (Addr)closure->readdr);
527 return bbIn;
528 }
529
530 CLG_DEBUG(3, "+ instrument(BB %p)\n", (Addr)closure->readdr);
531
532 /* Set up BB for instrumented IR */
533 bbOut = emptyIRBB();
534 bbOut->tyenv = dopyIRTypeEnv(bbIn->tyenv);
535 bbOut->next = dopyIRExpr(bbIn->next);
536 bbOut->jumpkind = bbIn->jumpkind;
537
538 // Copy verbatim any IR preamble preceding the first IMark
539 i = 0;
540 while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) {
541 addStmtToIRBB( bbOut, bbIn->stmts[i] );
542 i++;
543 }
544
545 // Get the first statement, and origAddr from it
546 CLG_ASSERT(bbIn->stmts_used > 0);
547 st = bbIn->stmts[i];
548 CLG_ASSERT(Ist_IMark == st->tag);
549 instrAddr = origAddr = (Addr)st->Ist.IMark.addr;
550 CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
551
552 /* Get BB (creating if necessary).
553 * JS: The hash table is keyed with orig_addr_noredir -- important!
554 * JW: Why? If it is because of different chasing of the redirection,
555 * this is not needed, as chasing is switched off in callgrind
556 */
557 bb = CLG_(get_bb)(origAddr, bbIn, &bb_seen_before);
558 //bb = CLG_(get_bb)(orig_addr_noredir, bbIn, &bb_seen_before);
559
560 /*
561 * Precondition:
562 * - jmps_passed has number of cond.jumps passed in last executed BB
563 * - current_bbcc has a pointer to the BBCC of the last executed BB
564 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
565 * current_bbcc->bb->jmp_addr
566 * gives the address of the jump source.
567 *
568 * The BBCC setup does 2 things:
569 * - trace call:
570 * * Unwind own call stack, i.e sync our ESP with real ESP
571 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
572 * * For CALLs or JMPs crossing objects, record call arg +
573 * push are on own call stack
574 *
575 * - prepare for cache log functions:
576 * Set current_bbcc to BBCC that gets the costs for this BB execution
577 * attached
578 */
579
580 // helper call to setup_bbcc, with pointer to basic block info struct as argument
581 arg1 = mkIRExpr_HWord( (HWord)bb );
582 argv = mkIRExprVec_1(arg1);
sewardjbb760702006-04-02 15:53:59 +0000583 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
584 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
585 argv);
weidendoa17f2a32006-03-20 10:27:30 +0000586 addStmtToIRBB( bbOut, IRStmt_Dirty(di) );
587
588 instrCount = 0;
589 costOffset = 0;
590
591 // loop for each host instruction (starting from 'i')
592 do {
593
594 // We should be at an IMark statement
595 CLG_ASSERT(Ist_IMark == st->tag);
596
597 // Reset stuff for this original instruction
598 loadAddrExpr = storeAddrExpr = NULL;
599 instrIssued = False;
600 dataSize = 0;
601
602 // Process all the statements for this original instruction (ie. until
603 // the next IMark statement, or the end of the block)
604 do {
605 i++;
606 stnext = ( i < bbIn->stmts_used ? bbIn->stmts[i] : NULL );
607 beforeIBoundary = !stnext || (Ist_IMark == stnext->tag);
608 collectStatementInfo(bbIn->tyenv, bbOut, st, &instrAddr, &instrLen,
609 &loadAddrExpr, &storeAddrExpr, &dataSize, hWordTy);
610
611 // instrument a simulator call before conditional jumps
612 if (st->tag == Ist_Exit) {
613 // Nb: instrLen will be zero if Vex failed to decode it.
614 // Also Client requests can appear to be very large (eg. 18
615 // bytes on x86) because they are really multiple instructions.
616 CLG_ASSERT( 0 == instrLen ||
617 bbIn->jumpkind == Ijk_ClientReq ||
618 (instrLen >= VG_MIN_INSTR_SZB &&
619 instrLen <= VG_MAX_INSTR_SZB) );
620
621 // Add instrumentation before this statement
622 endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
623 instrAddr - origAddr, instrLen, dataSize, &costOffset,
624 instrIssued, loadAddrExpr, storeAddrExpr);
625
626 // prepare for a possible further simcall in same host instr
627 loadAddrExpr = storeAddrExpr = NULL;
628 instrIssued = True;
629
630 if (!bb_seen_before) {
631 bb->jmp[cJumps].instr = instrCount;
632 bb->jmp[cJumps].skip = False;
633 }
634
635 /* Update global variable jmps_passed (this is before the jump!)
636 * A correction is needed if VEX inverted the last jump condition
637 */
638 cJumpsCorrected = cJumps;
639 if ((cJumps+1 == bb->cjmp_count) && bb->cjmp_inverted) cJumpsCorrected++;
640 addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
641 cJumpsCorrected, hWordTy);
642
643 cJumps++;
644 }
645
646 addStmtToIRBB( bbOut, st );
647 st = stnext;
648 }
649 while (!beforeIBoundary);
650
651 // Add instrumentation for this original instruction.
652 if (!instrIssued || (loadAddrExpr != 0) || (storeAddrExpr !=0))
653 endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
654 instrAddr - origAddr, instrLen, dataSize, &costOffset,
655 instrIssued, loadAddrExpr, storeAddrExpr);
656
657 instrCount++;
658 }
659 while (st);
660
661 /* Always update global variable jmps_passed (at end of BB)
662 * A correction is needed if VEX inverted the last jump condition
663 */
664 cJumpsCorrected = cJumps;
665 if (bb->cjmp_inverted) cJumpsCorrected--;
666 addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
667 cJumpsCorrected, hWordTy);
668
669 /* This stores the instr of the call/ret at BB end */
670 bb->jmp[cJumps].instr = instrCount-1;
671
672 CLG_ASSERT(bb->cjmp_count == cJumps);
673 CLG_ASSERT(bb->instr_count == instrCount);
674
675 instrAddr += instrLen;
676 if (bb_seen_before) {
677 CLG_ASSERT(bb->instr_len == instrAddr - origAddr);
678 CLG_ASSERT(bb->cost_count == costOffset);
679 CLG_ASSERT(bb->jmpkind == bbIn->jumpkind);
680 }
681 else {
682 bb->instr_len = instrAddr - origAddr;
683 bb->cost_count = costOffset;
684 bb->jmpkind = bbIn->jumpkind;
685 }
686
687 CLG_DEBUG(3, "- instrument(BB %p): byteLen %u, CJumps %u, CostLen %u\n",
688 origAddr, bb->instr_len, bb->cjmp_count, bb->cost_count);
689 if (cJumps>0) {
690 CLG_DEBUG(3, " [ ");
691 for (i=0;i<cJumps;i++)
692 CLG_DEBUG(3, "%d ", bb->jmp[i].instr);
693 CLG_DEBUG(3, "], last inverted: %s \n", bb->cjmp_inverted ? "yes":"no");
694 }
695
696 return bbOut;
697}
698
699/*--------------------------------------------------------------------*/
700/*--- Discarding BB info ---*/
701/*--------------------------------------------------------------------*/
702
703// Called when a translation is removed from the translation cache for
704// any reason at all: to free up space, because the guest code was
705// unmapped or modified, or for any arbitrary reason.
706static
707void clg_discard_basic_block_info ( Addr64 orig_addr64, VexGuestExtents vge )
708{
709 Addr orig_addr = (Addr)orig_addr64;
710
711 tl_assert(vge.n_used > 0);
712
713 if (0)
714 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
715 (void*)(Addr)orig_addr,
716 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
717
718 // Get BB info, remove from table, free BB info. Simple! Note that we
719 // use orig_addr, not the first instruction address in vge.
720 CLG_(delete_bb)(orig_addr);
721}
722
723
724/*------------------------------------------------------------*/
725/*--- CLG_(fini)() and related function ---*/
726/*------------------------------------------------------------*/
727
728
729
730static void zero_thread_cost(thread_info* t)
731{
732 Int i;
733
734 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
735 if (!CLG_(current_call_stack).entry[i].jcc) continue;
736
737 /* reset call counters to current for active calls */
738 CLG_(copy_cost)( CLG_(sets).full,
739 CLG_(current_call_stack).entry[i].enter_cost,
740 CLG_(current_state).cost );
741 }
742
743 CLG_(forall_bbccs)(CLG_(zero_bbcc));
744
745 /* set counter for last dump */
746 CLG_(copy_cost)( CLG_(sets).full,
747 t->lastdump_cost, CLG_(current_state).cost );
748}
749
750void CLG_(zero_all_cost)(Bool only_current_thread)
751{
752 if (VG_(clo_verbosity) > 1)
753 VG_(message)(Vg_DebugMsg, " Zeroing costs...");
754
755 if (only_current_thread)
756 zero_thread_cost(CLG_(get_current_thread)());
757 else
758 CLG_(forall_threads)(zero_thread_cost);
759
760 if (VG_(clo_verbosity) > 1)
761 VG_(message)(Vg_DebugMsg, " ...done");
762}
763
764static
765void unwind_thread(thread_info* t)
766{
767 /* unwind signal handlers */
768 while(CLG_(current_state).sig !=0)
769 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
770
771 /* unwind regular call stack */
772 while(CLG_(current_call_stack).sp>0)
773 CLG_(pop_call_stack)();
774}
775
776/* Ups, this can go wrong... */
777extern void VG_(discard_translations) ( Addr64 start, ULong range );
778
779void CLG_(set_instrument_state)(Char* reason, Bool state)
780{
781 if (CLG_(instrument_state) == state) {
782 CLG_DEBUG(2, "%s: instrumentation already %s\n",
783 reason, state ? "ON" : "OFF");
784 return;
785 }
786 CLG_(instrument_state) = state;
787 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
788 reason, state ? "ON" : "OFF");
789
790 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl);
791
792 /* reset internal state: call stacks, simulator */
793 CLG_(forall_threads)(unwind_thread);
794 (*CLG_(cachesim).clear)();
795 if (0)
796 CLG_(forall_threads)(zero_thread_cost);
797
798 if (!state)
799 CLG_(init_exec_state)( &CLG_(current_state) );
800
801 if (VG_(clo_verbosity) > 1)
802 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
803 reason, state ? "ON" : "OFF");
804}
805
806
807static
808Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
809{
810 if (!VG_IS_TOOL_USERREQ('C','T',args[0]))
811 return False;
812
813 switch(args[0]) {
814 case VG_USERREQ__DUMP_STATS:
815 CLG_(dump_profile)("Client Request", True);
816 *ret = 0; /* meaningless */
817 break;
818
819 case VG_USERREQ__DUMP_STATS_AT:
820 {
821 Char buf[512];
weidendoca472c52006-03-31 19:34:51 +0000822 VG_(sprintf)(buf,"Client Request: %s", args[1]);
weidendoa17f2a32006-03-20 10:27:30 +0000823 CLG_(dump_profile)(buf, True);
824 *ret = 0; /* meaningless */
825 }
826 break;
827
828 case VG_USERREQ__ZERO_STATS:
829 CLG_(zero_all_cost)(True);
830 *ret = 0; /* meaningless */
831 break;
832
833 case VG_USERREQ__TOGGLE_COLLECT:
834 CLG_(current_state).collect = !CLG_(current_state).collect;
835 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
836 CLG_(current_state).collect ? "ON" : "OFF");
837 *ret = 0; /* meaningless */
838 break;
839
840 case VG_USERREQ__START_INSTRUMENTATION:
841 CLG_(set_instrument_state)("Client Request", True);
842 *ret = 0; /* meaningless */
843 break;
844
845 case VG_USERREQ__STOP_INSTRUMENTATION:
846 CLG_(set_instrument_state)("Client Request", False);
847 *ret = 0; /* meaningless */
848 break;
849
850 default:
851 return False;
852 }
853
854 return True;
855}
856
857
858/* Syscall Timing */
859
860/* struct timeval syscalltime[VG_N_THREADS]; */
861#if CLG_MICROSYSTIME
862#include <sys/time.h>
863#include <sys/syscall.h>
864extern Int VG_(do_syscall) ( UInt, ... );
865
866ULong syscalltime[VG_N_THREADS];
867#else
868UInt syscalltime[VG_N_THREADS];
869#endif
870
871static
872void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno)
873{
874 if (CLG_(clo).collect_systime) {
875#if CLG_MICROSYSTIME
876 struct vki_timeval tv_now;
877 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
878 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
879#else
880 syscalltime[tid] = VG_(read_millisecond_timer)();
881#endif
882 }
883}
884
885static
886void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, SysRes res)
887{
888 if (CLG_(clo).collect_systime) {
889 Int o = CLG_(sets).off_full_systime;
890#if CLG_MICROSYSTIME
891 struct vki_timeval tv_now;
892 ULong diff;
893
894 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
895 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
896#else
897 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
898#endif
899
900 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
901
902 if (o<0) return;
903
904 CLG_(current_state).cost[o] ++;
905 CLG_(current_state).cost[o+1] += diff;
906 if (!CLG_(current_state).bbcc->skipped)
907 CLG_(init_cost_lz)(CLG_(sets).full,
908 &(CLG_(current_state).bbcc->skipped));
909 CLG_(current_state).bbcc->skipped[o] ++;
910 CLG_(current_state).bbcc->skipped[o+1] += diff;
911 }
912}
913
914static
915void finish(void)
916{
917 char buf[RESULTS_BUF_LEN];
918
919 CLG_DEBUG(0, "finish()\n");
920
921 (*CLG_(cachesim).finish)();
922
923 /* pop all remaining items from CallStack for correct sum
924 */
925 CLG_(forall_threads)(unwind_thread);
926
927 CLG_(dump_profile)(0, False);
928
929 CLG_(finish_command)();
930
931 if (VG_(clo_verbosity) == 0) return;
932
933 /* Hash table stats */
934 if (VG_(clo_verbosity) > 1) {
935 int BB_lookups =
936 CLG_(stat).full_debug_BBs +
937 CLG_(stat).fn_name_debug_BBs +
938 CLG_(stat).file_line_debug_BBs +
939 CLG_(stat).no_debug_BBs;
940
941 VG_(message)(Vg_DebugMsg, "");
942 VG_(message)(Vg_DebugMsg, "Distinct objects: %d",
943 CLG_(stat).distinct_objs);
944 VG_(message)(Vg_DebugMsg, "Distinct files: %d",
945 CLG_(stat).distinct_files);
946 VG_(message)(Vg_DebugMsg, "Distinct fns: %d",
947 CLG_(stat).distinct_fns);
948 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d",
949 CLG_(stat).distinct_contexts);
950 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d",
951 CLG_(stat).distinct_bbs);
952 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)",
953 CLG_(costarray_entries), CLG_(costarray_chunks));
954 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d",
955 CLG_(stat).distinct_bbccs);
956 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d",
957 CLG_(stat).distinct_jccs);
958 VG_(message)(Vg_DebugMsg, "Distinct skips: %d",
959 CLG_(stat).distinct_skips);
960 VG_(message)(Vg_DebugMsg, "BB lookups: %d",
961 BB_lookups);
962 if (BB_lookups>0) {
963 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
964 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
965 CLG_(stat).full_debug_BBs);
966 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
967 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
968 CLG_(stat).file_line_debug_BBs);
969 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
970 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
971 CLG_(stat).fn_name_debug_BBs);
972 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
973 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
974 CLG_(stat).no_debug_BBs);
975 }
976 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d",
977 CLG_(stat).bbcc_clones);
978 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d",
979 CLG_(stat).bb_retranslations);
980 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d",
981 CLG_(stat).distinct_instrs);
982 VG_(message)(Vg_DebugMsg, "");
983
984 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d",
985 CLG_(stat).cxt_lru_misses);
986 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d",
987 CLG_(stat).bbcc_lru_misses);
988 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d",
989 CLG_(stat).jcc_lru_misses);
990 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu",
991 CLG_(stat).bb_executions);
992 VG_(message)(Vg_DebugMsg, "Calls: %llu",
993 CLG_(stat).call_counter);
994 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu",
995 CLG_(stat).jcnd_counter);
996 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu",
997 CLG_(stat).jump_counter);
998 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu",
999 CLG_(stat).rec_call_counter);
1000 VG_(message)(Vg_DebugMsg, "Returns: %llu",
1001 CLG_(stat).ret_counter);
1002
1003 VG_(message)(Vg_DebugMsg, "");
1004 }
1005
1006 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
1007 VG_(message)(Vg_UserMsg, "Events : %s", buf);
1008 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
1009 VG_(message)(Vg_UserMsg, "Collected : %s", buf);
1010 VG_(message)(Vg_UserMsg, "");
1011
1012 // if (CLG_(clo).simulate_cache)
1013 (*CLG_(cachesim).printstat)();
1014}
1015
1016
1017void CLG_(fini)(Int exitcode)
1018{
1019 finish();
1020}
1021
1022
1023/*--------------------------------------------------------------------*/
1024/*--- Setup ---*/
1025/*--------------------------------------------------------------------*/
1026
1027static
1028void CLG_(post_clo_init)(void)
1029{
1030 Char *dir = 0, *fname = 0;
1031
1032 VG_(clo_vex_control).iropt_unroll_thresh = 0;
1033 VG_(clo_vex_control).guest_chase_thresh = 0;
1034
1035 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
1036 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
1037 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
1038
1039 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
1040 VG_(message)(Vg_UserMsg, "Using source line as position.");
1041 CLG_(clo).dump_line = True;
1042 }
1043
1044 CLG_(init_files)(&dir,&fname);
1045 CLG_(init_command)(dir,fname);
1046
1047 (*CLG_(cachesim).post_clo_init)();
1048
1049 CLG_(init_eventsets)(0);
1050 CLG_(init_statistics)(& CLG_(stat));
1051 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
1052
1053 /* initialize hash tables */
1054 CLG_(init_obj_table)();
1055 CLG_(init_cxt_table)();
1056 CLG_(init_bb_hash)();
1057
1058 CLG_(init_threads)();
1059 CLG_(run_thread)(1);
1060
1061 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
1062
weidendoca472c52006-03-31 19:34:51 +00001063 if (VG_(clo_verbosity > 0)) {
weidendoca472c52006-03-31 19:34:51 +00001064 VG_(message)(Vg_UserMsg,
1065 "For interactive control, run 'callgrind_control -h'.");
1066 }
weidendoa17f2a32006-03-20 10:27:30 +00001067}
1068
1069static
1070void CLG_(pre_clo_init)(void)
1071{
1072 VG_(details_name) ("Callgrind");
weidendoca472c52006-03-31 19:34:51 +00001073 VG_(details_version) (NULL);
weidendoa17f2a32006-03-20 10:27:30 +00001074 VG_(details_description) ("a call-graph generating cache profiler");
1075 VG_(details_copyright_author)("Copyright (C) 2002-2006, and GNU GPL'd, "
weidendoca472c52006-03-31 19:34:51 +00001076 "by Josef Weidendorfer et al.");
weidendodb70ed72006-05-27 15:39:45 +00001077 VG_(details_bug_reports_to) (VG_BUGS_TO);
weidendoca472c52006-03-31 19:34:51 +00001078 VG_(details_avg_translation_sizeB) ( 245 );
weidendoa17f2a32006-03-20 10:27:30 +00001079
1080 VG_(basic_tool_funcs) (CLG_(post_clo_init),
1081 CLG_(instrument),
1082 CLG_(fini));
1083
1084 VG_(needs_basic_block_discards)(clg_discard_basic_block_info);
1085
1086
1087 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
1088 CLG_(print_usage),
1089 CLG_(print_debug_usage));
1090
1091 VG_(needs_client_requests)(CLG_(handle_client_request));
1092 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1093 CLG_(post_syscalltime));
1094
1095 VG_(track_thread_run) ( & CLG_(run_thread) );
1096 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
1097 VG_(track_post_deliver_signal) ( & CLG_(post_signal) );
1098
1099 CLG_(set_clo_defaults)();
1100}
1101
1102VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
1103
1104/*--------------------------------------------------------------------*/
1105/*--- end main.c ---*/
1106/*--------------------------------------------------------------------*/