weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Callgrind ---*/ |
| 4 | /*--- main.c ---*/ |
| 5 | /*--------------------------------------------------------------------*/ |
| 6 | |
| 7 | /* |
| 8 | This file is part of Callgrind, a Valgrind tool for call graph |
| 9 | profiling programs. |
| 10 | |
sewardj | 9ebd6e0 | 2007-01-08 06:01:59 +0000 | [diff] [blame] | 11 | Copyright (C) 2002-2007, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 12 | |
njn | 9a0cba4 | 2007-04-15 22:15:57 +0000 | [diff] [blame^] | 13 | This tool is derived from and contains code from Cachegrind |
| 14 | Copyright (C) 2002-2007 Nicholas Nethercote (njn@valgrind.org) |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 15 | |
| 16 | This program is free software; you can redistribute it and/or |
| 17 | modify it under the terms of the GNU General Public License as |
| 18 | published by the Free Software Foundation; either version 2 of the |
| 19 | License, or (at your option) any later version. |
| 20 | |
| 21 | This program is distributed in the hope that it will be useful, but |
| 22 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 24 | General Public License for more details. |
| 25 | |
| 26 | You should have received a copy of the GNU General Public License |
| 27 | along with this program; if not, write to the Free Software |
| 28 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 29 | 02111-1307, USA. |
| 30 | |
| 31 | The GNU General Public License is contained in the file COPYING. |
| 32 | */ |
| 33 | |
| 34 | #include "config.h" |
| 35 | #include "callgrind.h" |
| 36 | #include "global.h" |
| 37 | |
| 38 | #include <pub_tool_threadstate.h> |
| 39 | |
| 40 | /*------------------------------------------------------------*/ |
| 41 | /*--- Global variables ---*/ |
| 42 | /*------------------------------------------------------------*/ |
| 43 | |
| 44 | /* for all threads */ |
| 45 | CommandLineOptions CLG_(clo); |
| 46 | Statistics CLG_(stat); |
| 47 | Bool CLG_(instrument_state) = True; /* Instrumentation on ? */ |
| 48 | |
| 49 | /* thread and signal handler specific */ |
| 50 | exec_state CLG_(current_state); |
| 51 | |
| 52 | |
| 53 | /*------------------------------------------------------------*/ |
| 54 | /*--- Statistics ---*/ |
| 55 | /*------------------------------------------------------------*/ |
| 56 | |
| 57 | static void CLG_(init_statistics)(Statistics* s) |
| 58 | { |
| 59 | s->call_counter = 0; |
| 60 | s->jcnd_counter = 0; |
| 61 | s->jump_counter = 0; |
| 62 | s->rec_call_counter = 0; |
| 63 | s->ret_counter = 0; |
| 64 | s->bb_executions = 0; |
| 65 | |
| 66 | s->context_counter = 0; |
| 67 | s->bb_retranslations = 0; |
| 68 | |
| 69 | s->distinct_objs = 0; |
| 70 | s->distinct_files = 0; |
| 71 | s->distinct_fns = 0; |
| 72 | s->distinct_contexts = 0; |
| 73 | s->distinct_bbs = 0; |
| 74 | s->distinct_bbccs = 0; |
| 75 | s->distinct_instrs = 0; |
| 76 | s->distinct_skips = 0; |
| 77 | |
| 78 | s->bb_hash_resizes = 0; |
| 79 | s->bbcc_hash_resizes = 0; |
| 80 | s->jcc_hash_resizes = 0; |
| 81 | s->cxt_hash_resizes = 0; |
| 82 | s->fn_array_resizes = 0; |
| 83 | s->call_stack_resizes = 0; |
| 84 | s->fn_stack_resizes = 0; |
| 85 | |
| 86 | s->full_debug_BBs = 0; |
| 87 | s->file_line_debug_BBs = 0; |
| 88 | s->fn_name_debug_BBs = 0; |
| 89 | s->no_debug_BBs = 0; |
| 90 | s->bbcc_lru_misses = 0; |
| 91 | s->jcc_lru_misses = 0; |
| 92 | s->cxt_lru_misses = 0; |
| 93 | s->bbcc_clones = 0; |
| 94 | } |
| 95 | |
| 96 | |
| 97 | |
| 98 | |
| 99 | /*------------------------------------------------------------*/ |
| 100 | /*--- Cache simulation instrumentation phase ---*/ |
| 101 | /*------------------------------------------------------------*/ |
| 102 | |
| 103 | |
| 104 | static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) |
| 105 | { |
| 106 | // I'm assuming that for 'modify' instructions, that Vex always makes |
| 107 | // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp |
| 108 | // expressions, or both Const expressions. |
| 109 | CLG_ASSERT(isIRAtom(loadAddrExpr)); |
| 110 | CLG_ASSERT(isIRAtom(storeAddrExpr)); |
| 111 | return eqIRAtom(loadAddrExpr, storeAddrExpr); |
| 112 | } |
| 113 | |
| 114 | static |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 115 | EventSet* insert_simcall(IRSB* bbOut, InstrInfo* ii, UInt dataSize, |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 116 | Bool instrIssued, |
| 117 | IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) |
| 118 | { |
| 119 | HChar* helperName; |
| 120 | void* helperAddr; |
| 121 | Int argc; |
| 122 | EventSet* es; |
| 123 | IRExpr *arg1, *arg2 = 0, *arg3 = 0, **argv; |
| 124 | IRDirty* di; |
| 125 | |
| 126 | /* Check type of original instruction regarding memory access, |
| 127 | * and collect info to be able to generate fitting helper call |
| 128 | */ |
| 129 | if (!loadAddrExpr && !storeAddrExpr) { |
| 130 | // no load/store |
| 131 | CLG_ASSERT(0 == dataSize); |
| 132 | if (instrIssued) { |
| 133 | helperName = 0; |
| 134 | helperAddr = 0; |
| 135 | } |
| 136 | else { |
| 137 | helperName = CLG_(cachesim).log_1I0D_name; |
| 138 | helperAddr = CLG_(cachesim).log_1I0D; |
| 139 | } |
| 140 | argc = 1; |
| 141 | es = CLG_(sets).D0; |
| 142 | |
| 143 | } else if (loadAddrExpr && !storeAddrExpr) { |
| 144 | // load |
| 145 | CLG_ASSERT( isIRAtom(loadAddrExpr) ); |
| 146 | if (instrIssued) { |
| 147 | helperName = CLG_(cachesim).log_0I1Dr_name; |
| 148 | helperAddr = CLG_(cachesim).log_0I1Dr; |
| 149 | } |
| 150 | else { |
| 151 | helperName = CLG_(cachesim).log_1I1Dr_name; |
| 152 | helperAddr = CLG_(cachesim).log_1I1Dr; |
| 153 | } |
| 154 | argc = 2; |
| 155 | arg2 = loadAddrExpr; |
| 156 | es = CLG_(sets).D1r; |
| 157 | |
| 158 | } else if (!loadAddrExpr && storeAddrExpr) { |
| 159 | // store |
| 160 | CLG_ASSERT( isIRAtom(storeAddrExpr) ); |
| 161 | if (instrIssued) { |
| 162 | helperName = CLG_(cachesim).log_0I1Dw_name; |
| 163 | helperAddr = CLG_(cachesim).log_0I1Dw; |
| 164 | } |
| 165 | else { |
| 166 | helperName = CLG_(cachesim).log_1I1Dw_name; |
| 167 | helperAddr = CLG_(cachesim).log_1I1Dw; |
| 168 | } |
| 169 | argc = 2; |
| 170 | arg2 = storeAddrExpr; |
| 171 | es = CLG_(sets).D1w; |
| 172 | |
| 173 | } else { |
| 174 | CLG_ASSERT( loadAddrExpr && storeAddrExpr ); |
| 175 | CLG_ASSERT( isIRAtom(loadAddrExpr) ); |
| 176 | CLG_ASSERT( isIRAtom(storeAddrExpr) ); |
| 177 | |
| 178 | if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) { |
| 179 | /* modify: suppose write access, as this is |
| 180 | * more resource consuming (as in callgrind for VG2) |
| 181 | * Cachegrind does a read here (!) |
| 182 | * DISCUSS: Best way depends on simulation model? |
| 183 | */ |
| 184 | if (instrIssued) { |
| 185 | helperName = CLG_(cachesim).log_0I1Dw_name; |
| 186 | helperAddr = CLG_(cachesim).log_0I1Dw; |
| 187 | } |
| 188 | else { |
| 189 | helperName = CLG_(cachesim).log_1I1Dw_name; |
| 190 | helperAddr = CLG_(cachesim).log_1I1Dw; |
| 191 | } |
| 192 | argc = 2; |
| 193 | arg2 = storeAddrExpr; |
| 194 | es = CLG_(sets).D1w; |
| 195 | |
| 196 | } else { |
| 197 | // load/store |
| 198 | if (instrIssued) { |
| 199 | helperName = CLG_(cachesim).log_0I2D_name; |
| 200 | helperAddr = CLG_(cachesim).log_0I2D; |
| 201 | } |
| 202 | else { |
| 203 | helperName = CLG_(cachesim).log_1I2D_name; |
| 204 | helperAddr = CLG_(cachesim).log_1I2D; |
| 205 | } |
| 206 | argc = 3; |
| 207 | arg2 = loadAddrExpr; |
| 208 | arg3 = storeAddrExpr; |
| 209 | es = CLG_(sets).D2; |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | /* helper could be unset depending on the simulator used */ |
| 214 | if (helperAddr == 0) return 0; |
| 215 | |
| 216 | /* Setup 1st arg: InstrInfo */ |
| 217 | arg1 = mkIRExpr_HWord( (HWord)ii ); |
| 218 | |
| 219 | // Add call to the instrumentation function |
| 220 | if (argc == 1) |
| 221 | argv = mkIRExprVec_1(arg1); |
| 222 | else if (argc == 2) |
| 223 | argv = mkIRExprVec_2(arg1, arg2); |
| 224 | else if (argc == 3) |
| 225 | argv = mkIRExprVec_3(arg1, arg2, arg3); |
| 226 | else |
| 227 | VG_(tool_panic)("argc... not 1 or 2 or 3?"); |
| 228 | |
sewardj | 8a95fd3 | 2006-04-02 16:21:44 +0000 | [diff] [blame] | 229 | di = unsafeIRDirty_0_N( argc, helperName, |
| 230 | VG_(fnptr_to_fnentry)( helperAddr ), argv); |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 231 | addStmtToIRSB( bbOut, IRStmt_Dirty(di) ); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 232 | |
| 233 | return es; |
| 234 | } |
| 235 | |
| 236 | |
| 237 | /* Instrumentation before a conditional jump or at the end |
| 238 | * of each original instruction. |
| 239 | * Fills the InstrInfo struct if not seen before |
| 240 | */ |
| 241 | static |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 242 | void endOfInstr(IRSB* bbOut, InstrInfo* ii, Bool bb_seen_before, |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 243 | UInt instr_offset, UInt instrLen, UInt dataSize, |
| 244 | UInt* cost_offset, Bool instrIssued, |
| 245 | IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) |
| 246 | { |
| 247 | IRType wordTy; |
| 248 | EventSet* es; |
| 249 | |
| 250 | // Stay sane ... |
| 251 | CLG_ASSERT(sizeof(HWord) == sizeof(void*)); |
| 252 | if (sizeof(HWord) == 4) { |
| 253 | wordTy = Ity_I32; |
| 254 | } else |
| 255 | if (sizeof(HWord) == 8) { |
| 256 | wordTy = Ity_I64; |
| 257 | } else { |
| 258 | VG_(tool_panic)("endOfInstr: strange word size"); |
| 259 | } |
| 260 | |
| 261 | if (loadAddrExpr) |
| 262 | CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr)); |
| 263 | if (storeAddrExpr) |
| 264 | CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr)); |
| 265 | |
| 266 | // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be |
| 267 | // done inaccurately, but they're very rare and this avoids errors from |
| 268 | // hitting more than two cache lines in the simulation. |
| 269 | if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE; |
| 270 | |
| 271 | /* returns 0 if simulator needs no instrumentation */ |
| 272 | es = insert_simcall(bbOut, ii, dataSize, instrIssued, |
| 273 | loadAddrExpr, storeAddrExpr); |
| 274 | |
weidendo | c8e7615 | 2006-05-27 15:30:58 +0000 | [diff] [blame] | 275 | CLG_DEBUG(5, " Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n", |
| 276 | instr_offset, instrLen, dataSize, |
| 277 | es ? es->name : (Char*)"(no instrumentation)", |
| 278 | es ? es->size : 0); |
| 279 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 280 | if (bb_seen_before) { |
weidendo | c8e7615 | 2006-05-27 15:30:58 +0000 | [diff] [blame] | 281 | CLG_DEBUG(5, " before: Instr +%2d (Size %d, DSize %d)\n", |
| 282 | ii->instr_offset, ii->instr_size, ii->data_size); |
| 283 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 284 | CLG_ASSERT(ii->instr_offset == instr_offset); |
| 285 | CLG_ASSERT(ii->instr_size == instrLen); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 286 | CLG_ASSERT(ii->cost_offset == *cost_offset); |
| 287 | CLG_ASSERT(ii->eventset == es); |
weidendo | c8e7615 | 2006-05-27 15:30:58 +0000 | [diff] [blame] | 288 | |
| 289 | /* Only check size if data size >0. |
| 290 | * This is needed: e.g. for rep or cmov x86 instructions, the same InstrInfo |
| 291 | * is used both for 2 simulator calls: for the pure instruction fetch and |
| 292 | * separately for an memory access (which may not happen depending on flags). |
| 293 | * If checked always, this triggers an assertion failure on retranslation. |
| 294 | */ |
| 295 | if (dataSize>0) CLG_ASSERT(ii->data_size == dataSize); |
| 296 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 297 | } |
| 298 | else { |
| 299 | ii->instr_offset = instr_offset; |
| 300 | ii->instr_size = instrLen; |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 301 | ii->cost_offset = *cost_offset; |
| 302 | ii->eventset = es; |
weidendo | c8e7615 | 2006-05-27 15:30:58 +0000 | [diff] [blame] | 303 | |
| 304 | /* data size only relevant if >0 */ |
| 305 | if (dataSize > 0) ii->data_size = dataSize; |
| 306 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 307 | |
| 308 | CLG_(stat).distinct_instrs++; |
| 309 | } |
| 310 | |
| 311 | *cost_offset += es ? es->size : 0; |
| 312 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 313 | } |
| 314 | |
| 315 | #if defined(VG_BIGENDIAN) |
| 316 | # define CLGEndness Iend_BE |
| 317 | #elif defined(VG_LITTLEENDIAN) |
| 318 | # define CLGEndness Iend_LE |
| 319 | #else |
| 320 | # error "Unknown endianness" |
| 321 | #endif |
| 322 | |
| 323 | static |
| 324 | Addr IRConst2Addr(IRConst* con) |
| 325 | { |
| 326 | Addr addr; |
| 327 | |
| 328 | if (sizeof(Addr) == 4) { |
| 329 | CLG_ASSERT( con->tag == Ico_U32 ); |
| 330 | addr = con->Ico.U32; |
| 331 | } |
| 332 | else if (sizeof(Addr) == 8) { |
| 333 | CLG_ASSERT( con->tag == Ico_U64 ); |
| 334 | addr = con->Ico.U64; |
| 335 | } |
| 336 | else |
| 337 | VG_(tool_panic)("Callgrind: invalid Addr type"); |
| 338 | |
| 339 | return addr; |
| 340 | } |
| 341 | |
| 342 | /* First pass over a BB to instrument, counting instructions and jumps |
| 343 | * This is needed for the size of the BB struct to allocate |
| 344 | * |
| 345 | * Called from CLG_(get_bb) |
| 346 | */ |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 347 | void CLG_(collectBlockInfo)(IRSB* bbIn, |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 348 | /*INOUT*/ UInt* instrs, |
| 349 | /*INOUT*/ UInt* cjmps, |
| 350 | /*INOUT*/ Bool* cjmp_inverted) |
| 351 | { |
| 352 | Int i; |
| 353 | IRStmt* st; |
| 354 | Addr instrAddr =0, jumpDst; |
| 355 | UInt instrLen = 0; |
| 356 | Bool toNextInstr = False; |
| 357 | |
| 358 | // Ist_Exit has to be ignored in preamble code, before first IMark: |
| 359 | // preamble code is added by VEX for self modifying code, and has |
| 360 | // nothing to do with client code |
| 361 | Bool inPreamble = True; |
| 362 | |
| 363 | if (!bbIn) return; |
| 364 | |
| 365 | for (i = 0; i < bbIn->stmts_used; i++) { |
| 366 | st = bbIn->stmts[i]; |
| 367 | if (Ist_IMark == st->tag) { |
| 368 | inPreamble = False; |
| 369 | |
| 370 | instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); |
| 371 | instrLen = st->Ist.IMark.len; |
| 372 | |
| 373 | (*instrs)++; |
| 374 | toNextInstr = False; |
| 375 | } |
| 376 | if (inPreamble) continue; |
| 377 | if (Ist_Exit == st->tag) { |
| 378 | jumpDst = IRConst2Addr(st->Ist.Exit.dst); |
| 379 | toNextInstr = (jumpDst == instrAddr + instrLen); |
| 380 | |
| 381 | (*cjmps)++; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | /* if the last instructions of BB conditionally jumps to next instruction |
| 386 | * (= first instruction of next BB in memory), this is a inverted by VEX. |
| 387 | */ |
| 388 | *cjmp_inverted = toNextInstr; |
| 389 | } |
| 390 | |
| 391 | static |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 392 | void collectStatementInfo(IRTypeEnv* tyenv, IRSB* bbOut, IRStmt* st, |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 393 | Addr* instrAddr, UInt* instrLen, |
| 394 | IRExpr** loadAddrExpr, IRExpr** storeAddrExpr, |
| 395 | UInt* dataSize, IRType hWordTy) |
| 396 | { |
| 397 | CLG_ASSERT(isFlatIRStmt(st)); |
| 398 | |
| 399 | switch (st->tag) { |
| 400 | case Ist_NoOp: |
| 401 | break; |
| 402 | |
| 403 | case Ist_AbiHint: |
| 404 | /* ABI hints aren't interesting. Ignore. */ |
| 405 | break; |
| 406 | |
| 407 | case Ist_IMark: |
| 408 | /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this |
| 409 | to the host's native pointer type; if that is 32 bits then it |
| 410 | discards the upper 32 bits. If we are cachegrinding on a |
| 411 | 32-bit host then we are also ensured that the guest word size |
| 412 | is 32 bits, due to the assertion in cg_instrument that the |
| 413 | host and guest word sizes must be the same. Hence |
| 414 | st->Ist.IMark.addr will have been derived from a 32-bit guest |
| 415 | code address and truncation of it is safe. I believe this |
| 416 | assignment should be correct for both 32- and 64-bit |
| 417 | machines. */ |
| 418 | *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); |
| 419 | *instrLen = st->Ist.IMark.len; |
| 420 | break; |
| 421 | |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 422 | case Ist_WrTmp: { |
| 423 | IRExpr* data = st->Ist.WrTmp.data; |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 424 | if (data->tag == Iex_Load) { |
| 425 | IRExpr* aexpr = data->Iex.Load.addr; |
| 426 | CLG_ASSERT( isIRAtom(aexpr) ); |
| 427 | // Note also, endianness info is ignored. I guess that's not |
| 428 | // interesting. |
| 429 | // XXX: repe cmpsb does two loads... the first one is ignored here! |
| 430 | //tl_assert( NULL == *loadAddrExpr ); // XXX: ??? |
| 431 | *loadAddrExpr = aexpr; |
| 432 | *dataSize = sizeofIRType(data->Iex.Load.ty); |
| 433 | } |
| 434 | break; |
| 435 | } |
| 436 | |
| 437 | case Ist_Store: { |
| 438 | IRExpr* data = st->Ist.Store.data; |
| 439 | IRExpr* aexpr = st->Ist.Store.addr; |
| 440 | CLG_ASSERT( isIRAtom(aexpr) ); |
| 441 | if ( NULL == *storeAddrExpr ) { |
| 442 | /* this is a kludge: ignore all except the first store from |
| 443 | an instruction. */ |
| 444 | *storeAddrExpr = aexpr; |
| 445 | *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data)); |
| 446 | } |
| 447 | break; |
| 448 | } |
| 449 | |
| 450 | case Ist_Dirty: { |
| 451 | IRDirty* d = st->Ist.Dirty.details; |
| 452 | if (d->mFx != Ifx_None) { |
| 453 | /* This dirty helper accesses memory. Collect the |
| 454 | details. */ |
| 455 | CLG_ASSERT(d->mAddr != NULL); |
| 456 | CLG_ASSERT(d->mSize != 0); |
| 457 | *dataSize = d->mSize; |
| 458 | if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) |
| 459 | *loadAddrExpr = d->mAddr; |
| 460 | if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) |
| 461 | *storeAddrExpr = d->mAddr; |
| 462 | } else { |
| 463 | CLG_ASSERT(d->mAddr == NULL); |
| 464 | CLG_ASSERT(d->mSize == 0); |
| 465 | } |
| 466 | break; |
| 467 | } |
| 468 | |
| 469 | case Ist_Put: |
| 470 | case Ist_PutI: |
| 471 | case Ist_MFence: |
| 472 | case Ist_Exit: |
| 473 | break; |
| 474 | |
| 475 | default: |
| 476 | VG_(printf)("\n"); |
| 477 | ppIRStmt(st); |
| 478 | VG_(printf)("\n"); |
| 479 | VG_(tool_panic)("Callgrind: unhandled IRStmt"); |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | static |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 484 | void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy) |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 485 | { |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 486 | addStmtToIRSB( bbOut, |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 487 | IRStmt_Store(CLGEndness, |
| 488 | IRExpr_Const(hWordTy == Ity_I32 ? |
| 489 | IRConst_U32( addr ) : |
| 490 | IRConst_U64( addr )), |
| 491 | IRExpr_Const(IRConst_U32(val)) )); |
| 492 | } |
| 493 | |
| 494 | static |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 495 | IRSB* CLG_(instrument)( VgCallbackClosure* closure, |
| 496 | IRSB* bbIn, |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 497 | VexGuestLayout* layout, |
| 498 | VexGuestExtents* vge, |
| 499 | IRType gWordTy, IRType hWordTy ) |
| 500 | { |
| 501 | Int i; |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 502 | IRSB* bbOut; |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 503 | IRStmt* st, *stnext; |
| 504 | Addr instrAddr, origAddr; |
| 505 | UInt instrLen = 0, dataSize; |
| 506 | UInt instrCount, costOffset; |
| 507 | IRExpr *loadAddrExpr, *storeAddrExpr; |
| 508 | |
| 509 | BB* bb; |
| 510 | |
| 511 | IRDirty* di; |
| 512 | IRExpr *arg1, **argv; |
| 513 | |
| 514 | Bool bb_seen_before = False; |
| 515 | UInt cJumps = 0, cJumpsCorrected; |
| 516 | Bool beforeIBoundary, instrIssued; |
| 517 | |
| 518 | if (gWordTy != hWordTy) { |
| 519 | /* We don't currently support this case. */ |
| 520 | VG_(tool_panic)("host/guest word size mismatch"); |
| 521 | } |
| 522 | |
| 523 | // No instrumentation if it is switched off |
| 524 | if (! CLG_(instrument_state)) { |
| 525 | CLG_DEBUG(5, "instrument(BB %p) [Instrumentation OFF]\n", |
| 526 | (Addr)closure->readdr); |
| 527 | return bbIn; |
| 528 | } |
| 529 | |
| 530 | CLG_DEBUG(3, "+ instrument(BB %p)\n", (Addr)closure->readdr); |
| 531 | |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 532 | /* Set up SB for instrumented IR */ |
| 533 | bbOut = deepCopyIRSBExceptStmts(bbIn); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 534 | |
| 535 | // Copy verbatim any IR preamble preceding the first IMark |
| 536 | i = 0; |
| 537 | while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) { |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 538 | addStmtToIRSB( bbOut, bbIn->stmts[i] ); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 539 | i++; |
| 540 | } |
| 541 | |
| 542 | // Get the first statement, and origAddr from it |
| 543 | CLG_ASSERT(bbIn->stmts_used > 0); |
| 544 | st = bbIn->stmts[i]; |
| 545 | CLG_ASSERT(Ist_IMark == st->tag); |
| 546 | instrAddr = origAddr = (Addr)st->Ist.IMark.addr; |
| 547 | CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow |
| 548 | |
| 549 | /* Get BB (creating if necessary). |
| 550 | * JS: The hash table is keyed with orig_addr_noredir -- important! |
| 551 | * JW: Why? If it is because of different chasing of the redirection, |
| 552 | * this is not needed, as chasing is switched off in callgrind |
| 553 | */ |
| 554 | bb = CLG_(get_bb)(origAddr, bbIn, &bb_seen_before); |
| 555 | //bb = CLG_(get_bb)(orig_addr_noredir, bbIn, &bb_seen_before); |
| 556 | |
| 557 | /* |
| 558 | * Precondition: |
| 559 | * - jmps_passed has number of cond.jumps passed in last executed BB |
| 560 | * - current_bbcc has a pointer to the BBCC of the last executed BB |
| 561 | * Thus, if bbcc_jmpkind is != -1 (JmpNone), |
| 562 | * current_bbcc->bb->jmp_addr |
| 563 | * gives the address of the jump source. |
| 564 | * |
| 565 | * The BBCC setup does 2 things: |
| 566 | * - trace call: |
| 567 | * * Unwind own call stack, i.e sync our ESP with real ESP |
| 568 | * This is for ESP manipulation (longjmps, C++ exec handling) and RET |
| 569 | * * For CALLs or JMPs crossing objects, record call arg + |
| 570 | * push are on own call stack |
| 571 | * |
| 572 | * - prepare for cache log functions: |
| 573 | * Set current_bbcc to BBCC that gets the costs for this BB execution |
| 574 | * attached |
| 575 | */ |
| 576 | |
| 577 | // helper call to setup_bbcc, with pointer to basic block info struct as argument |
| 578 | arg1 = mkIRExpr_HWord( (HWord)bb ); |
| 579 | argv = mkIRExprVec_1(arg1); |
sewardj | bb76070 | 2006-04-02 15:53:59 +0000 | [diff] [blame] | 580 | di = unsafeIRDirty_0_N( 1, "setup_bbcc", |
| 581 | VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ), |
| 582 | argv); |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 583 | addStmtToIRSB( bbOut, IRStmt_Dirty(di) ); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 584 | |
| 585 | instrCount = 0; |
| 586 | costOffset = 0; |
| 587 | |
| 588 | // loop for each host instruction (starting from 'i') |
| 589 | do { |
| 590 | |
| 591 | // We should be at an IMark statement |
| 592 | CLG_ASSERT(Ist_IMark == st->tag); |
| 593 | |
| 594 | // Reset stuff for this original instruction |
| 595 | loadAddrExpr = storeAddrExpr = NULL; |
| 596 | instrIssued = False; |
| 597 | dataSize = 0; |
| 598 | |
| 599 | // Process all the statements for this original instruction (ie. until |
| 600 | // the next IMark statement, or the end of the block) |
| 601 | do { |
| 602 | i++; |
| 603 | stnext = ( i < bbIn->stmts_used ? bbIn->stmts[i] : NULL ); |
| 604 | beforeIBoundary = !stnext || (Ist_IMark == stnext->tag); |
| 605 | collectStatementInfo(bbIn->tyenv, bbOut, st, &instrAddr, &instrLen, |
| 606 | &loadAddrExpr, &storeAddrExpr, &dataSize, hWordTy); |
| 607 | |
| 608 | // instrument a simulator call before conditional jumps |
| 609 | if (st->tag == Ist_Exit) { |
| 610 | // Nb: instrLen will be zero if Vex failed to decode it. |
| 611 | // Also Client requests can appear to be very large (eg. 18 |
| 612 | // bytes on x86) because they are really multiple instructions. |
| 613 | CLG_ASSERT( 0 == instrLen || |
| 614 | bbIn->jumpkind == Ijk_ClientReq || |
| 615 | (instrLen >= VG_MIN_INSTR_SZB && |
| 616 | instrLen <= VG_MAX_INSTR_SZB) ); |
| 617 | |
| 618 | // Add instrumentation before this statement |
| 619 | endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before, |
| 620 | instrAddr - origAddr, instrLen, dataSize, &costOffset, |
| 621 | instrIssued, loadAddrExpr, storeAddrExpr); |
| 622 | |
| 623 | // prepare for a possible further simcall in same host instr |
| 624 | loadAddrExpr = storeAddrExpr = NULL; |
| 625 | instrIssued = True; |
| 626 | |
| 627 | if (!bb_seen_before) { |
| 628 | bb->jmp[cJumps].instr = instrCount; |
| 629 | bb->jmp[cJumps].skip = False; |
| 630 | } |
| 631 | |
| 632 | /* Update global variable jmps_passed (this is before the jump!) |
| 633 | * A correction is needed if VEX inverted the last jump condition |
| 634 | */ |
| 635 | cJumpsCorrected = cJumps; |
| 636 | if ((cJumps+1 == bb->cjmp_count) && bb->cjmp_inverted) cJumpsCorrected++; |
| 637 | addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed, |
| 638 | cJumpsCorrected, hWordTy); |
| 639 | |
| 640 | cJumps++; |
| 641 | } |
| 642 | |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 643 | addStmtToIRSB( bbOut, st ); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 644 | st = stnext; |
| 645 | } |
| 646 | while (!beforeIBoundary); |
| 647 | |
| 648 | // Add instrumentation for this original instruction. |
| 649 | if (!instrIssued || (loadAddrExpr != 0) || (storeAddrExpr !=0)) |
| 650 | endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before, |
| 651 | instrAddr - origAddr, instrLen, dataSize, &costOffset, |
| 652 | instrIssued, loadAddrExpr, storeAddrExpr); |
| 653 | |
| 654 | instrCount++; |
| 655 | } |
| 656 | while (st); |
| 657 | |
| 658 | /* Always update global variable jmps_passed (at end of BB) |
| 659 | * A correction is needed if VEX inverted the last jump condition |
| 660 | */ |
| 661 | cJumpsCorrected = cJumps; |
| 662 | if (bb->cjmp_inverted) cJumpsCorrected--; |
| 663 | addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed, |
| 664 | cJumpsCorrected, hWordTy); |
| 665 | |
| 666 | /* This stores the instr of the call/ret at BB end */ |
| 667 | bb->jmp[cJumps].instr = instrCount-1; |
| 668 | |
| 669 | CLG_ASSERT(bb->cjmp_count == cJumps); |
| 670 | CLG_ASSERT(bb->instr_count == instrCount); |
| 671 | |
| 672 | instrAddr += instrLen; |
| 673 | if (bb_seen_before) { |
| 674 | CLG_ASSERT(bb->instr_len == instrAddr - origAddr); |
| 675 | CLG_ASSERT(bb->cost_count == costOffset); |
| 676 | CLG_ASSERT(bb->jmpkind == bbIn->jumpkind); |
| 677 | } |
| 678 | else { |
| 679 | bb->instr_len = instrAddr - origAddr; |
| 680 | bb->cost_count = costOffset; |
| 681 | bb->jmpkind = bbIn->jumpkind; |
| 682 | } |
| 683 | |
| 684 | CLG_DEBUG(3, "- instrument(BB %p): byteLen %u, CJumps %u, CostLen %u\n", |
| 685 | origAddr, bb->instr_len, bb->cjmp_count, bb->cost_count); |
| 686 | if (cJumps>0) { |
| 687 | CLG_DEBUG(3, " [ "); |
| 688 | for (i=0;i<cJumps;i++) |
| 689 | CLG_DEBUG(3, "%d ", bb->jmp[i].instr); |
| 690 | CLG_DEBUG(3, "], last inverted: %s \n", bb->cjmp_inverted ? "yes":"no"); |
| 691 | } |
| 692 | |
| 693 | return bbOut; |
| 694 | } |
| 695 | |
| 696 | /*--------------------------------------------------------------------*/ |
| 697 | /*--- Discarding BB info ---*/ |
| 698 | /*--------------------------------------------------------------------*/ |
| 699 | |
| 700 | // Called when a translation is removed from the translation cache for |
| 701 | // any reason at all: to free up space, because the guest code was |
| 702 | // unmapped or modified, or for any arbitrary reason. |
| 703 | static |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 704 | void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge ) |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 705 | { |
| 706 | Addr orig_addr = (Addr)orig_addr64; |
| 707 | |
| 708 | tl_assert(vge.n_used > 0); |
| 709 | |
| 710 | if (0) |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 711 | VG_(printf)( "discard_superblock_info: %p, %p, %llu\n", |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 712 | (void*)(Addr)orig_addr, |
| 713 | (void*)(Addr)vge.base[0], (ULong)vge.len[0]); |
| 714 | |
| 715 | // Get BB info, remove from table, free BB info. Simple! Note that we |
| 716 | // use orig_addr, not the first instruction address in vge. |
| 717 | CLG_(delete_bb)(orig_addr); |
| 718 | } |
| 719 | |
| 720 | |
| 721 | /*------------------------------------------------------------*/ |
| 722 | /*--- CLG_(fini)() and related function ---*/ |
| 723 | /*------------------------------------------------------------*/ |
| 724 | |
| 725 | |
| 726 | |
| 727 | static void zero_thread_cost(thread_info* t) |
| 728 | { |
| 729 | Int i; |
| 730 | |
| 731 | for(i = 0; i < CLG_(current_call_stack).sp; i++) { |
| 732 | if (!CLG_(current_call_stack).entry[i].jcc) continue; |
| 733 | |
| 734 | /* reset call counters to current for active calls */ |
| 735 | CLG_(copy_cost)( CLG_(sets).full, |
| 736 | CLG_(current_call_stack).entry[i].enter_cost, |
| 737 | CLG_(current_state).cost ); |
| 738 | } |
| 739 | |
| 740 | CLG_(forall_bbccs)(CLG_(zero_bbcc)); |
| 741 | |
| 742 | /* set counter for last dump */ |
| 743 | CLG_(copy_cost)( CLG_(sets).full, |
| 744 | t->lastdump_cost, CLG_(current_state).cost ); |
| 745 | } |
| 746 | |
| 747 | void CLG_(zero_all_cost)(Bool only_current_thread) |
| 748 | { |
| 749 | if (VG_(clo_verbosity) > 1) |
| 750 | VG_(message)(Vg_DebugMsg, " Zeroing costs..."); |
| 751 | |
| 752 | if (only_current_thread) |
| 753 | zero_thread_cost(CLG_(get_current_thread)()); |
| 754 | else |
| 755 | CLG_(forall_threads)(zero_thread_cost); |
| 756 | |
| 757 | if (VG_(clo_verbosity) > 1) |
| 758 | VG_(message)(Vg_DebugMsg, " ...done"); |
| 759 | } |
| 760 | |
| 761 | static |
| 762 | void unwind_thread(thread_info* t) |
| 763 | { |
| 764 | /* unwind signal handlers */ |
| 765 | while(CLG_(current_state).sig !=0) |
| 766 | CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig); |
| 767 | |
| 768 | /* unwind regular call stack */ |
| 769 | while(CLG_(current_call_stack).sp>0) |
| 770 | CLG_(pop_call_stack)(); |
weidendo | f3e0b49 | 2006-09-10 22:34:20 +0000 | [diff] [blame] | 771 | |
| 772 | /* reset context and function stack for context generation */ |
| 773 | CLG_(init_exec_state)( &CLG_(current_state) ); |
| 774 | CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom; |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 775 | } |
| 776 | |
| 777 | /* Ups, this can go wrong... */ |
| 778 | extern void VG_(discard_translations) ( Addr64 start, ULong range ); |
| 779 | |
| 780 | void CLG_(set_instrument_state)(Char* reason, Bool state) |
| 781 | { |
| 782 | if (CLG_(instrument_state) == state) { |
| 783 | CLG_DEBUG(2, "%s: instrumentation already %s\n", |
| 784 | reason, state ? "ON" : "OFF"); |
| 785 | return; |
| 786 | } |
| 787 | CLG_(instrument_state) = state; |
| 788 | CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n", |
| 789 | reason, state ? "ON" : "OFF"); |
| 790 | |
| 791 | VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl); |
| 792 | |
| 793 | /* reset internal state: call stacks, simulator */ |
| 794 | CLG_(forall_threads)(unwind_thread); |
| 795 | (*CLG_(cachesim).clear)(); |
| 796 | if (0) |
| 797 | CLG_(forall_threads)(zero_thread_cost); |
| 798 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 799 | if (VG_(clo_verbosity) > 1) |
weidendo | f3e0b49 | 2006-09-10 22:34:20 +0000 | [diff] [blame] | 800 | VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s", |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 801 | reason, state ? "ON" : "OFF"); |
| 802 | } |
| 803 | |
| 804 | |
| 805 | static |
| 806 | Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) |
| 807 | { |
| 808 | if (!VG_IS_TOOL_USERREQ('C','T',args[0])) |
| 809 | return False; |
| 810 | |
| 811 | switch(args[0]) { |
| 812 | case VG_USERREQ__DUMP_STATS: |
| 813 | CLG_(dump_profile)("Client Request", True); |
| 814 | *ret = 0; /* meaningless */ |
| 815 | break; |
| 816 | |
| 817 | case VG_USERREQ__DUMP_STATS_AT: |
| 818 | { |
| 819 | Char buf[512]; |
weidendo | ca472c5 | 2006-03-31 19:34:51 +0000 | [diff] [blame] | 820 | VG_(sprintf)(buf,"Client Request: %s", args[1]); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 821 | CLG_(dump_profile)(buf, True); |
| 822 | *ret = 0; /* meaningless */ |
| 823 | } |
| 824 | break; |
| 825 | |
| 826 | case VG_USERREQ__ZERO_STATS: |
| 827 | CLG_(zero_all_cost)(True); |
| 828 | *ret = 0; /* meaningless */ |
| 829 | break; |
| 830 | |
| 831 | case VG_USERREQ__TOGGLE_COLLECT: |
| 832 | CLG_(current_state).collect = !CLG_(current_state).collect; |
| 833 | CLG_DEBUG(2, "Client Request: toggled collection state to %s\n", |
| 834 | CLG_(current_state).collect ? "ON" : "OFF"); |
| 835 | *ret = 0; /* meaningless */ |
| 836 | break; |
| 837 | |
| 838 | case VG_USERREQ__START_INSTRUMENTATION: |
| 839 | CLG_(set_instrument_state)("Client Request", True); |
| 840 | *ret = 0; /* meaningless */ |
| 841 | break; |
| 842 | |
| 843 | case VG_USERREQ__STOP_INSTRUMENTATION: |
| 844 | CLG_(set_instrument_state)("Client Request", False); |
| 845 | *ret = 0; /* meaningless */ |
| 846 | break; |
| 847 | |
| 848 | default: |
| 849 | return False; |
| 850 | } |
| 851 | |
| 852 | return True; |
| 853 | } |
| 854 | |
| 855 | |
| 856 | /* Syscall Timing */ |
| 857 | |
| 858 | /* struct timeval syscalltime[VG_N_THREADS]; */ |
| 859 | #if CLG_MICROSYSTIME |
| 860 | #include <sys/time.h> |
| 861 | #include <sys/syscall.h> |
| 862 | extern Int VG_(do_syscall) ( UInt, ... ); |
| 863 | |
| 864 | ULong syscalltime[VG_N_THREADS]; |
| 865 | #else |
| 866 | UInt syscalltime[VG_N_THREADS]; |
| 867 | #endif |
| 868 | |
| 869 | static |
| 870 | void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno) |
| 871 | { |
| 872 | if (CLG_(clo).collect_systime) { |
| 873 | #if CLG_MICROSYSTIME |
| 874 | struct vki_timeval tv_now; |
| 875 | VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); |
| 876 | syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec; |
| 877 | #else |
| 878 | syscalltime[tid] = VG_(read_millisecond_timer)(); |
| 879 | #endif |
| 880 | } |
| 881 | } |
| 882 | |
| 883 | static |
| 884 | void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno, SysRes res) |
| 885 | { |
weidendo | ae0bb6f | 2007-02-16 13:12:43 +0000 | [diff] [blame] | 886 | if (CLG_(clo).collect_systime && |
| 887 | CLG_(current_state).bbcc) { |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 888 | Int o = CLG_(sets).off_full_systime; |
| 889 | #if CLG_MICROSYSTIME |
| 890 | struct vki_timeval tv_now; |
| 891 | ULong diff; |
| 892 | |
| 893 | VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL); |
| 894 | diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid]; |
| 895 | #else |
| 896 | UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid]; |
| 897 | #endif |
| 898 | |
| 899 | CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff); |
| 900 | |
| 901 | if (o<0) return; |
| 902 | |
| 903 | CLG_(current_state).cost[o] ++; |
| 904 | CLG_(current_state).cost[o+1] += diff; |
| 905 | if (!CLG_(current_state).bbcc->skipped) |
| 906 | CLG_(init_cost_lz)(CLG_(sets).full, |
| 907 | &(CLG_(current_state).bbcc->skipped)); |
| 908 | CLG_(current_state).bbcc->skipped[o] ++; |
| 909 | CLG_(current_state).bbcc->skipped[o+1] += diff; |
| 910 | } |
| 911 | } |
| 912 | |
| 913 | static |
| 914 | void finish(void) |
| 915 | { |
| 916 | char buf[RESULTS_BUF_LEN]; |
| 917 | |
| 918 | CLG_DEBUG(0, "finish()\n"); |
| 919 | |
| 920 | (*CLG_(cachesim).finish)(); |
| 921 | |
| 922 | /* pop all remaining items from CallStack for correct sum |
| 923 | */ |
| 924 | CLG_(forall_threads)(unwind_thread); |
sewardj | e45a799 | 2006-10-17 02:24:18 +0000 | [diff] [blame] | 925 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 926 | CLG_(dump_profile)(0, False); |
sewardj | e45a799 | 2006-10-17 02:24:18 +0000 | [diff] [blame] | 927 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 928 | CLG_(finish_command)(); |
sewardj | e45a799 | 2006-10-17 02:24:18 +0000 | [diff] [blame] | 929 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 930 | if (VG_(clo_verbosity) == 0) return; |
| 931 | |
| 932 | /* Hash table stats */ |
| 933 | if (VG_(clo_verbosity) > 1) { |
| 934 | int BB_lookups = |
| 935 | CLG_(stat).full_debug_BBs + |
| 936 | CLG_(stat).fn_name_debug_BBs + |
| 937 | CLG_(stat).file_line_debug_BBs + |
| 938 | CLG_(stat).no_debug_BBs; |
| 939 | |
| 940 | VG_(message)(Vg_DebugMsg, ""); |
| 941 | VG_(message)(Vg_DebugMsg, "Distinct objects: %d", |
| 942 | CLG_(stat).distinct_objs); |
| 943 | VG_(message)(Vg_DebugMsg, "Distinct files: %d", |
| 944 | CLG_(stat).distinct_files); |
| 945 | VG_(message)(Vg_DebugMsg, "Distinct fns: %d", |
| 946 | CLG_(stat).distinct_fns); |
| 947 | VG_(message)(Vg_DebugMsg, "Distinct contexts:%d", |
| 948 | CLG_(stat).distinct_contexts); |
| 949 | VG_(message)(Vg_DebugMsg, "Distinct BBs: %d", |
| 950 | CLG_(stat).distinct_bbs); |
| 951 | VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)", |
| 952 | CLG_(costarray_entries), CLG_(costarray_chunks)); |
| 953 | VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d", |
| 954 | CLG_(stat).distinct_bbccs); |
| 955 | VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d", |
| 956 | CLG_(stat).distinct_jccs); |
| 957 | VG_(message)(Vg_DebugMsg, "Distinct skips: %d", |
| 958 | CLG_(stat).distinct_skips); |
| 959 | VG_(message)(Vg_DebugMsg, "BB lookups: %d", |
| 960 | BB_lookups); |
| 961 | if (BB_lookups>0) { |
| 962 | VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)", |
| 963 | CLG_(stat).full_debug_BBs * 100 / BB_lookups, |
| 964 | CLG_(stat).full_debug_BBs); |
| 965 | VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)", |
| 966 | CLG_(stat).file_line_debug_BBs * 100 / BB_lookups, |
| 967 | CLG_(stat).file_line_debug_BBs); |
| 968 | VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)", |
| 969 | CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups, |
| 970 | CLG_(stat).fn_name_debug_BBs); |
| 971 | VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)", |
| 972 | CLG_(stat).no_debug_BBs * 100 / BB_lookups, |
| 973 | CLG_(stat).no_debug_BBs); |
| 974 | } |
| 975 | VG_(message)(Vg_DebugMsg, "BBCC Clones: %d", |
| 976 | CLG_(stat).bbcc_clones); |
| 977 | VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", |
| 978 | CLG_(stat).bb_retranslations); |
| 979 | VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", |
| 980 | CLG_(stat).distinct_instrs); |
| 981 | VG_(message)(Vg_DebugMsg, ""); |
| 982 | |
| 983 | VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d", |
| 984 | CLG_(stat).cxt_lru_misses); |
| 985 | VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d", |
| 986 | CLG_(stat).bbcc_lru_misses); |
| 987 | VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d", |
| 988 | CLG_(stat).jcc_lru_misses); |
| 989 | VG_(message)(Vg_DebugMsg, "BBs Executed: %llu", |
| 990 | CLG_(stat).bb_executions); |
| 991 | VG_(message)(Vg_DebugMsg, "Calls: %llu", |
| 992 | CLG_(stat).call_counter); |
| 993 | VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu", |
| 994 | CLG_(stat).jcnd_counter); |
| 995 | VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu", |
| 996 | CLG_(stat).jump_counter); |
| 997 | VG_(message)(Vg_DebugMsg, "Recursive calls: %llu", |
| 998 | CLG_(stat).rec_call_counter); |
| 999 | VG_(message)(Vg_DebugMsg, "Returns: %llu", |
| 1000 | CLG_(stat).ret_counter); |
| 1001 | |
| 1002 | VG_(message)(Vg_DebugMsg, ""); |
| 1003 | } |
| 1004 | |
| 1005 | CLG_(sprint_eventmapping)(buf, CLG_(dumpmap)); |
| 1006 | VG_(message)(Vg_UserMsg, "Events : %s", buf); |
| 1007 | CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost)); |
| 1008 | VG_(message)(Vg_UserMsg, "Collected : %s", buf); |
| 1009 | VG_(message)(Vg_UserMsg, ""); |
| 1010 | |
| 1011 | // if (CLG_(clo).simulate_cache) |
| 1012 | (*CLG_(cachesim).printstat)(); |
| 1013 | } |
| 1014 | |
| 1015 | |
| 1016 | void CLG_(fini)(Int exitcode) |
| 1017 | { |
| 1018 | finish(); |
| 1019 | } |
| 1020 | |
| 1021 | |
| 1022 | /*--------------------------------------------------------------------*/ |
| 1023 | /*--- Setup ---*/ |
| 1024 | /*--------------------------------------------------------------------*/ |
| 1025 | |
njn | 3e32c87 | 2006-12-24 07:51:17 +0000 | [diff] [blame] | 1026 | static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done ) |
sewardj | 9756181 | 2006-12-23 01:21:12 +0000 | [diff] [blame] | 1027 | { |
weidendo | 134657c | 2006-12-23 23:11:20 +0000 | [diff] [blame] | 1028 | static ULong last_blocks_done = 0; |
| 1029 | |
sewardj | 9756181 | 2006-12-23 01:21:12 +0000 | [diff] [blame] | 1030 | if (0) |
njn | 3e32c87 | 2006-12-24 07:51:17 +0000 | [diff] [blame] | 1031 | VG_(printf)("%d R %llu\n", (Int)tid, blocks_done); |
weidendo | 134657c | 2006-12-23 23:11:20 +0000 | [diff] [blame] | 1032 | |
| 1033 | /* throttle calls to CLG_(run_thread) by number of BBs executed */ |
| 1034 | if (blocks_done - last_blocks_done < 5000) return; |
| 1035 | last_blocks_done = blocks_done; |
| 1036 | |
| 1037 | CLG_(run_thread)( tid ); |
sewardj | 9756181 | 2006-12-23 01:21:12 +0000 | [diff] [blame] | 1038 | } |
| 1039 | |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1040 | static |
| 1041 | void CLG_(post_clo_init)(void) |
| 1042 | { |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1043 | VG_(clo_vex_control).iropt_unroll_thresh = 0; |
| 1044 | VG_(clo_vex_control).guest_chase_thresh = 0; |
| 1045 | |
| 1046 | CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No"); |
| 1047 | CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers); |
| 1048 | CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions); |
| 1049 | |
| 1050 | if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) { |
| 1051 | VG_(message)(Vg_UserMsg, "Using source line as position."); |
| 1052 | CLG_(clo).dump_line = True; |
| 1053 | } |
| 1054 | |
weidendo | 4ce5e79 | 2006-09-20 21:29:39 +0000 | [diff] [blame] | 1055 | CLG_(init_dumps)(); |
| 1056 | CLG_(init_command)(); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1057 | |
| 1058 | (*CLG_(cachesim).post_clo_init)(); |
| 1059 | |
| 1060 | CLG_(init_eventsets)(0); |
| 1061 | CLG_(init_statistics)(& CLG_(stat)); |
| 1062 | CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) ); |
| 1063 | |
| 1064 | /* initialize hash tables */ |
| 1065 | CLG_(init_obj_table)(); |
| 1066 | CLG_(init_cxt_table)(); |
| 1067 | CLG_(init_bb_hash)(); |
| 1068 | |
| 1069 | CLG_(init_threads)(); |
| 1070 | CLG_(run_thread)(1); |
| 1071 | |
| 1072 | CLG_(instrument_state) = CLG_(clo).instrument_atstart; |
| 1073 | |
weidendo | ca472c5 | 2006-03-31 19:34:51 +0000 | [diff] [blame] | 1074 | if (VG_(clo_verbosity > 0)) { |
weidendo | ca472c5 | 2006-03-31 19:34:51 +0000 | [diff] [blame] | 1075 | VG_(message)(Vg_UserMsg, |
| 1076 | "For interactive control, run 'callgrind_control -h'."); |
| 1077 | } |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1078 | } |
| 1079 | |
| 1080 | static |
| 1081 | void CLG_(pre_clo_init)(void) |
| 1082 | { |
| 1083 | VG_(details_name) ("Callgrind"); |
weidendo | ca472c5 | 2006-03-31 19:34:51 +0000 | [diff] [blame] | 1084 | VG_(details_version) (NULL); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1085 | VG_(details_description) ("a call-graph generating cache profiler"); |
sewardj | 9ebd6e0 | 2007-01-08 06:01:59 +0000 | [diff] [blame] | 1086 | VG_(details_copyright_author)("Copyright (C) 2002-2007, and GNU GPL'd, " |
weidendo | ca472c5 | 2006-03-31 19:34:51 +0000 | [diff] [blame] | 1087 | "by Josef Weidendorfer et al."); |
weidendo | db70ed7 | 2006-05-27 15:39:45 +0000 | [diff] [blame] | 1088 | VG_(details_bug_reports_to) (VG_BUGS_TO); |
sewardj | e45a799 | 2006-10-17 02:24:18 +0000 | [diff] [blame] | 1089 | VG_(details_avg_translation_sizeB) ( 500 ); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1090 | |
| 1091 | VG_(basic_tool_funcs) (CLG_(post_clo_init), |
| 1092 | CLG_(instrument), |
| 1093 | CLG_(fini)); |
| 1094 | |
sewardj | 0b9d74a | 2006-12-24 02:24:11 +0000 | [diff] [blame] | 1095 | VG_(needs_superblock_discards)(clg_discard_superblock_info); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1096 | |
| 1097 | |
| 1098 | VG_(needs_command_line_options)(CLG_(process_cmd_line_option), |
| 1099 | CLG_(print_usage), |
| 1100 | CLG_(print_debug_usage)); |
| 1101 | |
| 1102 | VG_(needs_client_requests)(CLG_(handle_client_request)); |
| 1103 | VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime), |
| 1104 | CLG_(post_syscalltime)); |
| 1105 | |
njn | 3e32c87 | 2006-12-24 07:51:17 +0000 | [diff] [blame] | 1106 | VG_(track_start_client_code) ( & clg_start_client_code_callback ); |
| 1107 | VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) ); |
| 1108 | VG_(track_post_deliver_signal)( & CLG_(post_signal) ); |
weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1109 | |
| 1110 | CLG_(set_clo_defaults)(); |
| 1111 | } |
| 1112 | |
| 1113 | VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init)) |
| 1114 | |
| 1115 | /*--------------------------------------------------------------------*/ |
| 1116 | /*--- end main.c ---*/ |
| 1117 | /*--------------------------------------------------------------------*/ |