weidendo | a17f2a3 | 2006-03-20 10:27:30 +0000 | [diff] [blame] | 1 | /*--------------------------------------------------------------------*/ |
| 2 | /*--- Callgrind ---*/ |
| 3 | /*--- bbcc.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Callgrind, a Valgrind tool for call tracing. |
| 8 | |
| 9 | Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) |
| 10 | |
| 11 | This program is free software; you can redistribute it and/or |
| 12 | modify it under the terms of the GNU General Public License as |
| 13 | published by the Free Software Foundation; either version 2 of the |
| 14 | License, or (at your option) any later version. |
| 15 | |
| 16 | This program is distributed in the hope that it will be useful, but |
| 17 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 19 | General Public License for more details. |
| 20 | |
| 21 | You should have received a copy of the GNU General Public License |
| 22 | along with this program; if not, write to the Free Software |
| 23 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 24 | 02111-1307, USA. |
| 25 | |
| 26 | The GNU General Public License is contained in the file COPYING. |
| 27 | */ |
| 28 | |
| 29 | #include "global.h" |
| 30 | #include "costs.h" |
| 31 | |
| 32 | #include <pub_tool_threadstate.h> |
| 33 | |
| 34 | /*------------------------------------------------------------*/ |
| 35 | /*--- BBCC operations ---*/ |
| 36 | /*------------------------------------------------------------*/ |
| 37 | |
| 38 | #define N_BBCC_INITIAL_ENTRIES 10437 |
| 39 | |
| 40 | /* BBCC table (key is BB/Context), per thread, resizable */ |
| 41 | bbcc_hash current_bbccs; |
| 42 | |
| 43 | void CLG_(init_bbcc_hash)(bbcc_hash* bbccs) |
| 44 | { |
| 45 | Int i; |
| 46 | |
| 47 | CLG_ASSERT(bbccs != 0); |
| 48 | |
| 49 | bbccs->size = N_BBCC_INITIAL_ENTRIES; |
| 50 | bbccs->entries = 0; |
| 51 | bbccs->table = (BBCC**) CLG_MALLOC(bbccs->size * sizeof(BBCC*)); |
| 52 | |
| 53 | for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL; |
| 54 | } |
| 55 | |
| 56 | void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst) |
| 57 | { |
| 58 | CLG_ASSERT(dst != 0); |
| 59 | |
| 60 | dst->size = current_bbccs.size; |
| 61 | dst->entries = current_bbccs.entries; |
| 62 | dst->table = current_bbccs.table; |
| 63 | } |
| 64 | |
| 65 | bbcc_hash* CLG_(get_current_bbcc_hash)() |
| 66 | { |
| 67 | return ¤t_bbccs; |
| 68 | } |
| 69 | |
| 70 | void CLG_(set_current_bbcc_hash)(bbcc_hash* h) |
| 71 | { |
| 72 | CLG_ASSERT(h != 0); |
| 73 | |
| 74 | current_bbccs.size = h->size; |
| 75 | current_bbccs.entries = h->entries; |
| 76 | current_bbccs.table = h->table; |
| 77 | } |
| 78 | |
| 79 | /* |
| 80 | * Zero all costs of a BBCC |
| 81 | */ |
| 82 | void CLG_(zero_bbcc)(BBCC* bbcc) |
| 83 | { |
| 84 | Int i; |
| 85 | jCC* jcc; |
| 86 | |
| 87 | CLG_ASSERT(bbcc->cxt != 0); |
| 88 | CLG_DEBUG(1, " zero_bbcc: BB %p, Cxt %d " |
| 89 | "(fn '%s', rec %d)\n", |
| 90 | bb_addr(bbcc->bb), |
| 91 | bbcc->cxt->base_number + bbcc->rec_index, |
| 92 | bbcc->cxt->fn[0]->name, |
| 93 | bbcc->rec_index); |
| 94 | |
| 95 | if ((bbcc->ecounter_sum ==0) && |
| 96 | (bbcc->ret_counter ==0)) return; |
| 97 | |
| 98 | for(i=0;i<bbcc->bb->cost_count;i++) |
| 99 | bbcc->cost[i] = 0; |
| 100 | for(i=0;i <= bbcc->bb->cjmp_count;i++) { |
| 101 | bbcc->jmp[i].ecounter = 0; |
| 102 | for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from) |
| 103 | CLG_(init_cost)( CLG_(sets).full, jcc->cost ); |
| 104 | } |
| 105 | bbcc->ecounter_sum = 0; |
| 106 | bbcc->ret_counter = 0; |
| 107 | } |
| 108 | |
| 109 | |
| 110 | |
| 111 | void CLG_(forall_bbccs)(void (*func)(BBCC*)) |
| 112 | { |
| 113 | BBCC *bbcc, *bbcc2; |
| 114 | int i, j; |
| 115 | |
| 116 | for (i = 0; i < current_bbccs.size; i++) { |
| 117 | if ((bbcc=current_bbccs.table[i]) == NULL) continue; |
| 118 | while (bbcc) { |
| 119 | /* every bbcc should have a rec_array */ |
| 120 | CLG_ASSERT(bbcc->rec_array != 0); |
| 121 | |
| 122 | for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) { |
| 123 | if ((bbcc2 = bbcc->rec_array[j]) == 0) continue; |
| 124 | |
| 125 | (*func)(bbcc2); |
| 126 | } |
| 127 | bbcc = bbcc->next; |
| 128 | } |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | |
| 133 | /* All BBCCs for recursion level 0 are inserted into a |
| 134 | * thread specific hash table with key |
| 135 | * - address of BB structure (unique, as never freed) |
| 136 | * - current context (includes caller chain) |
| 137 | * BBCCs for other recursion levels are in bbcc->rec_array. |
| 138 | * |
| 139 | * The hash is used in setup_bb(), i.e. to find the cost |
| 140 | * counters to be changed in the execution of a BB. |
| 141 | */ |
| 142 | |
| 143 | static __inline__ |
| 144 | UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size) |
| 145 | { |
| 146 | CLG_ASSERT(bb != 0); |
| 147 | CLG_ASSERT(cxt != 0); |
| 148 | |
| 149 | return ((Addr)bb + (Addr)cxt) % size; |
| 150 | } |
| 151 | |
| 152 | |
| 153 | /* Lookup for a BBCC in hash. |
| 154 | */ |
| 155 | static |
| 156 | BBCC* lookup_bbcc(BB* bb, Context* cxt) |
| 157 | { |
| 158 | BBCC* bbcc = bb->last_bbcc; |
| 159 | UInt idx; |
| 160 | |
| 161 | /* check LRU */ |
| 162 | if (bbcc->cxt == cxt) { |
| 163 | if (!CLG_(clo).separate_threads) { |
| 164 | /* if we don't dump threads separate, tid doesn't have to match */ |
| 165 | return bbcc; |
| 166 | } |
| 167 | if (bbcc->tid == CLG_(current_tid)) return bbcc; |
| 168 | } |
| 169 | |
| 170 | CLG_(stat).bbcc_lru_misses++; |
| 171 | |
| 172 | idx = bbcc_hash_idx(bb, cxt, current_bbccs.size); |
| 173 | bbcc = current_bbccs.table[idx]; |
| 174 | while (bbcc && |
| 175 | (bb != bbcc->bb || |
| 176 | cxt != bbcc->cxt)) { |
| 177 | bbcc = bbcc->next; |
| 178 | } |
| 179 | |
| 180 | CLG_DEBUG(2," lookup_bbcc(BB %p, Cxt %d, fn '%s'): %p (tid %d)\n", |
| 181 | bb_addr(bb), cxt->base_number, cxt->fn[0]->name, |
| 182 | bbcc, bbcc ? bbcc->tid : 0); |
| 183 | |
| 184 | CLG_DEBUGIF(2) |
| 185 | if (bbcc) CLG_(print_bbcc)(-2,bbcc,False); |
| 186 | |
| 187 | return bbcc; |
| 188 | } |
| 189 | |
| 190 | |
| 191 | /* double size of hash table 1 (addr->BBCC) */ |
| 192 | static void resize_bbcc_hash(void) |
| 193 | { |
| 194 | Int i, new_size, conflicts1 = 0, conflicts2 = 0; |
| 195 | BBCC** new_table; |
| 196 | UInt new_idx; |
| 197 | BBCC *curr_BBCC, *next_BBCC; |
| 198 | |
| 199 | new_size = 2*current_bbccs.size+3; |
| 200 | new_table = (BBCC**) CLG_MALLOC(new_size * sizeof(BBCC*)); |
| 201 | |
| 202 | if (!new_table) return; |
| 203 | |
| 204 | for (i = 0; i < new_size; i++) |
| 205 | new_table[i] = NULL; |
| 206 | |
| 207 | for (i = 0; i < current_bbccs.size; i++) { |
| 208 | if (current_bbccs.table[i] == NULL) continue; |
| 209 | |
| 210 | curr_BBCC = current_bbccs.table[i]; |
| 211 | while (NULL != curr_BBCC) { |
| 212 | next_BBCC = curr_BBCC->next; |
| 213 | |
| 214 | new_idx = bbcc_hash_idx(curr_BBCC->bb, |
| 215 | curr_BBCC->cxt, |
| 216 | new_size); |
| 217 | |
| 218 | curr_BBCC->next = new_table[new_idx]; |
| 219 | new_table[new_idx] = curr_BBCC; |
| 220 | if (curr_BBCC->next) { |
| 221 | conflicts1++; |
| 222 | if (curr_BBCC->next->next) |
| 223 | conflicts2++; |
| 224 | } |
| 225 | |
| 226 | curr_BBCC = next_BBCC; |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | VG_(free)(current_bbccs.table); |
| 231 | |
| 232 | |
| 233 | CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n", |
| 234 | current_bbccs.size, new_size, |
| 235 | current_bbccs.entries, conflicts1, conflicts2); |
| 236 | |
| 237 | current_bbccs.size = new_size; |
| 238 | current_bbccs.table = new_table; |
| 239 | CLG_(stat).bbcc_hash_resizes++; |
| 240 | } |
| 241 | |
| 242 | |
| 243 | static __inline |
| 244 | BBCC** new_recursion(int size) |
| 245 | { |
| 246 | BBCC** bbccs; |
| 247 | int i; |
| 248 | |
| 249 | bbccs = (BBCC**) CLG_MALLOC(sizeof(BBCC*) * size); |
| 250 | for(i=0;i<size;i++) |
| 251 | bbccs[i] = 0; |
| 252 | |
| 253 | CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs); |
| 254 | |
| 255 | return bbccs; |
| 256 | } |
| 257 | |
| 258 | |
| 259 | /* |
| 260 | * Allocate a new BBCC |
| 261 | * |
| 262 | * Uninitialized: |
| 263 | * cxt, rec_index, rec_array, next_bbcc, next1, next2 |
| 264 | */ |
| 265 | static __inline__ |
| 266 | BBCC* new_bbcc(BB* bb) |
| 267 | { |
| 268 | BBCC* new; |
| 269 | Int i; |
| 270 | |
| 271 | /* We need cjmp_count+1 JmpData structs: |
| 272 | * the last is for the unconditional jump/call/ret at end of BB |
| 273 | */ |
| 274 | new = (BBCC*)CLG_MALLOC(sizeof(BBCC) + |
| 275 | (bb->cjmp_count+1) * sizeof(JmpData)); |
| 276 | new->bb = bb; |
| 277 | new->tid = CLG_(current_tid); |
| 278 | |
| 279 | new->ret_counter = 0; |
| 280 | new->skipped = 0; |
| 281 | new->cost = CLG_(get_costarray)(bb->cost_count); |
| 282 | for(i=0;i<bb->cost_count;i++) |
| 283 | new->cost[i] = 0; |
| 284 | for(i=0; i<=bb->cjmp_count; i++) { |
| 285 | new->jmp[i].ecounter = 0; |
| 286 | new->jmp[i].jcc_list = 0; |
| 287 | } |
| 288 | new->ecounter_sum = 0; |
| 289 | |
| 290 | /* Init pointer caches (LRU) */ |
| 291 | new->lru_next_bbcc = 0; |
| 292 | new->lru_from_jcc = 0; |
| 293 | new->lru_to_jcc = 0; |
| 294 | |
| 295 | CLG_(stat).distinct_bbccs++; |
| 296 | |
| 297 | CLG_DEBUG(3, " new_bbcc(BB %p): %p (now %d)\n", |
| 298 | bb_addr(bb), new, CLG_(stat).distinct_bbccs); |
| 299 | |
| 300 | return new; |
| 301 | } |
| 302 | |
| 303 | |
| 304 | /** |
| 305 | * Inserts a new BBCC into hashes. |
| 306 | * BBCC specific items must be set as this is used for the hash |
| 307 | * keys: |
| 308 | * fn : current function |
| 309 | * tid : current thread ID |
| 310 | * from : position where current function is called from |
| 311 | * |
| 312 | * Recursion level doesn't need to be set as this is not included |
| 313 | * in the hash key: Only BBCCs with rec level 0 are in hashes. |
| 314 | */ |
| 315 | static |
| 316 | void insert_bbcc_into_hash(BBCC* bbcc) |
| 317 | { |
| 318 | UInt idx; |
| 319 | |
| 320 | CLG_ASSERT(bbcc->cxt != 0); |
| 321 | |
| 322 | CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %p, fn '%s')\n", |
| 323 | bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name); |
| 324 | |
| 325 | /* check fill degree of hash and resize if needed (>90%) */ |
| 326 | current_bbccs.entries++; |
| 327 | if (100 * current_bbccs.entries / current_bbccs.size > 90) |
| 328 | resize_bbcc_hash(); |
| 329 | |
| 330 | idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size); |
| 331 | bbcc->next = current_bbccs.table[idx]; |
| 332 | current_bbccs.table[idx] = bbcc; |
| 333 | |
| 334 | CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n", |
| 335 | current_bbccs.entries); |
| 336 | } |
| 337 | |
| 338 | static Char* mangled_cxt(Context* cxt, int rec_index) |
| 339 | { |
| 340 | static Char mangled[FN_NAME_LEN]; |
| 341 | int i, p; |
| 342 | |
| 343 | if (!cxt) return "(no context)"; |
| 344 | |
| 345 | p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name); |
| 346 | if (rec_index >0) |
| 347 | p += VG_(sprintf)(mangled+p, "'%d", rec_index +1); |
| 348 | for(i=1;i<cxt->size;i++) |
| 349 | p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name); |
| 350 | |
| 351 | return mangled; |
| 352 | } |
| 353 | |
| 354 | |
| 355 | /* Create a new BBCC as a copy of an existing one, |
| 356 | * but with costs set to 0 and jcc chains empty. |
| 357 | * |
| 358 | * This is needed when a BB is executed in another context than |
| 359 | * the one at instrumentation time of the BB. |
| 360 | * |
| 361 | * Use cases: |
| 362 | * rec_index == 0: clone from a BBCC with differing tid/cxt |
| 363 | * and insert into hashes |
| 364 | * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0 |
| 365 | * don't insert into hashes |
| 366 | */ |
| 367 | static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index) |
| 368 | { |
| 369 | BBCC* new; |
| 370 | |
| 371 | CLG_DEBUG(3,"+ clone_bbcc(BB %p, rec %d, fn %s)\n", |
| 372 | bb_addr(orig->bb), rec_index, cxt->fn[0]->name); |
| 373 | |
| 374 | new = new_bbcc(orig->bb); |
| 375 | |
| 376 | if (rec_index == 0) { |
| 377 | |
| 378 | /* hash insertion is only allowed if tid or cxt is different */ |
| 379 | CLG_ASSERT((orig->tid != CLG_(current_tid)) || |
| 380 | (orig->cxt != cxt)); |
| 381 | |
| 382 | new->rec_index = 0; |
| 383 | new->cxt = cxt; |
| 384 | new->rec_array = new_recursion(cxt->fn[0]->separate_recursions); |
| 385 | new->rec_array[0] = new; |
| 386 | |
| 387 | insert_bbcc_into_hash(new); |
| 388 | } |
| 389 | else { |
| 390 | if (CLG_(clo).separate_threads) |
| 391 | CLG_ASSERT(orig->tid == CLG_(current_tid)); |
| 392 | |
| 393 | CLG_ASSERT(orig->cxt == cxt); |
| 394 | CLG_ASSERT(orig->rec_array); |
| 395 | CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index); |
| 396 | CLG_ASSERT(orig->rec_array[rec_index] ==0); |
| 397 | |
| 398 | /* new BBCC will only have differing recursion level */ |
| 399 | new->rec_index = rec_index; |
| 400 | new->cxt = cxt; |
| 401 | new->rec_array = orig->rec_array; |
| 402 | new->rec_array[rec_index] = new; |
| 403 | } |
| 404 | |
| 405 | /* update list of BBCCs for same BB */ |
| 406 | new->next_bbcc = orig->bb->bbcc_list; |
| 407 | orig->bb->bbcc_list = new; |
| 408 | |
| 409 | |
| 410 | CLG_DEBUGIF(3) |
| 411 | CLG_(print_bbcc)(-2, new, False); |
| 412 | |
| 413 | CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %p\n" |
| 414 | " orig %s\n" |
| 415 | " new %s\n", |
| 416 | orig, rec_index, bb_addr(orig->bb), |
| 417 | mangled_cxt(orig->cxt, orig->rec_index), |
| 418 | mangled_cxt(new->cxt, new->rec_index)); |
| 419 | |
| 420 | CLG_(stat).bbcc_clones++; |
| 421 | |
| 422 | return new; |
| 423 | }; |
| 424 | |
| 425 | |
| 426 | |
| 427 | /* Get a pointer to the cost centre structure for given basic block |
| 428 | * address. If created, the BBCC is inserted into the BBCC hash. |
| 429 | * Also sets BB_seen_before by reference. |
| 430 | * |
| 431 | */ |
| 432 | BBCC* CLG_(get_bbcc)(BB* bb) |
| 433 | { |
| 434 | BBCC* bbcc; |
| 435 | |
| 436 | CLG_DEBUG(3, "+ get_bbcc(BB %p)\n", bb_addr(bb)); |
| 437 | |
| 438 | bbcc = bb->bbcc_list; |
| 439 | |
| 440 | if (!bbcc) { |
| 441 | bbcc = new_bbcc(bb); |
| 442 | |
| 443 | /* initialize BBCC */ |
| 444 | bbcc->cxt = 0; |
| 445 | bbcc->rec_array = 0; |
| 446 | bbcc->rec_index = 0; |
| 447 | |
| 448 | bbcc->next_bbcc = bb->bbcc_list; |
| 449 | bb->bbcc_list = bbcc; |
| 450 | bb->last_bbcc = bbcc; |
| 451 | |
| 452 | CLG_DEBUGIF(3) |
| 453 | CLG_(print_bbcc)(-2, bbcc, False); |
| 454 | } |
| 455 | |
| 456 | CLG_DEBUG(3, "- get_bbcc(BB %p): BBCC %p\n", |
| 457 | bb_addr(bb), bbcc); |
| 458 | |
| 459 | return bbcc; |
| 460 | } |
| 461 | |
| 462 | |
| 463 | /* Callgrind manages its own call stack for each thread. |
| 464 | * When leaving a function, a underflow can happen when |
| 465 | * Callgrind's tracing was switched on in the middle of |
| 466 | * a run, i.e. when Callgrind was not able to trace the |
| 467 | * call instruction. |
| 468 | * This function tries to reconstruct the original call. |
| 469 | * As we know the return address (the address following |
| 470 | * the CALL instruction), we can detect the function |
| 471 | * we return back to, but the original call site is unknown. |
| 472 | * We suppose a call site at return address - 1. |
| 473 | * (TODO: other heuristic: lookup info of instrumented BBs). |
| 474 | */ |
| 475 | static void handleUnderflow(BB* bb) |
| 476 | { |
| 477 | /* RET at top of call stack */ |
| 478 | BBCC* source_bbcc; |
| 479 | BB* source_bb; |
| 480 | jCC* jcc; |
| 481 | Bool seen_before; |
| 482 | fn_node* caller; |
| 483 | int fn_number, *pactive; |
| 484 | call_entry* call_entry_up; |
| 485 | |
| 486 | CLG_DEBUG(1," Callstack underflow !\n"); |
| 487 | |
| 488 | /* we emulate an old call from the function we return to |
| 489 | * by using (<return address> -1) */ |
| 490 | source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before); |
| 491 | source_bbcc = CLG_(get_bbcc)(source_bb); |
| 492 | |
| 493 | /* seen_before can be true if RET from a signal handler */ |
| 494 | if (!seen_before) { |
| 495 | source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0; |
| 496 | } |
| 497 | else if (CLG_(current_state).collect) |
| 498 | source_bbcc->ecounter_sum++; |
| 499 | |
| 500 | /* Force a new top context, will be set active by push_cxt() */ |
| 501 | CLG_(current_fn_stack).top--; |
| 502 | CLG_(current_state).cxt = 0; |
| 503 | caller = CLG_(get_fn_node)(bb); |
| 504 | CLG_(push_cxt)( caller ); |
| 505 | |
| 506 | if (!seen_before) { |
| 507 | /* set rec array for source BBCC: this is at rec level 1 */ |
| 508 | source_bbcc->rec_array = new_recursion(caller->separate_recursions); |
| 509 | source_bbcc->rec_array[0] = source_bbcc; |
| 510 | |
| 511 | CLG_ASSERT(source_bbcc->cxt == 0); |
| 512 | source_bbcc->cxt = CLG_(current_state).cxt; |
| 513 | insert_bbcc_into_hash(source_bbcc); |
| 514 | } |
| 515 | CLG_ASSERT(CLG_(current_state).bbcc); |
| 516 | |
| 517 | /* correct active counts */ |
| 518 | fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number; |
| 519 | pactive = CLG_(get_fn_entry)(fn_number); |
| 520 | (*pactive)--; |
| 521 | |
| 522 | /* This assertion is not correct for reentrant |
| 523 | * signal handlers */ |
| 524 | /* CLG_ASSERT(*pactive == 0); */ |
| 525 | |
| 526 | CLG_(current_state).nonskipped = 0; /* we didn't skip this function */ |
| 527 | /* back to current context */ |
| 528 | CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] ); |
| 529 | CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc, |
| 530 | (Addr)-1, False); |
| 531 | call_entry_up = |
| 532 | &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]); |
| 533 | jcc = call_entry_up->jcc; |
| 534 | /* assume this call is lasting since last dump or |
| 535 | * for a signal handler since it's call */ |
| 536 | if (CLG_(current_state).sig == 0) |
| 537 | CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost, |
| 538 | CLG_(get_current_thread)()->lastdump_cost ); |
| 539 | else |
| 540 | CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost ); |
| 541 | } |
| 542 | |
| 543 | |
| 544 | /* |
| 545 | * Helper function called at start of each instrumented BB to setup |
| 546 | * pointer to costs for current thread/context/recursion level |
| 547 | */ |
| 548 | |
| 549 | VG_REGPARM(1) |
| 550 | void CLG_(setup_bbcc)(BB* bb) |
| 551 | { |
| 552 | BBCC *bbcc, *last_bbcc; |
| 553 | Bool call_emulation = False, delayed_push = False, skip = False; |
| 554 | Addr sp; |
| 555 | BB* last_bb; |
| 556 | ThreadId tid; |
| 557 | Int jmpkind, passed = 0, csp; |
| 558 | Bool ret_without_call = False; |
| 559 | Int popcount_on_return = 1; |
| 560 | |
| 561 | CLG_DEBUG(3,"+ setup_bbcc(BB %p)\n", bb_addr(bb)); |
| 562 | |
| 563 | /* This is needed because thread switches can not reliable be tracked |
| 564 | * with callback CLG_(run_thread) only: we have otherwise no way to get |
| 565 | * the thread ID after a signal handler returns. |
| 566 | * This could be removed again if that bug is fixed in Valgrind. |
| 567 | * This is in the hot path but hopefully not to costly. |
| 568 | */ |
| 569 | tid = VG_(get_running_tid)(); |
| 570 | #if 1 |
| 571 | CLG_(switch_thread)(tid); |
| 572 | #else |
| 573 | CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid)); |
| 574 | #endif |
| 575 | |
| 576 | sp = VG_(get_SP)(tid); |
| 577 | last_bbcc = CLG_(current_state).bbcc; |
| 578 | last_bb = last_bbcc ? last_bbcc->bb : 0; |
| 579 | |
| 580 | if (last_bb) { |
| 581 | passed = CLG_(current_state).jmps_passed; |
| 582 | if (passed == last_bb->cjmp_count) { |
| 583 | jmpkind = last_bb->jmpkind; |
| 584 | |
| 585 | /* VEX always gives a Boring jump kind also when passed trough */ |
| 586 | if ((jmpkind == Ijk_Boring) && |
| 587 | (last_bb->offset + last_bb->instr_len == bb->offset)) |
| 588 | jmpkind = JmpNone; |
| 589 | } |
| 590 | else |
| 591 | jmpkind = JmpCond; |
| 592 | |
| 593 | /* if we are in a function which is skipped in the call graph, we |
| 594 | * do not increment the exe counter to produce cost (if simulation off), |
| 595 | * which would lead to dumping this BB to be skipped |
| 596 | */ |
| 597 | if (CLG_(current_state).collect && !CLG_(current_state).nonskipped) { |
| 598 | last_bbcc->ecounter_sum++; |
| 599 | last_bbcc->jmp[passed].ecounter++; |
| 600 | if (!CLG_(clo).simulate_cache) { |
| 601 | /* update Ir cost */ |
| 602 | int instr_count = last_bb->jmp[passed].instr+1; |
| 603 | CLG_(current_state).cost[CLG_(sets).off_sim_Ir] += instr_count; |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | CLG_DEBUGIF(4) { |
| 608 | CLG_(print_execstate)(-2, &CLG_(current_state) ); |
| 609 | CLG_(print_bbcc_cost)(-2, last_bbcc); |
| 610 | } |
| 611 | } |
| 612 | else { |
| 613 | jmpkind = JmpNone; |
| 614 | } |
| 615 | |
| 616 | /* Manipulate JmpKind if needed, only using BB specific info */ |
| 617 | |
| 618 | csp = CLG_(current_call_stack).sp; |
| 619 | |
| 620 | /* A return not matching the top call in our callstack is a jump */ |
| 621 | if ( (jmpkind == Ijk_Ret) && (csp >0)) { |
| 622 | Int csp_up = csp-1; |
| 623 | call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]); |
| 624 | |
| 625 | /* We have a real return if |
| 626 | * - the stack pointer (SP) left the current stack frame, or |
| 627 | * - SP has the same value as when reaching the current function |
| 628 | * and the address of this BB is the return address of last call |
| 629 | * (we even allow to leave multiple frames if the SP stays the |
| 630 | * same and we find a matching return address) |
| 631 | * The latter condition is needed because on PPC, SP can stay |
| 632 | * the same over CALL=b(c)l / RET=b(c)lr boundaries |
| 633 | */ |
| 634 | if (sp < top_ce->sp) popcount_on_return = 0; |
| 635 | else if (top_ce->sp == sp) { |
| 636 | while(1) { |
| 637 | if (top_ce->ret_addr == bb_addr(bb)) break; |
| 638 | if (csp_up>0) { |
| 639 | csp_up--; |
| 640 | top_ce = &(CLG_(current_call_stack).entry[csp_up]); |
| 641 | if (top_ce->sp == sp) { |
| 642 | popcount_on_return++; |
| 643 | continue; |
| 644 | } |
| 645 | } |
| 646 | popcount_on_return = 0; |
| 647 | break; |
| 648 | } |
| 649 | } |
| 650 | if (popcount_on_return == 0) { |
| 651 | jmpkind = Ijk_Boring; |
| 652 | ret_without_call = True; |
| 653 | } |
| 654 | } |
| 655 | |
| 656 | /* Should this jump be converted to call or pop/call ? */ |
| 657 | if (( jmpkind != Ijk_Ret) && |
| 658 | ( jmpkind != Ijk_Call) && last_bb) { |
| 659 | |
| 660 | /* We simulate a JMP/Cont to be a CALL if |
| 661 | * - jump is in another ELF object or section kind |
| 662 | * - jump is to first instruction of a function (tail recursion) |
| 663 | */ |
| 664 | if (ret_without_call || |
| 665 | /* This is for detection of optimized tail recursion. |
| 666 | * On PPC, this is only detected as call when going to another |
| 667 | * function. The problem is that on PPC it can go wrong |
| 668 | * more easily (no stack frame setup needed) |
| 669 | */ |
| 670 | #if defined(VGA_ppc32) |
| 671 | (bb->is_entry && (last_bb->fn != bb->fn)) || |
| 672 | #else |
| 673 | bb->is_entry || |
| 674 | #endif |
| 675 | (last_bb->sect_kind != bb->sect_kind) || |
| 676 | (last_bb->obj->number != bb->obj->number)) { |
| 677 | |
| 678 | CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n", |
| 679 | last_bb->fn->name, last_bb->obj->name, |
| 680 | bb->fn->name, bb->obj->name, |
| 681 | ret_without_call?" (RET w/o CALL)":""); |
| 682 | |
| 683 | if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) { |
| 684 | |
| 685 | call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]); |
| 686 | |
| 687 | if (top_ce->jcc) { |
| 688 | |
| 689 | CLG_DEBUG(1," Pop on Jump!\n"); |
| 690 | |
| 691 | /* change source for delayed push */ |
| 692 | CLG_(current_state).bbcc = top_ce->jcc->from; |
| 693 | sp = top_ce->sp; |
| 694 | CLG_(pop_call_stack)(); |
| 695 | } |
| 696 | else { |
| 697 | CLG_ASSERT(CLG_(current_state).nonskipped != 0); |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | jmpkind = Ijk_Call; |
| 702 | call_emulation = True; |
| 703 | } |
| 704 | } |
| 705 | |
| 706 | if (jmpkind == Ijk_Call) |
| 707 | skip = CLG_(get_fn_node)(bb)->skip; |
| 708 | |
| 709 | CLG_DEBUGIF(1) { |
| 710 | if (jmpkind == JmpCond) |
| 711 | VG_(printf)("Conditional"); |
| 712 | else if (jmpkind == JmpNone) |
| 713 | VG_(printf)("None"); |
| 714 | else |
| 715 | ppIRJumpKind( jmpkind ); |
| 716 | |
| 717 | VG_(printf)(" %08x -> %08x, SP %08x\n", |
| 718 | last_bb ? bb_jmpaddr(last_bb) : 0, |
| 719 | bb_addr(bb), sp); |
| 720 | } |
| 721 | |
| 722 | /* Handle CALL/RET and update context to get correct BBCC */ |
| 723 | |
| 724 | if (jmpkind == Ijk_Ret) { |
| 725 | |
| 726 | if ((csp == 0) || |
| 727 | ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) && |
| 728 | ( *(CLG_(current_fn_stack).top-1)==0)) ) { |
| 729 | |
| 730 | /* On an empty call stack or at a signal separation marker, |
| 731 | * a RETURN generates an call stack underflow. |
| 732 | */ |
| 733 | handleUnderflow(bb); |
| 734 | CLG_(pop_call_stack)(); |
| 735 | } |
| 736 | else { |
| 737 | CLG_ASSERT(popcount_on_return >0); |
| 738 | CLG_(unwind_call_stack)(sp, popcount_on_return); |
| 739 | } |
| 740 | } |
| 741 | else { |
| 742 | CLG_(unwind_call_stack)(sp, 0); |
| 743 | |
| 744 | if (jmpkind == Ijk_Call) { |
| 745 | delayed_push = True; |
| 746 | |
| 747 | csp = CLG_(current_call_stack).sp; |
| 748 | if (call_emulation && csp>0) |
| 749 | sp = CLG_(current_call_stack).entry[csp-1].sp; |
| 750 | |
| 751 | } |
| 752 | } |
| 753 | |
| 754 | /* Change new context if needed, taking delayed_push into account */ |
| 755 | if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) { |
| 756 | CLG_(push_cxt)(CLG_(get_fn_node)(bb)); |
| 757 | } |
| 758 | CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom); |
| 759 | |
| 760 | /* If there is a fresh instrumented BBCC, assign current context */ |
| 761 | bbcc = CLG_(get_bbcc)(bb); |
| 762 | if (bbcc->cxt == 0) { |
| 763 | CLG_ASSERT(bbcc->rec_array == 0); |
| 764 | |
| 765 | bbcc->cxt = CLG_(current_state).cxt; |
| 766 | bbcc->rec_array = |
| 767 | new_recursion((*CLG_(current_fn_stack).top)->separate_recursions); |
| 768 | bbcc->rec_array[0] = bbcc; |
| 769 | |
| 770 | insert_bbcc_into_hash(bbcc); |
| 771 | } |
| 772 | else { |
| 773 | /* get BBCC with current context */ |
| 774 | |
| 775 | /* first check LRU of last bbcc executed */ |
| 776 | |
| 777 | if (last_bbcc) { |
| 778 | bbcc = last_bbcc->lru_next_bbcc; |
| 779 | if (bbcc && |
| 780 | ((bbcc->bb != bb) || |
| 781 | (bbcc->cxt != CLG_(current_state).cxt))) |
| 782 | bbcc = 0; |
| 783 | } |
| 784 | else |
| 785 | bbcc = 0; |
| 786 | |
| 787 | if (!bbcc) |
| 788 | bbcc = lookup_bbcc(bb, CLG_(current_state).cxt); |
| 789 | if (!bbcc) |
| 790 | bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0); |
| 791 | |
| 792 | bb->last_bbcc = bbcc; |
| 793 | } |
| 794 | |
| 795 | /* save for fast lookup */ |
| 796 | if (last_bbcc) |
| 797 | last_bbcc->lru_next_bbcc = bbcc; |
| 798 | |
| 799 | if ((*CLG_(current_fn_stack).top)->separate_recursions >1) { |
| 800 | UInt level, idx; |
| 801 | fn_node* top = *(CLG_(current_fn_stack).top); |
| 802 | |
| 803 | level = *CLG_(get_fn_entry)(top->number); |
| 804 | |
| 805 | if (delayed_push && !skip) { |
| 806 | if (CLG_(clo).skip_direct_recursion) { |
| 807 | /* do not increment rec. level if called from |
| 808 | * same function */ |
| 809 | if (!CLG_(current_state).bbcc || |
| 810 | (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])) |
| 811 | level++; |
| 812 | } |
| 813 | else level++; |
| 814 | } |
| 815 | if (level> top->separate_recursions) |
| 816 | level = top->separate_recursions; |
| 817 | |
| 818 | if (level == 0) { |
| 819 | /* can only happen if instrumentation just was switched on */ |
| 820 | level = 1; |
| 821 | *CLG_(get_fn_entry)(top->number) = 1; |
| 822 | } |
| 823 | |
| 824 | idx = level -1; |
| 825 | if (bbcc->rec_array[idx]) |
| 826 | bbcc = bbcc->rec_array[idx]; |
| 827 | else |
| 828 | bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx); |
| 829 | |
| 830 | CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc); |
| 831 | } |
| 832 | |
| 833 | if (delayed_push) { |
| 834 | if (!skip && CLG_(current_state).nonskipped) { |
| 835 | /* a call from skipped to nonskipped */ |
| 836 | CLG_(current_state).bbcc = CLG_(current_state).nonskipped; |
| 837 | } |
| 838 | CLG_(push_call_stack)(CLG_(current_state).bbcc, passed, |
| 839 | bbcc, sp, skip); |
| 840 | } |
| 841 | |
| 842 | if (CLG_(clo).collect_jumps && |
| 843 | ((jmpkind == JmpCond) || (jmpkind == Ijk_Boring))) { |
| 844 | |
| 845 | /* Handle conditional jumps followed, i.e. trace arcs |
| 846 | * This uses JCC structures, too */ |
| 847 | |
| 848 | jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc); |
| 849 | CLG_ASSERT(jcc != 0); |
| 850 | // Change from default, and check if already changed |
| 851 | if (jcc->jmpkind == Ijk_Call) |
| 852 | jcc->jmpkind = jmpkind; |
| 853 | else { |
| 854 | // FIXME: Why can this fail? |
| 855 | // CLG_ASSERT(jcc->jmpkind == jmpkind); |
| 856 | } |
| 857 | |
| 858 | jcc->call_counter++; |
| 859 | if (jmpkind == JmpCond) |
| 860 | CLG_(stat).jcnd_counter++; |
| 861 | else |
| 862 | CLG_(stat).jump_counter++; |
| 863 | } |
| 864 | |
| 865 | CLG_(current_state).bbcc = bbcc; |
| 866 | |
| 867 | CLG_DEBUGIF(1) { |
| 868 | VG_(printf)(" "); |
| 869 | CLG_(print_bbcc_fn)(bbcc); |
| 870 | VG_(printf)("\n"); |
| 871 | } |
| 872 | |
| 873 | CLG_DEBUG(3,"- setup_bbcc (BB %p): Cost %p (Len %d), Instrs %d (Len %d)\n", |
| 874 | bb_addr(bb), bbcc->cost, bb->cost_count, |
| 875 | bb->instr_count, bb->instr_len); |
| 876 | CLG_DEBUGIF(3) |
| 877 | CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index); |
| 878 | CLG_DEBUG(3,"\n"); |
| 879 | |
| 880 | (*CLG_(cachesim).after_bbsetup)(); |
| 881 | |
| 882 | CLG_(stat).bb_executions++; |
| 883 | } |