blob: d2eb4b93bb24c0eb4fe44b5a0eed4c56d591e3c0 [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001/*--------------------------------------------------------------------*/
2/*--- Callgrind ---*/
3/*--- bbcc.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Callgrind, a Valgrind tool for call tracing.
8
9 Copyright (C) 2002-2004, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
10
11 This program is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of the
14 License, or (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 02111-1307, USA.
25
26 The GNU General Public License is contained in the file COPYING.
27*/
28
29#include "global.h"
30#include "costs.h"
31
32#include <pub_tool_threadstate.h>
33
34/*------------------------------------------------------------*/
35/*--- BBCC operations ---*/
36/*------------------------------------------------------------*/
37
38#define N_BBCC_INITIAL_ENTRIES 10437
39
40/* BBCC table (key is BB/Context), per thread, resizable */
41bbcc_hash current_bbccs;
42
43void CLG_(init_bbcc_hash)(bbcc_hash* bbccs)
44{
45 Int i;
46
47 CLG_ASSERT(bbccs != 0);
48
49 bbccs->size = N_BBCC_INITIAL_ENTRIES;
50 bbccs->entries = 0;
51 bbccs->table = (BBCC**) CLG_MALLOC(bbccs->size * sizeof(BBCC*));
52
53 for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
54}
55
56void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst)
57{
58 CLG_ASSERT(dst != 0);
59
60 dst->size = current_bbccs.size;
61 dst->entries = current_bbccs.entries;
62 dst->table = current_bbccs.table;
63}
64
65bbcc_hash* CLG_(get_current_bbcc_hash)()
66{
67 return &current_bbccs;
68}
69
70void CLG_(set_current_bbcc_hash)(bbcc_hash* h)
71{
72 CLG_ASSERT(h != 0);
73
74 current_bbccs.size = h->size;
75 current_bbccs.entries = h->entries;
76 current_bbccs.table = h->table;
77}
78
79/*
80 * Zero all costs of a BBCC
81 */
82void CLG_(zero_bbcc)(BBCC* bbcc)
83{
84 Int i;
85 jCC* jcc;
86
87 CLG_ASSERT(bbcc->cxt != 0);
88 CLG_DEBUG(1, " zero_bbcc: BB %p, Cxt %d "
89 "(fn '%s', rec %d)\n",
90 bb_addr(bbcc->bb),
91 bbcc->cxt->base_number + bbcc->rec_index,
92 bbcc->cxt->fn[0]->name,
93 bbcc->rec_index);
94
95 if ((bbcc->ecounter_sum ==0) &&
96 (bbcc->ret_counter ==0)) return;
97
98 for(i=0;i<bbcc->bb->cost_count;i++)
99 bbcc->cost[i] = 0;
100 for(i=0;i <= bbcc->bb->cjmp_count;i++) {
101 bbcc->jmp[i].ecounter = 0;
102 for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from)
103 CLG_(init_cost)( CLG_(sets).full, jcc->cost );
104 }
105 bbcc->ecounter_sum = 0;
106 bbcc->ret_counter = 0;
107}
108
109
110
111void CLG_(forall_bbccs)(void (*func)(BBCC*))
112{
113 BBCC *bbcc, *bbcc2;
114 int i, j;
115
116 for (i = 0; i < current_bbccs.size; i++) {
117 if ((bbcc=current_bbccs.table[i]) == NULL) continue;
118 while (bbcc) {
119 /* every bbcc should have a rec_array */
120 CLG_ASSERT(bbcc->rec_array != 0);
121
122 for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) {
123 if ((bbcc2 = bbcc->rec_array[j]) == 0) continue;
124
125 (*func)(bbcc2);
126 }
127 bbcc = bbcc->next;
128 }
129 }
130}
131
132
133/* All BBCCs for recursion level 0 are inserted into a
134 * thread specific hash table with key
135 * - address of BB structure (unique, as never freed)
136 * - current context (includes caller chain)
137 * BBCCs for other recursion levels are in bbcc->rec_array.
138 *
139 * The hash is used in setup_bb(), i.e. to find the cost
140 * counters to be changed in the execution of a BB.
141 */
142
143static __inline__
144UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size)
145{
146 CLG_ASSERT(bb != 0);
147 CLG_ASSERT(cxt != 0);
148
149 return ((Addr)bb + (Addr)cxt) % size;
150}
151
152
153/* Lookup for a BBCC in hash.
154 */
155static
156BBCC* lookup_bbcc(BB* bb, Context* cxt)
157{
158 BBCC* bbcc = bb->last_bbcc;
159 UInt idx;
160
161 /* check LRU */
162 if (bbcc->cxt == cxt) {
163 if (!CLG_(clo).separate_threads) {
164 /* if we don't dump threads separate, tid doesn't have to match */
165 return bbcc;
166 }
167 if (bbcc->tid == CLG_(current_tid)) return bbcc;
168 }
169
170 CLG_(stat).bbcc_lru_misses++;
171
172 idx = bbcc_hash_idx(bb, cxt, current_bbccs.size);
173 bbcc = current_bbccs.table[idx];
174 while (bbcc &&
175 (bb != bbcc->bb ||
176 cxt != bbcc->cxt)) {
177 bbcc = bbcc->next;
178 }
179
180 CLG_DEBUG(2," lookup_bbcc(BB %p, Cxt %d, fn '%s'): %p (tid %d)\n",
181 bb_addr(bb), cxt->base_number, cxt->fn[0]->name,
182 bbcc, bbcc ? bbcc->tid : 0);
183
184 CLG_DEBUGIF(2)
185 if (bbcc) CLG_(print_bbcc)(-2,bbcc,False);
186
187 return bbcc;
188}
189
190
191/* double size of hash table 1 (addr->BBCC) */
192static void resize_bbcc_hash(void)
193{
194 Int i, new_size, conflicts1 = 0, conflicts2 = 0;
195 BBCC** new_table;
196 UInt new_idx;
197 BBCC *curr_BBCC, *next_BBCC;
198
199 new_size = 2*current_bbccs.size+3;
200 new_table = (BBCC**) CLG_MALLOC(new_size * sizeof(BBCC*));
201
202 if (!new_table) return;
203
204 for (i = 0; i < new_size; i++)
205 new_table[i] = NULL;
206
207 for (i = 0; i < current_bbccs.size; i++) {
208 if (current_bbccs.table[i] == NULL) continue;
209
210 curr_BBCC = current_bbccs.table[i];
211 while (NULL != curr_BBCC) {
212 next_BBCC = curr_BBCC->next;
213
214 new_idx = bbcc_hash_idx(curr_BBCC->bb,
215 curr_BBCC->cxt,
216 new_size);
217
218 curr_BBCC->next = new_table[new_idx];
219 new_table[new_idx] = curr_BBCC;
220 if (curr_BBCC->next) {
221 conflicts1++;
222 if (curr_BBCC->next->next)
223 conflicts2++;
224 }
225
226 curr_BBCC = next_BBCC;
227 }
228 }
229
230 VG_(free)(current_bbccs.table);
231
232
233 CLG_DEBUG(0,"Resize BBCC Hash: %d => %d (entries %d, conflicts %d/%d)\n",
234 current_bbccs.size, new_size,
235 current_bbccs.entries, conflicts1, conflicts2);
236
237 current_bbccs.size = new_size;
238 current_bbccs.table = new_table;
239 CLG_(stat).bbcc_hash_resizes++;
240}
241
242
243static __inline
244BBCC** new_recursion(int size)
245{
246 BBCC** bbccs;
247 int i;
248
249 bbccs = (BBCC**) CLG_MALLOC(sizeof(BBCC*) * size);
250 for(i=0;i<size;i++)
251 bbccs[i] = 0;
252
253 CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs);
254
255 return bbccs;
256}
257
258
259/*
260 * Allocate a new BBCC
261 *
262 * Uninitialized:
263 * cxt, rec_index, rec_array, next_bbcc, next1, next2
264 */
265static __inline__
266BBCC* new_bbcc(BB* bb)
267{
268 BBCC* new;
269 Int i;
270
271 /* We need cjmp_count+1 JmpData structs:
272 * the last is for the unconditional jump/call/ret at end of BB
273 */
274 new = (BBCC*)CLG_MALLOC(sizeof(BBCC) +
275 (bb->cjmp_count+1) * sizeof(JmpData));
276 new->bb = bb;
277 new->tid = CLG_(current_tid);
278
279 new->ret_counter = 0;
280 new->skipped = 0;
281 new->cost = CLG_(get_costarray)(bb->cost_count);
282 for(i=0;i<bb->cost_count;i++)
283 new->cost[i] = 0;
284 for(i=0; i<=bb->cjmp_count; i++) {
285 new->jmp[i].ecounter = 0;
286 new->jmp[i].jcc_list = 0;
287 }
288 new->ecounter_sum = 0;
289
290 /* Init pointer caches (LRU) */
291 new->lru_next_bbcc = 0;
292 new->lru_from_jcc = 0;
293 new->lru_to_jcc = 0;
294
295 CLG_(stat).distinct_bbccs++;
296
297 CLG_DEBUG(3, " new_bbcc(BB %p): %p (now %d)\n",
298 bb_addr(bb), new, CLG_(stat).distinct_bbccs);
299
300 return new;
301}
302
303
304/**
305 * Inserts a new BBCC into hashes.
306 * BBCC specific items must be set as this is used for the hash
307 * keys:
308 * fn : current function
309 * tid : current thread ID
310 * from : position where current function is called from
311 *
312 * Recursion level doesn't need to be set as this is not included
313 * in the hash key: Only BBCCs with rec level 0 are in hashes.
314 */
315static
316void insert_bbcc_into_hash(BBCC* bbcc)
317{
318 UInt idx;
319
320 CLG_ASSERT(bbcc->cxt != 0);
321
322 CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %p, fn '%s')\n",
323 bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name);
324
325 /* check fill degree of hash and resize if needed (>90%) */
326 current_bbccs.entries++;
327 if (100 * current_bbccs.entries / current_bbccs.size > 90)
328 resize_bbcc_hash();
329
330 idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size);
331 bbcc->next = current_bbccs.table[idx];
332 current_bbccs.table[idx] = bbcc;
333
334 CLG_DEBUG(3,"- insert_bbcc_into_hash: %d entries\n",
335 current_bbccs.entries);
336}
337
338static Char* mangled_cxt(Context* cxt, int rec_index)
339{
340 static Char mangled[FN_NAME_LEN];
341 int i, p;
342
343 if (!cxt) return "(no context)";
344
345 p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name);
346 if (rec_index >0)
347 p += VG_(sprintf)(mangled+p, "'%d", rec_index +1);
348 for(i=1;i<cxt->size;i++)
349 p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name);
350
351 return mangled;
352}
353
354
355/* Create a new BBCC as a copy of an existing one,
356 * but with costs set to 0 and jcc chains empty.
357 *
358 * This is needed when a BB is executed in another context than
359 * the one at instrumentation time of the BB.
360 *
361 * Use cases:
362 * rec_index == 0: clone from a BBCC with differing tid/cxt
363 * and insert into hashes
364 * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0
365 * don't insert into hashes
366 */
367static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)
368{
369 BBCC* new;
370
371 CLG_DEBUG(3,"+ clone_bbcc(BB %p, rec %d, fn %s)\n",
372 bb_addr(orig->bb), rec_index, cxt->fn[0]->name);
373
374 new = new_bbcc(orig->bb);
375
376 if (rec_index == 0) {
377
378 /* hash insertion is only allowed if tid or cxt is different */
379 CLG_ASSERT((orig->tid != CLG_(current_tid)) ||
380 (orig->cxt != cxt));
381
382 new->rec_index = 0;
383 new->cxt = cxt;
384 new->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
385 new->rec_array[0] = new;
386
387 insert_bbcc_into_hash(new);
388 }
389 else {
390 if (CLG_(clo).separate_threads)
391 CLG_ASSERT(orig->tid == CLG_(current_tid));
392
393 CLG_ASSERT(orig->cxt == cxt);
394 CLG_ASSERT(orig->rec_array);
395 CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index);
396 CLG_ASSERT(orig->rec_array[rec_index] ==0);
397
398 /* new BBCC will only have differing recursion level */
399 new->rec_index = rec_index;
400 new->cxt = cxt;
401 new->rec_array = orig->rec_array;
402 new->rec_array[rec_index] = new;
403 }
404
405 /* update list of BBCCs for same BB */
406 new->next_bbcc = orig->bb->bbcc_list;
407 orig->bb->bbcc_list = new;
408
409
410 CLG_DEBUGIF(3)
411 CLG_(print_bbcc)(-2, new, False);
412
413 CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %p\n"
414 " orig %s\n"
415 " new %s\n",
416 orig, rec_index, bb_addr(orig->bb),
417 mangled_cxt(orig->cxt, orig->rec_index),
418 mangled_cxt(new->cxt, new->rec_index));
419
420 CLG_(stat).bbcc_clones++;
421
422 return new;
423};
424
425
426
427/* Get a pointer to the cost centre structure for given basic block
428 * address. If created, the BBCC is inserted into the BBCC hash.
429 * Also sets BB_seen_before by reference.
430 *
431 */
432BBCC* CLG_(get_bbcc)(BB* bb)
433{
434 BBCC* bbcc;
435
436 CLG_DEBUG(3, "+ get_bbcc(BB %p)\n", bb_addr(bb));
437
438 bbcc = bb->bbcc_list;
439
440 if (!bbcc) {
441 bbcc = new_bbcc(bb);
442
443 /* initialize BBCC */
444 bbcc->cxt = 0;
445 bbcc->rec_array = 0;
446 bbcc->rec_index = 0;
447
448 bbcc->next_bbcc = bb->bbcc_list;
449 bb->bbcc_list = bbcc;
450 bb->last_bbcc = bbcc;
451
452 CLG_DEBUGIF(3)
453 CLG_(print_bbcc)(-2, bbcc, False);
454 }
455
456 CLG_DEBUG(3, "- get_bbcc(BB %p): BBCC %p\n",
457 bb_addr(bb), bbcc);
458
459 return bbcc;
460}
461
462
463/* Callgrind manages its own call stack for each thread.
464 * When leaving a function, a underflow can happen when
465 * Callgrind's tracing was switched on in the middle of
466 * a run, i.e. when Callgrind was not able to trace the
467 * call instruction.
468 * This function tries to reconstruct the original call.
469 * As we know the return address (the address following
470 * the CALL instruction), we can detect the function
471 * we return back to, but the original call site is unknown.
472 * We suppose a call site at return address - 1.
473 * (TODO: other heuristic: lookup info of instrumented BBs).
474 */
475static void handleUnderflow(BB* bb)
476{
477 /* RET at top of call stack */
478 BBCC* source_bbcc;
479 BB* source_bb;
480 jCC* jcc;
481 Bool seen_before;
482 fn_node* caller;
483 int fn_number, *pactive;
484 call_entry* call_entry_up;
485
486 CLG_DEBUG(1," Callstack underflow !\n");
487
488 /* we emulate an old call from the function we return to
489 * by using (<return address> -1) */
490 source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before);
491 source_bbcc = CLG_(get_bbcc)(source_bb);
492
493 /* seen_before can be true if RET from a signal handler */
494 if (!seen_before) {
495 source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0;
496 }
497 else if (CLG_(current_state).collect)
498 source_bbcc->ecounter_sum++;
499
500 /* Force a new top context, will be set active by push_cxt() */
501 CLG_(current_fn_stack).top--;
502 CLG_(current_state).cxt = 0;
503 caller = CLG_(get_fn_node)(bb);
504 CLG_(push_cxt)( caller );
505
506 if (!seen_before) {
507 /* set rec array for source BBCC: this is at rec level 1 */
508 source_bbcc->rec_array = new_recursion(caller->separate_recursions);
509 source_bbcc->rec_array[0] = source_bbcc;
510
511 CLG_ASSERT(source_bbcc->cxt == 0);
512 source_bbcc->cxt = CLG_(current_state).cxt;
513 insert_bbcc_into_hash(source_bbcc);
514 }
515 CLG_ASSERT(CLG_(current_state).bbcc);
516
517 /* correct active counts */
518 fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number;
519 pactive = CLG_(get_fn_entry)(fn_number);
520 (*pactive)--;
521
522 /* This assertion is not correct for reentrant
523 * signal handlers */
524 /* CLG_ASSERT(*pactive == 0); */
525
526 CLG_(current_state).nonskipped = 0; /* we didn't skip this function */
527 /* back to current context */
528 CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] );
529 CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc,
530 (Addr)-1, False);
531 call_entry_up =
532 &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]);
533 jcc = call_entry_up->jcc;
534 /* assume this call is lasting since last dump or
535 * for a signal handler since it's call */
536 if (CLG_(current_state).sig == 0)
537 CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost,
538 CLG_(get_current_thread)()->lastdump_cost );
539 else
540 CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost );
541}
542
543
544/*
545 * Helper function called at start of each instrumented BB to setup
546 * pointer to costs for current thread/context/recursion level
547 */
548
549VG_REGPARM(1)
550void CLG_(setup_bbcc)(BB* bb)
551{
552 BBCC *bbcc, *last_bbcc;
553 Bool call_emulation = False, delayed_push = False, skip = False;
554 Addr sp;
555 BB* last_bb;
556 ThreadId tid;
557 Int jmpkind, passed = 0, csp;
558 Bool ret_without_call = False;
559 Int popcount_on_return = 1;
560
561 CLG_DEBUG(3,"+ setup_bbcc(BB %p)\n", bb_addr(bb));
562
563 /* This is needed because thread switches can not reliable be tracked
564 * with callback CLG_(run_thread) only: we have otherwise no way to get
565 * the thread ID after a signal handler returns.
566 * This could be removed again if that bug is fixed in Valgrind.
567 * This is in the hot path but hopefully not to costly.
568 */
569 tid = VG_(get_running_tid)();
570#if 1
571 CLG_(switch_thread)(tid);
572#else
573 CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid));
574#endif
575
576 sp = VG_(get_SP)(tid);
577 last_bbcc = CLG_(current_state).bbcc;
578 last_bb = last_bbcc ? last_bbcc->bb : 0;
579
580 if (last_bb) {
581 passed = CLG_(current_state).jmps_passed;
582 if (passed == last_bb->cjmp_count) {
583 jmpkind = last_bb->jmpkind;
584
585 /* VEX always gives a Boring jump kind also when passed trough */
586 if ((jmpkind == Ijk_Boring) &&
587 (last_bb->offset + last_bb->instr_len == bb->offset))
588 jmpkind = JmpNone;
589 }
590 else
591 jmpkind = JmpCond;
592
593 /* if we are in a function which is skipped in the call graph, we
594 * do not increment the exe counter to produce cost (if simulation off),
595 * which would lead to dumping this BB to be skipped
596 */
597 if (CLG_(current_state).collect && !CLG_(current_state).nonskipped) {
598 last_bbcc->ecounter_sum++;
599 last_bbcc->jmp[passed].ecounter++;
600 if (!CLG_(clo).simulate_cache) {
601 /* update Ir cost */
602 int instr_count = last_bb->jmp[passed].instr+1;
603 CLG_(current_state).cost[CLG_(sets).off_sim_Ir] += instr_count;
604 }
605 }
606
607 CLG_DEBUGIF(4) {
608 CLG_(print_execstate)(-2, &CLG_(current_state) );
609 CLG_(print_bbcc_cost)(-2, last_bbcc);
610 }
611 }
612 else {
613 jmpkind = JmpNone;
614 }
615
616 /* Manipulate JmpKind if needed, only using BB specific info */
617
618 csp = CLG_(current_call_stack).sp;
619
620 /* A return not matching the top call in our callstack is a jump */
621 if ( (jmpkind == Ijk_Ret) && (csp >0)) {
622 Int csp_up = csp-1;
623 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
624
625 /* We have a real return if
626 * - the stack pointer (SP) left the current stack frame, or
627 * - SP has the same value as when reaching the current function
628 * and the address of this BB is the return address of last call
629 * (we even allow to leave multiple frames if the SP stays the
630 * same and we find a matching return address)
631 * The latter condition is needed because on PPC, SP can stay
632 * the same over CALL=b(c)l / RET=b(c)lr boundaries
633 */
634 if (sp < top_ce->sp) popcount_on_return = 0;
635 else if (top_ce->sp == sp) {
636 while(1) {
637 if (top_ce->ret_addr == bb_addr(bb)) break;
638 if (csp_up>0) {
639 csp_up--;
640 top_ce = &(CLG_(current_call_stack).entry[csp_up]);
641 if (top_ce->sp == sp) {
642 popcount_on_return++;
643 continue;
644 }
645 }
646 popcount_on_return = 0;
647 break;
648 }
649 }
650 if (popcount_on_return == 0) {
651 jmpkind = Ijk_Boring;
652 ret_without_call = True;
653 }
654 }
655
656 /* Should this jump be converted to call or pop/call ? */
657 if (( jmpkind != Ijk_Ret) &&
658 ( jmpkind != Ijk_Call) && last_bb) {
659
660 /* We simulate a JMP/Cont to be a CALL if
661 * - jump is in another ELF object or section kind
662 * - jump is to first instruction of a function (tail recursion)
663 */
664 if (ret_without_call ||
665 /* This is for detection of optimized tail recursion.
666 * On PPC, this is only detected as call when going to another
667 * function. The problem is that on PPC it can go wrong
668 * more easily (no stack frame setup needed)
669 */
670#if defined(VGA_ppc32)
671 (bb->is_entry && (last_bb->fn != bb->fn)) ||
672#else
673 bb->is_entry ||
674#endif
675 (last_bb->sect_kind != bb->sect_kind) ||
676 (last_bb->obj->number != bb->obj->number)) {
677
678 CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n",
679 last_bb->fn->name, last_bb->obj->name,
680 bb->fn->name, bb->obj->name,
681 ret_without_call?" (RET w/o CALL)":"");
682
683 if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) {
684
685 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
686
687 if (top_ce->jcc) {
688
689 CLG_DEBUG(1," Pop on Jump!\n");
690
691 /* change source for delayed push */
692 CLG_(current_state).bbcc = top_ce->jcc->from;
693 sp = top_ce->sp;
694 CLG_(pop_call_stack)();
695 }
696 else {
697 CLG_ASSERT(CLG_(current_state).nonskipped != 0);
698 }
699 }
700
701 jmpkind = Ijk_Call;
702 call_emulation = True;
703 }
704 }
705
706 if (jmpkind == Ijk_Call)
707 skip = CLG_(get_fn_node)(bb)->skip;
708
709 CLG_DEBUGIF(1) {
710 if (jmpkind == JmpCond)
711 VG_(printf)("Conditional");
712 else if (jmpkind == JmpNone)
713 VG_(printf)("None");
714 else
715 ppIRJumpKind( jmpkind );
716
717 VG_(printf)(" %08x -> %08x, SP %08x\n",
718 last_bb ? bb_jmpaddr(last_bb) : 0,
719 bb_addr(bb), sp);
720 }
721
722 /* Handle CALL/RET and update context to get correct BBCC */
723
724 if (jmpkind == Ijk_Ret) {
725
726 if ((csp == 0) ||
727 ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
728 ( *(CLG_(current_fn_stack).top-1)==0)) ) {
729
730 /* On an empty call stack or at a signal separation marker,
731 * a RETURN generates an call stack underflow.
732 */
733 handleUnderflow(bb);
734 CLG_(pop_call_stack)();
735 }
736 else {
737 CLG_ASSERT(popcount_on_return >0);
738 CLG_(unwind_call_stack)(sp, popcount_on_return);
739 }
740 }
741 else {
742 CLG_(unwind_call_stack)(sp, 0);
743
744 if (jmpkind == Ijk_Call) {
745 delayed_push = True;
746
747 csp = CLG_(current_call_stack).sp;
748 if (call_emulation && csp>0)
749 sp = CLG_(current_call_stack).entry[csp-1].sp;
750
751 }
752 }
753
754 /* Change new context if needed, taking delayed_push into account */
755 if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) {
756 CLG_(push_cxt)(CLG_(get_fn_node)(bb));
757 }
758 CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom);
759
760 /* If there is a fresh instrumented BBCC, assign current context */
761 bbcc = CLG_(get_bbcc)(bb);
762 if (bbcc->cxt == 0) {
763 CLG_ASSERT(bbcc->rec_array == 0);
764
765 bbcc->cxt = CLG_(current_state).cxt;
766 bbcc->rec_array =
767 new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
768 bbcc->rec_array[0] = bbcc;
769
770 insert_bbcc_into_hash(bbcc);
771 }
772 else {
773 /* get BBCC with current context */
774
775 /* first check LRU of last bbcc executed */
776
777 if (last_bbcc) {
778 bbcc = last_bbcc->lru_next_bbcc;
779 if (bbcc &&
780 ((bbcc->bb != bb) ||
781 (bbcc->cxt != CLG_(current_state).cxt)))
782 bbcc = 0;
783 }
784 else
785 bbcc = 0;
786
787 if (!bbcc)
788 bbcc = lookup_bbcc(bb, CLG_(current_state).cxt);
789 if (!bbcc)
790 bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0);
791
792 bb->last_bbcc = bbcc;
793 }
794
795 /* save for fast lookup */
796 if (last_bbcc)
797 last_bbcc->lru_next_bbcc = bbcc;
798
799 if ((*CLG_(current_fn_stack).top)->separate_recursions >1) {
800 UInt level, idx;
801 fn_node* top = *(CLG_(current_fn_stack).top);
802
803 level = *CLG_(get_fn_entry)(top->number);
804
805 if (delayed_push && !skip) {
806 if (CLG_(clo).skip_direct_recursion) {
807 /* do not increment rec. level if called from
808 * same function */
809 if (!CLG_(current_state).bbcc ||
810 (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0]))
811 level++;
812 }
813 else level++;
814 }
815 if (level> top->separate_recursions)
816 level = top->separate_recursions;
817
818 if (level == 0) {
819 /* can only happen if instrumentation just was switched on */
820 level = 1;
821 *CLG_(get_fn_entry)(top->number) = 1;
822 }
823
824 idx = level -1;
825 if (bbcc->rec_array[idx])
826 bbcc = bbcc->rec_array[idx];
827 else
828 bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx);
829
830 CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc);
831 }
832
833 if (delayed_push) {
834 if (!skip && CLG_(current_state).nonskipped) {
835 /* a call from skipped to nonskipped */
836 CLG_(current_state).bbcc = CLG_(current_state).nonskipped;
837 }
838 CLG_(push_call_stack)(CLG_(current_state).bbcc, passed,
839 bbcc, sp, skip);
840 }
841
842 if (CLG_(clo).collect_jumps &&
843 ((jmpkind == JmpCond) || (jmpkind == Ijk_Boring))) {
844
845 /* Handle conditional jumps followed, i.e. trace arcs
846 * This uses JCC structures, too */
847
848 jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
849 CLG_ASSERT(jcc != 0);
850 // Change from default, and check if already changed
851 if (jcc->jmpkind == Ijk_Call)
852 jcc->jmpkind = jmpkind;
853 else {
854 // FIXME: Why can this fail?
855 // CLG_ASSERT(jcc->jmpkind == jmpkind);
856 }
857
858 jcc->call_counter++;
859 if (jmpkind == JmpCond)
860 CLG_(stat).jcnd_counter++;
861 else
862 CLG_(stat).jump_counter++;
863 }
864
865 CLG_(current_state).bbcc = bbcc;
866
867 CLG_DEBUGIF(1) {
868 VG_(printf)(" ");
869 CLG_(print_bbcc_fn)(bbcc);
870 VG_(printf)("\n");
871 }
872
873 CLG_DEBUG(3,"- setup_bbcc (BB %p): Cost %p (Len %d), Instrs %d (Len %d)\n",
874 bb_addr(bb), bbcc->cost, bb->cost_count,
875 bb->instr_count, bb->instr_len);
876 CLG_DEBUGIF(3)
877 CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index);
878 CLG_DEBUG(3,"\n");
879
880 (*CLG_(cachesim).after_bbsetup)();
881
882 CLG_(stat).bb_executions++;
883}