blob: 00c076b7abe216bf8c00f02d5413b23b9f158aad [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001
2/*--------------------------------------------------------------------*/
3/*--- Callgrind ---*/
4/*--- main.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
10
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2002-2010, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
weidendoa17f2a32006-03-20 10:27:30 +000012
njn9a0cba42007-04-15 22:15:57 +000013 This tool is derived from and contains code from Cachegrind
sewardj9eecbbb2010-05-03 21:37:12 +000014 Copyright (C) 2002-2010 Nicholas Nethercote (njn@valgrind.org)
weidendoa17f2a32006-03-20 10:27:30 +000015
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
30
31 The GNU General Public License is contained in the file COPYING.
32*/
33
34#include "config.h"
35#include "callgrind.h"
36#include "global.h"
37
sewardje7a50822011-04-20 11:54:32 +000038#include "pub_tool_threadstate.h"
sewardj3b290482011-05-06 21:02:55 +000039#include "pub_tool_gdbserver.h"
weidendoa17f2a32006-03-20 10:27:30 +000040
weidendo320705f2010-07-02 19:56:23 +000041#include "cg_branchpred.c"
42
weidendoa17f2a32006-03-20 10:27:30 +000043/*------------------------------------------------------------*/
44/*--- Global variables ---*/
45/*------------------------------------------------------------*/
46
47/* for all threads */
48CommandLineOptions CLG_(clo);
49Statistics CLG_(stat);
50Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
51
52/* thread and signal handler specific */
53exec_state CLG_(current_state);
54
55
56/*------------------------------------------------------------*/
57/*--- Statistics ---*/
58/*------------------------------------------------------------*/
59
60static void CLG_(init_statistics)(Statistics* s)
61{
62 s->call_counter = 0;
63 s->jcnd_counter = 0;
64 s->jump_counter = 0;
65 s->rec_call_counter = 0;
66 s->ret_counter = 0;
67 s->bb_executions = 0;
68
69 s->context_counter = 0;
70 s->bb_retranslations = 0;
71
72 s->distinct_objs = 0;
73 s->distinct_files = 0;
74 s->distinct_fns = 0;
75 s->distinct_contexts = 0;
76 s->distinct_bbs = 0;
77 s->distinct_bbccs = 0;
78 s->distinct_instrs = 0;
79 s->distinct_skips = 0;
80
81 s->bb_hash_resizes = 0;
82 s->bbcc_hash_resizes = 0;
83 s->jcc_hash_resizes = 0;
84 s->cxt_hash_resizes = 0;
85 s->fn_array_resizes = 0;
86 s->call_stack_resizes = 0;
87 s->fn_stack_resizes = 0;
88
89 s->full_debug_BBs = 0;
90 s->file_line_debug_BBs = 0;
91 s->fn_name_debug_BBs = 0;
92 s->no_debug_BBs = 0;
93 s->bbcc_lru_misses = 0;
94 s->jcc_lru_misses = 0;
95 s->cxt_lru_misses = 0;
96 s->bbcc_clones = 0;
97}
98
99
weidendoa17f2a32006-03-20 10:27:30 +0000100/*------------------------------------------------------------*/
weidendoaeb86222010-06-09 22:33:02 +0000101/*--- Simple callbacks (not cache similator) ---*/
102/*------------------------------------------------------------*/
103
104VG_REGPARM(1)
105static void log_global_event(InstrInfo* ii)
106{
107 ULong* cost_Bus;
108
weidendo320705f2010-07-02 19:56:23 +0000109 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
weidendoaeb86222010-06-09 22:33:02 +0000110 CLG_(bb_base) + ii->instr_offset, ii->instr_size);
111
112 if (!CLG_(current_state).collect) return;
113
weidendo320705f2010-07-02 19:56:23 +0000114 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
115
weidendoaeb86222010-06-09 22:33:02 +0000116 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
117
118 if (CLG_(current_state).nonskipped)
119 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
120 else
121 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
122 cost_Bus[0]++;
123}
124
125
weidendo320705f2010-07-02 19:56:23 +0000126/* For branches, we consult two different predictors, one which
127 predicts taken/untaken for conditional branches, and the other
128 which predicts the branch target address for indirect branches
129 (jump-to-register style ones). */
130
131static VG_REGPARM(2)
132void log_cond_branch(InstrInfo* ii, Word taken)
133{
134 Bool miss;
135 Int fullOffset_Bc;
136 ULong* cost_Bc;
137
138 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %lu\n",
139 CLG_(bb_base) + ii->instr_offset, taken);
140
141 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
142
143 if (!CLG_(current_state).collect) return;
144
145 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
146
147 if (CLG_(current_state).nonskipped)
148 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
149 else
150 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
151
152 fullOffset_Bc = fullOffset(EG_BC);
153 CLG_(current_state).cost[ fullOffset_Bc ]++;
154 cost_Bc[0]++;
155 if (miss) {
156 CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
157 cost_Bc[1]++;
158 }
159}
160
161static VG_REGPARM(2)
162void log_ind_branch(InstrInfo* ii, UWord actual_dst)
163{
164 Bool miss;
165 Int fullOffset_Bi;
166 ULong* cost_Bi;
167
168 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
169 CLG_(bb_base) + ii->instr_offset, actual_dst);
170
171 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
172
173 if (!CLG_(current_state).collect) return;
174
175 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
176
177 if (CLG_(current_state).nonskipped)
178 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
179 else
180 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
181
182 fullOffset_Bi = fullOffset(EG_BI);
183 CLG_(current_state).cost[ fullOffset_Bi ]++;
184 cost_Bi[0]++;
185 if (miss) {
186 CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
187 cost_Bi[1]++;
188 }
189}
190
weidendoaeb86222010-06-09 22:33:02 +0000191/*------------------------------------------------------------*/
weidendo0a1951d2009-06-15 00:16:36 +0000192/*--- Instrumentation structures and event queue handling ---*/
weidendoa17f2a32006-03-20 10:27:30 +0000193/*------------------------------------------------------------*/
194
weidendo0a1951d2009-06-15 00:16:36 +0000195/* Maintain an ordered list of memory events which are outstanding, in
196 the sense that no IR has yet been generated to do the relevant
197 helper calls. The BB is scanned top to bottom and memory events
198 are added to the end of the list, merging with the most recent
199 notified event where possible (Dw immediately following Dr and
200 having the same size and EA can be merged).
weidendoa17f2a32006-03-20 10:27:30 +0000201
weidendo0a1951d2009-06-15 00:16:36 +0000202 This merging is done so that for architectures which have
203 load-op-store instructions (x86, amd64), the insn is treated as if
204 it makes just one memory reference (a modify), rather than two (a
205 read followed by a write at the same address).
206
207 At various points the list will need to be flushed, that is, IR
208 generated from it. That must happen before any possible exit from
209 the block (the end, or an IRStmt_Exit). Flushing also takes place
210 when there is no space to add a new event.
211
212 If we require the simulation statistics to be up to date with
213 respect to possible memory exceptions, then the list would have to
214 be flushed before each memory reference. That would however lose
215 performance by inhibiting event-merging during flushing.
216
217 Flushing the list consists of walking it start to end and emitting
218 instrumentation IR for each event, in the order in which they
219 appear. It may be possible to emit a single call for two adjacent
220 events in order to reduce the number of helper function calls made.
221 For example, it could well be profitable to handle two adjacent Ir
222 events with a single helper call. */
223
224typedef
225 IRExpr
226 IRAtom;
227
228typedef
229 enum {
230 Ev_Ir, // Instruction read
231 Ev_Dr, // Data read
232 Ev_Dw, // Data write
233 Ev_Dm, // Data modify (read then write)
weidendo320705f2010-07-02 19:56:23 +0000234 Ev_Bc, // branch conditional
235 Ev_Bi, // branch indirect (to unknown destination)
weidendoaeb86222010-06-09 22:33:02 +0000236 Ev_G // Global bus event
weidendo0a1951d2009-06-15 00:16:36 +0000237 }
238 EventTag;
239
240typedef
241 struct {
242 EventTag tag;
243 InstrInfo* inode;
244 union {
245 struct {
246 } Ir;
247 struct {
248 IRAtom* ea;
249 Int szB;
250 } Dr;
251 struct {
252 IRAtom* ea;
253 Int szB;
254 } Dw;
255 struct {
256 IRAtom* ea;
257 Int szB;
258 } Dm;
weidendo320705f2010-07-02 19:56:23 +0000259 struct {
260 IRAtom* taken; /* :: Ity_I1 */
261 } Bc;
262 struct {
263 IRAtom* dst;
264 } Bi;
weidendoaeb86222010-06-09 22:33:02 +0000265 struct {
266 } G;
weidendo0a1951d2009-06-15 00:16:36 +0000267 } Ev;
268 }
269 Event;
270
271static void init_Event ( Event* ev ) {
272 VG_(memset)(ev, 0, sizeof(Event));
weidendoa17f2a32006-03-20 10:27:30 +0000273}
274
weidendo0a1951d2009-06-15 00:16:36 +0000275static IRAtom* get_Event_dea ( Event* ev ) {
276 switch (ev->tag) {
277 case Ev_Dr: return ev->Ev.Dr.ea;
278 case Ev_Dw: return ev->Ev.Dw.ea;
279 case Ev_Dm: return ev->Ev.Dm.ea;
280 default: tl_assert(0);
281 }
282}
weidendoa17f2a32006-03-20 10:27:30 +0000283
weidendo0a1951d2009-06-15 00:16:36 +0000284static Int get_Event_dszB ( Event* ev ) {
285 switch (ev->tag) {
286 case Ev_Dr: return ev->Ev.Dr.szB;
287 case Ev_Dw: return ev->Ev.Dw.szB;
288 case Ev_Dm: return ev->Ev.Dm.szB;
289 default: tl_assert(0);
290 }
291}
292
293
294/* Up to this many unnotified events are allowed. Number is
295 arbitrary. Larger numbers allow more event merging to occur, but
296 potentially induce more spilling due to extending live ranges of
297 address temporaries. */
298#define N_EVENTS 16
299
300
301/* A struct which holds all the running state during instrumentation.
302 Mostly to avoid passing loads of parameters everywhere. */
303typedef struct {
304 /* The current outstanding-memory-event list. */
305 Event events[N_EVENTS];
306 Int events_used;
307
308 /* The array of InstrInfo's is part of BB struct. */
309 BB* bb;
310
311 /* BB seen before (ie. re-instrumentation) */
312 Bool seen_before;
313
314 /* Number InstrInfo bins 'used' so far. */
315 UInt ii_index;
316
317 // current offset of guest instructions from BB start
318 UInt instr_offset;
319
320 /* The output SB being constructed. */
321 IRSB* sbOut;
322} ClgState;
323
324
325static void showEvent ( Event* ev )
326{
327 switch (ev->tag) {
328 case Ev_Ir:
329 VG_(printf)("Ir (InstrInfo %p) at +%d\n",
330 ev->inode, ev->inode->instr_offset);
331 break;
332 case Ev_Dr:
333 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=",
334 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
335 ppIRExpr(ev->Ev.Dr.ea);
336 VG_(printf)("\n");
337 break;
338 case Ev_Dw:
339 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=",
340 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
341 ppIRExpr(ev->Ev.Dw.ea);
342 VG_(printf)("\n");
343 break;
344 case Ev_Dm:
345 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=",
346 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
347 ppIRExpr(ev->Ev.Dm.ea);
348 VG_(printf)("\n");
349 break;
weidendo320705f2010-07-02 19:56:23 +0000350 case Ev_Bc:
351 VG_(printf)("Bc %p GA=", ev->inode);
352 ppIRExpr(ev->Ev.Bc.taken);
353 VG_(printf)("\n");
354 break;
355 case Ev_Bi:
356 VG_(printf)("Bi %p DST=", ev->inode);
357 ppIRExpr(ev->Ev.Bi.dst);
358 VG_(printf)("\n");
359 break;
weidendoaeb86222010-06-09 22:33:02 +0000360 case Ev_G:
361 VG_(printf)("G %p\n", ev->inode);
362 break;
weidendo0a1951d2009-06-15 00:16:36 +0000363 default:
364 tl_assert(0);
365 break;
366 }
367}
368
369/* Generate code for all outstanding memory events, and mark the queue
370 empty. Code is generated into cgs->sbOut, and this activity
371 'consumes' slots in cgs->bb. */
372
373static void flushEvents ( ClgState* clgs )
374{
375 Int i, regparms, inew;
376 Char* helperName;
377 void* helperAddr;
378 IRExpr** argv;
379 IRExpr* i_node_expr;
380 IRDirty* di;
381 Event* ev;
382 Event* ev2;
383 Event* ev3;
384
385 if (!clgs->seen_before) {
386 // extend event sets as needed
387 // available sets: D0 Dr
388 for(i=0; i<clgs->events_used; i++) {
389 ev = &clgs->events[i];
390 switch(ev->tag) {
391 case Ev_Ir:
392 // Ir event always is first for a guest instruction
393 CLG_ASSERT(ev->inode->eventset == 0);
weidendo5bba5252010-06-09 22:32:53 +0000394 ev->inode->eventset = CLG_(sets).base;
weidendo0a1951d2009-06-15 00:16:36 +0000395 break;
396 case Ev_Dr:
weidendo320705f2010-07-02 19:56:23 +0000397 // extend event set by Dr counters
weidendo5bba5252010-06-09 22:32:53 +0000398 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
399 EG_DR);
weidendo0a1951d2009-06-15 00:16:36 +0000400 break;
401 case Ev_Dw:
402 case Ev_Dm:
weidendo320705f2010-07-02 19:56:23 +0000403 // extend event set by Dw counters
weidendo5bba5252010-06-09 22:32:53 +0000404 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
405 EG_DW);
weidendo0a1951d2009-06-15 00:16:36 +0000406 break;
weidendo320705f2010-07-02 19:56:23 +0000407 case Ev_Bc:
408 // extend event set by Bc counters
409 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
410 EG_BC);
411 break;
412 case Ev_Bi:
413 // extend event set by Bi counters
414 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
415 EG_BI);
416 break;
weidendoaeb86222010-06-09 22:33:02 +0000417 case Ev_G:
weidendo320705f2010-07-02 19:56:23 +0000418 // extend event set by Bus counter
weidendoaeb86222010-06-09 22:33:02 +0000419 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
420 EG_BUS);
421 break;
weidendo0a1951d2009-06-15 00:16:36 +0000422 default:
423 tl_assert(0);
424 }
425 }
426 }
427
428 for(i = 0; i < clgs->events_used; i = inew) {
429
430 helperName = NULL;
431 helperAddr = NULL;
432 argv = NULL;
433 regparms = 0;
434
435 /* generate IR to notify event i and possibly the ones
436 immediately following it. */
437 tl_assert(i >= 0 && i < clgs->events_used);
438
439 ev = &clgs->events[i];
440 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
441 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
442
443 CLG_DEBUGIF(5) {
444 VG_(printf)(" flush ");
445 showEvent( ev );
446 }
447
448 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
449
450 /* Decide on helper fn to call and args to pass it, and advance
451 i appropriately.
452 Dm events have same effect as Dw events */
453 switch (ev->tag) {
454 case Ev_Ir:
455 /* Merge an Ir with a following Dr. */
456 if (ev2 && ev2->tag == Ev_Dr) {
457 /* Why is this true? It's because we're merging an Ir
458 with a following Dr. The Ir derives from the
459 instruction's IMark and the Dr from data
460 references which follow it. In short it holds
461 because each insn starts with an IMark, hence an
462 Ev_Ir, and so these Dr must pertain to the
463 immediately preceding Ir. Same applies to analogous
464 assertions in the subsequent cases. */
465 tl_assert(ev2->inode == ev->inode);
466 helperName = CLG_(cachesim).log_1I1Dr_name;
467 helperAddr = CLG_(cachesim).log_1I1Dr;
468 argv = mkIRExprVec_3( i_node_expr,
469 get_Event_dea(ev2),
470 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
471 regparms = 3;
472 inew = i+2;
473 }
474 /* Merge an Ir with a following Dw/Dm. */
475 else
476 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
477 tl_assert(ev2->inode == ev->inode);
478 helperName = CLG_(cachesim).log_1I1Dw_name;
479 helperAddr = CLG_(cachesim).log_1I1Dw;
480 argv = mkIRExprVec_3( i_node_expr,
481 get_Event_dea(ev2),
482 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
483 regparms = 3;
484 inew = i+2;
485 }
486 /* Merge an Ir with two following Irs. */
487 else
488 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
489 helperName = CLG_(cachesim).log_3I0D_name;
490 helperAddr = CLG_(cachesim).log_3I0D;
491 argv = mkIRExprVec_3( i_node_expr,
492 mkIRExpr_HWord( (HWord)ev2->inode ),
493 mkIRExpr_HWord( (HWord)ev3->inode ) );
494 regparms = 3;
495 inew = i+3;
496 }
497 /* Merge an Ir with one following Ir. */
498 else
499 if (ev2 && ev2->tag == Ev_Ir) {
500 helperName = CLG_(cachesim).log_2I0D_name;
501 helperAddr = CLG_(cachesim).log_2I0D;
502 argv = mkIRExprVec_2( i_node_expr,
503 mkIRExpr_HWord( (HWord)ev2->inode ) );
504 regparms = 2;
505 inew = i+2;
506 }
507 /* No merging possible; emit as-is. */
508 else {
509 helperName = CLG_(cachesim).log_1I0D_name;
510 helperAddr = CLG_(cachesim).log_1I0D;
511 argv = mkIRExprVec_1( i_node_expr );
512 regparms = 1;
513 inew = i+1;
514 }
515 break;
516 case Ev_Dr:
517 /* Data read or modify */
weidendoa17f2a32006-03-20 10:27:30 +0000518 helperName = CLG_(cachesim).log_0I1Dr_name;
519 helperAddr = CLG_(cachesim).log_0I1Dr;
weidendo0a1951d2009-06-15 00:16:36 +0000520 argv = mkIRExprVec_3( i_node_expr,
521 get_Event_dea(ev),
522 mkIRExpr_HWord( get_Event_dszB(ev) ) );
523 regparms = 3;
524 inew = i+1;
525 break;
526 case Ev_Dw:
527 case Ev_Dm:
528 /* Data write */
weidendoa17f2a32006-03-20 10:27:30 +0000529 helperName = CLG_(cachesim).log_0I1Dw_name;
530 helperAddr = CLG_(cachesim).log_0I1Dw;
weidendo0a1951d2009-06-15 00:16:36 +0000531 argv = mkIRExprVec_3( i_node_expr,
532 get_Event_dea(ev),
533 mkIRExpr_HWord( get_Event_dszB(ev) ) );
534 regparms = 3;
535 inew = i+1;
536 break;
weidendo320705f2010-07-02 19:56:23 +0000537 case Ev_Bc:
538 /* Conditional branch */
539 helperName = "log_cond_branch";
540 helperAddr = &log_cond_branch;
541 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
542 regparms = 2;
543 inew = i+1;
544 break;
545 case Ev_Bi:
546 /* Branch to an unknown destination */
547 helperName = "log_ind_branch";
548 helperAddr = &log_ind_branch;
549 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
550 regparms = 2;
551 inew = i+1;
552 break;
weidendoaeb86222010-06-09 22:33:02 +0000553 case Ev_G:
554 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
555 helperName = "log_global_event";
556 helperAddr = &log_global_event;
557 argv = mkIRExprVec_1( i_node_expr );
558 regparms = 1;
559 inew = i+1;
560 break;
weidendo0a1951d2009-06-15 00:16:36 +0000561 default:
562 tl_assert(0);
563 }
weidendoa17f2a32006-03-20 10:27:30 +0000564
weidendo0a1951d2009-06-15 00:16:36 +0000565 CLG_DEBUGIF(5) {
566 if (inew > i+1) {
567 VG_(printf)(" merge ");
568 showEvent( ev2 );
569 }
570 if (inew > i+2) {
571 VG_(printf)(" merge ");
572 showEvent( ev3 );
573 }
574 if (helperAddr)
575 VG_(printf)(" call %s (%p)\n",
576 helperName, helperAddr);
577 }
weidendoa17f2a32006-03-20 10:27:30 +0000578
weidendo0a1951d2009-06-15 00:16:36 +0000579 /* helper could be unset depending on the simulator used */
580 if (helperAddr == 0) continue;
weidendoa17f2a32006-03-20 10:27:30 +0000581
weidendo0a1951d2009-06-15 00:16:36 +0000582 /* Add the helper. */
583 tl_assert(helperName);
584 tl_assert(helperAddr);
585 tl_assert(argv);
586 di = unsafeIRDirty_0_N( regparms,
587 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
588 argv );
589 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
weidendoa17f2a32006-03-20 10:27:30 +0000590 }
591
weidendo0a1951d2009-06-15 00:16:36 +0000592 clgs->events_used = 0;
593}
weidendoa17f2a32006-03-20 10:27:30 +0000594
weidendo0a1951d2009-06-15 00:16:36 +0000595static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
596{
597 Event* evt;
598 tl_assert(clgs->seen_before || (inode->eventset == 0));
599 if (!CLG_(clo).simulate_cache) return;
weidendoa17f2a32006-03-20 10:27:30 +0000600
weidendo0a1951d2009-06-15 00:16:36 +0000601 if (clgs->events_used == N_EVENTS)
602 flushEvents(clgs);
603 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
604 evt = &clgs->events[clgs->events_used];
605 init_Event(evt);
606 evt->tag = Ev_Ir;
607 evt->inode = inode;
608 clgs->events_used++;
609}
weidendoa17f2a32006-03-20 10:27:30 +0000610
weidendo0a1951d2009-06-15 00:16:36 +0000611static
612void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
613{
614 Event* evt;
615 tl_assert(isIRAtom(ea));
616 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
617 if (!CLG_(clo).simulate_cache) return;
weidendoc8e76152006-05-27 15:30:58 +0000618
weidendo0a1951d2009-06-15 00:16:36 +0000619 if (clgs->events_used == N_EVENTS)
620 flushEvents(clgs);
621 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
622 evt = &clgs->events[clgs->events_used];
623 init_Event(evt);
624 evt->tag = Ev_Dr;
625 evt->inode = inode;
626 evt->Ev.Dr.szB = datasize;
627 evt->Ev.Dr.ea = ea;
628 clgs->events_used++;
629}
weidendoc8e76152006-05-27 15:30:58 +0000630
weidendo0a1951d2009-06-15 00:16:36 +0000631static
632void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
633{
634 Event* lastEvt;
635 Event* evt;
636 tl_assert(isIRAtom(ea));
637 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
638 if (!CLG_(clo).simulate_cache) return;
weidendoc8e76152006-05-27 15:30:58 +0000639
weidendo0a1951d2009-06-15 00:16:36 +0000640 /* Is it possible to merge this write with the preceding read? */
641 lastEvt = &clgs->events[clgs->events_used-1];
642 if (clgs->events_used > 0
643 && lastEvt->tag == Ev_Dr
644 && lastEvt->Ev.Dr.szB == datasize
645 && lastEvt->inode == inode
646 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
647 {
648 lastEvt->tag = Ev_Dm;
649 return;
650 }
weidendoc8e76152006-05-27 15:30:58 +0000651
weidendo0a1951d2009-06-15 00:16:36 +0000652 /* No. Add as normal. */
653 if (clgs->events_used == N_EVENTS)
654 flushEvents(clgs);
655 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
656 evt = &clgs->events[clgs->events_used];
657 init_Event(evt);
658 evt->tag = Ev_Dw;
659 evt->inode = inode;
660 evt->Ev.Dw.szB = datasize;
661 evt->Ev.Dw.ea = ea;
662 clgs->events_used++;
663}
664
weidendoaeb86222010-06-09 22:33:02 +0000665static
weidendo320705f2010-07-02 19:56:23 +0000666void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
667{
668 Event* evt;
669 tl_assert(isIRAtom(guard));
670 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
671 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
672 if (!CLG_(clo).simulate_branch) return;
673
674 if (clgs->events_used == N_EVENTS)
675 flushEvents(clgs);
676 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
677 evt = &clgs->events[clgs->events_used];
678 init_Event(evt);
679 evt->tag = Ev_Bc;
680 evt->inode = inode;
681 evt->Ev.Bc.taken = guard;
682 clgs->events_used++;
683}
684
685static
686void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
687{
688 Event* evt;
689 tl_assert(isIRAtom(whereTo));
690 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
691 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
692 if (!CLG_(clo).simulate_branch) return;
693
694 if (clgs->events_used == N_EVENTS)
695 flushEvents(clgs);
696 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
697 evt = &clgs->events[clgs->events_used];
698 init_Event(evt);
699 evt->tag = Ev_Bi;
700 evt->inode = inode;
701 evt->Ev.Bi.dst = whereTo;
702 clgs->events_used++;
703}
704
705static
weidendoaeb86222010-06-09 22:33:02 +0000706void addEvent_G ( ClgState* clgs, InstrInfo* inode )
707{
708 Event* evt;
709 if (!CLG_(clo).collect_bus) return;
weidendo320705f2010-07-02 19:56:23 +0000710
weidendoaeb86222010-06-09 22:33:02 +0000711 if (clgs->events_used == N_EVENTS)
712 flushEvents(clgs);
713 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
714 evt = &clgs->events[clgs->events_used];
715 init_Event(evt);
716 evt->tag = Ev_G;
717 evt->inode = inode;
718 clgs->events_used++;
719}
720
weidendo0a1951d2009-06-15 00:16:36 +0000721/* Initialise or check (if already seen before) an InstrInfo for next insn.
722 We only can set instr_offset/instr_size here. The required event set and
723 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
724 instructions. The event set is extended as required on flush of the event
725 queue (when Dm events were determined), cost offsets are determined at
726 end of BB instrumentation. */
727static
728InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
729{
730 InstrInfo* ii;
731 tl_assert(clgs->ii_index >= 0);
732 tl_assert(clgs->ii_index < clgs->bb->instr_count);
733 ii = &clgs->bb->instr[ clgs->ii_index ];
734
735 if (clgs->seen_before) {
736 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
737 CLG_ASSERT(ii->instr_size == instr_size);
weidendoa17f2a32006-03-20 10:27:30 +0000738 }
739 else {
weidendo0a1951d2009-06-15 00:16:36 +0000740 ii->instr_offset = clgs->instr_offset;
741 ii->instr_size = instr_size;
742 ii->cost_offset = 0;
743 ii->eventset = 0;
weidendoa17f2a32006-03-20 10:27:30 +0000744 }
745
weidendo0a1951d2009-06-15 00:16:36 +0000746 clgs->ii_index++;
747 clgs->instr_offset += instr_size;
748 CLG_(stat).distinct_instrs++;
weidendoa17f2a32006-03-20 10:27:30 +0000749
weidendo0a1951d2009-06-15 00:16:36 +0000750 return ii;
weidendoa17f2a32006-03-20 10:27:30 +0000751}
752
weidendo0a1951d2009-06-15 00:16:36 +0000753// return total number of cost values needed for this BB
754static
755UInt update_cost_offsets( ClgState* clgs )
756{
757 Int i;
758 InstrInfo* ii;
759 UInt cost_offset = 0;
760
761 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
762 for(i=0; i<clgs->ii_index; i++) {
763 ii = &clgs->bb->instr[i];
764 if (clgs->seen_before) {
765 CLG_ASSERT(ii->cost_offset == cost_offset);
766 } else
767 ii->cost_offset = cost_offset;
768 cost_offset += ii->eventset ? ii->eventset->size : 0;
769 }
770
771 return cost_offset;
772}
773
774/*------------------------------------------------------------*/
775/*--- Instrumentation ---*/
776/*------------------------------------------------------------*/
777
weidendoa17f2a32006-03-20 10:27:30 +0000778#if defined(VG_BIGENDIAN)
779# define CLGEndness Iend_BE
780#elif defined(VG_LITTLEENDIAN)
781# define CLGEndness Iend_LE
782#else
783# error "Unknown endianness"
784#endif
785
786static
787Addr IRConst2Addr(IRConst* con)
788{
789 Addr addr;
790
791 if (sizeof(Addr) == 4) {
792 CLG_ASSERT( con->tag == Ico_U32 );
793 addr = con->Ico.U32;
794 }
795 else if (sizeof(Addr) == 8) {
796 CLG_ASSERT( con->tag == Ico_U64 );
797 addr = con->Ico.U64;
798 }
799 else
800 VG_(tool_panic)("Callgrind: invalid Addr type");
801
802 return addr;
803}
804
805/* First pass over a BB to instrument, counting instructions and jumps
806 * This is needed for the size of the BB struct to allocate
807 *
808 * Called from CLG_(get_bb)
809 */
weidendo0a1951d2009-06-15 00:16:36 +0000810void CLG_(collectBlockInfo)(IRSB* sbIn,
weidendoa17f2a32006-03-20 10:27:30 +0000811 /*INOUT*/ UInt* instrs,
812 /*INOUT*/ UInt* cjmps,
813 /*INOUT*/ Bool* cjmp_inverted)
814{
815 Int i;
816 IRStmt* st;
817 Addr instrAddr =0, jumpDst;
818 UInt instrLen = 0;
819 Bool toNextInstr = False;
820
821 // Ist_Exit has to be ignored in preamble code, before first IMark:
822 // preamble code is added by VEX for self modifying code, and has
823 // nothing to do with client code
824 Bool inPreamble = True;
825
weidendo0a1951d2009-06-15 00:16:36 +0000826 if (!sbIn) return;
weidendoa17f2a32006-03-20 10:27:30 +0000827
weidendo0a1951d2009-06-15 00:16:36 +0000828 for (i = 0; i < sbIn->stmts_used; i++) {
829 st = sbIn->stmts[i];
weidendoa17f2a32006-03-20 10:27:30 +0000830 if (Ist_IMark == st->tag) {
831 inPreamble = False;
832
833 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
834 instrLen = st->Ist.IMark.len;
835
836 (*instrs)++;
837 toNextInstr = False;
838 }
839 if (inPreamble) continue;
840 if (Ist_Exit == st->tag) {
841 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
842 toNextInstr = (jumpDst == instrAddr + instrLen);
weidendo0a1951d2009-06-15 00:16:36 +0000843
weidendoa17f2a32006-03-20 10:27:30 +0000844 (*cjmps)++;
845 }
846 }
847
848 /* if the last instructions of BB conditionally jumps to next instruction
849 * (= first instruction of next BB in memory), this is a inverted by VEX.
850 */
851 *cjmp_inverted = toNextInstr;
852}
853
854static
sewardj0b9d74a2006-12-24 02:24:11 +0000855void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
weidendoa17f2a32006-03-20 10:27:30 +0000856{
sewardj0b9d74a2006-12-24 02:24:11 +0000857 addStmtToIRSB( bbOut,
weidendoa17f2a32006-03-20 10:27:30 +0000858 IRStmt_Store(CLGEndness,
859 IRExpr_Const(hWordTy == Ity_I32 ?
860 IRConst_U32( addr ) :
861 IRConst_U64( addr )),
862 IRExpr_Const(IRConst_U32(val)) ));
863}
864
weidendo0a1951d2009-06-15 00:16:36 +0000865
866/* add helper call to setup_bbcc, with pointer to BB struct as argument
867 *
868 * precondition for setup_bbcc:
869 * - jmps_passed has number of cond.jumps passed in last executed BB
870 * - current_bbcc has a pointer to the BBCC of the last executed BB
871 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
872 * current_bbcc->bb->jmp_addr
873 * gives the address of the jump source.
874 *
875 * the setup does 2 things:
876 * - trace call:
877 * * Unwind own call stack, i.e sync our ESP with real ESP
878 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
879 * * For CALLs or JMPs crossing objects, record call arg +
880 * push are on own call stack
881 *
882 * - prepare for cache log functions:
883 * set current_bbcc to BBCC that gets the costs for this BB execution
884 * attached
885 */
886static
887void addBBSetupCall(ClgState* clgs)
888{
889 IRDirty* di;
890 IRExpr *arg1, **argv;
891
892 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
893 argv = mkIRExprVec_1(arg1);
894 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
895 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
896 argv);
897 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
898}
899
900
weidendoa17f2a32006-03-20 10:27:30 +0000901static
sewardj0b9d74a2006-12-24 02:24:11 +0000902IRSB* CLG_(instrument)( VgCallbackClosure* closure,
weidendo0a1951d2009-06-15 00:16:36 +0000903 IRSB* sbIn,
weidendoa17f2a32006-03-20 10:27:30 +0000904 VexGuestLayout* layout,
905 VexGuestExtents* vge,
906 IRType gWordTy, IRType hWordTy )
907{
weidendo0a1951d2009-06-15 00:16:36 +0000908 Int i, isize;
909 IRStmt* st;
910 Addr origAddr;
weidendo320705f2010-07-02 19:56:23 +0000911 Addr64 cia; /* address of current insn */
weidendo0a1951d2009-06-15 00:16:36 +0000912 InstrInfo* curr_inode = NULL;
913 ClgState clgs;
914 UInt cJumps = 0;
weidendoa17f2a32006-03-20 10:27:30 +0000915
weidendoa17f2a32006-03-20 10:27:30 +0000916
917 if (gWordTy != hWordTy) {
918 /* We don't currently support this case. */
919 VG_(tool_panic)("host/guest word size mismatch");
920 }
921
922 // No instrumentation if it is switched off
923 if (! CLG_(instrument_state)) {
barta0b6b2c2008-07-07 06:49:24 +0000924 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
weidendoa17f2a32006-03-20 10:27:30 +0000925 (Addr)closure->readdr);
weidendo0a1951d2009-06-15 00:16:36 +0000926 return sbIn;
weidendoa17f2a32006-03-20 10:27:30 +0000927 }
928
barta0b6b2c2008-07-07 06:49:24 +0000929 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
weidendoa17f2a32006-03-20 10:27:30 +0000930
sewardj0b9d74a2006-12-24 02:24:11 +0000931 /* Set up SB for instrumented IR */
weidendo0a1951d2009-06-15 00:16:36 +0000932 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
weidendoa17f2a32006-03-20 10:27:30 +0000933
934 // Copy verbatim any IR preamble preceding the first IMark
935 i = 0;
weidendo0a1951d2009-06-15 00:16:36 +0000936 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
937 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
weidendoa17f2a32006-03-20 10:27:30 +0000938 i++;
939 }
940
941 // Get the first statement, and origAddr from it
weidendo0a1951d2009-06-15 00:16:36 +0000942 CLG_ASSERT(sbIn->stmts_used >0);
943 CLG_ASSERT(i < sbIn->stmts_used);
944 st = sbIn->stmts[i];
weidendoa17f2a32006-03-20 10:27:30 +0000945 CLG_ASSERT(Ist_IMark == st->tag);
weidendo0a1951d2009-06-15 00:16:36 +0000946
947 origAddr = (Addr)st->Ist.IMark.addr;
weidendo320705f2010-07-02 19:56:23 +0000948 cia = st->Ist.IMark.addr;
949 isize = st->Ist.IMark.len;
weidendoa17f2a32006-03-20 10:27:30 +0000950 CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
951
weidendo0a1951d2009-06-15 00:16:36 +0000952 /* Get BB struct (creating if necessary).
weidendoa17f2a32006-03-20 10:27:30 +0000953 * JS: The hash table is keyed with orig_addr_noredir -- important!
954 * JW: Why? If it is because of different chasing of the redirection,
955 * this is not needed, as chasing is switched off in callgrind
956 */
weidendo0a1951d2009-06-15 00:16:36 +0000957 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
weidendoa17f2a32006-03-20 10:27:30 +0000958
weidendo0a1951d2009-06-15 00:16:36 +0000959 addBBSetupCall(&clgs);
weidendoa17f2a32006-03-20 10:27:30 +0000960
weidendo0a1951d2009-06-15 00:16:36 +0000961 // Set up running state
962 clgs.events_used = 0;
963 clgs.ii_index = 0;
964 clgs.instr_offset = 0;
weidendoa17f2a32006-03-20 10:27:30 +0000965
weidendo0a1951d2009-06-15 00:16:36 +0000966 for (/*use current i*/; i < sbIn->stmts_used; i++) {
weidendoa17f2a32006-03-20 10:27:30 +0000967
weidendo0a1951d2009-06-15 00:16:36 +0000968 st = sbIn->stmts[i];
969 CLG_ASSERT(isFlatIRStmt(st));
weidendoa17f2a32006-03-20 10:27:30 +0000970
weidendo0a1951d2009-06-15 00:16:36 +0000971 switch (st->tag) {
972 case Ist_NoOp:
973 case Ist_AbiHint:
974 case Ist_Put:
975 case Ist_PutI:
976 case Ist_MBE:
977 break;
weidendoa17f2a32006-03-20 10:27:30 +0000978
weidendo0a1951d2009-06-15 00:16:36 +0000979 case Ist_IMark: {
weidendo320705f2010-07-02 19:56:23 +0000980 cia = st->Ist.IMark.addr;
981 isize = st->Ist.IMark.len;
982 CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr);
weidendo0a1951d2009-06-15 00:16:36 +0000983 // If Vex fails to decode an instruction, the size will be zero.
984 // Pretend otherwise.
985 if (isize == 0) isize = VG_MIN_INSTR_SZB;
weidendoa17f2a32006-03-20 10:27:30 +0000986
weidendo0a1951d2009-06-15 00:16:36 +0000987 // Sanity-check size.
988 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
989 || VG_CLREQ_SZB == isize );
weidendoa17f2a32006-03-20 10:27:30 +0000990
weidendo0a1951d2009-06-15 00:16:36 +0000991 // Init the inode, record it as the current one.
992 // Subsequent Dr/Dw/Dm events from the same instruction will
993 // also use it.
994 curr_inode = next_InstrInfo (&clgs, isize);
weidendoa17f2a32006-03-20 10:27:30 +0000995
weidendo0a1951d2009-06-15 00:16:36 +0000996 addEvent_Ir( &clgs, curr_inode );
997 break;
998 }
weidendoa17f2a32006-03-20 10:27:30 +0000999
weidendo0a1951d2009-06-15 00:16:36 +00001000 case Ist_WrTmp: {
1001 IRExpr* data = st->Ist.WrTmp.data;
1002 if (data->tag == Iex_Load) {
1003 IRExpr* aexpr = data->Iex.Load.addr;
1004 // Note also, endianness info is ignored. I guess
1005 // that's not interesting.
1006 addEvent_Dr( &clgs, curr_inode,
1007 sizeofIRType(data->Iex.Load.ty), aexpr );
1008 }
1009 break;
1010 }
weidendoa17f2a32006-03-20 10:27:30 +00001011
weidendo0a1951d2009-06-15 00:16:36 +00001012 case Ist_Store: {
1013 IRExpr* data = st->Ist.Store.data;
1014 IRExpr* aexpr = st->Ist.Store.addr;
1015 addEvent_Dw( &clgs, curr_inode,
1016 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1017 break;
1018 }
weidendoa17f2a32006-03-20 10:27:30 +00001019
weidendo0a1951d2009-06-15 00:16:36 +00001020 case Ist_Dirty: {
1021 Int dataSize;
1022 IRDirty* d = st->Ist.Dirty.details;
1023 if (d->mFx != Ifx_None) {
1024 /* This dirty helper accesses memory. Collect the details. */
1025 tl_assert(d->mAddr != NULL);
1026 tl_assert(d->mSize != 0);
1027 dataSize = d->mSize;
1028 // Large (eg. 28B, 108B, 512B on x86) data-sized
1029 // instructions will be done inaccurately, but they're
1030 // very rare and this avoids errors from hitting more
1031 // than two cache lines in the simulation.
1032 if (dataSize > MIN_LINE_SIZE)
1033 dataSize = MIN_LINE_SIZE;
1034 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1035 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1036 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1037 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1038 } else {
1039 tl_assert(d->mAddr == NULL);
1040 tl_assert(d->mSize == 0);
1041 }
1042 break;
1043 }
weidendoa17f2a32006-03-20 10:27:30 +00001044
sewardj1c0ce7a2009-07-01 08:10:49 +00001045 case Ist_CAS: {
1046 /* We treat it as a read and a write of the location. I
1047 think that is the same behaviour as it was before IRCAS
1048 was introduced, since prior to that point, the Vex
1049 front ends would translate a lock-prefixed instruction
1050 into a (normal) read followed by a (normal) write. */
1051 Int dataSize;
1052 IRCAS* cas = st->Ist.CAS.details;
1053 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1054 CLG_ASSERT(cas->dataLo);
1055 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1056 if (cas->dataHi != NULL)
1057 dataSize *= 2; /* since this is a doubleword-cas */
1058 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1059 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
weidendoaeb86222010-06-09 22:33:02 +00001060 addEvent_G( &clgs, curr_inode );
sewardj1c0ce7a2009-07-01 08:10:49 +00001061 break;
1062 }
sewardjdb5907d2009-11-26 17:20:21 +00001063
1064 case Ist_LLSC: {
1065 IRType dataTy;
1066 if (st->Ist.LLSC.storedata == NULL) {
1067 /* LL */
1068 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1069 addEvent_Dr( &clgs, curr_inode,
1070 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1071 } else {
1072 /* SC */
1073 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1074 addEvent_Dw( &clgs, curr_inode,
1075 sizeofIRType(dataTy), st->Ist.LLSC.addr );
weidendoaeb86222010-06-09 22:33:02 +00001076 /* I don't know whether the global-bus-lock cost should
1077 be attributed to the LL or the SC, but it doesn't
1078 really matter since they always have to be used in
1079 pairs anyway. Hence put it (quite arbitrarily) on
1080 the SC. */
1081 addEvent_G( &clgs, curr_inode );
sewardjdb5907d2009-11-26 17:20:21 +00001082 }
1083 break;
1084 }
1085
1086 case Ist_Exit: {
weidendo320705f2010-07-02 19:56:23 +00001087 Bool guest_exit, inverted;
1088
1089 /* VEX code generation sometimes inverts conditional branches.
1090 * As Callgrind counts (conditional) jumps, it has to correct
1091 * inversions. The heuristic is the following:
1092 * (1) Callgrind switches off SB chasing and unrolling, and
1093 * therefore it assumes that a candidate for inversion only is
1094 * the last conditional branch in an SB.
1095 * (2) inversion is assumed if the branch jumps to the address of
1096 * the next guest instruction in memory.
1097 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1098 *
1099 * Branching behavior is also used for branch prediction. Note that
1100 * above heuristic is different from what Cachegrind does.
1101 * Cachegrind uses (2) for all branches.
1102 */
1103 if (cJumps+1 == clgs.bb->cjmp_count)
1104 inverted = clgs.bb->cjmp_inverted;
1105 else
1106 inverted = False;
1107
1108 // call branch predictor only if this is a branch in guest code
1109 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1110 (st->Ist.Exit.jk == Ijk_Call) ||
1111 (st->Ist.Exit.jk == Ijk_Ret);
1112
1113 if (guest_exit) {
1114 /* Stuff to widen the guard expression to a host word, so
1115 we can pass it to the branch predictor simulation
1116 functions easily. */
1117 IRType tyW = hWordTy;
1118 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1119 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1120 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1121 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1122 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
1123 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1124 : IRExpr_Const(IRConst_U64(1));
1125
1126 /* Widen the guard expression. */
1127 addStmtToIRSB( clgs.sbOut,
1128 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1129 addStmtToIRSB( clgs.sbOut,
1130 IRStmt_WrTmp( guardW,
1131 IRExpr_Unop(widen,
1132 IRExpr_RdTmp(guard1))) );
1133 /* If the exit is inverted, invert the sense of the guard. */
1134 addStmtToIRSB(
1135 clgs.sbOut,
1136 IRStmt_WrTmp(
1137 guard,
1138 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1139 : IRExpr_RdTmp(guardW)
1140 ));
1141 /* And post the event. */
1142 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1143 }
weidendoa17f2a32006-03-20 10:27:30 +00001144
weidendo0a1951d2009-06-15 00:16:36 +00001145 /* We may never reach the next statement, so need to flush
1146 all outstanding transactions now. */
1147 flushEvents( &clgs );
weidendoa17f2a32006-03-20 10:27:30 +00001148
weidendo0a1951d2009-06-15 00:16:36 +00001149 CLG_ASSERT(clgs.ii_index>0);
1150 if (!clgs.seen_before) {
1151 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1152 clgs.bb->jmp[cJumps].skip = False;
1153 }
1154
1155 /* Update global variable jmps_passed before the jump
1156 * A correction is needed if VEX inverted the last jump condition
1157 */
weidendo0a1951d2009-06-15 00:16:36 +00001158 addConstMemStoreStmt( clgs.sbOut,
1159 (UWord) &CLG_(current_state).jmps_passed,
weidendo320705f2010-07-02 19:56:23 +00001160 inverted ? cJumps+1 : cJumps, hWordTy);
weidendo0a1951d2009-06-15 00:16:36 +00001161 cJumps++;
1162
1163 break;
1164 }
1165
1166 default:
1167 tl_assert(0);
1168 break;
1169 }
1170
1171 /* Copy the original statement */
1172 addStmtToIRSB( clgs.sbOut, st );
1173
1174 CLG_DEBUGIF(5) {
1175 VG_(printf)(" pass ");
1176 ppIRStmt(st);
1177 VG_(printf)("\n");
1178 }
weidendoa17f2a32006-03-20 10:27:30 +00001179 }
weidendoa17f2a32006-03-20 10:27:30 +00001180
weidendo320705f2010-07-02 19:56:23 +00001181 /* Deal with branches to unknown destinations. Except ignore ones
1182 which are function returns as we assume the return stack
1183 predictor never mispredicts. */
weidendocb3ccf32010-07-07 18:51:59 +00001184 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
weidendo320705f2010-07-02 19:56:23 +00001185 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1186 switch (sbIn->next->tag) {
1187 case Iex_Const:
1188 break; /* boring - branch to known address */
1189 case Iex_RdTmp:
1190 /* looks like an indirect branch (branch to unknown) */
1191 addEvent_Bi( &clgs, curr_inode, sbIn->next );
1192 break;
1193 default:
1194 /* shouldn't happen - if the incoming IR is properly
1195 flattened, should only have tmp and const cases to
1196 consider. */
1197 tl_assert(0);
1198 }
1199 }
1200
weidendo0a1951d2009-06-15 00:16:36 +00001201 /* At the end of the bb. Flush outstandings. */
1202 flushEvents( &clgs );
1203
1204 /* Always update global variable jmps_passed at end of bb.
weidendoa17f2a32006-03-20 10:27:30 +00001205 * A correction is needed if VEX inverted the last jump condition
1206 */
weidendo0a1951d2009-06-15 00:16:36 +00001207 {
1208 UInt jmps_passed = cJumps;
1209 if (clgs.bb->cjmp_inverted) jmps_passed--;
1210 addConstMemStoreStmt( clgs.sbOut,
1211 (UWord) &CLG_(current_state).jmps_passed,
1212 jmps_passed, hWordTy);
1213 }
1214 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1215 CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index);
weidendoa17f2a32006-03-20 10:27:30 +00001216
1217 /* This stores the instr of the call/ret at BB end */
weidendo0a1951d2009-06-15 00:16:36 +00001218 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
weidendoa17f2a32006-03-20 10:27:30 +00001219
weidendo0a1951d2009-06-15 00:16:36 +00001220 if (clgs.seen_before) {
1221 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1222 CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset);
1223 CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind);
weidendoa17f2a32006-03-20 10:27:30 +00001224 }
1225 else {
weidendo0a1951d2009-06-15 00:16:36 +00001226 clgs.bb->cost_count = update_cost_offsets(&clgs);
1227 clgs.bb->instr_len = clgs.instr_offset;
1228 clgs.bb->jmpkind = sbIn->jumpkind;
weidendoa17f2a32006-03-20 10:27:30 +00001229 }
weidendo0a1951d2009-06-15 00:16:36 +00001230
barta0b6b2c2008-07-07 06:49:24 +00001231 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
weidendo0a1951d2009-06-15 00:16:36 +00001232 origAddr, clgs.bb->instr_len,
1233 clgs.bb->cjmp_count, clgs.bb->cost_count);
weidendoa17f2a32006-03-20 10:27:30 +00001234 if (cJumps>0) {
1235 CLG_DEBUG(3, " [ ");
1236 for (i=0;i<cJumps;i++)
weidendo0a1951d2009-06-15 00:16:36 +00001237 CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr);
1238 CLG_DEBUG(3, "], last inverted: %s \n",
1239 clgs.bb->cjmp_inverted ? "yes":"no");
weidendoa17f2a32006-03-20 10:27:30 +00001240 }
1241
weidendo0a1951d2009-06-15 00:16:36 +00001242 return clgs.sbOut;
weidendoa17f2a32006-03-20 10:27:30 +00001243}
1244
1245/*--------------------------------------------------------------------*/
1246/*--- Discarding BB info ---*/
1247/*--------------------------------------------------------------------*/
1248
1249// Called when a translation is removed from the translation cache for
1250// any reason at all: to free up space, because the guest code was
1251// unmapped or modified, or for any arbitrary reason.
1252static
sewardj0b9d74a2006-12-24 02:24:11 +00001253void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
weidendoa17f2a32006-03-20 10:27:30 +00001254{
1255 Addr orig_addr = (Addr)orig_addr64;
1256
1257 tl_assert(vge.n_used > 0);
1258
1259 if (0)
sewardj0b9d74a2006-12-24 02:24:11 +00001260 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001261 (void*)(Addr)orig_addr,
1262 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
1263
1264 // Get BB info, remove from table, free BB info. Simple! Note that we
1265 // use orig_addr, not the first instruction address in vge.
1266 CLG_(delete_bb)(orig_addr);
1267}
1268
1269
1270/*------------------------------------------------------------*/
1271/*--- CLG_(fini)() and related function ---*/
1272/*------------------------------------------------------------*/
1273
1274
1275
1276static void zero_thread_cost(thread_info* t)
1277{
1278 Int i;
1279
1280 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1281 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1282
1283 /* reset call counters to current for active calls */
1284 CLG_(copy_cost)( CLG_(sets).full,
1285 CLG_(current_call_stack).entry[i].enter_cost,
1286 CLG_(current_state).cost );
weidendoceb06de2009-08-11 20:53:57 +00001287 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001288 }
1289
1290 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1291
1292 /* set counter for last dump */
1293 CLG_(copy_cost)( CLG_(sets).full,
1294 t->lastdump_cost, CLG_(current_state).cost );
1295}
1296
1297void CLG_(zero_all_cost)(Bool only_current_thread)
1298{
1299 if (VG_(clo_verbosity) > 1)
sewardj0f33adf2009-07-15 14:51:03 +00001300 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
weidendoa17f2a32006-03-20 10:27:30 +00001301
1302 if (only_current_thread)
1303 zero_thread_cost(CLG_(get_current_thread)());
1304 else
1305 CLG_(forall_threads)(zero_thread_cost);
1306
1307 if (VG_(clo_verbosity) > 1)
sewardj0f33adf2009-07-15 14:51:03 +00001308 VG_(message)(Vg_DebugMsg, " ...done\n");
weidendoa17f2a32006-03-20 10:27:30 +00001309}
1310
1311static
1312void unwind_thread(thread_info* t)
1313{
1314 /* unwind signal handlers */
1315 while(CLG_(current_state).sig !=0)
1316 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1317
1318 /* unwind regular call stack */
1319 while(CLG_(current_call_stack).sp>0)
1320 CLG_(pop_call_stack)();
weidendof3e0b492006-09-10 22:34:20 +00001321
1322 /* reset context and function stack for context generation */
1323 CLG_(init_exec_state)( &CLG_(current_state) );
1324 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
weidendoa17f2a32006-03-20 10:27:30 +00001325}
1326
weidendoe8914872009-08-11 20:53:59 +00001327static
1328void zero_state_cost(thread_info* t)
1329{
1330 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1331}
1332
weidendo722be392011-07-21 20:24:54 +00001333/* Ups, this can go very wrong... */
1334extern void VG_(discard_translations) ( Addr64 start, ULong range, HChar* who );
weidendoa17f2a32006-03-20 10:27:30 +00001335
1336void CLG_(set_instrument_state)(Char* reason, Bool state)
1337{
1338 if (CLG_(instrument_state) == state) {
1339 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1340 reason, state ? "ON" : "OFF");
1341 return;
1342 }
1343 CLG_(instrument_state) = state;
1344 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1345 reason, state ? "ON" : "OFF");
1346
weidendo722be392011-07-21 20:24:54 +00001347 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl, "callgrind");
weidendoa17f2a32006-03-20 10:27:30 +00001348
1349 /* reset internal state: call stacks, simulator */
1350 CLG_(forall_threads)(unwind_thread);
weidendoe8914872009-08-11 20:53:59 +00001351 CLG_(forall_threads)(zero_state_cost);
weidendoa17f2a32006-03-20 10:27:30 +00001352 (*CLG_(cachesim).clear)();
weidendoa17f2a32006-03-20 10:27:30 +00001353
weidendoa17f2a32006-03-20 10:27:30 +00001354 if (VG_(clo_verbosity) > 1)
sewardj0f33adf2009-07-15 14:51:03 +00001355 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
weidendoa17f2a32006-03-20 10:27:30 +00001356 reason, state ? "ON" : "OFF");
1357}
weidendo584c9ef2011-07-11 14:46:41 +00001358
1359/* helper for dump_state_togdb */
1360static void dump_state_of_thread_togdb(thread_info* ti)
1361{
1362 static Char buf[512];
1363 static FullCost sum = 0, tmp = 0;
1364 Int t, p, i;
1365 BBCC *from, *to;
1366 call_entry* ce;
1367
1368 t = CLG_(current_tid);
1369 CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1370 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1371 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1372 ti->states.entry[0]->cost);
1373 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1374 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), sum);
1375 VG_(gdb_printf)("events-%d: %s\n", t, buf);
1376 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1377
1378 ce = 0;
1379 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1380 ce = CLG_(get_call_entry)(i);
1381 /* if this frame is skipped, we don't have counters */
1382 if (!ce->jcc) continue;
1383
1384 from = ce->jcc->from;
1385 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1386 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1387
1388 /* FIXME: EventSets! */
1389 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1390 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1391 CLG_(add_diff_cost)( CLG_(sets).full, sum,
1392 ce->enter_cost, CLG_(current_state).cost );
1393 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1394
1395 p = VG_(sprintf)(buf, "events-%d-%d: ",t, i);
1396 CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum );
1397 VG_(gdb_printf)("%s\n", buf);
1398 }
1399 if (ce && ce->jcc) {
1400 to = ce->jcc->to;
1401 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1402 }
1403}
1404
1405/* Dump current state */
1406static void dump_state_togdb(void)
1407{
1408 static Char buf[512];
1409 thread_info** th;
1410 int t, p;
1411 Int orig_tid = CLG_(current_tid);
1412
1413 VG_(gdb_printf)("instrumentation: %s\n",
1414 CLG_(instrument_state) ? "on":"off");
1415 if (!CLG_(instrument_state)) return;
1416
1417 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1418 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1419 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1420 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1421 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1422 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1423
1424 /* "events:" line. Given here because it will be dynamic in the future */
1425 p = VG_(sprintf)(buf, "events: ");
1426 CLG_(sprint_eventmapping)(buf+p, CLG_(dumpmap));
1427 VG_(gdb_printf)("%s\n", buf);
1428 /* "part:" line (number of last part. Is 0 at start */
1429 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1430
1431 /* threads */
1432 th = CLG_(get_threads)();
1433 p = VG_(sprintf)(buf, "threads:");
1434 for(t=1;t<VG_N_THREADS;t++) {
1435 if (!th[t]) continue;
1436 p += VG_(sprintf)(buf+p, " %d", t);
1437 }
1438 VG_(gdb_printf)("%s\n", buf);
1439 VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1440 CLG_(forall_threads)(dump_state_of_thread_togdb);
1441}
1442
weidendoa17f2a32006-03-20 10:27:30 +00001443
sewardj3b290482011-05-06 21:02:55 +00001444static void print_monitor_help ( void )
1445{
1446 VG_(gdb_printf) ("\n");
1447 VG_(gdb_printf) ("callgrind monitor commands:\n");
sewardj30b3eca2011-06-28 08:20:39 +00001448 VG_(gdb_printf) (" dump [<dump_hint>]\n");
sewardj3b290482011-05-06 21:02:55 +00001449 VG_(gdb_printf) (" dump counters\n");
sewardj30b3eca2011-06-28 08:20:39 +00001450 VG_(gdb_printf) (" zero\n");
sewardj3b290482011-05-06 21:02:55 +00001451 VG_(gdb_printf) (" zero counters\n");
weidendo584c9ef2011-07-11 14:46:41 +00001452 VG_(gdb_printf) (" status\n");
1453 VG_(gdb_printf) (" print status (statistics and shadow stacks)\n");
1454 VG_(gdb_printf) (" instrumentation [on|off]\n");
1455 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n");
sewardj3b290482011-05-06 21:02:55 +00001456 VG_(gdb_printf) ("\n");
1457}
1458
1459/* return True if request recognised, False otherwise */
1460static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
1461{
1462 Char* wcmd;
1463 Char s[VG_(strlen(req))]; /* copy for strtok_r */
1464 Char *ssaveptr;
1465
1466 VG_(strcpy) (s, req);
1467
1468 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
weidendo584c9ef2011-07-11 14:46:41 +00001469 switch (VG_(keyword_id) ("help dump zero status instrumentation",
sewardj3b290482011-05-06 21:02:55 +00001470 wcmd, kwd_report_duplicated_matches)) {
1471 case -2: /* multiple matches */
1472 return True;
1473 case -1: /* not found */
1474 return False;
1475 case 0: /* help */
1476 print_monitor_help();
1477 return True;
sewardj30b3eca2011-06-28 08:20:39 +00001478 case 1: { /* dump */
sewardj3b290482011-05-06 21:02:55 +00001479 CLG_(dump_profile)(req, False);
1480 return True;
1481 }
sewardj30b3eca2011-06-28 08:20:39 +00001482 case 2: { /* zero */
sewardj3b290482011-05-06 21:02:55 +00001483 CLG_(zero_all_cost)(False);
1484 return True;
1485 }
weidendo584c9ef2011-07-11 14:46:41 +00001486 case 3: { /* status */
1487 dump_state_togdb();
1488 return True;
1489 }
1490 case 4: { /* instrumentation */
1491 Char* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1492 if (!arg) {
1493 VG_(gdb_printf)("instrumentation: %s\n",
1494 CLG_(instrument_state) ? "on":"off");
1495 }
1496 else
1497 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1498 return True;
1499 }
sewardj3b290482011-05-06 21:02:55 +00001500
1501 default:
1502 tl_assert(0);
1503 return False;
1504 }
1505}
weidendoa17f2a32006-03-20 10:27:30 +00001506
1507static
1508Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1509{
sewardj3b290482011-05-06 21:02:55 +00001510 if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1511 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
weidendoa17f2a32006-03-20 10:27:30 +00001512 return False;
1513
1514 switch(args[0]) {
1515 case VG_USERREQ__DUMP_STATS:
1516 CLG_(dump_profile)("Client Request", True);
1517 *ret = 0; /* meaningless */
1518 break;
1519
1520 case VG_USERREQ__DUMP_STATS_AT:
1521 {
1522 Char buf[512];
njn8a7b41b2007-09-23 00:51:24 +00001523 VG_(sprintf)(buf,"Client Request: %s", (Char*)args[1]);
weidendoa17f2a32006-03-20 10:27:30 +00001524 CLG_(dump_profile)(buf, True);
1525 *ret = 0; /* meaningless */
1526 }
1527 break;
1528
1529 case VG_USERREQ__ZERO_STATS:
1530 CLG_(zero_all_cost)(True);
1531 *ret = 0; /* meaningless */
1532 break;
1533
1534 case VG_USERREQ__TOGGLE_COLLECT:
1535 CLG_(current_state).collect = !CLG_(current_state).collect;
1536 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1537 CLG_(current_state).collect ? "ON" : "OFF");
1538 *ret = 0; /* meaningless */
1539 break;
1540
1541 case VG_USERREQ__START_INSTRUMENTATION:
1542 CLG_(set_instrument_state)("Client Request", True);
1543 *ret = 0; /* meaningless */
1544 break;
1545
1546 case VG_USERREQ__STOP_INSTRUMENTATION:
1547 CLG_(set_instrument_state)("Client Request", False);
1548 *ret = 0; /* meaningless */
1549 break;
1550
sewardj3b290482011-05-06 21:02:55 +00001551 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1552 Bool handled = handle_gdb_monitor_command (tid, (Char*)args[1]);
1553 if (handled)
1554 *ret = 1;
1555 else
1556 *ret = 0;
1557 return handled;
1558 }
weidendoa17f2a32006-03-20 10:27:30 +00001559 default:
1560 return False;
1561 }
1562
1563 return True;
1564}
1565
1566
1567/* Syscall Timing */
1568
1569/* struct timeval syscalltime[VG_N_THREADS]; */
1570#if CLG_MICROSYSTIME
1571#include <sys/time.h>
1572#include <sys/syscall.h>
1573extern Int VG_(do_syscall) ( UInt, ... );
1574
1575ULong syscalltime[VG_N_THREADS];
1576#else
1577UInt syscalltime[VG_N_THREADS];
1578#endif
1579
1580static
sewardj1c0ce7a2009-07-01 08:10:49 +00001581void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1582 UWord* args, UInt nArgs)
weidendoa17f2a32006-03-20 10:27:30 +00001583{
1584 if (CLG_(clo).collect_systime) {
1585#if CLG_MICROSYSTIME
1586 struct vki_timeval tv_now;
1587 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1588 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1589#else
1590 syscalltime[tid] = VG_(read_millisecond_timer)();
1591#endif
1592 }
1593}
1594
1595static
sewardj1c0ce7a2009-07-01 08:10:49 +00001596void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1597 UWord* args, UInt nArgs, SysRes res)
weidendoa17f2a32006-03-20 10:27:30 +00001598{
weidendoae0bb6f2007-02-16 13:12:43 +00001599 if (CLG_(clo).collect_systime &&
1600 CLG_(current_state).bbcc) {
weidendo5bba5252010-06-09 22:32:53 +00001601 Int o;
weidendoa17f2a32006-03-20 10:27:30 +00001602#if CLG_MICROSYSTIME
1603 struct vki_timeval tv_now;
1604 ULong diff;
1605
1606 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1607 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1608#else
1609 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1610#endif
weidendo5bba5252010-06-09 22:32:53 +00001611
1612 /* offset o is for "SysCount", o+1 for "SysTime" */
1613 o = fullOffset(EG_SYS);
1614 CLG_ASSERT(o>=0);
weidendoa17f2a32006-03-20 10:27:30 +00001615 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
1616
weidendoa17f2a32006-03-20 10:27:30 +00001617 CLG_(current_state).cost[o] ++;
1618 CLG_(current_state).cost[o+1] += diff;
1619 if (!CLG_(current_state).bbcc->skipped)
1620 CLG_(init_cost_lz)(CLG_(sets).full,
1621 &(CLG_(current_state).bbcc->skipped));
1622 CLG_(current_state).bbcc->skipped[o] ++;
1623 CLG_(current_state).bbcc->skipped[o+1] += diff;
1624 }
1625}
1626
weidendo320705f2010-07-02 19:56:23 +00001627static UInt ULong_width(ULong n)
1628{
1629 UInt w = 0;
1630 while (n > 0) {
1631 n = n / 10;
1632 w++;
1633 }
1634 if (w == 0) w = 1;
1635 return w + (w-1)/3; // add space for commas
1636}
1637
1638static
1639void branchsim_printstat(int l1, int l2, int l3)
1640{
1641 static Char buf1[128], buf2[128], buf3[128], fmt[128];
1642 FullCost total;
1643 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1644 ULong B_total_b, B_total_mp;
1645
1646 total = CLG_(total_cost);
1647 Bc_total_b = total[ fullOffset(EG_BC) ];
1648 Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1649 Bi_total_b = total[ fullOffset(EG_BI) ];
1650 Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1651
1652 /* Make format string, getting width right for numbers */
1653 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1654 l1, l2, l3);
1655
1656 if (0 == Bc_total_b) Bc_total_b = 1;
1657 if (0 == Bi_total_b) Bi_total_b = 1;
1658 B_total_b = Bc_total_b + Bi_total_b;
1659 B_total_mp = Bc_total_mp + Bi_total_mp;
1660
1661 VG_(umsg)("\n");
1662 VG_(umsg)(fmt, "Branches: ",
1663 B_total_b, Bc_total_b, Bi_total_b);
1664
1665 VG_(umsg)(fmt, "Mispredicts: ",
1666 B_total_mp, Bc_total_mp, Bi_total_mp);
1667
1668 VG_(percentify)(B_total_mp, B_total_b, 1, l1+1, buf1);
1669 VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2);
1670 VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3);
1671
1672 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
1673}
1674
1675
weidendoa17f2a32006-03-20 10:27:30 +00001676static
1677void finish(void)
1678{
weidendo0ecd49e2010-10-04 19:17:21 +00001679 Char buf[32+COSTS_LEN], fmt[128];
weidendo320705f2010-07-02 19:56:23 +00001680 Int l1, l2, l3;
1681 FullCost total;
weidendoa17f2a32006-03-20 10:27:30 +00001682
1683 CLG_DEBUG(0, "finish()\n");
1684
1685 (*CLG_(cachesim).finish)();
1686
1687 /* pop all remaining items from CallStack for correct sum
1688 */
1689 CLG_(forall_threads)(unwind_thread);
sewardje45a7992006-10-17 02:24:18 +00001690
weidendoa17f2a32006-03-20 10:27:30 +00001691 CLG_(dump_profile)(0, False);
sewardje45a7992006-10-17 02:24:18 +00001692
weidendoa17f2a32006-03-20 10:27:30 +00001693 CLG_(finish_command)();
sewardje45a7992006-10-17 02:24:18 +00001694
weidendoa17f2a32006-03-20 10:27:30 +00001695 if (VG_(clo_verbosity) == 0) return;
1696
1697 /* Hash table stats */
sewardj2d9e8742009-08-07 15:46:56 +00001698 if (VG_(clo_stats)) {
weidendoa17f2a32006-03-20 10:27:30 +00001699 int BB_lookups =
1700 CLG_(stat).full_debug_BBs +
1701 CLG_(stat).fn_name_debug_BBs +
1702 CLG_(stat).file_line_debug_BBs +
1703 CLG_(stat).no_debug_BBs;
1704
sewardj0f33adf2009-07-15 14:51:03 +00001705 VG_(message)(Vg_DebugMsg, "\n");
1706 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001707 CLG_(stat).distinct_objs);
sewardj0f33adf2009-07-15 14:51:03 +00001708 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001709 CLG_(stat).distinct_files);
sewardj0f33adf2009-07-15 14:51:03 +00001710 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001711 CLG_(stat).distinct_fns);
sewardj0f33adf2009-07-15 14:51:03 +00001712 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001713 CLG_(stat).distinct_contexts);
sewardj0f33adf2009-07-15 14:51:03 +00001714 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001715 CLG_(stat).distinct_bbs);
sewardj0f33adf2009-07-15 14:51:03 +00001716 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001717 CLG_(costarray_entries), CLG_(costarray_chunks));
sewardj0f33adf2009-07-15 14:51:03 +00001718 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001719 CLG_(stat).distinct_bbccs);
sewardj0f33adf2009-07-15 14:51:03 +00001720 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001721 CLG_(stat).distinct_jccs);
sewardj0f33adf2009-07-15 14:51:03 +00001722 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001723 CLG_(stat).distinct_skips);
sewardj0f33adf2009-07-15 14:51:03 +00001724 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001725 BB_lookups);
1726 if (BB_lookups>0) {
sewardj0f33adf2009-07-15 14:51:03 +00001727 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001728 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1729 CLG_(stat).full_debug_BBs);
sewardj0f33adf2009-07-15 14:51:03 +00001730 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001731 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1732 CLG_(stat).file_line_debug_BBs);
sewardj0f33adf2009-07-15 14:51:03 +00001733 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001734 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1735 CLG_(stat).fn_name_debug_BBs);
sewardj0f33adf2009-07-15 14:51:03 +00001736 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001737 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1738 CLG_(stat).no_debug_BBs);
1739 }
sewardj0f33adf2009-07-15 14:51:03 +00001740 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001741 CLG_(stat).bbcc_clones);
sewardj0f33adf2009-07-15 14:51:03 +00001742 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001743 CLG_(stat).bb_retranslations);
sewardj0f33adf2009-07-15 14:51:03 +00001744 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001745 CLG_(stat).distinct_instrs);
1746 VG_(message)(Vg_DebugMsg, "");
1747
sewardj0f33adf2009-07-15 14:51:03 +00001748 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001749 CLG_(stat).cxt_lru_misses);
sewardj0f33adf2009-07-15 14:51:03 +00001750 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001751 CLG_(stat).bbcc_lru_misses);
sewardj0f33adf2009-07-15 14:51:03 +00001752 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
weidendoa17f2a32006-03-20 10:27:30 +00001753 CLG_(stat).jcc_lru_misses);
sewardj0f33adf2009-07-15 14:51:03 +00001754 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001755 CLG_(stat).bb_executions);
sewardj0f33adf2009-07-15 14:51:03 +00001756 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001757 CLG_(stat).call_counter);
sewardj0f33adf2009-07-15 14:51:03 +00001758 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001759 CLG_(stat).jcnd_counter);
sewardj0f33adf2009-07-15 14:51:03 +00001760 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001761 CLG_(stat).jump_counter);
sewardj0f33adf2009-07-15 14:51:03 +00001762 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001763 CLG_(stat).rec_call_counter);
sewardj0f33adf2009-07-15 14:51:03 +00001764 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001765 CLG_(stat).ret_counter);
1766
1767 VG_(message)(Vg_DebugMsg, "");
1768 }
1769
1770 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
sewardj0f33adf2009-07-15 14:51:03 +00001771 VG_(message)(Vg_UserMsg, "Events : %s\n", buf);
weidendoa17f2a32006-03-20 10:27:30 +00001772 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
sewardj0f33adf2009-07-15 14:51:03 +00001773 VG_(message)(Vg_UserMsg, "Collected : %s\n", buf);
1774 VG_(message)(Vg_UserMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001775
weidendo320705f2010-07-02 19:56:23 +00001776 /* determine value widths for statistics */
1777 total = CLG_(total_cost);
1778 l1 = ULong_width( total[fullOffset(EG_IR)] );
1779 l2 = l3 = 0;
1780 if (CLG_(clo).simulate_cache) {
1781 l2 = ULong_width( total[fullOffset(EG_DR)] );
1782 l3 = ULong_width( total[fullOffset(EG_DW)] );
1783 }
1784 if (CLG_(clo).simulate_branch) {
1785 int l2b = ULong_width( total[fullOffset(EG_BC)] );
1786 int l3b = ULong_width( total[fullOffset(EG_BI)] );
1787 if (l2b > l2) l2 = l2b;
1788 if (l3b > l3) l3 = l3b;
1789 }
1790
1791 /* Make format string, getting width right for numbers */
1792 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1793
1794 /* Always print this */
1795 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
1796
1797 if (CLG_(clo).simulate_cache)
1798 (*CLG_(cachesim).printstat)(l1, l2, l3);
1799
1800 if (CLG_(clo).simulate_branch)
1801 branchsim_printstat(l1, l2, l3);
1802
weidendoa17f2a32006-03-20 10:27:30 +00001803}
1804
1805
1806void CLG_(fini)(Int exitcode)
1807{
1808 finish();
1809}
1810
1811
1812/*--------------------------------------------------------------------*/
1813/*--- Setup ---*/
1814/*--------------------------------------------------------------------*/
1815
njn3e32c872006-12-24 07:51:17 +00001816static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
sewardj97561812006-12-23 01:21:12 +00001817{
weidendo134657c2006-12-23 23:11:20 +00001818 static ULong last_blocks_done = 0;
1819
sewardj97561812006-12-23 01:21:12 +00001820 if (0)
njn3e32c872006-12-24 07:51:17 +00001821 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
weidendo134657c2006-12-23 23:11:20 +00001822
1823 /* throttle calls to CLG_(run_thread) by number of BBs executed */
1824 if (blocks_done - last_blocks_done < 5000) return;
1825 last_blocks_done = blocks_done;
1826
1827 CLG_(run_thread)( tid );
sewardj97561812006-12-23 01:21:12 +00001828}
1829
weidendoa17f2a32006-03-20 10:27:30 +00001830static
1831void CLG_(post_clo_init)(void)
1832{
weidendoa17f2a32006-03-20 10:27:30 +00001833 VG_(clo_vex_control).iropt_unroll_thresh = 0;
1834 VG_(clo_vex_control).guest_chase_thresh = 0;
1835
1836 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
1837 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
1838 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
1839
1840 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
sewardj0f33adf2009-07-15 14:51:03 +00001841 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
weidendoa17f2a32006-03-20 10:27:30 +00001842 CLG_(clo).dump_line = True;
1843 }
1844
weidendo4ce5e792006-09-20 21:29:39 +00001845 CLG_(init_dumps)();
1846 CLG_(init_command)();
weidendoa17f2a32006-03-20 10:27:30 +00001847
1848 (*CLG_(cachesim).post_clo_init)();
1849
weidendo5bba5252010-06-09 22:32:53 +00001850 CLG_(init_eventsets)();
weidendoa17f2a32006-03-20 10:27:30 +00001851 CLG_(init_statistics)(& CLG_(stat));
1852 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
1853
1854 /* initialize hash tables */
1855 CLG_(init_obj_table)();
1856 CLG_(init_cxt_table)();
1857 CLG_(init_bb_hash)();
1858
1859 CLG_(init_threads)();
1860 CLG_(run_thread)(1);
1861
1862 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
1863
weidendoca472c52006-03-31 19:34:51 +00001864 if (VG_(clo_verbosity > 0)) {
weidendoca472c52006-03-31 19:34:51 +00001865 VG_(message)(Vg_UserMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001866 "For interactive control, run 'callgrind_control -h'.\n");
weidendoca472c52006-03-31 19:34:51 +00001867 }
weidendoa17f2a32006-03-20 10:27:30 +00001868}
1869
1870static
1871void CLG_(pre_clo_init)(void)
1872{
1873 VG_(details_name) ("Callgrind");
weidendoca472c52006-03-31 19:34:51 +00001874 VG_(details_version) (NULL);
weidendoa17f2a32006-03-20 10:27:30 +00001875 VG_(details_description) ("a call-graph generating cache profiler");
sewardj9eecbbb2010-05-03 21:37:12 +00001876 VG_(details_copyright_author)("Copyright (C) 2002-2010, and GNU GPL'd, "
weidendoca472c52006-03-31 19:34:51 +00001877 "by Josef Weidendorfer et al.");
weidendodb70ed72006-05-27 15:39:45 +00001878 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardje45a7992006-10-17 02:24:18 +00001879 VG_(details_avg_translation_sizeB) ( 500 );
weidendoa17f2a32006-03-20 10:27:30 +00001880
1881 VG_(basic_tool_funcs) (CLG_(post_clo_init),
1882 CLG_(instrument),
1883 CLG_(fini));
1884
sewardj0b9d74a2006-12-24 02:24:11 +00001885 VG_(needs_superblock_discards)(clg_discard_superblock_info);
weidendoa17f2a32006-03-20 10:27:30 +00001886
1887
1888 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
1889 CLG_(print_usage),
1890 CLG_(print_debug_usage));
1891
1892 VG_(needs_client_requests)(CLG_(handle_client_request));
1893 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1894 CLG_(post_syscalltime));
1895
njn3e32c872006-12-24 07:51:17 +00001896 VG_(track_start_client_code) ( & clg_start_client_code_callback );
1897 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
1898 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
weidendoa17f2a32006-03-20 10:27:30 +00001899
1900 CLG_(set_clo_defaults)();
1901}
1902
1903VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
1904
1905/*--------------------------------------------------------------------*/
1906/*--- end main.c ---*/
1907/*--------------------------------------------------------------------*/