blob: 8cd633f97681154d18b8b6e98e6cf46cf97d80bc [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njn101e5722005-04-21 02:37:54 +00003/*--- Cachegrind: everything but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
sewardj03f8d3f2012-08-05 15:46:46 +000011 Copyright (C) 2002-2012 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000012 njn@valgrind.org
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
sewardj4cfea4f2006-10-14 19:26:10 +000033#include "pub_tool_vki.h"
njnea27e462005-05-31 02:38:09 +000034#include "pub_tool_debuginfo.h"
njn97405b22005-06-02 03:39:33 +000035#include "pub_tool_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +000036#include "pub_tool_libcassert.h"
njneb8896b2005-06-04 20:03:55 +000037#include "pub_tool_libcfile.h"
njn36a20fa2005-06-03 03:08:39 +000038#include "pub_tool_libcprint.h"
njnf39e9a32005-06-12 02:43:17 +000039#include "pub_tool_libcproc.h"
njnf536bbb2005-06-13 04:21:38 +000040#include "pub_tool_machine.h"
njn717cde52005-05-10 02:47:21 +000041#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +000042#include "pub_tool_options.h"
njnd3bef4f2005-10-15 17:46:18 +000043#include "pub_tool_oset.h"
njn43b9a8a2005-05-10 04:37:01 +000044#include "pub_tool_tooliface.h"
sewardj14c7cc52007-02-25 15:08:24 +000045#include "pub_tool_xarray.h"
sewardj45f4e7c2005-09-27 19:20:21 +000046#include "pub_tool_clientstate.h"
sewardj5bb86822005-12-23 12:47:42 +000047#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
njn25e49d8e72002-09-23 09:36:25 +000048
nethercoteb35a8b92004-09-11 16:45:27 +000049#include "cg_arch.h"
nethercote27fc1da2004-01-04 16:56:57 +000050#include "cg_sim.c"
sewardj8badbaa2007-05-08 09:20:25 +000051#include "cg_branchpred.c"
njn4f9c9342002-04-29 16:03:24 +000052
njn25e49d8e72002-09-23 09:36:25 +000053/*------------------------------------------------------------*/
54/*--- Constants ---*/
55/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000056
sewardj5155dec2005-10-12 10:09:23 +000057/* Set to 1 for very verbose debugging */
58#define DEBUG_CG 0
59
nethercote9313ac42004-07-06 21:54:20 +000060#define MIN_LINE_SIZE 16
njnd3bef4f2005-10-15 17:46:18 +000061#define FILE_LEN VKI_PATH_MAX
nethercote9313ac42004-07-06 21:54:20 +000062#define FN_LEN 256
njn7cf0bd32002-06-08 13:36:03 +000063
64/*------------------------------------------------------------*/
sewardj8badbaa2007-05-08 09:20:25 +000065/*--- Options ---*/
66/*------------------------------------------------------------*/
67
njn374a36d2007-11-23 01:41:32 +000068static Bool clo_cache_sim = True; /* do cache simulation? */
69static Bool clo_branch_sim = False; /* do branch simulation? */
florian19f91bb2012-11-10 22:29:54 +000070static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
sewardj8badbaa2007-05-08 09:20:25 +000071
72/*------------------------------------------------------------*/
sewardj98763d52012-06-03 22:40:07 +000073/*--- Cachesim configuration ---*/
74/*------------------------------------------------------------*/
75
76static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
77
78/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000079/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000080/*------------------------------------------------------------*/
81
sewardj8badbaa2007-05-08 09:20:25 +000082typedef
83 struct {
84 ULong a; /* total # memory accesses of this kind */
85 ULong m1; /* misses in the first level cache */
njn2d853a12010-10-06 22:46:31 +000086 ULong mL; /* misses in the second level cache */
sewardj8badbaa2007-05-08 09:20:25 +000087 }
88 CacheCC;
89
90typedef
91 struct {
92 ULong b; /* total # branches of this kind */
93 ULong mp; /* number of branches mispredicted */
94 }
95 BranchCC;
njn4f9c9342002-04-29 16:03:24 +000096
nethercote9313ac42004-07-06 21:54:20 +000097//------------------------------------------------------------
98// Primary data structure #1: CC table
99// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
njnd3bef4f2005-10-15 17:46:18 +0000100// - an ordered set of CCs. CC indexing done by file/function/line (as
101// determined from the instrAddr).
nethercote9313ac42004-07-06 21:54:20 +0000102// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +0000103
njnd3bef4f2005-10-15 17:46:18 +0000104typedef struct {
florian19f91bb2012-11-10 22:29:54 +0000105 HChar* file;
106 HChar* fn;
107 Int line;
njnd3bef4f2005-10-15 17:46:18 +0000108}
109CodeLoc;
njn4f9c9342002-04-29 16:03:24 +0000110
sewardj8badbaa2007-05-08 09:20:25 +0000111typedef struct {
112 CodeLoc loc; /* Source location that these counts pertain to */
113 CacheCC Ir; /* Insn read counts */
114 CacheCC Dr; /* Data read counts */
115 CacheCC Dw; /* Data write/modify counts */
116 BranchCC Bc; /* Conditional branch counts */
117 BranchCC Bi; /* Indirect branch counts */
118} LineCC;
njn4f9c9342002-04-29 16:03:24 +0000119
njnd3bef4f2005-10-15 17:46:18 +0000120// First compare file, then fn, then line.
tom5a835d52007-12-30 12:28:26 +0000121static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
njnd3bef4f2005-10-15 17:46:18 +0000122{
njnafa12262005-12-24 03:10:56 +0000123 Word res;
njnd3bef4f2005-10-15 17:46:18 +0000124 CodeLoc* a = (CodeLoc*)vloc;
125 CodeLoc* b = &(((LineCC*)vcc)->loc);
njn4f9c9342002-04-29 16:03:24 +0000126
njnd3bef4f2005-10-15 17:46:18 +0000127 res = VG_(strcmp)(a->file, b->file);
128 if (0 != res)
129 return res;
njn4f9c9342002-04-29 16:03:24 +0000130
njnd3bef4f2005-10-15 17:46:18 +0000131 res = VG_(strcmp)(a->fn, b->fn);
132 if (0 != res)
133 return res;
134
135 return a->line - b->line;
136}
137
138static OSet* CC_table;
njn4f9c9342002-04-29 16:03:24 +0000139
nethercote9313ac42004-07-06 21:54:20 +0000140//------------------------------------------------------------
njnd3bef4f2005-10-15 17:46:18 +0000141// Primary data structure #2: InstrInfo table
nethercote9313ac42004-07-06 21:54:20 +0000142// - Holds the cached info about each instr that is used for simulation.
sewardj0b9d74a2006-12-24 02:24:11 +0000143// - table(SB_start_addr, list(InstrInfo))
144// - For each SB, each InstrInfo in the list holds info about the
njnd3bef4f2005-10-15 17:46:18 +0000145// instruction (instrLen, instrAddr, etc), plus a pointer to its line
nethercote9313ac42004-07-06 21:54:20 +0000146// CC. This node is what's passed to the simulation function.
sewardj0b9d74a2006-12-24 02:24:11 +0000147// - When SBs are discarded the relevant list(instr_details) is freed.
nethercote9313ac42004-07-06 21:54:20 +0000148
njnd3bef4f2005-10-15 17:46:18 +0000149typedef struct _InstrInfo InstrInfo;
150struct _InstrInfo {
nethercoteca1f2dc2004-07-21 08:49:02 +0000151 Addr instr_addr;
njn6a3009b2005-03-20 00:20:06 +0000152 UChar instr_len;
njnd3bef4f2005-10-15 17:46:18 +0000153 LineCC* parent; // parent line-CC
nethercote9313ac42004-07-06 21:54:20 +0000154};
155
sewardj0b9d74a2006-12-24 02:24:11 +0000156typedef struct _SB_info SB_info;
157struct _SB_info {
158 Addr SB_addr; // key; MUST BE FIRST
njnd3bef4f2005-10-15 17:46:18 +0000159 Int n_instrs;
160 InstrInfo instrs[0];
nethercote9313ac42004-07-06 21:54:20 +0000161};
162
njnd3bef4f2005-10-15 17:46:18 +0000163static OSet* instrInfoTable;
164
165//------------------------------------------------------------
166// Secondary data structure: string table
167// - holds strings, avoiding dups
168// - used for filenames and function names, each of which will be
169// pointed to by one or more CCs.
170// - it also allows equality checks just by pointer comparison, which
171// is good when printing the output file at the end.
172
173static OSet* stringTable;
nethercote9313ac42004-07-06 21:54:20 +0000174
175//------------------------------------------------------------
176// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000177static Int distinct_files = 0;
178static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000179static Int distinct_lines = 0;
weidendo6fc0de02012-10-30 00:28:29 +0000180static Int distinct_instrsGen = 0;
181static Int distinct_instrsNoX = 0;
nethercote9313ac42004-07-06 21:54:20 +0000182
njnd3bef4f2005-10-15 17:46:18 +0000183static Int full_debugs = 0;
184static Int file_line_debugs = 0;
185static Int fn_debugs = 0;
186static Int no_debugs = 0;
njn4f9c9342002-04-29 16:03:24 +0000187
nethercote9313ac42004-07-06 21:54:20 +0000188/*------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +0000189/*--- String table operations ---*/
190/*------------------------------------------------------------*/
191
tom5a835d52007-12-30 12:28:26 +0000192static Word stringCmp( const void* key, const void* elem )
njnd3bef4f2005-10-15 17:46:18 +0000193{
florian19f91bb2012-11-10 22:29:54 +0000194 return VG_(strcmp)(*(HChar**)key, *(HChar**)elem);
njnd3bef4f2005-10-15 17:46:18 +0000195}
196
197// Get a permanent string; either pull it out of the string table if it's
198// been encountered before, or dup it and put it into the string table.
florian19f91bb2012-11-10 22:29:54 +0000199static HChar* get_perm_string(HChar* s)
njnd3bef4f2005-10-15 17:46:18 +0000200{
florian19f91bb2012-11-10 22:29:54 +0000201 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
njnd3bef4f2005-10-15 17:46:18 +0000202 if (s_ptr) {
203 return *s_ptr;
204 } else {
florian19f91bb2012-11-10 22:29:54 +0000205 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
sewardj9c606bd2008-09-18 18:12:50 +0000206 *s_node = VG_(strdup)("cg.main.gps.1", s);
njne2a9ad32007-09-17 05:30:48 +0000207 VG_(OSetGen_Insert)(stringTable, s_node);
njnd3bef4f2005-10-15 17:46:18 +0000208 return *s_node;
209 }
210}
211
212/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000213/*--- CC table operations ---*/
214/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000215
florian19f91bb2012-11-10 22:29:54 +0000216static void get_debug_info(Addr instr_addr, HChar file[FILE_LEN],
217 HChar fn[FN_LEN], UInt* line)
njn4f9c9342002-04-29 16:03:24 +0000218{
florian19f91bb2012-11-10 22:29:54 +0000219 HChar dir[FILE_LEN];
njnf3b61d62007-09-17 00:41:07 +0000220 Bool found_dirname;
sewardj7cee6f92005-06-13 17:39:06 +0000221 Bool found_file_line = VG_(get_filename_linenum)(
222 instr_addr,
223 file, FILE_LEN,
njnf3b61d62007-09-17 00:41:07 +0000224 dir, FILE_LEN, &found_dirname,
sewardj7cee6f92005-06-13 17:39:06 +0000225 line
226 );
nethercote9313ac42004-07-06 21:54:20 +0000227 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
njn4f9c9342002-04-29 16:03:24 +0000228
nethercote9313ac42004-07-06 21:54:20 +0000229 if (!found_file_line) {
230 VG_(strcpy)(file, "???");
231 *line = 0;
232 }
233 if (!found_fn) {
234 VG_(strcpy)(fn, "???");
235 }
njnf3b61d62007-09-17 00:41:07 +0000236
237 if (found_dirname) {
238 // +1 for the '/'.
239 tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
240 VG_(strcat)(dir, "/"); // Append '/'
241 VG_(strcat)(dir, file); // Append file to dir
242 VG_(strcpy)(file, dir); // Move dir+file to file
243 }
244
nethercote9313ac42004-07-06 21:54:20 +0000245 if (found_file_line) {
njnd3bef4f2005-10-15 17:46:18 +0000246 if (found_fn) full_debugs++;
247 else file_line_debugs++;
nethercote9313ac42004-07-06 21:54:20 +0000248 } else {
njnd3bef4f2005-10-15 17:46:18 +0000249 if (found_fn) fn_debugs++;
250 else no_debugs++;
njn4f9c9342002-04-29 16:03:24 +0000251 }
252}
253
nethercote9313ac42004-07-06 21:54:20 +0000254// Do a three step traversal: by file, then fn, then line.
njnd3bef4f2005-10-15 17:46:18 +0000255// Returns a pointer to the line CC, creates a new one if necessary.
256static LineCC* get_lineCC(Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000257{
florian19f91bb2012-11-10 22:29:54 +0000258 HChar file[FILE_LEN], fn[FN_LEN];
259 UInt line;
njnd3bef4f2005-10-15 17:46:18 +0000260 CodeLoc loc;
261 LineCC* lineCC;
nethercote9313ac42004-07-06 21:54:20 +0000262
njn6a3009b2005-03-20 00:20:06 +0000263 get_debug_info(origAddr, file, fn, &line);
nethercote9313ac42004-07-06 21:54:20 +0000264
njnd3bef4f2005-10-15 17:46:18 +0000265 loc.file = file;
266 loc.fn = fn;
267 loc.line = line;
njn4f9c9342002-04-29 16:03:24 +0000268
njne2a9ad32007-09-17 05:30:48 +0000269 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
njnd3bef4f2005-10-15 17:46:18 +0000270 if (!lineCC) {
271 // Allocate and zero a new node.
njne2a9ad32007-09-17 05:30:48 +0000272 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
njnd3bef4f2005-10-15 17:46:18 +0000273 lineCC->loc.file = get_perm_string(loc.file);
274 lineCC->loc.fn = get_perm_string(loc.fn);
275 lineCC->loc.line = loc.line;
njn0a8db5c2007-04-02 03:11:41 +0000276 lineCC->Ir.a = 0;
277 lineCC->Ir.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000278 lineCC->Ir.mL = 0;
njn0a8db5c2007-04-02 03:11:41 +0000279 lineCC->Dr.a = 0;
280 lineCC->Dr.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000281 lineCC->Dr.mL = 0;
njn0a8db5c2007-04-02 03:11:41 +0000282 lineCC->Dw.a = 0;
283 lineCC->Dw.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000284 lineCC->Dw.mL = 0;
sewardj8badbaa2007-05-08 09:20:25 +0000285 lineCC->Bc.b = 0;
286 lineCC->Bc.mp = 0;
287 lineCC->Bi.b = 0;
288 lineCC->Bi.mp = 0;
njne2a9ad32007-09-17 05:30:48 +0000289 VG_(OSetGen_Insert)(CC_table, lineCC);
njn4f9c9342002-04-29 16:03:24 +0000290 }
nethercote9313ac42004-07-06 21:54:20 +0000291
njnd3bef4f2005-10-15 17:46:18 +0000292 return lineCC;
njn4f9c9342002-04-29 16:03:24 +0000293}
294
295/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000296/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000297/*------------------------------------------------------------*/
298
weidendo6fc0de02012-10-30 00:28:29 +0000299/* A common case for an instruction read event is that the
300 * bytes read belong to the same cache line in both L1I and LL
301 * (if cache line sizes of L1 and LL are the same).
302 * As this can be detected at instrumentation time, and results
303 * in faster simulation, special-casing is benefical.
304 *
305 * Abbrevations used in var/function names:
306 * IrNoX - instruction read does not cross cache lines
307 * IrGen - generic instruction read; not detected as IrNoX
308 * Ir - not known / not important whether it is an IrNoX
309 */
310
njnc52b9322010-09-27 02:20:38 +0000311// Only used with --cache-sim=no.
312static VG_REGPARM(1)
weidendo6fc0de02012-10-30 00:28:29 +0000313void log_1Ir(InstrInfo* n)
njnc52b9322010-09-27 02:20:38 +0000314{
315 n->parent->Ir.a++;
316}
317
318// Only used with --cache-sim=no.
319static VG_REGPARM(2)
weidendo6fc0de02012-10-30 00:28:29 +0000320void log_2Ir(InstrInfo* n, InstrInfo* n2)
njnc52b9322010-09-27 02:20:38 +0000321{
322 n->parent->Ir.a++;
323 n2->parent->Ir.a++;
324}
325
326// Only used with --cache-sim=no.
327static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000328void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
njnc52b9322010-09-27 02:20:38 +0000329{
330 n->parent->Ir.a++;
331 n2->parent->Ir.a++;
332 n3->parent->Ir.a++;
333}
334
weidendo6fc0de02012-10-30 00:28:29 +0000335// Generic case for instruction reads: may cross cache lines.
336// All other Ir handlers expect IrNoX instruction reads.
njnaf839f52005-06-23 03:27:57 +0000337static VG_REGPARM(1)
weidendo6fc0de02012-10-30 00:28:29 +0000338void log_1IrGen_0D_cache_access(InstrInfo* n)
njn25e49d8e72002-09-23 09:36:25 +0000339{
weidendo6fc0de02012-10-30 00:28:29 +0000340 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000341 // n, n->instr_addr, n->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000342 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
343 &n->parent->Ir.m1, &n->parent->Ir.mL);
344 n->parent->Ir.a++;
345}
346
347static VG_REGPARM(1)
348void log_1IrNoX_0D_cache_access(InstrInfo* n)
349{
350 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
351 // n, n->instr_addr, n->instr_len);
352 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
353 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000354 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000355}
356
njnaf839f52005-06-23 03:27:57 +0000357static VG_REGPARM(2)
weidendo6fc0de02012-10-30 00:28:29 +0000358void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
njn25e49d8e72002-09-23 09:36:25 +0000359{
weidendo6fc0de02012-10-30 00:28:29 +0000360 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
361 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000362 // n, n->instr_addr, n->instr_len,
363 // n2, n2->instr_addr, n2->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000364 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
365 &n->parent->Ir.m1, &n->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000366 n->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000367 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
368 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000369 n2->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000370}
371
372static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000373void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
sewardj5155dec2005-10-12 10:09:23 +0000374{
weidendo6fc0de02012-10-30 00:28:29 +0000375 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
376 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
377 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000378 // n, n->instr_addr, n->instr_len,
379 // n2, n2->instr_addr, n2->instr_len,
380 // n3, n3->instr_addr, n3->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000381 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
382 &n->parent->Ir.m1, &n->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000383 n->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000384 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
385 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000386 n2->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000387 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
388 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000389 n3->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000390}
391
392static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000393void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000394{
weidendo6fc0de02012-10-30 00:28:29 +0000395 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
sewardj5155dec2005-10-12 10:09:23 +0000396 // " daddr=0x%010lx, dsize=%lu\n",
397 // n, n->instr_addr, n->instr_len, data_addr, data_size);
weidendo6fc0de02012-10-30 00:28:29 +0000398 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
399 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000400 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000401
sewardj5155dec2005-10-12 10:09:23 +0000402 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000403 &n->parent->Dr.m1, &n->parent->Dr.mL);
nethercote9313ac42004-07-06 21:54:20 +0000404 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000405}
406
sewardj5155dec2005-10-12 10:09:23 +0000407static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000408void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000409{
weidendo6fc0de02012-10-30 00:28:29 +0000410 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
sewardj5155dec2005-10-12 10:09:23 +0000411 // " daddr=0x%010lx, dsize=%lu\n",
412 // n, n->instr_addr, n->instr_len, data_addr, data_size);
weidendo6fc0de02012-10-30 00:28:29 +0000413 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
414 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000415 n->parent->Ir.a++;
416
sewardj5155dec2005-10-12 10:09:23 +0000417 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000418 &n->parent->Dw.m1, &n->parent->Dw.mL);
nethercote9313ac42004-07-06 21:54:20 +0000419 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000420}
421
njnaf839f52005-06-23 03:27:57 +0000422static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000423void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000424{
weidendo6fc0de02012-10-30 00:28:29 +0000425 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000426 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000427 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000428 &n->parent->Dr.m1, &n->parent->Dr.mL);
nethercote9313ac42004-07-06 21:54:20 +0000429 n->parent->Dr.a++;
sewardj5155dec2005-10-12 10:09:23 +0000430}
431
432static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000433void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000434{
weidendo6fc0de02012-10-30 00:28:29 +0000435 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000436 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000437 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000438 &n->parent->Dw.m1, &n->parent->Dw.mL);
nethercote9313ac42004-07-06 21:54:20 +0000439 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000440}
441
sewardj8badbaa2007-05-08 09:20:25 +0000442/* For branches, we consult two different predictors, one which
443 predicts taken/untaken for conditional branches, and the other
444 which predicts the branch target address for indirect branches
445 (jump-to-register style ones). */
446
447static VG_REGPARM(2)
448void log_cond_branch(InstrInfo* n, Word taken)
449{
450 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
451 // n, taken);
452 n->parent->Bc.b++;
453 n->parent->Bc.mp
454 += (1 & do_cond_branch_predict(n->instr_addr, taken));
455}
456
457static VG_REGPARM(2)
458void log_ind_branch(InstrInfo* n, UWord actual_dst)
459{
460 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
461 // n, actual_dst);
462 n->parent->Bi.b++;
463 n->parent->Bi.mp
464 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
465}
466
467
nethercote9313ac42004-07-06 21:54:20 +0000468/*------------------------------------------------------------*/
sewardj5155dec2005-10-12 10:09:23 +0000469/*--- Instrumentation types and structures ---*/
470/*------------------------------------------------------------*/
471
472/* Maintain an ordered list of memory events which are outstanding, in
473 the sense that no IR has yet been generated to do the relevant
474 helper calls. The BB is scanned top to bottom and memory events
475 are added to the end of the list, merging with the most recent
476 notified event where possible (Dw immediately following Dr and
477 having the same size and EA can be merged).
478
479 This merging is done so that for architectures which have
480 load-op-store instructions (x86, amd64), the insn is treated as if
481 it makes just one memory reference (a modify), rather than two (a
482 read followed by a write at the same address).
483
484 At various points the list will need to be flushed, that is, IR
485 generated from it. That must happen before any possible exit from
486 the block (the end, or an IRStmt_Exit). Flushing also takes place
487 when there is no space to add a new event.
488
489 If we require the simulation statistics to be up to date with
490 respect to possible memory exceptions, then the list would have to
491 be flushed before each memory reference. That would however lose
492 performance by inhibiting event-merging during flushing.
493
494 Flushing the list consists of walking it start to end and emitting
495 instrumentation IR for each event, in the order in which they
496 appear. It may be possible to emit a single call for two adjacent
497 events in order to reduce the number of helper function calls made.
498 For example, it could well be profitable to handle two adjacent Ir
499 events with a single helper call. */
500
501typedef
502 IRExpr
503 IRAtom;
504
505typedef
sewardj8badbaa2007-05-08 09:20:25 +0000506 enum {
weidendo6fc0de02012-10-30 00:28:29 +0000507 Ev_IrNoX, // Instruction read not crossing cache lines
508 Ev_IrGen, // Generic Ir, not being detected as IrNoX
509 Ev_Dr, // Data read
510 Ev_Dw, // Data write
511 Ev_Dm, // Data modify (read then write)
512 Ev_Bc, // branch conditional
513 Ev_Bi // branch indirect (to unknown destination)
sewardj8badbaa2007-05-08 09:20:25 +0000514 }
515 EventTag;
sewardj5155dec2005-10-12 10:09:23 +0000516
517typedef
518 struct {
sewardj8badbaa2007-05-08 09:20:25 +0000519 EventTag tag;
520 InstrInfo* inode;
521 union {
522 struct {
weidendo6fc0de02012-10-30 00:28:29 +0000523 } IrGen;
524 struct {
525 } IrNoX;
sewardj8badbaa2007-05-08 09:20:25 +0000526 struct {
527 IRAtom* ea;
528 Int szB;
529 } Dr;
530 struct {
531 IRAtom* ea;
532 Int szB;
533 } Dw;
534 struct {
535 IRAtom* ea;
536 Int szB;
537 } Dm;
538 struct {
539 IRAtom* taken; /* :: Ity_I1 */
540 } Bc;
541 struct {
542 IRAtom* dst;
543 } Bi;
544 } Ev;
sewardj5155dec2005-10-12 10:09:23 +0000545 }
546 Event;
547
sewardj8badbaa2007-05-08 09:20:25 +0000548static void init_Event ( Event* ev ) {
549 VG_(memset)(ev, 0, sizeof(Event));
550}
551
552static IRAtom* get_Event_dea ( Event* ev ) {
553 switch (ev->tag) {
554 case Ev_Dr: return ev->Ev.Dr.ea;
555 case Ev_Dw: return ev->Ev.Dw.ea;
556 case Ev_Dm: return ev->Ev.Dm.ea;
557 default: tl_assert(0);
558 }
559}
560
561static Int get_Event_dszB ( Event* ev ) {
562 switch (ev->tag) {
563 case Ev_Dr: return ev->Ev.Dr.szB;
564 case Ev_Dw: return ev->Ev.Dw.szB;
565 case Ev_Dm: return ev->Ev.Dm.szB;
566 default: tl_assert(0);
567 }
568}
569
570
sewardj5155dec2005-10-12 10:09:23 +0000571/* Up to this many unnotified events are allowed. Number is
572 arbitrary. Larger numbers allow more event merging to occur, but
573 potentially induce more spilling due to extending live ranges of
574 address temporaries. */
575#define N_EVENTS 16
576
577
578/* A struct which holds all the running state during instrumentation.
579 Mostly to avoid passing loads of parameters everywhere. */
580typedef
581 struct {
582 /* The current outstanding-memory-event list. */
583 Event events[N_EVENTS];
584 Int events_used;
585
njnd3bef4f2005-10-15 17:46:18 +0000586 /* The array of InstrInfo bins for the BB. */
sewardj0b9d74a2006-12-24 02:24:11 +0000587 SB_info* sbInfo;
sewardj5155dec2005-10-12 10:09:23 +0000588
njnd3bef4f2005-10-15 17:46:18 +0000589 /* Number InstrInfo bins 'used' so far. */
sewardj0b9d74a2006-12-24 02:24:11 +0000590 Int sbInfo_i;
sewardj5155dec2005-10-12 10:09:23 +0000591
sewardj0b9d74a2006-12-24 02:24:11 +0000592 /* The output SB being constructed. */
593 IRSB* sbOut;
sewardj5155dec2005-10-12 10:09:23 +0000594 }
595 CgState;
596
597
sewardj5155dec2005-10-12 10:09:23 +0000598/*------------------------------------------------------------*/
599/*--- Instrumentation main ---*/
nethercote9313ac42004-07-06 21:54:20 +0000600/*------------------------------------------------------------*/
601
sewardj4ba057c2005-10-18 12:04:18 +0000602// Note that origAddr is the real origAddr, not the address of the first
603// instruction in the block (they can be different due to redirection).
nethercote564b2b02004-08-07 15:54:53 +0000604static
sewardj0b9d74a2006-12-24 02:24:11 +0000605SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000606{
njn4bd67b52005-08-11 00:47:10 +0000607 Int i, n_instrs;
608 IRStmt* st;
sewardj0b9d74a2006-12-24 02:24:11 +0000609 SB_info* sbInfo;
njnd3bef4f2005-10-15 17:46:18 +0000610
sewardj0b9d74a2006-12-24 02:24:11 +0000611 // Count number of original instrs in SB
njn6a3009b2005-03-20 00:20:06 +0000612 n_instrs = 0;
sewardj0b9d74a2006-12-24 02:24:11 +0000613 for (i = 0; i < sbIn->stmts_used; i++) {
614 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +0000615 if (Ist_IMark == st->tag) n_instrs++;
nethercote9313ac42004-07-06 21:54:20 +0000616 }
617
njnf7d26092005-10-12 16:45:17 +0000618 // Check that we don't have an entry for this BB in the instr-info table.
619 // If this assertion fails, there has been some screwup: some
620 // translations must have been discarded but Cachegrind hasn't discarded
621 // the corresponding entries in the instr-info table.
njne2a9ad32007-09-17 05:30:48 +0000622 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
sewardj0b9d74a2006-12-24 02:24:11 +0000623 tl_assert(NULL == sbInfo);
sewardja3a29a52005-10-12 16:16:03 +0000624
njnd3bef4f2005-10-15 17:46:18 +0000625 // BB never translated before (at this address, at least; could have
626 // been unloaded and then reloaded elsewhere in memory)
njne2a9ad32007-09-17 05:30:48 +0000627 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
sewardj0b9d74a2006-12-24 02:24:11 +0000628 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
629 sbInfo->SB_addr = origAddr;
630 sbInfo->n_instrs = n_instrs;
njne2a9ad32007-09-17 05:30:48 +0000631 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
sewardja3a29a52005-10-12 16:16:03 +0000632
sewardj0b9d74a2006-12-24 02:24:11 +0000633 return sbInfo;
nethercote9313ac42004-07-06 21:54:20 +0000634}
njn6a3009b2005-03-20 00:20:06 +0000635
nethercote9313ac42004-07-06 21:54:20 +0000636
sewardj5155dec2005-10-12 10:09:23 +0000637static void showEvent ( Event* ev )
nethercote9313ac42004-07-06 21:54:20 +0000638{
sewardj8badbaa2007-05-08 09:20:25 +0000639 switch (ev->tag) {
weidendo6fc0de02012-10-30 00:28:29 +0000640 case Ev_IrGen:
641 VG_(printf)("IrGen %p\n", ev->inode);
642 break;
643 case Ev_IrNoX:
644 VG_(printf)("IrNoX %p\n", ev->inode);
sewardj5155dec2005-10-12 10:09:23 +0000645 break;
sewardj8badbaa2007-05-08 09:20:25 +0000646 case Ev_Dr:
647 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
648 ppIRExpr(ev->Ev.Dr.ea);
sewardj5155dec2005-10-12 10:09:23 +0000649 VG_(printf)("\n");
650 break;
sewardj8badbaa2007-05-08 09:20:25 +0000651 case Ev_Dw:
652 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
653 ppIRExpr(ev->Ev.Dw.ea);
sewardj5155dec2005-10-12 10:09:23 +0000654 VG_(printf)("\n");
655 break;
sewardj8badbaa2007-05-08 09:20:25 +0000656 case Ev_Dm:
657 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
658 ppIRExpr(ev->Ev.Dm.ea);
659 VG_(printf)("\n");
660 break;
661 case Ev_Bc:
662 VG_(printf)("Bc %p GA=", ev->inode);
663 ppIRExpr(ev->Ev.Bc.taken);
664 VG_(printf)("\n");
665 break;
666 case Ev_Bi:
667 VG_(printf)("Bi %p DST=", ev->inode);
668 ppIRExpr(ev->Ev.Bi.dst);
sewardj5155dec2005-10-12 10:09:23 +0000669 VG_(printf)("\n");
670 break;
671 default:
672 tl_assert(0);
673 break;
674 }
njn6a3009b2005-03-20 00:20:06 +0000675}
676
njnfd9f6222005-10-16 00:17:37 +0000677// Reserve and initialise an InstrInfo for the first mention of a new insn.
678static
679InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
njn6a3009b2005-03-20 00:20:06 +0000680{
njnd3bef4f2005-10-15 17:46:18 +0000681 InstrInfo* i_node;
sewardj0b9d74a2006-12-24 02:24:11 +0000682 tl_assert(cgs->sbInfo_i >= 0);
683 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
684 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
njnfd9f6222005-10-16 00:17:37 +0000685 i_node->instr_addr = instr_addr;
686 i_node->instr_len = instr_len;
687 i_node->parent = get_lineCC(instr_addr);
sewardj0b9d74a2006-12-24 02:24:11 +0000688 cgs->sbInfo_i++;
sewardj5155dec2005-10-12 10:09:23 +0000689 return i_node;
690}
sewardj17a56bf2005-03-21 01:35:02 +0000691
sewardj17a56bf2005-03-21 01:35:02 +0000692
sewardj5155dec2005-10-12 10:09:23 +0000693/* Generate code for all outstanding memory events, and mark the queue
694 empty. Code is generated into cgs->bbOut, and this activity
sewardj0b9d74a2006-12-24 02:24:11 +0000695 'consumes' slots in cgs->sbInfo. */
njn6a3009b2005-03-20 00:20:06 +0000696
sewardj5155dec2005-10-12 10:09:23 +0000697static void flushEvents ( CgState* cgs )
698{
njnd3bef4f2005-10-15 17:46:18 +0000699 Int i, regparms;
florianee90c8a2012-10-21 02:39:42 +0000700 const HChar* helperName;
njnd3bef4f2005-10-15 17:46:18 +0000701 void* helperAddr;
702 IRExpr** argv;
703 IRExpr* i_node_expr;
njnd3bef4f2005-10-15 17:46:18 +0000704 IRDirty* di;
njnc285dca2005-10-15 22:07:28 +0000705 Event* ev;
706 Event* ev2;
707 Event* ev3;
njn6a3009b2005-03-20 00:20:06 +0000708
sewardj5155dec2005-10-12 10:09:23 +0000709 i = 0;
710 while (i < cgs->events_used) {
njn6a3009b2005-03-20 00:20:06 +0000711
sewardj5155dec2005-10-12 10:09:23 +0000712 helperName = NULL;
713 helperAddr = NULL;
714 argv = NULL;
715 regparms = 0;
716
717 /* generate IR to notify event i and possibly the ones
718 immediately following it. */
719 tl_assert(i >= 0 && i < cgs->events_used);
njnc285dca2005-10-15 22:07:28 +0000720
721 ev = &cgs->events[i];
722 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
723 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
724
sewardj5155dec2005-10-12 10:09:23 +0000725 if (DEBUG_CG) {
726 VG_(printf)(" flush ");
njnc285dca2005-10-15 22:07:28 +0000727 showEvent( ev );
njn4f9c9342002-04-29 16:03:24 +0000728 }
sewardj5155dec2005-10-12 10:09:23 +0000729
njnfd9f6222005-10-16 00:17:37 +0000730 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
sewardj5155dec2005-10-12 10:09:23 +0000731
732 /* Decide on helper fn to call and args to pass it, and advance
733 i appropriately. */
sewardj8badbaa2007-05-08 09:20:25 +0000734 switch (ev->tag) {
weidendo6fc0de02012-10-30 00:28:29 +0000735 case Ev_IrNoX:
736 /* Merge an IrNoX with a following Dr/Dm. */
sewardj8badbaa2007-05-08 09:20:25 +0000737 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
738 /* Why is this true? It's because we're merging an Ir
739 with a following Dr or Dm. The Ir derives from the
740 instruction's IMark and the Dr/Dm from data
741 references which follow it. In short it holds
742 because each insn starts with an IMark, hence an
743 Ev_Ir, and so these Dr/Dm must pertain to the
744 immediately preceding Ir. Same applies to analogous
745 assertions in the subsequent cases. */
njnfd9f6222005-10-16 00:17:37 +0000746 tl_assert(ev2->inode == ev->inode);
weidendo6fc0de02012-10-30 00:28:29 +0000747 helperName = "log_1IrNoX_1Dr_cache_access";
748 helperAddr = &log_1IrNoX_1Dr_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000749 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000750 get_Event_dea(ev2),
751 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000752 regparms = 3;
753 i += 2;
754 }
weidendo6fc0de02012-10-30 00:28:29 +0000755 /* Merge an IrNoX with a following Dw. */
sewardj5155dec2005-10-12 10:09:23 +0000756 else
sewardj8badbaa2007-05-08 09:20:25 +0000757 if (ev2 && ev2->tag == Ev_Dw) {
njnfd9f6222005-10-16 00:17:37 +0000758 tl_assert(ev2->inode == ev->inode);
weidendo6fc0de02012-10-30 00:28:29 +0000759 helperName = "log_1IrNoX_1Dw_cache_access";
760 helperAddr = &log_1IrNoX_1Dw_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000761 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000762 get_Event_dea(ev2),
763 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000764 regparms = 3;
765 i += 2;
766 }
weidendo6fc0de02012-10-30 00:28:29 +0000767 /* Merge an IrNoX with two following IrNoX's. */
sewardj5155dec2005-10-12 10:09:23 +0000768 else
weidendo6fc0de02012-10-30 00:28:29 +0000769 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
njnc285dca2005-10-15 22:07:28 +0000770 {
njnc52b9322010-09-27 02:20:38 +0000771 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000772 helperName = "log_3IrNoX_0D_cache_access";
773 helperAddr = &log_3IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000774 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000775 helperName = "log_3Ir";
776 helperAddr = &log_3Ir;
njnc52b9322010-09-27 02:20:38 +0000777 }
njnfd9f6222005-10-16 00:17:37 +0000778 argv = mkIRExprVec_3( i_node_expr,
779 mkIRExpr_HWord( (HWord)ev2->inode ),
780 mkIRExpr_HWord( (HWord)ev3->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000781 regparms = 3;
782 i += 3;
783 }
weidendo6fc0de02012-10-30 00:28:29 +0000784 /* Merge an IrNoX with one following IrNoX. */
sewardj5155dec2005-10-12 10:09:23 +0000785 else
weidendo6fc0de02012-10-30 00:28:29 +0000786 if (ev2 && ev2->tag == Ev_IrNoX) {
njnc52b9322010-09-27 02:20:38 +0000787 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000788 helperName = "log_2IrNoX_0D_cache_access";
789 helperAddr = &log_2IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000790 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000791 helperName = "log_2Ir";
792 helperAddr = &log_2Ir;
njnc52b9322010-09-27 02:20:38 +0000793 }
njnfd9f6222005-10-16 00:17:37 +0000794 argv = mkIRExprVec_2( i_node_expr,
795 mkIRExpr_HWord( (HWord)ev2->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000796 regparms = 2;
797 i += 2;
798 }
799 /* No merging possible; emit as-is. */
800 else {
njnc52b9322010-09-27 02:20:38 +0000801 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000802 helperName = "log_1IrNoX_0D_cache_access";
803 helperAddr = &log_1IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000804 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000805 helperName = "log_1Ir";
806 helperAddr = &log_1Ir;
njnc52b9322010-09-27 02:20:38 +0000807 }
sewardj5155dec2005-10-12 10:09:23 +0000808 argv = mkIRExprVec_1( i_node_expr );
809 regparms = 1;
810 i++;
811 }
812 break;
weidendo6fc0de02012-10-30 00:28:29 +0000813 case Ev_IrGen:
814 if (clo_cache_sim) {
815 helperName = "log_1IrGen_0D_cache_access";
816 helperAddr = &log_1IrGen_0D_cache_access;
817 } else {
818 helperName = "log_1Ir";
819 helperAddr = &log_1Ir;
820 }
821 argv = mkIRExprVec_1( i_node_expr );
822 regparms = 1;
823 i++;
824 break;
sewardj8badbaa2007-05-08 09:20:25 +0000825 case Ev_Dr:
826 case Ev_Dm:
827 /* Data read or modify */
weidendo6fc0de02012-10-30 00:28:29 +0000828 helperName = "log_0Ir_1Dr_cache_access";
829 helperAddr = &log_0Ir_1Dr_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000830 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000831 get_Event_dea(ev),
832 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000833 regparms = 3;
834 i++;
835 break;
sewardj8badbaa2007-05-08 09:20:25 +0000836 case Ev_Dw:
837 /* Data write */
weidendo6fc0de02012-10-30 00:28:29 +0000838 helperName = "log_0Ir_1Dw_cache_access";
839 helperAddr = &log_0Ir_1Dw_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000840 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000841 get_Event_dea(ev),
842 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000843 regparms = 3;
844 i++;
845 break;
sewardj8badbaa2007-05-08 09:20:25 +0000846 case Ev_Bc:
847 /* Conditional branch */
848 helperName = "log_cond_branch";
849 helperAddr = &log_cond_branch;
850 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
851 regparms = 2;
852 i++;
853 break;
854 case Ev_Bi:
855 /* Branch to an unknown destination */
856 helperName = "log_ind_branch";
857 helperAddr = &log_ind_branch;
858 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
859 regparms = 2;
860 i++;
861 break;
sewardj5155dec2005-10-12 10:09:23 +0000862 default:
863 tl_assert(0);
864 }
865
866 /* Add the helper. */
867 tl_assert(helperName);
868 tl_assert(helperAddr);
869 tl_assert(argv);
sewardj5bb86822005-12-23 12:47:42 +0000870 di = unsafeIRDirty_0_N( regparms,
871 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
872 argv );
sewardj0b9d74a2006-12-24 02:24:11 +0000873 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
njn4f9c9342002-04-29 16:03:24 +0000874 }
875
sewardj5155dec2005-10-12 10:09:23 +0000876 cgs->events_used = 0;
njn4f9c9342002-04-29 16:03:24 +0000877}
njn14d01ce2004-11-26 11:30:14 +0000878
njnfd9f6222005-10-16 00:17:37 +0000879static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
sewardj5155dec2005-10-12 10:09:23 +0000880{
881 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000882 if (cgs->events_used == N_EVENTS)
883 flushEvents(cgs);
884 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
885 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000886 init_Event(evt);
njnfd9f6222005-10-16 00:17:37 +0000887 evt->inode = inode;
weidendo6fc0de02012-10-30 00:28:29 +0000888 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
889 evt->tag = Ev_IrNoX;
890 distinct_instrsNoX++;
891 } else {
892 evt->tag = Ev_IrGen;
893 distinct_instrsGen++;
894 }
sewardj5155dec2005-10-12 10:09:23 +0000895 cgs->events_used++;
896}
897
njnfd9f6222005-10-16 00:17:37 +0000898static
899void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
sewardj5155dec2005-10-12 10:09:23 +0000900{
njnfd9f6222005-10-16 00:17:37 +0000901 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000902 tl_assert(isIRAtom(ea));
sewardj98763d52012-06-03 22:40:07 +0000903 tl_assert(datasize >= 1 && datasize <= min_line_size);
sewardj8badbaa2007-05-08 09:20:25 +0000904 if (!clo_cache_sim)
905 return;
njnfd9f6222005-10-16 00:17:37 +0000906 if (cgs->events_used == N_EVENTS)
907 flushEvents(cgs);
908 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
909 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000910 init_Event(evt);
911 evt->tag = Ev_Dr;
912 evt->inode = inode;
913 evt->Ev.Dr.szB = datasize;
914 evt->Ev.Dr.ea = ea;
njnfd9f6222005-10-16 00:17:37 +0000915 cgs->events_used++;
916}
sewardj5155dec2005-10-12 10:09:23 +0000917
njnfd9f6222005-10-16 00:17:37 +0000918static
919void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
920{
921 Event* lastEvt;
922 Event* evt;
923
924 tl_assert(isIRAtom(ea));
sewardj98763d52012-06-03 22:40:07 +0000925 tl_assert(datasize >= 1 && datasize <= min_line_size);
njnfd9f6222005-10-16 00:17:37 +0000926
sewardj8badbaa2007-05-08 09:20:25 +0000927 if (!clo_cache_sim)
928 return;
929
njnfd9f6222005-10-16 00:17:37 +0000930 /* Is it possible to merge this write with the preceding read? */
931 lastEvt = &cgs->events[cgs->events_used-1];
sewardj5155dec2005-10-12 10:09:23 +0000932 if (cgs->events_used > 0
sewardj8badbaa2007-05-08 09:20:25 +0000933 && lastEvt->tag == Ev_Dr
934 && lastEvt->Ev.Dr.szB == datasize
935 && lastEvt->inode == inode
936 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
njnfd9f6222005-10-16 00:17:37 +0000937 {
sewardj8badbaa2007-05-08 09:20:25 +0000938 lastEvt->tag = Ev_Dm;
sewardj5155dec2005-10-12 10:09:23 +0000939 return;
940 }
941
942 /* No. Add as normal. */
943 if (cgs->events_used == N_EVENTS)
944 flushEvents(cgs);
945 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
njnfd9f6222005-10-16 00:17:37 +0000946 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000947 init_Event(evt);
948 evt->tag = Ev_Dw;
949 evt->inode = inode;
950 evt->Ev.Dw.szB = datasize;
951 evt->Ev.Dw.ea = ea;
952 cgs->events_used++;
953}
954
955static
956void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
957{
958 Event* evt;
959 tl_assert(isIRAtom(guard));
960 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
961 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
962 if (!clo_branch_sim)
963 return;
964 if (cgs->events_used == N_EVENTS)
965 flushEvents(cgs);
966 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
967 evt = &cgs->events[cgs->events_used];
968 init_Event(evt);
969 evt->tag = Ev_Bc;
970 evt->inode = inode;
971 evt->Ev.Bc.taken = guard;
972 cgs->events_used++;
973}
974
975static
976void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
977{
978 Event* evt;
979 tl_assert(isIRAtom(whereTo));
980 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
981 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
982 if (!clo_branch_sim)
983 return;
984 if (cgs->events_used == N_EVENTS)
985 flushEvents(cgs);
986 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
987 evt = &cgs->events[cgs->events_used];
988 init_Event(evt);
989 evt->tag = Ev_Bi;
990 evt->inode = inode;
991 evt->Ev.Bi.dst = whereTo;
sewardj5155dec2005-10-12 10:09:23 +0000992 cgs->events_used++;
993}
994
995////////////////////////////////////////////////////////////
996
997
sewardj4ba057c2005-10-18 12:04:18 +0000998static
sewardj0b9d74a2006-12-24 02:24:11 +0000999IRSB* cg_instrument ( VgCallbackClosure* closure,
1000 IRSB* sbIn,
sewardj461df9c2006-01-17 02:06:39 +00001001 VexGuestLayout* layout,
1002 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00001003 VexArchInfo* archinfo_host,
sewardj4ba057c2005-10-18 12:04:18 +00001004 IRType gWordTy, IRType hWordTy )
njn14d01ce2004-11-26 11:30:14 +00001005{
njnfd9f6222005-10-16 00:17:37 +00001006 Int i, isize;
sewardj5155dec2005-10-12 10:09:23 +00001007 IRStmt* st;
1008 Addr64 cia; /* address of current insn */
1009 CgState cgs;
sewardj0b9d74a2006-12-24 02:24:11 +00001010 IRTypeEnv* tyenv = sbIn->tyenv;
njnfd9f6222005-10-16 00:17:37 +00001011 InstrInfo* curr_inode = NULL;
sewardj5155dec2005-10-12 10:09:23 +00001012
sewardjd54babf2005-03-21 00:55:49 +00001013 if (gWordTy != hWordTy) {
1014 /* We don't currently support this case. */
1015 VG_(tool_panic)("host/guest word size mismatch");
1016 }
1017
sewardj0b9d74a2006-12-24 02:24:11 +00001018 // Set up new SB
1019 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
njn6a3009b2005-03-20 00:20:06 +00001020
sewardja9f538c2005-10-23 12:06:55 +00001021 // Copy verbatim any IR preamble preceding the first IMark
njn6a3009b2005-03-20 00:20:06 +00001022 i = 0;
sewardj0b9d74a2006-12-24 02:24:11 +00001023 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1024 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
sewardja9f538c2005-10-23 12:06:55 +00001025 i++;
1026 }
1027
1028 // Get the first statement, and initial cia from it
sewardj0b9d74a2006-12-24 02:24:11 +00001029 tl_assert(sbIn->stmts_used > 0);
1030 tl_assert(i < sbIn->stmts_used);
1031 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +00001032 tl_assert(Ist_IMark == st->tag);
sewardj8badbaa2007-05-08 09:20:25 +00001033
1034 cia = st->Ist.IMark.addr;
1035 isize = st->Ist.IMark.len;
1036 // If Vex fails to decode an instruction, the size will be zero.
1037 // Pretend otherwise.
1038 if (isize == 0) isize = VG_MIN_INSTR_SZB;
njn6a3009b2005-03-20 00:20:06 +00001039
sewardj5155dec2005-10-12 10:09:23 +00001040 // Set up running state and get block info
sewardj3a384b32006-01-22 01:12:51 +00001041 tl_assert(closure->readdr == vge->base[0]);
sewardj5155dec2005-10-12 10:09:23 +00001042 cgs.events_used = 0;
sewardj0b9d74a2006-12-24 02:24:11 +00001043 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
1044 cgs.sbInfo_i = 0;
njn6a3009b2005-03-20 00:20:06 +00001045
sewardj5155dec2005-10-12 10:09:23 +00001046 if (DEBUG_CG)
1047 VG_(printf)("\n\n---------- cg_instrument ----------\n");
njn6a3009b2005-03-20 00:20:06 +00001048
njnfd9f6222005-10-16 00:17:37 +00001049 // Traverse the block, initialising inodes, adding events and flushing as
1050 // necessary.
sewardj0b9d74a2006-12-24 02:24:11 +00001051 for (/*use current i*/; i < sbIn->stmts_used; i++) {
njn6a3009b2005-03-20 00:20:06 +00001052
sewardj0b9d74a2006-12-24 02:24:11 +00001053 st = sbIn->stmts[i];
sewardj5155dec2005-10-12 10:09:23 +00001054 tl_assert(isFlatIRStmt(st));
njnb3507ea2005-08-02 23:07:02 +00001055
sewardj5155dec2005-10-12 10:09:23 +00001056 switch (st->tag) {
1057 case Ist_NoOp:
1058 case Ist_AbiHint:
1059 case Ist_Put:
1060 case Ist_PutI:
sewardj72d75132007-11-09 23:06:35 +00001061 case Ist_MBE:
sewardj5155dec2005-10-12 10:09:23 +00001062 break;
njn20677cc2005-08-12 23:47:51 +00001063
sewardj5155dec2005-10-12 10:09:23 +00001064 case Ist_IMark:
njnfd9f6222005-10-16 00:17:37 +00001065 cia = st->Ist.IMark.addr;
1066 isize = st->Ist.IMark.len;
1067
1068 // If Vex fails to decode an instruction, the size will be zero.
1069 // Pretend otherwise.
1070 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1071
njna5ad9ba2005-11-10 15:20:37 +00001072 // Sanity-check size.
1073 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1074 || VG_CLREQ_SZB == isize );
njnfd9f6222005-10-16 00:17:37 +00001075
1076 // Get space for and init the inode, record it as the current one.
1077 // Subsequent Dr/Dw/Dm events from the same instruction will
1078 // also use it.
1079 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1080
1081 addEvent_Ir( &cgs, curr_inode );
sewardj5155dec2005-10-12 10:09:23 +00001082 break;
1083
sewardj0b9d74a2006-12-24 02:24:11 +00001084 case Ist_WrTmp: {
1085 IRExpr* data = st->Ist.WrTmp.data;
sewardj5155dec2005-10-12 10:09:23 +00001086 if (data->tag == Iex_Load) {
1087 IRExpr* aexpr = data->Iex.Load.addr;
sewardj5155dec2005-10-12 10:09:23 +00001088 // Note also, endianness info is ignored. I guess
1089 // that's not interesting.
njnfd9f6222005-10-16 00:17:37 +00001090 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1091 aexpr );
sewardj5155dec2005-10-12 10:09:23 +00001092 }
1093 break;
njnb3507ea2005-08-02 23:07:02 +00001094 }
1095
sewardj5155dec2005-10-12 10:09:23 +00001096 case Ist_Store: {
1097 IRExpr* data = st->Ist.Store.data;
1098 IRExpr* aexpr = st->Ist.Store.addr;
njnfd9f6222005-10-16 00:17:37 +00001099 addEvent_Dw( &cgs, curr_inode,
1100 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
sewardj5155dec2005-10-12 10:09:23 +00001101 break;
1102 }
njnb3507ea2005-08-02 23:07:02 +00001103
sewardj5155dec2005-10-12 10:09:23 +00001104 case Ist_Dirty: {
1105 Int dataSize;
1106 IRDirty* d = st->Ist.Dirty.details;
1107 if (d->mFx != Ifx_None) {
njnfd9f6222005-10-16 00:17:37 +00001108 /* This dirty helper accesses memory. Collect the details. */
sewardj5155dec2005-10-12 10:09:23 +00001109 tl_assert(d->mAddr != NULL);
1110 tl_assert(d->mSize != 0);
1111 dataSize = d->mSize;
1112 // Large (eg. 28B, 108B, 512B on x86) data-sized
1113 // instructions will be done inaccurately, but they're
1114 // very rare and this avoids errors from hitting more
1115 // than two cache lines in the simulation.
sewardj98763d52012-06-03 22:40:07 +00001116 if (dataSize > min_line_size)
1117 dataSize = min_line_size;
sewardj5155dec2005-10-12 10:09:23 +00001118 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001119 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001120 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001121 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001122 } else {
1123 tl_assert(d->mAddr == NULL);
1124 tl_assert(d->mSize == 0);
1125 }
1126 break;
1127 }
njn6a3009b2005-03-20 00:20:06 +00001128
sewardj1c0ce7a2009-07-01 08:10:49 +00001129 case Ist_CAS: {
1130 /* We treat it as a read and a write of the location. I
1131 think that is the same behaviour as it was before IRCAS
1132 was introduced, since prior to that point, the Vex
1133 front ends would translate a lock-prefixed instruction
1134 into a (normal) read followed by a (normal) write. */
1135 Int dataSize;
1136 IRCAS* cas = st->Ist.CAS.details;
1137 tl_assert(cas->addr != NULL);
1138 tl_assert(cas->dataLo != NULL);
1139 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1140 if (cas->dataHi != NULL)
1141 dataSize *= 2; /* since it's a doubleword-CAS */
1142 /* I don't think this can ever happen, but play safe. */
sewardj98763d52012-06-03 22:40:07 +00001143 if (dataSize > min_line_size)
1144 dataSize = min_line_size;
sewardj1c0ce7a2009-07-01 08:10:49 +00001145 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1146 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1147 break;
1148 }
1149
sewardjdb5907d2009-11-26 17:20:21 +00001150 case Ist_LLSC: {
1151 IRType dataTy;
1152 if (st->Ist.LLSC.storedata == NULL) {
1153 /* LL */
1154 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1155 addEvent_Dr( &cgs, curr_inode,
1156 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1157 } else {
1158 /* SC */
1159 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1160 addEvent_Dw( &cgs, curr_inode,
1161 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1162 }
1163 break;
1164 }
1165
sewardj8badbaa2007-05-08 09:20:25 +00001166 case Ist_Exit: {
weidendo374a48f2010-09-02 17:06:49 +00001167 // call branch predictor only if this is a branch in guest code
1168 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1169 (st->Ist.Exit.jk == Ijk_Call) ||
1170 (st->Ist.Exit.jk == Ijk_Ret) )
1171 {
1172 /* Stuff to widen the guard expression to a host word, so
1173 we can pass it to the branch predictor simulation
1174 functions easily. */
1175 Bool inverted;
1176 Addr64 nia, sea;
1177 IRConst* dst;
1178 IRType tyW = hWordTy;
1179 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1180 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1181 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1182 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1183 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1184 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1185 : IRExpr_Const(IRConst_U64(1));
sewardj8badbaa2007-05-08 09:20:25 +00001186
weidendo374a48f2010-09-02 17:06:49 +00001187 /* First we need to figure out whether the side exit got
1188 inverted by the ir optimiser. To do that, figure out
1189 the next (fallthrough) instruction's address and the
1190 side exit address and see if they are the same. */
1191 nia = cia + (Addr64)isize;
1192 if (tyW == Ity_I32)
1193 nia &= 0xFFFFFFFFULL;
sewardj8badbaa2007-05-08 09:20:25 +00001194
weidendo374a48f2010-09-02 17:06:49 +00001195 /* Side exit address */
1196 dst = st->Ist.Exit.dst;
1197 if (tyW == Ity_I32) {
1198 tl_assert(dst->tag == Ico_U32);
1199 sea = (Addr64)(UInt)dst->Ico.U32;
1200 } else {
1201 tl_assert(tyW == Ity_I64);
1202 tl_assert(dst->tag == Ico_U64);
1203 sea = dst->Ico.U64;
1204 }
1205
1206 inverted = nia == sea;
1207
1208 /* Widen the guard expression. */
1209 addStmtToIRSB( cgs.sbOut,
1210 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1211 addStmtToIRSB( cgs.sbOut,
1212 IRStmt_WrTmp( guardW,
1213 IRExpr_Unop(widen,
1214 IRExpr_RdTmp(guard1))) );
1215 /* If the exit is inverted, invert the sense of the guard. */
1216 addStmtToIRSB(
1217 cgs.sbOut,
1218 IRStmt_WrTmp(
1219 guard,
1220 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1221 : IRExpr_RdTmp(guardW)
1222 ));
1223 /* And post the event. */
1224 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
sewardj8badbaa2007-05-08 09:20:25 +00001225 }
1226
sewardj5155dec2005-10-12 10:09:23 +00001227 /* We may never reach the next statement, so need to flush
1228 all outstanding transactions now. */
1229 flushEvents( &cgs );
1230 break;
sewardj8badbaa2007-05-08 09:20:25 +00001231 }
sewardj5155dec2005-10-12 10:09:23 +00001232
1233 default:
1234 tl_assert(0);
1235 break;
njnb3507ea2005-08-02 23:07:02 +00001236 }
njn6a3009b2005-03-20 00:20:06 +00001237
sewardj5155dec2005-10-12 10:09:23 +00001238 /* Copy the original statement */
sewardj0b9d74a2006-12-24 02:24:11 +00001239 addStmtToIRSB( cgs.sbOut, st );
njn6a3009b2005-03-20 00:20:06 +00001240
sewardj5155dec2005-10-12 10:09:23 +00001241 if (DEBUG_CG) {
1242 ppIRStmt(st);
1243 VG_(printf)("\n");
1244 }
1245 }
1246
sewardj8badbaa2007-05-08 09:20:25 +00001247 /* Deal with branches to unknown destinations. Except ignore ones
1248 which are function returns as we assume the return stack
1249 predictor never mispredicts. */
weidendo374a48f2010-09-02 17:06:49 +00001250 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
sewardj8badbaa2007-05-08 09:20:25 +00001251 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1252 switch (sbIn->next->tag) {
1253 case Iex_Const:
1254 break; /* boring - branch to known address */
1255 case Iex_RdTmp:
1256 /* looks like an indirect branch (branch to unknown) */
1257 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1258 break;
1259 default:
1260 /* shouldn't happen - if the incoming IR is properly
1261 flattened, should only have tmp and const cases to
1262 consider. */
1263 tl_assert(0);
1264 }
1265 }
1266
sewardj5155dec2005-10-12 10:09:23 +00001267 /* At the end of the bb. Flush outstandings. */
sewardj5155dec2005-10-12 10:09:23 +00001268 flushEvents( &cgs );
1269
sewardj5155dec2005-10-12 10:09:23 +00001270 /* done. stay sane ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001271 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
sewardj5155dec2005-10-12 10:09:23 +00001272
1273 if (DEBUG_CG) {
1274 VG_(printf)( "goto {");
sewardj0b9d74a2006-12-24 02:24:11 +00001275 ppIRJumpKind(sbIn->jumpkind);
sewardj5155dec2005-10-12 10:09:23 +00001276 VG_(printf)( "} ");
sewardj0b9d74a2006-12-24 02:24:11 +00001277 ppIRExpr( sbIn->next );
sewardj5155dec2005-10-12 10:09:23 +00001278 VG_(printf)( "}\n");
1279 }
1280
sewardj0b9d74a2006-12-24 02:24:11 +00001281 return cgs.sbOut;
njn14d01ce2004-11-26 11:30:14 +00001282}
njn4f9c9342002-04-29 16:03:24 +00001283
1284/*------------------------------------------------------------*/
nethercoteb35a8b92004-09-11 16:45:27 +00001285/*--- Cache configuration ---*/
njn4f9c9342002-04-29 16:03:24 +00001286/*------------------------------------------------------------*/
1287
sewardjb5f6f512005-03-10 23:59:00 +00001288#define UNDEFINED_CACHE { -1, -1, -1 }
njn25e49d8e72002-09-23 09:36:25 +00001289
1290static cache_t clo_I1_cache = UNDEFINED_CACHE;
1291static cache_t clo_D1_cache = UNDEFINED_CACHE;
njn2d853a12010-10-06 22:46:31 +00001292static cache_t clo_LL_cache = UNDEFINED_CACHE;
njn25e49d8e72002-09-23 09:36:25 +00001293
njn4f9c9342002-04-29 16:03:24 +00001294/*------------------------------------------------------------*/
njn51d827b2005-05-09 01:02:08 +00001295/*--- cg_fini() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001296/*------------------------------------------------------------*/
1297
nethercote9313ac42004-07-06 21:54:20 +00001298// Total reads/writes/misses. Calculated during CC traversal at the end.
1299// All auto-zeroed.
sewardj8badbaa2007-05-08 09:20:25 +00001300static CacheCC Ir_total;
1301static CacheCC Dr_total;
1302static CacheCC Dw_total;
1303static BranchCC Bc_total;
1304static BranchCC Bi_total;
nethercote9313ac42004-07-06 21:54:20 +00001305
nethercote9313ac42004-07-06 21:54:20 +00001306static void fprint_CC_table_and_calc_totals(void)
1307{
njnd3bef4f2005-10-15 17:46:18 +00001308 Int i, fd;
sewardj92645592005-07-23 09:18:34 +00001309 SysRes sres;
floriandbb35842012-10-27 18:39:11 +00001310 HChar buf[512];
florian19f91bb2012-11-10 22:29:54 +00001311 HChar *currFile = NULL, *currFn = NULL;
njnd3bef4f2005-10-15 17:46:18 +00001312 LineCC* lineCC;
njn4f9c9342002-04-29 16:03:24 +00001313
njn7064fb22008-05-29 23:09:52 +00001314 // Setup output filename. Nb: it's important to do this now, ie. as late
1315 // as possible. If we do it at start-up and the program forks and the
1316 // output file format string contains a %p (pid) specifier, both the
1317 // parent and child will incorrectly write to the same file; this
1318 // happened in 3.3.0.
florian19f91bb2012-11-10 22:29:54 +00001319 HChar* cachegrind_out_file =
njn7064fb22008-05-29 23:09:52 +00001320 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1321
sewardj92645592005-07-23 09:18:34 +00001322 sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1323 VKI_S_IRUSR|VKI_S_IWUSR);
njncda2f0f2009-05-18 02:12:08 +00001324 if (sr_isError(sres)) {
nethercote9313ac42004-07-06 21:54:20 +00001325 // If the file can't be opened for whatever reason (conflict
1326 // between multiple cachegrinded processes?), give up now.
sewardjb2c985b2009-07-15 14:51:17 +00001327 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1328 cachegrind_out_file );
1329 VG_(umsg)(" ... so simulation results will be missing.\n");
njn7064fb22008-05-29 23:09:52 +00001330 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001331 return;
sewardj92645592005-07-23 09:18:34 +00001332 } else {
njncda2f0f2009-05-18 02:12:08 +00001333 fd = sr_Res(sres);
njn7064fb22008-05-29 23:09:52 +00001334 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001335 }
njn4f9c9342002-04-29 16:03:24 +00001336
njn2d853a12010-10-06 22:46:31 +00001337 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
nethercote9313ac42004-07-06 21:54:20 +00001338 // the 2nd colon makes cg_annotate's output look nicer.
1339 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1340 "desc: D1 cache: %s\n"
njn2d853a12010-10-06 22:46:31 +00001341 "desc: LL cache: %s\n",
1342 I1.desc_line, D1.desc_line, LL.desc_line);
njn7cf0bd32002-06-08 13:36:03 +00001343 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001344
nethercote9313ac42004-07-06 21:54:20 +00001345 // "cmd:" line
njn4f9c9342002-04-29 16:03:24 +00001346 VG_(strcpy)(buf, "cmd:");
1347 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
sewardj45f4e7c2005-09-27 19:20:21 +00001348 if (VG_(args_the_exename)) {
1349 VG_(write)(fd, " ", 1);
1350 VG_(write)(fd, VG_(args_the_exename),
1351 VG_(strlen)( VG_(args_the_exename) ));
1352 }
sewardj14c7cc52007-02-25 15:08:24 +00001353 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1354 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1355 if (arg) {
sewardj45f4e7c2005-09-27 19:20:21 +00001356 VG_(write)(fd, " ", 1);
sewardj14c7cc52007-02-25 15:08:24 +00001357 VG_(write)(fd, arg, VG_(strlen)( arg ));
sewardj45f4e7c2005-09-27 19:20:21 +00001358 }
njn4f9c9342002-04-29 16:03:24 +00001359 }
nethercote9313ac42004-07-06 21:54:20 +00001360 // "events:" line
sewardj8badbaa2007-05-08 09:20:25 +00001361 if (clo_cache_sim && clo_branch_sim) {
njn2d853a12010-10-06 22:46:31 +00001362 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
sewardj8badbaa2007-05-08 09:20:25 +00001363 "Bc Bcm Bi Bim\n");
1364 }
1365 else if (clo_cache_sim && !clo_branch_sim) {
njn2d853a12010-10-06 22:46:31 +00001366 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
sewardj8badbaa2007-05-08 09:20:25 +00001367 "\n");
1368 }
1369 else if (!clo_cache_sim && clo_branch_sim) {
1370 VG_(sprintf)(buf, "\nevents: Ir "
1371 "Bc Bcm Bi Bim\n");
1372 }
njne90711c2010-09-27 01:04:20 +00001373 else {
1374 VG_(sprintf)(buf, "\nevents: Ir\n");
1375 }
sewardj8badbaa2007-05-08 09:20:25 +00001376
njn4f9c9342002-04-29 16:03:24 +00001377 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1378
njnd3bef4f2005-10-15 17:46:18 +00001379 // Traverse every lineCC
njne2a9ad32007-09-17 05:30:48 +00001380 VG_(OSetGen_ResetIter)(CC_table);
1381 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
njn4311fe62005-12-08 23:18:50 +00001382 Bool just_hit_a_new_file = False;
njnd3bef4f2005-10-15 17:46:18 +00001383 // If we've hit a new file, print a "fl=" line. Note that because
1384 // each string is stored exactly once in the string table, we can use
1385 // pointer comparison rather than strcmp() to test for equality, which
1386 // is good because most of the time the comparisons are equal and so
njn4311fe62005-12-08 23:18:50 +00001387 // the whole strings would have to be checked.
njnd3bef4f2005-10-15 17:46:18 +00001388 if ( lineCC->loc.file != currFile ) {
1389 currFile = lineCC->loc.file;
1390 VG_(sprintf)(buf, "fl=%s\n", currFile);
njn4f9c9342002-04-29 16:03:24 +00001391 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njnd3bef4f2005-10-15 17:46:18 +00001392 distinct_files++;
njn4311fe62005-12-08 23:18:50 +00001393 just_hit_a_new_file = True;
njn4f9c9342002-04-29 16:03:24 +00001394 }
njn4311fe62005-12-08 23:18:50 +00001395 // If we've hit a new function, print a "fn=" line. We know to do
1396 // this when the function name changes, and also every time we hit a
1397 // new file (in which case the new function name might be the same as
1398 // in the old file, hence the just_hit_a_new_file test).
1399 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
njnd3bef4f2005-10-15 17:46:18 +00001400 currFn = lineCC->loc.fn;
1401 VG_(sprintf)(buf, "fn=%s\n", currFn);
1402 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1403 distinct_fns++;
1404 }
1405
1406 // Print the LineCC
sewardj8badbaa2007-05-08 09:20:25 +00001407 if (clo_cache_sim && clo_branch_sim) {
1408 VG_(sprintf)(buf, "%u %llu %llu %llu"
1409 " %llu %llu %llu"
1410 " %llu %llu %llu"
1411 " %llu %llu %llu %llu\n",
1412 lineCC->loc.line,
njn2d853a12010-10-06 22:46:31 +00001413 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1414 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1415 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
sewardj8badbaa2007-05-08 09:20:25 +00001416 lineCC->Bc.b, lineCC->Bc.mp,
1417 lineCC->Bi.b, lineCC->Bi.mp);
1418 }
1419 else if (clo_cache_sim && !clo_branch_sim) {
1420 VG_(sprintf)(buf, "%u %llu %llu %llu"
1421 " %llu %llu %llu"
1422 " %llu %llu %llu\n",
1423 lineCC->loc.line,
njn2d853a12010-10-06 22:46:31 +00001424 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1425 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1426 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
sewardj8badbaa2007-05-08 09:20:25 +00001427 }
1428 else if (!clo_cache_sim && clo_branch_sim) {
1429 VG_(sprintf)(buf, "%u %llu"
1430 " %llu %llu %llu %llu\n",
1431 lineCC->loc.line,
1432 lineCC->Ir.a,
1433 lineCC->Bc.b, lineCC->Bc.mp,
1434 lineCC->Bi.b, lineCC->Bi.mp);
1435 }
njne90711c2010-09-27 01:04:20 +00001436 else {
1437 VG_(sprintf)(buf, "%u %llu\n",
1438 lineCC->loc.line,
1439 lineCC->Ir.a);
1440 }
sewardj8badbaa2007-05-08 09:20:25 +00001441
njnd3bef4f2005-10-15 17:46:18 +00001442 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1443
1444 // Update summary stats
1445 Ir_total.a += lineCC->Ir.a;
1446 Ir_total.m1 += lineCC->Ir.m1;
njn2d853a12010-10-06 22:46:31 +00001447 Ir_total.mL += lineCC->Ir.mL;
njnd3bef4f2005-10-15 17:46:18 +00001448 Dr_total.a += lineCC->Dr.a;
1449 Dr_total.m1 += lineCC->Dr.m1;
njn2d853a12010-10-06 22:46:31 +00001450 Dr_total.mL += lineCC->Dr.mL;
njnd3bef4f2005-10-15 17:46:18 +00001451 Dw_total.a += lineCC->Dw.a;
1452 Dw_total.m1 += lineCC->Dw.m1;
njn2d853a12010-10-06 22:46:31 +00001453 Dw_total.mL += lineCC->Dw.mL;
sewardj8badbaa2007-05-08 09:20:25 +00001454 Bc_total.b += lineCC->Bc.b;
1455 Bc_total.mp += lineCC->Bc.mp;
1456 Bi_total.b += lineCC->Bi.b;
1457 Bi_total.mp += lineCC->Bi.mp;
njnd3bef4f2005-10-15 17:46:18 +00001458
1459 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +00001460 }
1461
nethercote9313ac42004-07-06 21:54:20 +00001462 // Summary stats must come after rest of table, since we calculate them
sewardj8badbaa2007-05-08 09:20:25 +00001463 // during traversal. */
1464 if (clo_cache_sim && clo_branch_sim) {
1465 VG_(sprintf)(buf, "summary:"
1466 " %llu %llu %llu"
1467 " %llu %llu %llu"
1468 " %llu %llu %llu"
1469 " %llu %llu %llu %llu\n",
njn2d853a12010-10-06 22:46:31 +00001470 Ir_total.a, Ir_total.m1, Ir_total.mL,
1471 Dr_total.a, Dr_total.m1, Dr_total.mL,
1472 Dw_total.a, Dw_total.m1, Dw_total.mL,
sewardj8badbaa2007-05-08 09:20:25 +00001473 Bc_total.b, Bc_total.mp,
1474 Bi_total.b, Bi_total.mp);
1475 }
1476 else if (clo_cache_sim && !clo_branch_sim) {
1477 VG_(sprintf)(buf, "summary:"
1478 " %llu %llu %llu"
1479 " %llu %llu %llu"
1480 " %llu %llu %llu\n",
njn2d853a12010-10-06 22:46:31 +00001481 Ir_total.a, Ir_total.m1, Ir_total.mL,
1482 Dr_total.a, Dr_total.m1, Dr_total.mL,
1483 Dw_total.a, Dw_total.m1, Dw_total.mL);
sewardj8badbaa2007-05-08 09:20:25 +00001484 }
1485 else if (!clo_cache_sim && clo_branch_sim) {
1486 VG_(sprintf)(buf, "summary:"
1487 " %llu"
1488 " %llu %llu %llu %llu\n",
1489 Ir_total.a,
1490 Bc_total.b, Bc_total.mp,
1491 Bi_total.b, Bi_total.mp);
1492 }
njne90711c2010-09-27 01:04:20 +00001493 else {
1494 VG_(sprintf)(buf, "summary:"
1495 " %llu\n",
1496 Ir_total.a);
1497 }
sewardj8badbaa2007-05-08 09:20:25 +00001498
njn4f9c9342002-04-29 16:03:24 +00001499 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1500 VG_(close)(fd);
1501}
1502
njn607adfc2003-09-30 14:15:44 +00001503static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001504{
njn607adfc2003-09-30 14:15:44 +00001505 UInt w = 0;
1506 while (n > 0) {
1507 n = n / 10;
1508 w++;
njn4f9c9342002-04-29 16:03:24 +00001509 }
sewardj46c59b12005-11-01 02:20:19 +00001510 if (w == 0) w = 1;
njn607adfc2003-09-30 14:15:44 +00001511 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001512}
1513
njn51d827b2005-05-09 01:02:08 +00001514static void cg_fini(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001515{
floriandbb35842012-10-27 18:39:11 +00001516 static HChar buf1[128], buf2[128], buf3[128], buf4[123];
florianee90c8a2012-10-21 02:39:42 +00001517 static HChar fmt[128];
njn607adfc2003-09-30 14:15:44 +00001518
sewardj8badbaa2007-05-08 09:20:25 +00001519 CacheCC D_total;
1520 BranchCC B_total;
njn2d853a12010-10-06 22:46:31 +00001521 ULong LL_total_m, LL_total_mr, LL_total_mw,
1522 LL_total, LL_total_r, LL_total_w;
njn4c245e52009-03-15 23:25:38 +00001523 Int l1, l2, l3;
njn4f9c9342002-04-29 16:03:24 +00001524
nethercote9313ac42004-07-06 21:54:20 +00001525 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001526
njn7cf0bd32002-06-08 13:36:03 +00001527 if (VG_(clo_verbosity) == 0)
1528 return;
1529
njnf76d27a2009-05-28 01:53:07 +00001530 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1531 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
njn4c245e52009-03-15 23:25:38 +00001532
njn4f9c9342002-04-29 16:03:24 +00001533 /* I cache results. Use the I_refs value to determine the first column
1534 * width. */
njn607adfc2003-09-30 14:15:44 +00001535 l1 = ULong_width(Ir_total.a);
njnf76d27a2009-05-28 01:53:07 +00001536 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1537 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
njn4f9c9342002-04-29 16:03:24 +00001538
njn607adfc2003-09-30 14:15:44 +00001539 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001540 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
njnd3bef4f2005-10-15 17:46:18 +00001541
sewardj8badbaa2007-05-08 09:20:25 +00001542 /* Always print this */
sewardjb2c985b2009-07-15 14:51:17 +00001543 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
njn4f9c9342002-04-29 16:03:24 +00001544
sewardj8badbaa2007-05-08 09:20:25 +00001545 /* If cache profiling is enabled, show D access numbers and all
1546 miss numbers */
1547 if (clo_cache_sim) {
sewardjb2c985b2009-07-15 14:51:17 +00001548 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
njn2d853a12010-10-06 22:46:31 +00001549 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
njn4f9c9342002-04-29 16:03:24 +00001550
sewardj8badbaa2007-05-08 09:20:25 +00001551 if (0 == Ir_total.a) Ir_total.a = 1;
1552 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
sewardjb2c985b2009-07-15 14:51:17 +00001553 VG_(umsg)("I1 miss rate: %s\n", buf1);
njn4f9c9342002-04-29 16:03:24 +00001554
njn2d853a12010-10-06 22:46:31 +00001555 VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1);
1556 VG_(umsg)("LLi miss rate: %s\n", buf1);
sewardjb2c985b2009-07-15 14:51:17 +00001557 VG_(umsg)("\n");
njnd3bef4f2005-10-15 17:46:18 +00001558
sewardj8badbaa2007-05-08 09:20:25 +00001559 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1560 * determine the width of columns 2 & 3. */
1561 D_total.a = Dr_total.a + Dw_total.a;
1562 D_total.m1 = Dr_total.m1 + Dw_total.m1;
njn2d853a12010-10-06 22:46:31 +00001563 D_total.mL = Dr_total.mL + Dw_total.mL;
njn4f9c9342002-04-29 16:03:24 +00001564
sewardj8badbaa2007-05-08 09:20:25 +00001565 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001566 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1567 l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001568
sewardjb2c985b2009-07-15 14:51:17 +00001569 VG_(umsg)(fmt, "D refs: ",
1570 D_total.a, Dr_total.a, Dw_total.a);
1571 VG_(umsg)(fmt, "D1 misses: ",
1572 D_total.m1, Dr_total.m1, Dw_total.m1);
njn2d853a12010-10-06 22:46:31 +00001573 VG_(umsg)(fmt, "LLd misses: ",
1574 D_total.mL, Dr_total.mL, Dw_total.mL);
njnd3bef4f2005-10-15 17:46:18 +00001575
sewardj8badbaa2007-05-08 09:20:25 +00001576 if (0 == D_total.a) D_total.a = 1;
1577 if (0 == Dr_total.a) Dr_total.a = 1;
1578 if (0 == Dw_total.a) Dw_total.a = 1;
1579 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1);
1580 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
1581 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
sewardjb2c985b2009-07-15 14:51:17 +00001582 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
njn4f9c9342002-04-29 16:03:24 +00001583
njn2d853a12010-10-06 22:46:31 +00001584 VG_(percentify)( D_total.mL, D_total.a, 1, l1+1, buf1);
1585 VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2);
1586 VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3);
1587 VG_(umsg)("LLd miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardjb2c985b2009-07-15 14:51:17 +00001588 VG_(umsg)("\n");
njn1d021fa2002-05-02 13:56:34 +00001589
njn2d853a12010-10-06 22:46:31 +00001590 /* LL overall results */
njn1d021fa2002-05-02 13:56:34 +00001591
njn2d853a12010-10-06 22:46:31 +00001592 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1593 LL_total_r = Dr_total.m1 + Ir_total.m1;
1594 LL_total_w = Dw_total.m1;
1595 VG_(umsg)(fmt, "LL refs: ",
1596 LL_total, LL_total_r, LL_total_w);
njn4f9c9342002-04-29 16:03:24 +00001597
njn2d853a12010-10-06 22:46:31 +00001598 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1599 LL_total_mr = Dr_total.mL + Ir_total.mL;
1600 LL_total_mw = Dw_total.mL;
1601 VG_(umsg)(fmt, "LL misses: ",
1602 LL_total_m, LL_total_mr, LL_total_mw);
njnd3bef4f2005-10-15 17:46:18 +00001603
njn2d853a12010-10-06 22:46:31 +00001604 VG_(percentify)(LL_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1);
1605 VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
1606 VG_(percentify)(LL_total_mw, Dw_total.a, 1, l3+1, buf3);
1607 VG_(umsg)("LL miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardj8badbaa2007-05-08 09:20:25 +00001608 }
1609
1610 /* If branch profiling is enabled, show branch overall results. */
1611 if (clo_branch_sim) {
1612 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001613 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1614 l1, l2, l3);
sewardj8badbaa2007-05-08 09:20:25 +00001615
1616 if (0 == Bc_total.b) Bc_total.b = 1;
1617 if (0 == Bi_total.b) Bi_total.b = 1;
1618 B_total.b = Bc_total.b + Bi_total.b;
1619 B_total.mp = Bc_total.mp + Bi_total.mp;
1620
sewardjb2c985b2009-07-15 14:51:17 +00001621 VG_(umsg)("\n");
1622 VG_(umsg)(fmt, "Branches: ",
1623 B_total.b, Bc_total.b, Bi_total.b);
sewardj8badbaa2007-05-08 09:20:25 +00001624
sewardjb2c985b2009-07-15 14:51:17 +00001625 VG_(umsg)(fmt, "Mispredicts: ",
1626 B_total.mp, Bc_total.mp, Bi_total.mp);
sewardj8badbaa2007-05-08 09:20:25 +00001627
1628 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1);
1629 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
1630 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
1631
sewardjb2c985b2009-07-15 14:51:17 +00001632 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardj8badbaa2007-05-08 09:20:25 +00001633 }
njn4f9c9342002-04-29 16:03:24 +00001634
nethercote9313ac42004-07-06 21:54:20 +00001635 // Various stats
sewardj2d9e8742009-08-07 15:46:56 +00001636 if (VG_(clo_stats)) {
njn1baf7db2006-04-18 22:34:48 +00001637 Int debug_lookups = full_debugs + fn_debugs +
1638 file_line_debugs + no_debugs;
njnd3bef4f2005-10-15 17:46:18 +00001639
sewardjb2c985b2009-07-15 14:51:17 +00001640 VG_(dmsg)("\n");
weidendo6fc0de02012-10-30 00:28:29 +00001641 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
1642 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1643 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
1644 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1645 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
sewardjb2c985b2009-07-15 14:51:17 +00001646 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
njn1baf7db2006-04-18 22:34:48 +00001647
1648 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1);
1649 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
1650 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3);
1651 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4);
sewardjb2c985b2009-07-15 14:51:17 +00001652 VG_(dmsg)("cachegrind: with full info:%s (%d)\n",
1653 buf1, full_debugs);
1654 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
1655 buf2, file_line_debugs);
1656 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n",
1657 buf3, fn_debugs);
1658 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n",
1659 buf4, no_debugs);
njn1baf7db2006-04-18 22:34:48 +00001660
sewardjb2c985b2009-07-15 14:51:17 +00001661 VG_(dmsg)("cachegrind: string table size: %lu\n",
1662 VG_(OSetGen_Size)(stringTable));
1663 VG_(dmsg)("cachegrind: CC table size: %lu\n",
1664 VG_(OSetGen_Size)(CC_table));
1665 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1666 VG_(OSetGen_Size)(instrInfoTable));
njn4f9c9342002-04-29 16:03:24 +00001667 }
njn4f9c9342002-04-29 16:03:24 +00001668}
1669
nethercote9313ac42004-07-06 21:54:20 +00001670/*--------------------------------------------------------------------*/
1671/*--- Discarding BB info ---*/
1672/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001673
sewardja3a29a52005-10-12 16:16:03 +00001674// Called when a translation is removed from the translation cache for
1675// any reason at all: to free up space, because the guest code was
1676// unmapped or modified, or for any arbitrary reason.
sewardj4ba057c2005-10-18 12:04:18 +00001677static
sewardj0b9d74a2006-12-24 02:24:11 +00001678void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
sewardj18d75132002-05-16 11:06:21 +00001679{
sewardj0b9d74a2006-12-24 02:24:11 +00001680 SB_info* sbInfo;
sewardj3a384b32006-01-22 01:12:51 +00001681 Addr orig_addr = (Addr)vge.base[0];
njn4294fd42002-06-05 14:41:10 +00001682
sewardj5155dec2005-10-12 10:09:23 +00001683 tl_assert(vge.n_used > 0);
1684
1685 if (DEBUG_CG)
sewardj4ba057c2005-10-18 12:04:18 +00001686 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1687 (void*)(Addr)orig_addr,
sewardj5155dec2005-10-12 10:09:23 +00001688 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
njn4294fd42002-06-05 14:41:10 +00001689
sewardj4ba057c2005-10-18 12:04:18 +00001690 // Get BB info, remove from table, free BB info. Simple! Note that we
1691 // use orig_addr, not the first instruction address in vge.
njne2a9ad32007-09-17 05:30:48 +00001692 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
sewardj0b9d74a2006-12-24 02:24:11 +00001693 tl_assert(NULL != sbInfo);
njne2a9ad32007-09-17 05:30:48 +00001694 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
sewardj18d75132002-05-16 11:06:21 +00001695}
1696
1697/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001698/*--- Command line processing ---*/
1699/*--------------------------------------------------------------------*/
1700
florian19f91bb2012-11-10 22:29:54 +00001701static Bool cg_process_cmd_line_option(const HChar* arg)
njn25e49d8e72002-09-23 09:36:25 +00001702{
weidendo23642272011-09-06 19:08:31 +00001703 if (VG_(str_clo_cache_opt)(arg,
1704 &clo_I1_cache,
1705 &clo_D1_cache,
1706 &clo_LL_cache)) {}
njn83df0b62009-02-25 01:01:05 +00001707
1708 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1709 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1710 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
njn25e49d8e72002-09-23 09:36:25 +00001711 else
1712 return False;
1713
1714 return True;
1715}
1716
njn51d827b2005-05-09 01:02:08 +00001717static void cg_print_usage(void)
njn25e49d8e72002-09-23 09:36:25 +00001718{
weidendo23642272011-09-06 19:08:31 +00001719 VG_(print_cache_clo_opts)();
njn3e884182003-04-15 13:03:23 +00001720 VG_(printf)(
sewardj8badbaa2007-05-08 09:20:25 +00001721" --cache-sim=yes|no [yes] collect cache stats?\n"
1722" --branch-sim=yes|no [no] collect branch prediction stats?\n"
njn374a36d2007-11-23 01:41:32 +00001723" --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
njn3e884182003-04-15 13:03:23 +00001724 );
1725}
1726
njn51d827b2005-05-09 01:02:08 +00001727static void cg_print_debug_usage(void)
njn3e884182003-04-15 13:03:23 +00001728{
1729 VG_(printf)(
1730" (none)\n"
1731 );
njn25e49d8e72002-09-23 09:36:25 +00001732}
1733
1734/*--------------------------------------------------------------------*/
1735/*--- Setup ---*/
1736/*--------------------------------------------------------------------*/
1737
sewardje1216cb2007-02-07 19:55:30 +00001738static void cg_post_clo_init(void); /* just below */
1739
njn51d827b2005-05-09 01:02:08 +00001740static void cg_pre_clo_init(void)
1741{
njn51d827b2005-05-09 01:02:08 +00001742 VG_(details_name) ("Cachegrind");
1743 VG_(details_version) (NULL);
sewardj8badbaa2007-05-08 09:20:25 +00001744 VG_(details_description) ("a cache and branch-prediction profiler");
njn51d827b2005-05-09 01:02:08 +00001745 VG_(details_copyright_author)(
sewardj03f8d3f2012-08-05 15:46:46 +00001746 "Copyright (C) 2002-2012, and GNU GPL'd, by Nicholas Nethercote et al.");
njn51d827b2005-05-09 01:02:08 +00001747 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardje8089302006-10-17 02:15:17 +00001748 VG_(details_avg_translation_sizeB) ( 500 );
njn51d827b2005-05-09 01:02:08 +00001749
philippe5b240c22012-08-14 22:28:31 +00001750 VG_(clo_vex_control).iropt_register_updates
1751 = VexRegUpdSpAtMemAccess; // overridable by the user.
njn51d827b2005-05-09 01:02:08 +00001752 VG_(basic_tool_funcs) (cg_post_clo_init,
1753 cg_instrument,
1754 cg_fini);
1755
sewardj0b9d74a2006-12-24 02:24:11 +00001756 VG_(needs_superblock_discards)(cg_discard_superblock_info);
njn51d827b2005-05-09 01:02:08 +00001757 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1758 cg_print_usage,
1759 cg_print_debug_usage);
sewardje1216cb2007-02-07 19:55:30 +00001760}
1761
1762static void cg_post_clo_init(void)
1763{
njn2d853a12010-10-06 22:46:31 +00001764 cache_t I1c, D1c, LLc;
njn51d827b2005-05-09 01:02:08 +00001765
njne2a9ad32007-09-17 05:30:48 +00001766 CC_table =
1767 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1768 cmp_CodeLoc_LineCC,
sewardj9c606bd2008-09-18 18:12:50 +00001769 VG_(malloc), "cg.main.cpci.1",
1770 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001771 instrInfoTable =
1772 VG_(OSetGen_Create)(/*keyOff*/0,
1773 NULL,
sewardj9c606bd2008-09-18 18:12:50 +00001774 VG_(malloc), "cg.main.cpci.2",
1775 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001776 stringTable =
1777 VG_(OSetGen_Create)(/*keyOff*/0,
1778 stringCmp,
sewardj9c606bd2008-09-18 18:12:50 +00001779 VG_(malloc), "cg.main.cpci.3",
1780 VG_(free));
sewardje1216cb2007-02-07 19:55:30 +00001781
weidendo23642272011-09-06 19:08:31 +00001782 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1783 &clo_I1_cache,
1784 &clo_D1_cache,
1785 &clo_LL_cache);
sewardje1216cb2007-02-07 19:55:30 +00001786
sewardj98763d52012-06-03 22:40:07 +00001787 // min_line_size is used to make sure that we never feed
1788 // accesses to the simulator straddling more than two
1789 // cache lines at any cache level
1790 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1791 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1792
1793 Int largest_load_or_store_size
1794 = VG_(machine_get_size_of_largest_guest_register)();
1795 if (min_line_size < largest_load_or_store_size) {
1796 /* We can't continue, because the cache simulation might
1797 straddle more than 2 lines, and it will assert. So let's
1798 just stop before we start. */
1799 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1800 (Int)min_line_size);
1801 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1802 largest_load_or_store_size );
1803 VG_(umsg)(" but it is not. Exiting now.\n");
1804 VG_(exit)(1);
1805 }
1806
weidendoc1e94262012-10-05 23:58:17 +00001807 cachesim_initcaches(I1c, D1c, LLc);
njn51d827b2005-05-09 01:02:08 +00001808}
1809
sewardj45f4e7c2005-09-27 19:20:21 +00001810VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
fitzhardinge98abfc72003-12-16 02:05:15 +00001811
njn25e49d8e72002-09-23 09:36:25 +00001812/*--------------------------------------------------------------------*/
njnf69f9452005-07-03 17:53:11 +00001813/*--- end ---*/
sewardj18d75132002-05-16 11:06:21 +00001814/*--------------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +00001815