blob: bdfda33c99af34dbad57009dabd8a1441cf99119 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njn101e5722005-04-21 02:37:54 +00003/*--- Cachegrind: everything but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
sewardj0f157dd2013-10-18 14:27:36 +000011 Copyright (C) 2002-2013 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000012 njn@valgrind.org
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
sewardj4cfea4f2006-10-14 19:26:10 +000033#include "pub_tool_vki.h"
njnea27e462005-05-31 02:38:09 +000034#include "pub_tool_debuginfo.h"
njn97405b22005-06-02 03:39:33 +000035#include "pub_tool_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +000036#include "pub_tool_libcassert.h"
njneb8896b2005-06-04 20:03:55 +000037#include "pub_tool_libcfile.h"
njn36a20fa2005-06-03 03:08:39 +000038#include "pub_tool_libcprint.h"
njnf39e9a32005-06-12 02:43:17 +000039#include "pub_tool_libcproc.h"
njnf536bbb2005-06-13 04:21:38 +000040#include "pub_tool_machine.h"
njn717cde52005-05-10 02:47:21 +000041#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +000042#include "pub_tool_options.h"
njnd3bef4f2005-10-15 17:46:18 +000043#include "pub_tool_oset.h"
njn43b9a8a2005-05-10 04:37:01 +000044#include "pub_tool_tooliface.h"
sewardj14c7cc52007-02-25 15:08:24 +000045#include "pub_tool_xarray.h"
sewardj45f4e7c2005-09-27 19:20:21 +000046#include "pub_tool_clientstate.h"
sewardj5bb86822005-12-23 12:47:42 +000047#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
njn25e49d8e72002-09-23 09:36:25 +000048
nethercoteb35a8b92004-09-11 16:45:27 +000049#include "cg_arch.h"
nethercote27fc1da2004-01-04 16:56:57 +000050#include "cg_sim.c"
sewardj8badbaa2007-05-08 09:20:25 +000051#include "cg_branchpred.c"
njn4f9c9342002-04-29 16:03:24 +000052
njn25e49d8e72002-09-23 09:36:25 +000053/*------------------------------------------------------------*/
54/*--- Constants ---*/
55/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000056
sewardj5155dec2005-10-12 10:09:23 +000057/* Set to 1 for very verbose debugging */
58#define DEBUG_CG 0
59
nethercote9313ac42004-07-06 21:54:20 +000060#define MIN_LINE_SIZE 16
njnd3bef4f2005-10-15 17:46:18 +000061#define FILE_LEN VKI_PATH_MAX
nethercote9313ac42004-07-06 21:54:20 +000062#define FN_LEN 256
njn7cf0bd32002-06-08 13:36:03 +000063
64/*------------------------------------------------------------*/
sewardj8badbaa2007-05-08 09:20:25 +000065/*--- Options ---*/
66/*------------------------------------------------------------*/
67
njn374a36d2007-11-23 01:41:32 +000068static Bool clo_cache_sim = True; /* do cache simulation? */
69static Bool clo_branch_sim = False; /* do branch simulation? */
florian19f91bb2012-11-10 22:29:54 +000070static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
sewardj8badbaa2007-05-08 09:20:25 +000071
72/*------------------------------------------------------------*/
sewardj98763d52012-06-03 22:40:07 +000073/*--- Cachesim configuration ---*/
74/*------------------------------------------------------------*/
75
76static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
77
78/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000079/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000080/*------------------------------------------------------------*/
81
sewardj8badbaa2007-05-08 09:20:25 +000082typedef
83 struct {
84 ULong a; /* total # memory accesses of this kind */
85 ULong m1; /* misses in the first level cache */
njn2d853a12010-10-06 22:46:31 +000086 ULong mL; /* misses in the second level cache */
sewardj8badbaa2007-05-08 09:20:25 +000087 }
88 CacheCC;
89
90typedef
91 struct {
92 ULong b; /* total # branches of this kind */
93 ULong mp; /* number of branches mispredicted */
94 }
95 BranchCC;
njn4f9c9342002-04-29 16:03:24 +000096
nethercote9313ac42004-07-06 21:54:20 +000097//------------------------------------------------------------
98// Primary data structure #1: CC table
99// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
njnd3bef4f2005-10-15 17:46:18 +0000100// - an ordered set of CCs. CC indexing done by file/function/line (as
101// determined from the instrAddr).
nethercote9313ac42004-07-06 21:54:20 +0000102// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +0000103
njnd3bef4f2005-10-15 17:46:18 +0000104typedef struct {
florian19f91bb2012-11-10 22:29:54 +0000105 HChar* file;
106 HChar* fn;
107 Int line;
njnd3bef4f2005-10-15 17:46:18 +0000108}
109CodeLoc;
njn4f9c9342002-04-29 16:03:24 +0000110
sewardj8badbaa2007-05-08 09:20:25 +0000111typedef struct {
112 CodeLoc loc; /* Source location that these counts pertain to */
113 CacheCC Ir; /* Insn read counts */
114 CacheCC Dr; /* Data read counts */
115 CacheCC Dw; /* Data write/modify counts */
116 BranchCC Bc; /* Conditional branch counts */
117 BranchCC Bi; /* Indirect branch counts */
118} LineCC;
njn4f9c9342002-04-29 16:03:24 +0000119
njnd3bef4f2005-10-15 17:46:18 +0000120// First compare file, then fn, then line.
tom5a835d52007-12-30 12:28:26 +0000121static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
njnd3bef4f2005-10-15 17:46:18 +0000122{
njnafa12262005-12-24 03:10:56 +0000123 Word res;
florian3e798632012-11-24 19:41:54 +0000124 const CodeLoc* a = (const CodeLoc*)vloc;
125 const CodeLoc* b = &(((const LineCC*)vcc)->loc);
njn4f9c9342002-04-29 16:03:24 +0000126
njnd3bef4f2005-10-15 17:46:18 +0000127 res = VG_(strcmp)(a->file, b->file);
128 if (0 != res)
129 return res;
njn4f9c9342002-04-29 16:03:24 +0000130
njnd3bef4f2005-10-15 17:46:18 +0000131 res = VG_(strcmp)(a->fn, b->fn);
132 if (0 != res)
133 return res;
134
135 return a->line - b->line;
136}
137
138static OSet* CC_table;
njn4f9c9342002-04-29 16:03:24 +0000139
nethercote9313ac42004-07-06 21:54:20 +0000140//------------------------------------------------------------
njnd3bef4f2005-10-15 17:46:18 +0000141// Primary data structure #2: InstrInfo table
nethercote9313ac42004-07-06 21:54:20 +0000142// - Holds the cached info about each instr that is used for simulation.
sewardj0b9d74a2006-12-24 02:24:11 +0000143// - table(SB_start_addr, list(InstrInfo))
144// - For each SB, each InstrInfo in the list holds info about the
njnd3bef4f2005-10-15 17:46:18 +0000145// instruction (instrLen, instrAddr, etc), plus a pointer to its line
nethercote9313ac42004-07-06 21:54:20 +0000146// CC. This node is what's passed to the simulation function.
sewardj0b9d74a2006-12-24 02:24:11 +0000147// - When SBs are discarded the relevant list(instr_details) is freed.
nethercote9313ac42004-07-06 21:54:20 +0000148
njnd3bef4f2005-10-15 17:46:18 +0000149typedef struct _InstrInfo InstrInfo;
150struct _InstrInfo {
nethercoteca1f2dc2004-07-21 08:49:02 +0000151 Addr instr_addr;
njn6a3009b2005-03-20 00:20:06 +0000152 UChar instr_len;
njnd3bef4f2005-10-15 17:46:18 +0000153 LineCC* parent; // parent line-CC
nethercote9313ac42004-07-06 21:54:20 +0000154};
155
sewardj0b9d74a2006-12-24 02:24:11 +0000156typedef struct _SB_info SB_info;
157struct _SB_info {
158 Addr SB_addr; // key; MUST BE FIRST
njnd3bef4f2005-10-15 17:46:18 +0000159 Int n_instrs;
160 InstrInfo instrs[0];
nethercote9313ac42004-07-06 21:54:20 +0000161};
162
njnd3bef4f2005-10-15 17:46:18 +0000163static OSet* instrInfoTable;
164
165//------------------------------------------------------------
166// Secondary data structure: string table
167// - holds strings, avoiding dups
168// - used for filenames and function names, each of which will be
169// pointed to by one or more CCs.
170// - it also allows equality checks just by pointer comparison, which
171// is good when printing the output file at the end.
172
173static OSet* stringTable;
nethercote9313ac42004-07-06 21:54:20 +0000174
175//------------------------------------------------------------
176// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000177static Int distinct_files = 0;
178static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000179static Int distinct_lines = 0;
weidendo6fc0de02012-10-30 00:28:29 +0000180static Int distinct_instrsGen = 0;
181static Int distinct_instrsNoX = 0;
nethercote9313ac42004-07-06 21:54:20 +0000182
njnd3bef4f2005-10-15 17:46:18 +0000183static Int full_debugs = 0;
184static Int file_line_debugs = 0;
185static Int fn_debugs = 0;
186static Int no_debugs = 0;
njn4f9c9342002-04-29 16:03:24 +0000187
nethercote9313ac42004-07-06 21:54:20 +0000188/*------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +0000189/*--- String table operations ---*/
190/*------------------------------------------------------------*/
191
tom5a835d52007-12-30 12:28:26 +0000192static Word stringCmp( const void* key, const void* elem )
njnd3bef4f2005-10-15 17:46:18 +0000193{
florian3e798632012-11-24 19:41:54 +0000194 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
njnd3bef4f2005-10-15 17:46:18 +0000195}
196
197// Get a permanent string; either pull it out of the string table if it's
198// been encountered before, or dup it and put it into the string table.
florian19f91bb2012-11-10 22:29:54 +0000199static HChar* get_perm_string(HChar* s)
njnd3bef4f2005-10-15 17:46:18 +0000200{
florian19f91bb2012-11-10 22:29:54 +0000201 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
njnd3bef4f2005-10-15 17:46:18 +0000202 if (s_ptr) {
203 return *s_ptr;
204 } else {
florian19f91bb2012-11-10 22:29:54 +0000205 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
sewardj9c606bd2008-09-18 18:12:50 +0000206 *s_node = VG_(strdup)("cg.main.gps.1", s);
njne2a9ad32007-09-17 05:30:48 +0000207 VG_(OSetGen_Insert)(stringTable, s_node);
njnd3bef4f2005-10-15 17:46:18 +0000208 return *s_node;
209 }
210}
211
212/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000213/*--- CC table operations ---*/
214/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000215
florian19f91bb2012-11-10 22:29:54 +0000216static void get_debug_info(Addr instr_addr, HChar file[FILE_LEN],
217 HChar fn[FN_LEN], UInt* line)
njn4f9c9342002-04-29 16:03:24 +0000218{
florian19f91bb2012-11-10 22:29:54 +0000219 HChar dir[FILE_LEN];
njnf3b61d62007-09-17 00:41:07 +0000220 Bool found_dirname;
sewardj7cee6f92005-06-13 17:39:06 +0000221 Bool found_file_line = VG_(get_filename_linenum)(
222 instr_addr,
223 file, FILE_LEN,
njnf3b61d62007-09-17 00:41:07 +0000224 dir, FILE_LEN, &found_dirname,
sewardj7cee6f92005-06-13 17:39:06 +0000225 line
226 );
nethercote9313ac42004-07-06 21:54:20 +0000227 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
njn4f9c9342002-04-29 16:03:24 +0000228
nethercote9313ac42004-07-06 21:54:20 +0000229 if (!found_file_line) {
230 VG_(strcpy)(file, "???");
231 *line = 0;
232 }
233 if (!found_fn) {
234 VG_(strcpy)(fn, "???");
235 }
njnf3b61d62007-09-17 00:41:07 +0000236
237 if (found_dirname) {
238 // +1 for the '/'.
239 tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
240 VG_(strcat)(dir, "/"); // Append '/'
241 VG_(strcat)(dir, file); // Append file to dir
242 VG_(strcpy)(file, dir); // Move dir+file to file
243 }
244
nethercote9313ac42004-07-06 21:54:20 +0000245 if (found_file_line) {
njnd3bef4f2005-10-15 17:46:18 +0000246 if (found_fn) full_debugs++;
247 else file_line_debugs++;
nethercote9313ac42004-07-06 21:54:20 +0000248 } else {
njnd3bef4f2005-10-15 17:46:18 +0000249 if (found_fn) fn_debugs++;
250 else no_debugs++;
njn4f9c9342002-04-29 16:03:24 +0000251 }
252}
253
nethercote9313ac42004-07-06 21:54:20 +0000254// Do a three step traversal: by file, then fn, then line.
njnd3bef4f2005-10-15 17:46:18 +0000255// Returns a pointer to the line CC, creates a new one if necessary.
256static LineCC* get_lineCC(Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000257{
florian19f91bb2012-11-10 22:29:54 +0000258 HChar file[FILE_LEN], fn[FN_LEN];
259 UInt line;
njnd3bef4f2005-10-15 17:46:18 +0000260 CodeLoc loc;
261 LineCC* lineCC;
nethercote9313ac42004-07-06 21:54:20 +0000262
njn6a3009b2005-03-20 00:20:06 +0000263 get_debug_info(origAddr, file, fn, &line);
nethercote9313ac42004-07-06 21:54:20 +0000264
njnd3bef4f2005-10-15 17:46:18 +0000265 loc.file = file;
266 loc.fn = fn;
267 loc.line = line;
njn4f9c9342002-04-29 16:03:24 +0000268
njne2a9ad32007-09-17 05:30:48 +0000269 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
njnd3bef4f2005-10-15 17:46:18 +0000270 if (!lineCC) {
271 // Allocate and zero a new node.
njne2a9ad32007-09-17 05:30:48 +0000272 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
njnd3bef4f2005-10-15 17:46:18 +0000273 lineCC->loc.file = get_perm_string(loc.file);
274 lineCC->loc.fn = get_perm_string(loc.fn);
275 lineCC->loc.line = loc.line;
njn0a8db5c2007-04-02 03:11:41 +0000276 lineCC->Ir.a = 0;
277 lineCC->Ir.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000278 lineCC->Ir.mL = 0;
njn0a8db5c2007-04-02 03:11:41 +0000279 lineCC->Dr.a = 0;
280 lineCC->Dr.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000281 lineCC->Dr.mL = 0;
njn0a8db5c2007-04-02 03:11:41 +0000282 lineCC->Dw.a = 0;
283 lineCC->Dw.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000284 lineCC->Dw.mL = 0;
sewardj8badbaa2007-05-08 09:20:25 +0000285 lineCC->Bc.b = 0;
286 lineCC->Bc.mp = 0;
287 lineCC->Bi.b = 0;
288 lineCC->Bi.mp = 0;
njne2a9ad32007-09-17 05:30:48 +0000289 VG_(OSetGen_Insert)(CC_table, lineCC);
njn4f9c9342002-04-29 16:03:24 +0000290 }
nethercote9313ac42004-07-06 21:54:20 +0000291
njnd3bef4f2005-10-15 17:46:18 +0000292 return lineCC;
njn4f9c9342002-04-29 16:03:24 +0000293}
294
295/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000296/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000297/*------------------------------------------------------------*/
298
weidendo6fc0de02012-10-30 00:28:29 +0000299/* A common case for an instruction read event is that the
300 * bytes read belong to the same cache line in both L1I and LL
301 * (if cache line sizes of L1 and LL are the same).
302 * As this can be detected at instrumentation time, and results
303 * in faster simulation, special-casing is benefical.
304 *
305 * Abbrevations used in var/function names:
306 * IrNoX - instruction read does not cross cache lines
307 * IrGen - generic instruction read; not detected as IrNoX
308 * Ir - not known / not important whether it is an IrNoX
309 */
310
njnc52b9322010-09-27 02:20:38 +0000311// Only used with --cache-sim=no.
312static VG_REGPARM(1)
weidendo6fc0de02012-10-30 00:28:29 +0000313void log_1Ir(InstrInfo* n)
njnc52b9322010-09-27 02:20:38 +0000314{
315 n->parent->Ir.a++;
316}
317
318// Only used with --cache-sim=no.
319static VG_REGPARM(2)
weidendo6fc0de02012-10-30 00:28:29 +0000320void log_2Ir(InstrInfo* n, InstrInfo* n2)
njnc52b9322010-09-27 02:20:38 +0000321{
322 n->parent->Ir.a++;
323 n2->parent->Ir.a++;
324}
325
326// Only used with --cache-sim=no.
327static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000328void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
njnc52b9322010-09-27 02:20:38 +0000329{
330 n->parent->Ir.a++;
331 n2->parent->Ir.a++;
332 n3->parent->Ir.a++;
333}
334
weidendo6fc0de02012-10-30 00:28:29 +0000335// Generic case for instruction reads: may cross cache lines.
336// All other Ir handlers expect IrNoX instruction reads.
njnaf839f52005-06-23 03:27:57 +0000337static VG_REGPARM(1)
weidendo6fc0de02012-10-30 00:28:29 +0000338void log_1IrGen_0D_cache_access(InstrInfo* n)
njn25e49d8e72002-09-23 09:36:25 +0000339{
weidendo6fc0de02012-10-30 00:28:29 +0000340 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000341 // n, n->instr_addr, n->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000342 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
343 &n->parent->Ir.m1, &n->parent->Ir.mL);
344 n->parent->Ir.a++;
345}
346
347static VG_REGPARM(1)
348void log_1IrNoX_0D_cache_access(InstrInfo* n)
349{
350 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
351 // n, n->instr_addr, n->instr_len);
352 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
353 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000354 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000355}
356
njnaf839f52005-06-23 03:27:57 +0000357static VG_REGPARM(2)
weidendo6fc0de02012-10-30 00:28:29 +0000358void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
njn25e49d8e72002-09-23 09:36:25 +0000359{
weidendo6fc0de02012-10-30 00:28:29 +0000360 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
361 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000362 // n, n->instr_addr, n->instr_len,
363 // n2, n2->instr_addr, n2->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000364 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
365 &n->parent->Ir.m1, &n->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000366 n->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000367 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
368 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000369 n2->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000370}
371
372static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000373void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
sewardj5155dec2005-10-12 10:09:23 +0000374{
weidendo6fc0de02012-10-30 00:28:29 +0000375 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
376 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
377 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000378 // n, n->instr_addr, n->instr_len,
379 // n2, n2->instr_addr, n2->instr_len,
380 // n3, n3->instr_addr, n3->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000381 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
382 &n->parent->Ir.m1, &n->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000383 n->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000384 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
385 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000386 n2->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000387 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
388 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000389 n3->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000390}
391
392static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000393void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000394{
weidendo6fc0de02012-10-30 00:28:29 +0000395 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
sewardj5155dec2005-10-12 10:09:23 +0000396 // " daddr=0x%010lx, dsize=%lu\n",
397 // n, n->instr_addr, n->instr_len, data_addr, data_size);
weidendo6fc0de02012-10-30 00:28:29 +0000398 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
399 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000400 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000401
sewardj5155dec2005-10-12 10:09:23 +0000402 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000403 &n->parent->Dr.m1, &n->parent->Dr.mL);
nethercote9313ac42004-07-06 21:54:20 +0000404 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000405}
406
sewardj5155dec2005-10-12 10:09:23 +0000407static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000408void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000409{
weidendo6fc0de02012-10-30 00:28:29 +0000410 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
sewardj5155dec2005-10-12 10:09:23 +0000411 // " daddr=0x%010lx, dsize=%lu\n",
412 // n, n->instr_addr, n->instr_len, data_addr, data_size);
weidendo6fc0de02012-10-30 00:28:29 +0000413 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
414 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000415 n->parent->Ir.a++;
416
sewardj5155dec2005-10-12 10:09:23 +0000417 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000418 &n->parent->Dw.m1, &n->parent->Dw.mL);
nethercote9313ac42004-07-06 21:54:20 +0000419 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000420}
421
sewardjcafe5052013-01-17 14:24:35 +0000422/* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
423 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
424 you change them, you must change addEvent_D_guarded too. */
njnaf839f52005-06-23 03:27:57 +0000425static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000426void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000427{
weidendo6fc0de02012-10-30 00:28:29 +0000428 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000429 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000430 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000431 &n->parent->Dr.m1, &n->parent->Dr.mL);
nethercote9313ac42004-07-06 21:54:20 +0000432 n->parent->Dr.a++;
sewardj5155dec2005-10-12 10:09:23 +0000433}
434
sewardjcafe5052013-01-17 14:24:35 +0000435/* See comment on log_0Ir_1Dr_cache_access. */
sewardj5155dec2005-10-12 10:09:23 +0000436static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000437void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000438{
weidendo6fc0de02012-10-30 00:28:29 +0000439 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000440 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000441 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000442 &n->parent->Dw.m1, &n->parent->Dw.mL);
nethercote9313ac42004-07-06 21:54:20 +0000443 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000444}
445
sewardj8badbaa2007-05-08 09:20:25 +0000446/* For branches, we consult two different predictors, one which
447 predicts taken/untaken for conditional branches, and the other
448 which predicts the branch target address for indirect branches
449 (jump-to-register style ones). */
450
451static VG_REGPARM(2)
452void log_cond_branch(InstrInfo* n, Word taken)
453{
454 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
455 // n, taken);
456 n->parent->Bc.b++;
457 n->parent->Bc.mp
458 += (1 & do_cond_branch_predict(n->instr_addr, taken));
459}
460
461static VG_REGPARM(2)
462void log_ind_branch(InstrInfo* n, UWord actual_dst)
463{
464 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
465 // n, actual_dst);
466 n->parent->Bi.b++;
467 n->parent->Bi.mp
468 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
469}
470
471
nethercote9313ac42004-07-06 21:54:20 +0000472/*------------------------------------------------------------*/
sewardj5155dec2005-10-12 10:09:23 +0000473/*--- Instrumentation types and structures ---*/
474/*------------------------------------------------------------*/
475
476/* Maintain an ordered list of memory events which are outstanding, in
477 the sense that no IR has yet been generated to do the relevant
478 helper calls. The BB is scanned top to bottom and memory events
479 are added to the end of the list, merging with the most recent
480 notified event where possible (Dw immediately following Dr and
481 having the same size and EA can be merged).
482
483 This merging is done so that for architectures which have
484 load-op-store instructions (x86, amd64), the insn is treated as if
485 it makes just one memory reference (a modify), rather than two (a
486 read followed by a write at the same address).
487
488 At various points the list will need to be flushed, that is, IR
489 generated from it. That must happen before any possible exit from
490 the block (the end, or an IRStmt_Exit). Flushing also takes place
491 when there is no space to add a new event.
492
493 If we require the simulation statistics to be up to date with
494 respect to possible memory exceptions, then the list would have to
495 be flushed before each memory reference. That would however lose
496 performance by inhibiting event-merging during flushing.
497
498 Flushing the list consists of walking it start to end and emitting
499 instrumentation IR for each event, in the order in which they
500 appear. It may be possible to emit a single call for two adjacent
501 events in order to reduce the number of helper function calls made.
502 For example, it could well be profitable to handle two adjacent Ir
503 events with a single helper call. */
504
505typedef
506 IRExpr
507 IRAtom;
508
509typedef
sewardj8badbaa2007-05-08 09:20:25 +0000510 enum {
weidendo6fc0de02012-10-30 00:28:29 +0000511 Ev_IrNoX, // Instruction read not crossing cache lines
512 Ev_IrGen, // Generic Ir, not being detected as IrNoX
513 Ev_Dr, // Data read
514 Ev_Dw, // Data write
515 Ev_Dm, // Data modify (read then write)
516 Ev_Bc, // branch conditional
517 Ev_Bi // branch indirect (to unknown destination)
sewardj8badbaa2007-05-08 09:20:25 +0000518 }
519 EventTag;
sewardj5155dec2005-10-12 10:09:23 +0000520
521typedef
522 struct {
sewardj8badbaa2007-05-08 09:20:25 +0000523 EventTag tag;
524 InstrInfo* inode;
525 union {
526 struct {
weidendo6fc0de02012-10-30 00:28:29 +0000527 } IrGen;
528 struct {
529 } IrNoX;
sewardj8badbaa2007-05-08 09:20:25 +0000530 struct {
531 IRAtom* ea;
532 Int szB;
533 } Dr;
534 struct {
535 IRAtom* ea;
536 Int szB;
537 } Dw;
538 struct {
539 IRAtom* ea;
540 Int szB;
541 } Dm;
542 struct {
543 IRAtom* taken; /* :: Ity_I1 */
544 } Bc;
545 struct {
546 IRAtom* dst;
547 } Bi;
548 } Ev;
sewardj5155dec2005-10-12 10:09:23 +0000549 }
550 Event;
551
sewardj8badbaa2007-05-08 09:20:25 +0000552static void init_Event ( Event* ev ) {
553 VG_(memset)(ev, 0, sizeof(Event));
554}
555
556static IRAtom* get_Event_dea ( Event* ev ) {
557 switch (ev->tag) {
558 case Ev_Dr: return ev->Ev.Dr.ea;
559 case Ev_Dw: return ev->Ev.Dw.ea;
560 case Ev_Dm: return ev->Ev.Dm.ea;
561 default: tl_assert(0);
562 }
563}
564
565static Int get_Event_dszB ( Event* ev ) {
566 switch (ev->tag) {
567 case Ev_Dr: return ev->Ev.Dr.szB;
568 case Ev_Dw: return ev->Ev.Dw.szB;
569 case Ev_Dm: return ev->Ev.Dm.szB;
570 default: tl_assert(0);
571 }
572}
573
574
sewardj5155dec2005-10-12 10:09:23 +0000575/* Up to this many unnotified events are allowed. Number is
576 arbitrary. Larger numbers allow more event merging to occur, but
577 potentially induce more spilling due to extending live ranges of
578 address temporaries. */
579#define N_EVENTS 16
580
581
582/* A struct which holds all the running state during instrumentation.
583 Mostly to avoid passing loads of parameters everywhere. */
584typedef
585 struct {
586 /* The current outstanding-memory-event list. */
587 Event events[N_EVENTS];
588 Int events_used;
589
njnd3bef4f2005-10-15 17:46:18 +0000590 /* The array of InstrInfo bins for the BB. */
sewardj0b9d74a2006-12-24 02:24:11 +0000591 SB_info* sbInfo;
sewardj5155dec2005-10-12 10:09:23 +0000592
njnd3bef4f2005-10-15 17:46:18 +0000593 /* Number InstrInfo bins 'used' so far. */
sewardj0b9d74a2006-12-24 02:24:11 +0000594 Int sbInfo_i;
sewardj5155dec2005-10-12 10:09:23 +0000595
sewardj0b9d74a2006-12-24 02:24:11 +0000596 /* The output SB being constructed. */
597 IRSB* sbOut;
sewardj5155dec2005-10-12 10:09:23 +0000598 }
599 CgState;
600
601
sewardj5155dec2005-10-12 10:09:23 +0000602/*------------------------------------------------------------*/
603/*--- Instrumentation main ---*/
nethercote9313ac42004-07-06 21:54:20 +0000604/*------------------------------------------------------------*/
605
sewardj4ba057c2005-10-18 12:04:18 +0000606// Note that origAddr is the real origAddr, not the address of the first
607// instruction in the block (they can be different due to redirection).
nethercote564b2b02004-08-07 15:54:53 +0000608static
sewardj0b9d74a2006-12-24 02:24:11 +0000609SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000610{
njn4bd67b52005-08-11 00:47:10 +0000611 Int i, n_instrs;
612 IRStmt* st;
sewardj0b9d74a2006-12-24 02:24:11 +0000613 SB_info* sbInfo;
njnd3bef4f2005-10-15 17:46:18 +0000614
sewardj0b9d74a2006-12-24 02:24:11 +0000615 // Count number of original instrs in SB
njn6a3009b2005-03-20 00:20:06 +0000616 n_instrs = 0;
sewardj0b9d74a2006-12-24 02:24:11 +0000617 for (i = 0; i < sbIn->stmts_used; i++) {
618 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +0000619 if (Ist_IMark == st->tag) n_instrs++;
nethercote9313ac42004-07-06 21:54:20 +0000620 }
621
njnf7d26092005-10-12 16:45:17 +0000622 // Check that we don't have an entry for this BB in the instr-info table.
623 // If this assertion fails, there has been some screwup: some
624 // translations must have been discarded but Cachegrind hasn't discarded
625 // the corresponding entries in the instr-info table.
njne2a9ad32007-09-17 05:30:48 +0000626 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
sewardj0b9d74a2006-12-24 02:24:11 +0000627 tl_assert(NULL == sbInfo);
sewardja3a29a52005-10-12 16:16:03 +0000628
njnd3bef4f2005-10-15 17:46:18 +0000629 // BB never translated before (at this address, at least; could have
630 // been unloaded and then reloaded elsewhere in memory)
njne2a9ad32007-09-17 05:30:48 +0000631 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
sewardj0b9d74a2006-12-24 02:24:11 +0000632 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
633 sbInfo->SB_addr = origAddr;
634 sbInfo->n_instrs = n_instrs;
njne2a9ad32007-09-17 05:30:48 +0000635 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
sewardja3a29a52005-10-12 16:16:03 +0000636
sewardj0b9d74a2006-12-24 02:24:11 +0000637 return sbInfo;
nethercote9313ac42004-07-06 21:54:20 +0000638}
njn6a3009b2005-03-20 00:20:06 +0000639
nethercote9313ac42004-07-06 21:54:20 +0000640
sewardj5155dec2005-10-12 10:09:23 +0000641static void showEvent ( Event* ev )
nethercote9313ac42004-07-06 21:54:20 +0000642{
sewardj8badbaa2007-05-08 09:20:25 +0000643 switch (ev->tag) {
weidendo6fc0de02012-10-30 00:28:29 +0000644 case Ev_IrGen:
645 VG_(printf)("IrGen %p\n", ev->inode);
646 break;
647 case Ev_IrNoX:
648 VG_(printf)("IrNoX %p\n", ev->inode);
sewardj5155dec2005-10-12 10:09:23 +0000649 break;
sewardj8badbaa2007-05-08 09:20:25 +0000650 case Ev_Dr:
651 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
652 ppIRExpr(ev->Ev.Dr.ea);
sewardj5155dec2005-10-12 10:09:23 +0000653 VG_(printf)("\n");
654 break;
sewardj8badbaa2007-05-08 09:20:25 +0000655 case Ev_Dw:
656 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
657 ppIRExpr(ev->Ev.Dw.ea);
sewardj5155dec2005-10-12 10:09:23 +0000658 VG_(printf)("\n");
659 break;
sewardj8badbaa2007-05-08 09:20:25 +0000660 case Ev_Dm:
661 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
662 ppIRExpr(ev->Ev.Dm.ea);
663 VG_(printf)("\n");
664 break;
665 case Ev_Bc:
666 VG_(printf)("Bc %p GA=", ev->inode);
667 ppIRExpr(ev->Ev.Bc.taken);
668 VG_(printf)("\n");
669 break;
670 case Ev_Bi:
671 VG_(printf)("Bi %p DST=", ev->inode);
672 ppIRExpr(ev->Ev.Bi.dst);
sewardj5155dec2005-10-12 10:09:23 +0000673 VG_(printf)("\n");
674 break;
675 default:
676 tl_assert(0);
677 break;
678 }
njn6a3009b2005-03-20 00:20:06 +0000679}
680
njnfd9f6222005-10-16 00:17:37 +0000681// Reserve and initialise an InstrInfo for the first mention of a new insn.
682static
683InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
njn6a3009b2005-03-20 00:20:06 +0000684{
njnd3bef4f2005-10-15 17:46:18 +0000685 InstrInfo* i_node;
sewardj0b9d74a2006-12-24 02:24:11 +0000686 tl_assert(cgs->sbInfo_i >= 0);
687 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
688 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
njnfd9f6222005-10-16 00:17:37 +0000689 i_node->instr_addr = instr_addr;
690 i_node->instr_len = instr_len;
691 i_node->parent = get_lineCC(instr_addr);
sewardj0b9d74a2006-12-24 02:24:11 +0000692 cgs->sbInfo_i++;
sewardj5155dec2005-10-12 10:09:23 +0000693 return i_node;
694}
sewardj17a56bf2005-03-21 01:35:02 +0000695
sewardj17a56bf2005-03-21 01:35:02 +0000696
sewardj5155dec2005-10-12 10:09:23 +0000697/* Generate code for all outstanding memory events, and mark the queue
698 empty. Code is generated into cgs->bbOut, and this activity
sewardj0b9d74a2006-12-24 02:24:11 +0000699 'consumes' slots in cgs->sbInfo. */
njn6a3009b2005-03-20 00:20:06 +0000700
sewardj5155dec2005-10-12 10:09:23 +0000701static void flushEvents ( CgState* cgs )
702{
njnd3bef4f2005-10-15 17:46:18 +0000703 Int i, regparms;
florianee90c8a2012-10-21 02:39:42 +0000704 const HChar* helperName;
njnd3bef4f2005-10-15 17:46:18 +0000705 void* helperAddr;
706 IRExpr** argv;
707 IRExpr* i_node_expr;
njnd3bef4f2005-10-15 17:46:18 +0000708 IRDirty* di;
njnc285dca2005-10-15 22:07:28 +0000709 Event* ev;
710 Event* ev2;
711 Event* ev3;
njn6a3009b2005-03-20 00:20:06 +0000712
sewardj5155dec2005-10-12 10:09:23 +0000713 i = 0;
714 while (i < cgs->events_used) {
njn6a3009b2005-03-20 00:20:06 +0000715
sewardj5155dec2005-10-12 10:09:23 +0000716 helperName = NULL;
717 helperAddr = NULL;
718 argv = NULL;
719 regparms = 0;
720
721 /* generate IR to notify event i and possibly the ones
722 immediately following it. */
723 tl_assert(i >= 0 && i < cgs->events_used);
njnc285dca2005-10-15 22:07:28 +0000724
725 ev = &cgs->events[i];
726 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
727 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
728
sewardj5155dec2005-10-12 10:09:23 +0000729 if (DEBUG_CG) {
730 VG_(printf)(" flush ");
njnc285dca2005-10-15 22:07:28 +0000731 showEvent( ev );
njn4f9c9342002-04-29 16:03:24 +0000732 }
sewardj5155dec2005-10-12 10:09:23 +0000733
njnfd9f6222005-10-16 00:17:37 +0000734 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
sewardj5155dec2005-10-12 10:09:23 +0000735
736 /* Decide on helper fn to call and args to pass it, and advance
737 i appropriately. */
sewardj8badbaa2007-05-08 09:20:25 +0000738 switch (ev->tag) {
weidendo6fc0de02012-10-30 00:28:29 +0000739 case Ev_IrNoX:
740 /* Merge an IrNoX with a following Dr/Dm. */
sewardj8badbaa2007-05-08 09:20:25 +0000741 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
742 /* Why is this true? It's because we're merging an Ir
743 with a following Dr or Dm. The Ir derives from the
744 instruction's IMark and the Dr/Dm from data
745 references which follow it. In short it holds
746 because each insn starts with an IMark, hence an
747 Ev_Ir, and so these Dr/Dm must pertain to the
748 immediately preceding Ir. Same applies to analogous
749 assertions in the subsequent cases. */
njnfd9f6222005-10-16 00:17:37 +0000750 tl_assert(ev2->inode == ev->inode);
weidendo6fc0de02012-10-30 00:28:29 +0000751 helperName = "log_1IrNoX_1Dr_cache_access";
752 helperAddr = &log_1IrNoX_1Dr_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000753 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000754 get_Event_dea(ev2),
755 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000756 regparms = 3;
757 i += 2;
758 }
weidendo6fc0de02012-10-30 00:28:29 +0000759 /* Merge an IrNoX with a following Dw. */
sewardj5155dec2005-10-12 10:09:23 +0000760 else
sewardj8badbaa2007-05-08 09:20:25 +0000761 if (ev2 && ev2->tag == Ev_Dw) {
njnfd9f6222005-10-16 00:17:37 +0000762 tl_assert(ev2->inode == ev->inode);
weidendo6fc0de02012-10-30 00:28:29 +0000763 helperName = "log_1IrNoX_1Dw_cache_access";
764 helperAddr = &log_1IrNoX_1Dw_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000765 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000766 get_Event_dea(ev2),
767 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000768 regparms = 3;
769 i += 2;
770 }
weidendo6fc0de02012-10-30 00:28:29 +0000771 /* Merge an IrNoX with two following IrNoX's. */
sewardj5155dec2005-10-12 10:09:23 +0000772 else
weidendo6fc0de02012-10-30 00:28:29 +0000773 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
njnc285dca2005-10-15 22:07:28 +0000774 {
njnc52b9322010-09-27 02:20:38 +0000775 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000776 helperName = "log_3IrNoX_0D_cache_access";
777 helperAddr = &log_3IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000778 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000779 helperName = "log_3Ir";
780 helperAddr = &log_3Ir;
njnc52b9322010-09-27 02:20:38 +0000781 }
njnfd9f6222005-10-16 00:17:37 +0000782 argv = mkIRExprVec_3( i_node_expr,
783 mkIRExpr_HWord( (HWord)ev2->inode ),
784 mkIRExpr_HWord( (HWord)ev3->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000785 regparms = 3;
786 i += 3;
787 }
weidendo6fc0de02012-10-30 00:28:29 +0000788 /* Merge an IrNoX with one following IrNoX. */
sewardj5155dec2005-10-12 10:09:23 +0000789 else
weidendo6fc0de02012-10-30 00:28:29 +0000790 if (ev2 && ev2->tag == Ev_IrNoX) {
njnc52b9322010-09-27 02:20:38 +0000791 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000792 helperName = "log_2IrNoX_0D_cache_access";
793 helperAddr = &log_2IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000794 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000795 helperName = "log_2Ir";
796 helperAddr = &log_2Ir;
njnc52b9322010-09-27 02:20:38 +0000797 }
njnfd9f6222005-10-16 00:17:37 +0000798 argv = mkIRExprVec_2( i_node_expr,
799 mkIRExpr_HWord( (HWord)ev2->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000800 regparms = 2;
801 i += 2;
802 }
803 /* No merging possible; emit as-is. */
804 else {
njnc52b9322010-09-27 02:20:38 +0000805 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000806 helperName = "log_1IrNoX_0D_cache_access";
807 helperAddr = &log_1IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000808 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000809 helperName = "log_1Ir";
810 helperAddr = &log_1Ir;
njnc52b9322010-09-27 02:20:38 +0000811 }
sewardj5155dec2005-10-12 10:09:23 +0000812 argv = mkIRExprVec_1( i_node_expr );
813 regparms = 1;
814 i++;
815 }
816 break;
weidendo6fc0de02012-10-30 00:28:29 +0000817 case Ev_IrGen:
818 if (clo_cache_sim) {
819 helperName = "log_1IrGen_0D_cache_access";
820 helperAddr = &log_1IrGen_0D_cache_access;
821 } else {
822 helperName = "log_1Ir";
823 helperAddr = &log_1Ir;
824 }
825 argv = mkIRExprVec_1( i_node_expr );
826 regparms = 1;
827 i++;
828 break;
sewardj8badbaa2007-05-08 09:20:25 +0000829 case Ev_Dr:
830 case Ev_Dm:
831 /* Data read or modify */
weidendo6fc0de02012-10-30 00:28:29 +0000832 helperName = "log_0Ir_1Dr_cache_access";
833 helperAddr = &log_0Ir_1Dr_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000834 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000835 get_Event_dea(ev),
836 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000837 regparms = 3;
838 i++;
839 break;
sewardj8badbaa2007-05-08 09:20:25 +0000840 case Ev_Dw:
841 /* Data write */
weidendo6fc0de02012-10-30 00:28:29 +0000842 helperName = "log_0Ir_1Dw_cache_access";
843 helperAddr = &log_0Ir_1Dw_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000844 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000845 get_Event_dea(ev),
846 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000847 regparms = 3;
848 i++;
849 break;
sewardj8badbaa2007-05-08 09:20:25 +0000850 case Ev_Bc:
851 /* Conditional branch */
852 helperName = "log_cond_branch";
853 helperAddr = &log_cond_branch;
854 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
855 regparms = 2;
856 i++;
857 break;
858 case Ev_Bi:
859 /* Branch to an unknown destination */
860 helperName = "log_ind_branch";
861 helperAddr = &log_ind_branch;
862 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
863 regparms = 2;
864 i++;
865 break;
sewardj5155dec2005-10-12 10:09:23 +0000866 default:
867 tl_assert(0);
868 }
869
870 /* Add the helper. */
871 tl_assert(helperName);
872 tl_assert(helperAddr);
873 tl_assert(argv);
sewardj5bb86822005-12-23 12:47:42 +0000874 di = unsafeIRDirty_0_N( regparms,
875 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
876 argv );
sewardj0b9d74a2006-12-24 02:24:11 +0000877 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
njn4f9c9342002-04-29 16:03:24 +0000878 }
879
sewardj5155dec2005-10-12 10:09:23 +0000880 cgs->events_used = 0;
njn4f9c9342002-04-29 16:03:24 +0000881}
njn14d01ce2004-11-26 11:30:14 +0000882
njnfd9f6222005-10-16 00:17:37 +0000883static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
sewardj5155dec2005-10-12 10:09:23 +0000884{
885 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000886 if (cgs->events_used == N_EVENTS)
887 flushEvents(cgs);
888 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
889 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000890 init_Event(evt);
njnfd9f6222005-10-16 00:17:37 +0000891 evt->inode = inode;
weidendo6fc0de02012-10-30 00:28:29 +0000892 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
893 evt->tag = Ev_IrNoX;
894 distinct_instrsNoX++;
895 } else {
896 evt->tag = Ev_IrGen;
897 distinct_instrsGen++;
898 }
sewardj5155dec2005-10-12 10:09:23 +0000899 cgs->events_used++;
900}
901
njnfd9f6222005-10-16 00:17:37 +0000902static
903void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
sewardj5155dec2005-10-12 10:09:23 +0000904{
njnfd9f6222005-10-16 00:17:37 +0000905 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000906 tl_assert(isIRAtom(ea));
sewardj98763d52012-06-03 22:40:07 +0000907 tl_assert(datasize >= 1 && datasize <= min_line_size);
sewardj8badbaa2007-05-08 09:20:25 +0000908 if (!clo_cache_sim)
909 return;
njnfd9f6222005-10-16 00:17:37 +0000910 if (cgs->events_used == N_EVENTS)
911 flushEvents(cgs);
912 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
913 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000914 init_Event(evt);
915 evt->tag = Ev_Dr;
916 evt->inode = inode;
917 evt->Ev.Dr.szB = datasize;
918 evt->Ev.Dr.ea = ea;
njnfd9f6222005-10-16 00:17:37 +0000919 cgs->events_used++;
920}
sewardj5155dec2005-10-12 10:09:23 +0000921
njnfd9f6222005-10-16 00:17:37 +0000922static
923void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
924{
925 Event* lastEvt;
926 Event* evt;
927
928 tl_assert(isIRAtom(ea));
sewardj98763d52012-06-03 22:40:07 +0000929 tl_assert(datasize >= 1 && datasize <= min_line_size);
njnfd9f6222005-10-16 00:17:37 +0000930
sewardj8badbaa2007-05-08 09:20:25 +0000931 if (!clo_cache_sim)
932 return;
933
njnfd9f6222005-10-16 00:17:37 +0000934 /* Is it possible to merge this write with the preceding read? */
935 lastEvt = &cgs->events[cgs->events_used-1];
sewardj5155dec2005-10-12 10:09:23 +0000936 if (cgs->events_used > 0
sewardjcafe5052013-01-17 14:24:35 +0000937 && lastEvt->tag == Ev_Dr
938 && lastEvt->Ev.Dr.szB == datasize
939 && lastEvt->inode == inode
940 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
njnfd9f6222005-10-16 00:17:37 +0000941 {
sewardj8badbaa2007-05-08 09:20:25 +0000942 lastEvt->tag = Ev_Dm;
sewardj5155dec2005-10-12 10:09:23 +0000943 return;
944 }
945
946 /* No. Add as normal. */
947 if (cgs->events_used == N_EVENTS)
948 flushEvents(cgs);
949 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
njnfd9f6222005-10-16 00:17:37 +0000950 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000951 init_Event(evt);
952 evt->tag = Ev_Dw;
953 evt->inode = inode;
954 evt->Ev.Dw.szB = datasize;
955 evt->Ev.Dw.ea = ea;
956 cgs->events_used++;
957}
958
959static
sewardjcafe5052013-01-17 14:24:35 +0000960void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
961 Int datasize, IRAtom* ea, IRAtom* guard,
962 Bool isWrite )
963{
964 tl_assert(isIRAtom(ea));
965 tl_assert(guard);
966 tl_assert(isIRAtom(guard));
967 tl_assert(datasize >= 1 && datasize <= min_line_size);
968
969 if (!clo_cache_sim)
970 return;
971
972 /* Adding guarded memory actions and merging them with the existing
973 queue is too complex. Simply flush the queue and add this
974 action immediately. Since guarded loads and stores are pretty
975 rare, this is not thought likely to cause any noticeable
976 performance loss as a result of the loss of event-merging
977 opportunities. */
978 tl_assert(cgs->events_used >= 0);
979 flushEvents(cgs);
980 tl_assert(cgs->events_used == 0);
981 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
982 IRExpr* i_node_expr;
983 const HChar* helperName;
984 void* helperAddr;
985 IRExpr** argv;
986 Int regparms;
987 IRDirty* di;
988 i_node_expr = mkIRExpr_HWord( (HWord)inode );
989 helperName = isWrite ? "log_0Ir_1Dw_cache_access"
990 : "log_0Ir_1Dr_cache_access";
991 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access
992 : &log_0Ir_1Dr_cache_access;
993 argv = mkIRExprVec_3( i_node_expr,
994 ea, mkIRExpr_HWord( datasize ) );
995 regparms = 3;
996 di = unsafeIRDirty_0_N(
997 regparms,
998 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
999 argv );
1000 di->guard = guard;
1001 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
1002}
1003
1004
1005static
sewardj8badbaa2007-05-08 09:20:25 +00001006void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
1007{
1008 Event* evt;
1009 tl_assert(isIRAtom(guard));
1010 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1011 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1012 if (!clo_branch_sim)
1013 return;
1014 if (cgs->events_used == N_EVENTS)
1015 flushEvents(cgs);
1016 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1017 evt = &cgs->events[cgs->events_used];
1018 init_Event(evt);
1019 evt->tag = Ev_Bc;
1020 evt->inode = inode;
1021 evt->Ev.Bc.taken = guard;
1022 cgs->events_used++;
1023}
1024
1025static
1026void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1027{
1028 Event* evt;
1029 tl_assert(isIRAtom(whereTo));
1030 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1031 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1032 if (!clo_branch_sim)
1033 return;
1034 if (cgs->events_used == N_EVENTS)
1035 flushEvents(cgs);
1036 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1037 evt = &cgs->events[cgs->events_used];
1038 init_Event(evt);
1039 evt->tag = Ev_Bi;
1040 evt->inode = inode;
1041 evt->Ev.Bi.dst = whereTo;
sewardj5155dec2005-10-12 10:09:23 +00001042 cgs->events_used++;
1043}
1044
1045////////////////////////////////////////////////////////////
1046
1047
sewardj4ba057c2005-10-18 12:04:18 +00001048static
sewardj0b9d74a2006-12-24 02:24:11 +00001049IRSB* cg_instrument ( VgCallbackClosure* closure,
1050 IRSB* sbIn,
sewardj461df9c2006-01-17 02:06:39 +00001051 VexGuestLayout* layout,
1052 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00001053 VexArchInfo* archinfo_host,
sewardj4ba057c2005-10-18 12:04:18 +00001054 IRType gWordTy, IRType hWordTy )
njn14d01ce2004-11-26 11:30:14 +00001055{
njnfd9f6222005-10-16 00:17:37 +00001056 Int i, isize;
sewardj5155dec2005-10-12 10:09:23 +00001057 IRStmt* st;
1058 Addr64 cia; /* address of current insn */
1059 CgState cgs;
sewardj0b9d74a2006-12-24 02:24:11 +00001060 IRTypeEnv* tyenv = sbIn->tyenv;
njnfd9f6222005-10-16 00:17:37 +00001061 InstrInfo* curr_inode = NULL;
sewardj5155dec2005-10-12 10:09:23 +00001062
sewardjd54babf2005-03-21 00:55:49 +00001063 if (gWordTy != hWordTy) {
1064 /* We don't currently support this case. */
1065 VG_(tool_panic)("host/guest word size mismatch");
1066 }
1067
sewardj0b9d74a2006-12-24 02:24:11 +00001068 // Set up new SB
1069 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
njn6a3009b2005-03-20 00:20:06 +00001070
sewardja9f538c2005-10-23 12:06:55 +00001071 // Copy verbatim any IR preamble preceding the first IMark
njn6a3009b2005-03-20 00:20:06 +00001072 i = 0;
sewardj0b9d74a2006-12-24 02:24:11 +00001073 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1074 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
sewardja9f538c2005-10-23 12:06:55 +00001075 i++;
1076 }
1077
1078 // Get the first statement, and initial cia from it
sewardj0b9d74a2006-12-24 02:24:11 +00001079 tl_assert(sbIn->stmts_used > 0);
1080 tl_assert(i < sbIn->stmts_used);
1081 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +00001082 tl_assert(Ist_IMark == st->tag);
sewardj8badbaa2007-05-08 09:20:25 +00001083
1084 cia = st->Ist.IMark.addr;
1085 isize = st->Ist.IMark.len;
1086 // If Vex fails to decode an instruction, the size will be zero.
1087 // Pretend otherwise.
1088 if (isize == 0) isize = VG_MIN_INSTR_SZB;
njn6a3009b2005-03-20 00:20:06 +00001089
sewardj5155dec2005-10-12 10:09:23 +00001090 // Set up running state and get block info
sewardj3a384b32006-01-22 01:12:51 +00001091 tl_assert(closure->readdr == vge->base[0]);
sewardj5155dec2005-10-12 10:09:23 +00001092 cgs.events_used = 0;
sewardj0b9d74a2006-12-24 02:24:11 +00001093 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
1094 cgs.sbInfo_i = 0;
njn6a3009b2005-03-20 00:20:06 +00001095
sewardj5155dec2005-10-12 10:09:23 +00001096 if (DEBUG_CG)
1097 VG_(printf)("\n\n---------- cg_instrument ----------\n");
njn6a3009b2005-03-20 00:20:06 +00001098
njnfd9f6222005-10-16 00:17:37 +00001099 // Traverse the block, initialising inodes, adding events and flushing as
1100 // necessary.
sewardj0b9d74a2006-12-24 02:24:11 +00001101 for (/*use current i*/; i < sbIn->stmts_used; i++) {
njn6a3009b2005-03-20 00:20:06 +00001102
sewardj0b9d74a2006-12-24 02:24:11 +00001103 st = sbIn->stmts[i];
sewardj5155dec2005-10-12 10:09:23 +00001104 tl_assert(isFlatIRStmt(st));
njnb3507ea2005-08-02 23:07:02 +00001105
sewardj5155dec2005-10-12 10:09:23 +00001106 switch (st->tag) {
1107 case Ist_NoOp:
1108 case Ist_AbiHint:
1109 case Ist_Put:
1110 case Ist_PutI:
sewardj72d75132007-11-09 23:06:35 +00001111 case Ist_MBE:
sewardj5155dec2005-10-12 10:09:23 +00001112 break;
njn20677cc2005-08-12 23:47:51 +00001113
sewardj5155dec2005-10-12 10:09:23 +00001114 case Ist_IMark:
njnfd9f6222005-10-16 00:17:37 +00001115 cia = st->Ist.IMark.addr;
1116 isize = st->Ist.IMark.len;
1117
1118 // If Vex fails to decode an instruction, the size will be zero.
1119 // Pretend otherwise.
1120 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1121
njna5ad9ba2005-11-10 15:20:37 +00001122 // Sanity-check size.
1123 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1124 || VG_CLREQ_SZB == isize );
njnfd9f6222005-10-16 00:17:37 +00001125
1126 // Get space for and init the inode, record it as the current one.
1127 // Subsequent Dr/Dw/Dm events from the same instruction will
1128 // also use it.
1129 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1130
1131 addEvent_Ir( &cgs, curr_inode );
sewardj5155dec2005-10-12 10:09:23 +00001132 break;
1133
sewardj0b9d74a2006-12-24 02:24:11 +00001134 case Ist_WrTmp: {
1135 IRExpr* data = st->Ist.WrTmp.data;
sewardj5155dec2005-10-12 10:09:23 +00001136 if (data->tag == Iex_Load) {
1137 IRExpr* aexpr = data->Iex.Load.addr;
sewardj5155dec2005-10-12 10:09:23 +00001138 // Note also, endianness info is ignored. I guess
1139 // that's not interesting.
njnfd9f6222005-10-16 00:17:37 +00001140 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1141 aexpr );
sewardj5155dec2005-10-12 10:09:23 +00001142 }
1143 break;
njnb3507ea2005-08-02 23:07:02 +00001144 }
1145
sewardj5155dec2005-10-12 10:09:23 +00001146 case Ist_Store: {
1147 IRExpr* data = st->Ist.Store.data;
1148 IRExpr* aexpr = st->Ist.Store.addr;
njnfd9f6222005-10-16 00:17:37 +00001149 addEvent_Dw( &cgs, curr_inode,
1150 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
sewardj5155dec2005-10-12 10:09:23 +00001151 break;
1152 }
njnb3507ea2005-08-02 23:07:02 +00001153
sewardjcafe5052013-01-17 14:24:35 +00001154 case Ist_StoreG: {
1155 IRStoreG* sg = st->Ist.StoreG.details;
1156 IRExpr* data = sg->data;
1157 IRExpr* addr = sg->addr;
1158 IRType type = typeOfIRExpr(tyenv, data);
1159 tl_assert(type != Ity_INVALID);
1160 addEvent_D_guarded( &cgs, curr_inode,
1161 sizeofIRType(type), addr, sg->guard,
1162 True/*isWrite*/ );
1163 break;
1164 }
1165
1166 case Ist_LoadG: {
1167 IRLoadG* lg = st->Ist.LoadG.details;
1168 IRType type = Ity_INVALID; /* loaded type */
1169 IRType typeWide = Ity_INVALID; /* after implicit widening */
1170 IRExpr* addr = lg->addr;
1171 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1172 tl_assert(type != Ity_INVALID);
1173 addEvent_D_guarded( &cgs, curr_inode,
1174 sizeofIRType(type), addr, lg->guard,
1175 False/*!isWrite*/ );
1176 break;
1177 }
1178
sewardj5155dec2005-10-12 10:09:23 +00001179 case Ist_Dirty: {
1180 Int dataSize;
1181 IRDirty* d = st->Ist.Dirty.details;
1182 if (d->mFx != Ifx_None) {
njnfd9f6222005-10-16 00:17:37 +00001183 /* This dirty helper accesses memory. Collect the details. */
sewardj5155dec2005-10-12 10:09:23 +00001184 tl_assert(d->mAddr != NULL);
1185 tl_assert(d->mSize != 0);
1186 dataSize = d->mSize;
1187 // Large (eg. 28B, 108B, 512B on x86) data-sized
1188 // instructions will be done inaccurately, but they're
1189 // very rare and this avoids errors from hitting more
1190 // than two cache lines in the simulation.
sewardj98763d52012-06-03 22:40:07 +00001191 if (dataSize > min_line_size)
1192 dataSize = min_line_size;
sewardj5155dec2005-10-12 10:09:23 +00001193 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001194 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001195 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001196 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001197 } else {
1198 tl_assert(d->mAddr == NULL);
1199 tl_assert(d->mSize == 0);
1200 }
1201 break;
1202 }
njn6a3009b2005-03-20 00:20:06 +00001203
sewardj1c0ce7a2009-07-01 08:10:49 +00001204 case Ist_CAS: {
1205 /* We treat it as a read and a write of the location. I
1206 think that is the same behaviour as it was before IRCAS
1207 was introduced, since prior to that point, the Vex
1208 front ends would translate a lock-prefixed instruction
1209 into a (normal) read followed by a (normal) write. */
1210 Int dataSize;
1211 IRCAS* cas = st->Ist.CAS.details;
1212 tl_assert(cas->addr != NULL);
1213 tl_assert(cas->dataLo != NULL);
1214 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1215 if (cas->dataHi != NULL)
1216 dataSize *= 2; /* since it's a doubleword-CAS */
1217 /* I don't think this can ever happen, but play safe. */
sewardj98763d52012-06-03 22:40:07 +00001218 if (dataSize > min_line_size)
1219 dataSize = min_line_size;
sewardj1c0ce7a2009-07-01 08:10:49 +00001220 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1221 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1222 break;
1223 }
1224
sewardjdb5907d2009-11-26 17:20:21 +00001225 case Ist_LLSC: {
1226 IRType dataTy;
1227 if (st->Ist.LLSC.storedata == NULL) {
1228 /* LL */
1229 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1230 addEvent_Dr( &cgs, curr_inode,
1231 sizeofIRType(dataTy), st->Ist.LLSC.addr );
weidendod4053322012-11-26 18:16:58 +00001232 /* flush events before LL, should help SC to succeed */
1233 flushEvents( &cgs );
sewardjdb5907d2009-11-26 17:20:21 +00001234 } else {
1235 /* SC */
1236 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1237 addEvent_Dw( &cgs, curr_inode,
1238 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1239 }
1240 break;
1241 }
1242
sewardj8badbaa2007-05-08 09:20:25 +00001243 case Ist_Exit: {
weidendo374a48f2010-09-02 17:06:49 +00001244 // call branch predictor only if this is a branch in guest code
1245 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1246 (st->Ist.Exit.jk == Ijk_Call) ||
1247 (st->Ist.Exit.jk == Ijk_Ret) )
1248 {
1249 /* Stuff to widen the guard expression to a host word, so
1250 we can pass it to the branch predictor simulation
1251 functions easily. */
1252 Bool inverted;
1253 Addr64 nia, sea;
1254 IRConst* dst;
1255 IRType tyW = hWordTy;
1256 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1257 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1258 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1259 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1260 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1261 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1262 : IRExpr_Const(IRConst_U64(1));
sewardj8badbaa2007-05-08 09:20:25 +00001263
weidendo374a48f2010-09-02 17:06:49 +00001264 /* First we need to figure out whether the side exit got
1265 inverted by the ir optimiser. To do that, figure out
1266 the next (fallthrough) instruction's address and the
1267 side exit address and see if they are the same. */
1268 nia = cia + (Addr64)isize;
1269 if (tyW == Ity_I32)
1270 nia &= 0xFFFFFFFFULL;
sewardj8badbaa2007-05-08 09:20:25 +00001271
weidendo374a48f2010-09-02 17:06:49 +00001272 /* Side exit address */
1273 dst = st->Ist.Exit.dst;
1274 if (tyW == Ity_I32) {
1275 tl_assert(dst->tag == Ico_U32);
1276 sea = (Addr64)(UInt)dst->Ico.U32;
1277 } else {
1278 tl_assert(tyW == Ity_I64);
1279 tl_assert(dst->tag == Ico_U64);
1280 sea = dst->Ico.U64;
1281 }
1282
1283 inverted = nia == sea;
1284
1285 /* Widen the guard expression. */
1286 addStmtToIRSB( cgs.sbOut,
1287 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1288 addStmtToIRSB( cgs.sbOut,
1289 IRStmt_WrTmp( guardW,
1290 IRExpr_Unop(widen,
1291 IRExpr_RdTmp(guard1))) );
1292 /* If the exit is inverted, invert the sense of the guard. */
1293 addStmtToIRSB(
1294 cgs.sbOut,
1295 IRStmt_WrTmp(
1296 guard,
1297 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1298 : IRExpr_RdTmp(guardW)
1299 ));
1300 /* And post the event. */
1301 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
sewardj8badbaa2007-05-08 09:20:25 +00001302 }
1303
sewardj5155dec2005-10-12 10:09:23 +00001304 /* We may never reach the next statement, so need to flush
1305 all outstanding transactions now. */
1306 flushEvents( &cgs );
1307 break;
sewardj8badbaa2007-05-08 09:20:25 +00001308 }
sewardj5155dec2005-10-12 10:09:23 +00001309
1310 default:
sewardjcafe5052013-01-17 14:24:35 +00001311 ppIRStmt(st);
sewardj5155dec2005-10-12 10:09:23 +00001312 tl_assert(0);
1313 break;
njnb3507ea2005-08-02 23:07:02 +00001314 }
njn6a3009b2005-03-20 00:20:06 +00001315
sewardj5155dec2005-10-12 10:09:23 +00001316 /* Copy the original statement */
sewardj0b9d74a2006-12-24 02:24:11 +00001317 addStmtToIRSB( cgs.sbOut, st );
njn6a3009b2005-03-20 00:20:06 +00001318
sewardj5155dec2005-10-12 10:09:23 +00001319 if (DEBUG_CG) {
1320 ppIRStmt(st);
1321 VG_(printf)("\n");
1322 }
1323 }
1324
sewardj8badbaa2007-05-08 09:20:25 +00001325 /* Deal with branches to unknown destinations. Except ignore ones
1326 which are function returns as we assume the return stack
1327 predictor never mispredicts. */
weidendo374a48f2010-09-02 17:06:49 +00001328 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
sewardj8badbaa2007-05-08 09:20:25 +00001329 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1330 switch (sbIn->next->tag) {
1331 case Iex_Const:
1332 break; /* boring - branch to known address */
1333 case Iex_RdTmp:
1334 /* looks like an indirect branch (branch to unknown) */
1335 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1336 break;
1337 default:
1338 /* shouldn't happen - if the incoming IR is properly
1339 flattened, should only have tmp and const cases to
1340 consider. */
1341 tl_assert(0);
1342 }
1343 }
1344
sewardj5155dec2005-10-12 10:09:23 +00001345 /* At the end of the bb. Flush outstandings. */
sewardj5155dec2005-10-12 10:09:23 +00001346 flushEvents( &cgs );
1347
sewardj5155dec2005-10-12 10:09:23 +00001348 /* done. stay sane ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001349 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
sewardj5155dec2005-10-12 10:09:23 +00001350
1351 if (DEBUG_CG) {
1352 VG_(printf)( "goto {");
sewardj0b9d74a2006-12-24 02:24:11 +00001353 ppIRJumpKind(sbIn->jumpkind);
sewardj5155dec2005-10-12 10:09:23 +00001354 VG_(printf)( "} ");
sewardj0b9d74a2006-12-24 02:24:11 +00001355 ppIRExpr( sbIn->next );
sewardj5155dec2005-10-12 10:09:23 +00001356 VG_(printf)( "}\n");
1357 }
1358
sewardj0b9d74a2006-12-24 02:24:11 +00001359 return cgs.sbOut;
njn14d01ce2004-11-26 11:30:14 +00001360}
njn4f9c9342002-04-29 16:03:24 +00001361
1362/*------------------------------------------------------------*/
nethercoteb35a8b92004-09-11 16:45:27 +00001363/*--- Cache configuration ---*/
njn4f9c9342002-04-29 16:03:24 +00001364/*------------------------------------------------------------*/
1365
sewardjb5f6f512005-03-10 23:59:00 +00001366#define UNDEFINED_CACHE { -1, -1, -1 }
njn25e49d8e72002-09-23 09:36:25 +00001367
1368static cache_t clo_I1_cache = UNDEFINED_CACHE;
1369static cache_t clo_D1_cache = UNDEFINED_CACHE;
njn2d853a12010-10-06 22:46:31 +00001370static cache_t clo_LL_cache = UNDEFINED_CACHE;
njn25e49d8e72002-09-23 09:36:25 +00001371
njn4f9c9342002-04-29 16:03:24 +00001372/*------------------------------------------------------------*/
njn51d827b2005-05-09 01:02:08 +00001373/*--- cg_fini() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001374/*------------------------------------------------------------*/
1375
nethercote9313ac42004-07-06 21:54:20 +00001376// Total reads/writes/misses. Calculated during CC traversal at the end.
1377// All auto-zeroed.
sewardj8badbaa2007-05-08 09:20:25 +00001378static CacheCC Ir_total;
1379static CacheCC Dr_total;
1380static CacheCC Dw_total;
1381static BranchCC Bc_total;
1382static BranchCC Bi_total;
nethercote9313ac42004-07-06 21:54:20 +00001383
nethercote9313ac42004-07-06 21:54:20 +00001384static void fprint_CC_table_and_calc_totals(void)
1385{
njnd3bef4f2005-10-15 17:46:18 +00001386 Int i, fd;
sewardj92645592005-07-23 09:18:34 +00001387 SysRes sres;
floriandbb35842012-10-27 18:39:11 +00001388 HChar buf[512];
florian19f91bb2012-11-10 22:29:54 +00001389 HChar *currFile = NULL, *currFn = NULL;
njnd3bef4f2005-10-15 17:46:18 +00001390 LineCC* lineCC;
njn4f9c9342002-04-29 16:03:24 +00001391
njn7064fb22008-05-29 23:09:52 +00001392 // Setup output filename. Nb: it's important to do this now, ie. as late
1393 // as possible. If we do it at start-up and the program forks and the
1394 // output file format string contains a %p (pid) specifier, both the
1395 // parent and child will incorrectly write to the same file; this
1396 // happened in 3.3.0.
florian19f91bb2012-11-10 22:29:54 +00001397 HChar* cachegrind_out_file =
njn7064fb22008-05-29 23:09:52 +00001398 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1399
sewardj92645592005-07-23 09:18:34 +00001400 sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1401 VKI_S_IRUSR|VKI_S_IWUSR);
njncda2f0f2009-05-18 02:12:08 +00001402 if (sr_isError(sres)) {
nethercote9313ac42004-07-06 21:54:20 +00001403 // If the file can't be opened for whatever reason (conflict
1404 // between multiple cachegrinded processes?), give up now.
sewardjb2c985b2009-07-15 14:51:17 +00001405 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1406 cachegrind_out_file );
1407 VG_(umsg)(" ... so simulation results will be missing.\n");
njn7064fb22008-05-29 23:09:52 +00001408 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001409 return;
sewardj92645592005-07-23 09:18:34 +00001410 } else {
njncda2f0f2009-05-18 02:12:08 +00001411 fd = sr_Res(sres);
njn7064fb22008-05-29 23:09:52 +00001412 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001413 }
njn4f9c9342002-04-29 16:03:24 +00001414
njn2d853a12010-10-06 22:46:31 +00001415 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
nethercote9313ac42004-07-06 21:54:20 +00001416 // the 2nd colon makes cg_annotate's output look nicer.
1417 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1418 "desc: D1 cache: %s\n"
njn2d853a12010-10-06 22:46:31 +00001419 "desc: LL cache: %s\n",
1420 I1.desc_line, D1.desc_line, LL.desc_line);
njn7cf0bd32002-06-08 13:36:03 +00001421 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001422
nethercote9313ac42004-07-06 21:54:20 +00001423 // "cmd:" line
njn4f9c9342002-04-29 16:03:24 +00001424 VG_(strcpy)(buf, "cmd:");
1425 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
sewardj45f4e7c2005-09-27 19:20:21 +00001426 if (VG_(args_the_exename)) {
1427 VG_(write)(fd, " ", 1);
1428 VG_(write)(fd, VG_(args_the_exename),
1429 VG_(strlen)( VG_(args_the_exename) ));
1430 }
sewardj14c7cc52007-02-25 15:08:24 +00001431 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1432 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1433 if (arg) {
sewardj45f4e7c2005-09-27 19:20:21 +00001434 VG_(write)(fd, " ", 1);
sewardj14c7cc52007-02-25 15:08:24 +00001435 VG_(write)(fd, arg, VG_(strlen)( arg ));
sewardj45f4e7c2005-09-27 19:20:21 +00001436 }
njn4f9c9342002-04-29 16:03:24 +00001437 }
nethercote9313ac42004-07-06 21:54:20 +00001438 // "events:" line
sewardj8badbaa2007-05-08 09:20:25 +00001439 if (clo_cache_sim && clo_branch_sim) {
njn2d853a12010-10-06 22:46:31 +00001440 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
sewardj8badbaa2007-05-08 09:20:25 +00001441 "Bc Bcm Bi Bim\n");
1442 }
1443 else if (clo_cache_sim && !clo_branch_sim) {
njn2d853a12010-10-06 22:46:31 +00001444 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
sewardj8badbaa2007-05-08 09:20:25 +00001445 "\n");
1446 }
1447 else if (!clo_cache_sim && clo_branch_sim) {
1448 VG_(sprintf)(buf, "\nevents: Ir "
1449 "Bc Bcm Bi Bim\n");
1450 }
njne90711c2010-09-27 01:04:20 +00001451 else {
1452 VG_(sprintf)(buf, "\nevents: Ir\n");
1453 }
sewardj8badbaa2007-05-08 09:20:25 +00001454
njn4f9c9342002-04-29 16:03:24 +00001455 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1456
njnd3bef4f2005-10-15 17:46:18 +00001457 // Traverse every lineCC
njne2a9ad32007-09-17 05:30:48 +00001458 VG_(OSetGen_ResetIter)(CC_table);
1459 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
njn4311fe62005-12-08 23:18:50 +00001460 Bool just_hit_a_new_file = False;
njnd3bef4f2005-10-15 17:46:18 +00001461 // If we've hit a new file, print a "fl=" line. Note that because
1462 // each string is stored exactly once in the string table, we can use
1463 // pointer comparison rather than strcmp() to test for equality, which
1464 // is good because most of the time the comparisons are equal and so
njn4311fe62005-12-08 23:18:50 +00001465 // the whole strings would have to be checked.
njnd3bef4f2005-10-15 17:46:18 +00001466 if ( lineCC->loc.file != currFile ) {
1467 currFile = lineCC->loc.file;
1468 VG_(sprintf)(buf, "fl=%s\n", currFile);
njn4f9c9342002-04-29 16:03:24 +00001469 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njnd3bef4f2005-10-15 17:46:18 +00001470 distinct_files++;
njn4311fe62005-12-08 23:18:50 +00001471 just_hit_a_new_file = True;
njn4f9c9342002-04-29 16:03:24 +00001472 }
njn4311fe62005-12-08 23:18:50 +00001473 // If we've hit a new function, print a "fn=" line. We know to do
1474 // this when the function name changes, and also every time we hit a
1475 // new file (in which case the new function name might be the same as
1476 // in the old file, hence the just_hit_a_new_file test).
1477 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
njnd3bef4f2005-10-15 17:46:18 +00001478 currFn = lineCC->loc.fn;
1479 VG_(sprintf)(buf, "fn=%s\n", currFn);
1480 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1481 distinct_fns++;
1482 }
1483
1484 // Print the LineCC
sewardj8badbaa2007-05-08 09:20:25 +00001485 if (clo_cache_sim && clo_branch_sim) {
1486 VG_(sprintf)(buf, "%u %llu %llu %llu"
1487 " %llu %llu %llu"
1488 " %llu %llu %llu"
1489 " %llu %llu %llu %llu\n",
1490 lineCC->loc.line,
njn2d853a12010-10-06 22:46:31 +00001491 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1492 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1493 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
sewardj8badbaa2007-05-08 09:20:25 +00001494 lineCC->Bc.b, lineCC->Bc.mp,
1495 lineCC->Bi.b, lineCC->Bi.mp);
1496 }
1497 else if (clo_cache_sim && !clo_branch_sim) {
1498 VG_(sprintf)(buf, "%u %llu %llu %llu"
1499 " %llu %llu %llu"
1500 " %llu %llu %llu\n",
1501 lineCC->loc.line,
njn2d853a12010-10-06 22:46:31 +00001502 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1503 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1504 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
sewardj8badbaa2007-05-08 09:20:25 +00001505 }
1506 else if (!clo_cache_sim && clo_branch_sim) {
1507 VG_(sprintf)(buf, "%u %llu"
1508 " %llu %llu %llu %llu\n",
1509 lineCC->loc.line,
1510 lineCC->Ir.a,
1511 lineCC->Bc.b, lineCC->Bc.mp,
1512 lineCC->Bi.b, lineCC->Bi.mp);
1513 }
njne90711c2010-09-27 01:04:20 +00001514 else {
1515 VG_(sprintf)(buf, "%u %llu\n",
1516 lineCC->loc.line,
1517 lineCC->Ir.a);
1518 }
sewardj8badbaa2007-05-08 09:20:25 +00001519
njnd3bef4f2005-10-15 17:46:18 +00001520 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1521
1522 // Update summary stats
1523 Ir_total.a += lineCC->Ir.a;
1524 Ir_total.m1 += lineCC->Ir.m1;
njn2d853a12010-10-06 22:46:31 +00001525 Ir_total.mL += lineCC->Ir.mL;
njnd3bef4f2005-10-15 17:46:18 +00001526 Dr_total.a += lineCC->Dr.a;
1527 Dr_total.m1 += lineCC->Dr.m1;
njn2d853a12010-10-06 22:46:31 +00001528 Dr_total.mL += lineCC->Dr.mL;
njnd3bef4f2005-10-15 17:46:18 +00001529 Dw_total.a += lineCC->Dw.a;
1530 Dw_total.m1 += lineCC->Dw.m1;
njn2d853a12010-10-06 22:46:31 +00001531 Dw_total.mL += lineCC->Dw.mL;
sewardj8badbaa2007-05-08 09:20:25 +00001532 Bc_total.b += lineCC->Bc.b;
1533 Bc_total.mp += lineCC->Bc.mp;
1534 Bi_total.b += lineCC->Bi.b;
1535 Bi_total.mp += lineCC->Bi.mp;
njnd3bef4f2005-10-15 17:46:18 +00001536
1537 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +00001538 }
1539
nethercote9313ac42004-07-06 21:54:20 +00001540 // Summary stats must come after rest of table, since we calculate them
sewardj8badbaa2007-05-08 09:20:25 +00001541 // during traversal. */
1542 if (clo_cache_sim && clo_branch_sim) {
1543 VG_(sprintf)(buf, "summary:"
1544 " %llu %llu %llu"
1545 " %llu %llu %llu"
1546 " %llu %llu %llu"
1547 " %llu %llu %llu %llu\n",
njn2d853a12010-10-06 22:46:31 +00001548 Ir_total.a, Ir_total.m1, Ir_total.mL,
1549 Dr_total.a, Dr_total.m1, Dr_total.mL,
1550 Dw_total.a, Dw_total.m1, Dw_total.mL,
sewardj8badbaa2007-05-08 09:20:25 +00001551 Bc_total.b, Bc_total.mp,
1552 Bi_total.b, Bi_total.mp);
1553 }
1554 else if (clo_cache_sim && !clo_branch_sim) {
1555 VG_(sprintf)(buf, "summary:"
1556 " %llu %llu %llu"
1557 " %llu %llu %llu"
1558 " %llu %llu %llu\n",
njn2d853a12010-10-06 22:46:31 +00001559 Ir_total.a, Ir_total.m1, Ir_total.mL,
1560 Dr_total.a, Dr_total.m1, Dr_total.mL,
1561 Dw_total.a, Dw_total.m1, Dw_total.mL);
sewardj8badbaa2007-05-08 09:20:25 +00001562 }
1563 else if (!clo_cache_sim && clo_branch_sim) {
1564 VG_(sprintf)(buf, "summary:"
1565 " %llu"
1566 " %llu %llu %llu %llu\n",
1567 Ir_total.a,
1568 Bc_total.b, Bc_total.mp,
1569 Bi_total.b, Bi_total.mp);
1570 }
njne90711c2010-09-27 01:04:20 +00001571 else {
1572 VG_(sprintf)(buf, "summary:"
1573 " %llu\n",
1574 Ir_total.a);
1575 }
sewardj8badbaa2007-05-08 09:20:25 +00001576
njn4f9c9342002-04-29 16:03:24 +00001577 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1578 VG_(close)(fd);
1579}
1580
njn607adfc2003-09-30 14:15:44 +00001581static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001582{
njn607adfc2003-09-30 14:15:44 +00001583 UInt w = 0;
1584 while (n > 0) {
1585 n = n / 10;
1586 w++;
njn4f9c9342002-04-29 16:03:24 +00001587 }
sewardj46c59b12005-11-01 02:20:19 +00001588 if (w == 0) w = 1;
njn607adfc2003-09-30 14:15:44 +00001589 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001590}
1591
njn51d827b2005-05-09 01:02:08 +00001592static void cg_fini(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001593{
floriandbb35842012-10-27 18:39:11 +00001594 static HChar buf1[128], buf2[128], buf3[128], buf4[123];
florianee90c8a2012-10-21 02:39:42 +00001595 static HChar fmt[128];
njn607adfc2003-09-30 14:15:44 +00001596
sewardj8badbaa2007-05-08 09:20:25 +00001597 CacheCC D_total;
1598 BranchCC B_total;
njn2d853a12010-10-06 22:46:31 +00001599 ULong LL_total_m, LL_total_mr, LL_total_mw,
1600 LL_total, LL_total_r, LL_total_w;
njn4c245e52009-03-15 23:25:38 +00001601 Int l1, l2, l3;
njn4f9c9342002-04-29 16:03:24 +00001602
nethercote9313ac42004-07-06 21:54:20 +00001603 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001604
njn7cf0bd32002-06-08 13:36:03 +00001605 if (VG_(clo_verbosity) == 0)
1606 return;
1607
njnf76d27a2009-05-28 01:53:07 +00001608 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1609 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
njn4c245e52009-03-15 23:25:38 +00001610
njn4f9c9342002-04-29 16:03:24 +00001611 /* I cache results. Use the I_refs value to determine the first column
1612 * width. */
njn607adfc2003-09-30 14:15:44 +00001613 l1 = ULong_width(Ir_total.a);
njnf76d27a2009-05-28 01:53:07 +00001614 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1615 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
njn4f9c9342002-04-29 16:03:24 +00001616
njn607adfc2003-09-30 14:15:44 +00001617 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001618 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
njnd3bef4f2005-10-15 17:46:18 +00001619
sewardj8badbaa2007-05-08 09:20:25 +00001620 /* Always print this */
sewardjb2c985b2009-07-15 14:51:17 +00001621 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
njn4f9c9342002-04-29 16:03:24 +00001622
sewardj8badbaa2007-05-08 09:20:25 +00001623 /* If cache profiling is enabled, show D access numbers and all
1624 miss numbers */
1625 if (clo_cache_sim) {
sewardjb2c985b2009-07-15 14:51:17 +00001626 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
njn2d853a12010-10-06 22:46:31 +00001627 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
njn4f9c9342002-04-29 16:03:24 +00001628
sewardj8badbaa2007-05-08 09:20:25 +00001629 if (0 == Ir_total.a) Ir_total.a = 1;
1630 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
sewardjb2c985b2009-07-15 14:51:17 +00001631 VG_(umsg)("I1 miss rate: %s\n", buf1);
njn4f9c9342002-04-29 16:03:24 +00001632
njn2d853a12010-10-06 22:46:31 +00001633 VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1);
1634 VG_(umsg)("LLi miss rate: %s\n", buf1);
sewardjb2c985b2009-07-15 14:51:17 +00001635 VG_(umsg)("\n");
njnd3bef4f2005-10-15 17:46:18 +00001636
sewardj8badbaa2007-05-08 09:20:25 +00001637 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1638 * determine the width of columns 2 & 3. */
1639 D_total.a = Dr_total.a + Dw_total.a;
1640 D_total.m1 = Dr_total.m1 + Dw_total.m1;
njn2d853a12010-10-06 22:46:31 +00001641 D_total.mL = Dr_total.mL + Dw_total.mL;
njn4f9c9342002-04-29 16:03:24 +00001642
sewardj8badbaa2007-05-08 09:20:25 +00001643 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001644 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1645 l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001646
sewardjb2c985b2009-07-15 14:51:17 +00001647 VG_(umsg)(fmt, "D refs: ",
1648 D_total.a, Dr_total.a, Dw_total.a);
1649 VG_(umsg)(fmt, "D1 misses: ",
1650 D_total.m1, Dr_total.m1, Dw_total.m1);
njn2d853a12010-10-06 22:46:31 +00001651 VG_(umsg)(fmt, "LLd misses: ",
1652 D_total.mL, Dr_total.mL, Dw_total.mL);
njnd3bef4f2005-10-15 17:46:18 +00001653
sewardj8badbaa2007-05-08 09:20:25 +00001654 if (0 == D_total.a) D_total.a = 1;
1655 if (0 == Dr_total.a) Dr_total.a = 1;
1656 if (0 == Dw_total.a) Dw_total.a = 1;
1657 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1);
1658 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
1659 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
sewardjb2c985b2009-07-15 14:51:17 +00001660 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
njn4f9c9342002-04-29 16:03:24 +00001661
njn2d853a12010-10-06 22:46:31 +00001662 VG_(percentify)( D_total.mL, D_total.a, 1, l1+1, buf1);
1663 VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2);
1664 VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3);
1665 VG_(umsg)("LLd miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardjb2c985b2009-07-15 14:51:17 +00001666 VG_(umsg)("\n");
njn1d021fa2002-05-02 13:56:34 +00001667
njn2d853a12010-10-06 22:46:31 +00001668 /* LL overall results */
njn1d021fa2002-05-02 13:56:34 +00001669
njn2d853a12010-10-06 22:46:31 +00001670 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1671 LL_total_r = Dr_total.m1 + Ir_total.m1;
1672 LL_total_w = Dw_total.m1;
1673 VG_(umsg)(fmt, "LL refs: ",
1674 LL_total, LL_total_r, LL_total_w);
njn4f9c9342002-04-29 16:03:24 +00001675
njn2d853a12010-10-06 22:46:31 +00001676 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1677 LL_total_mr = Dr_total.mL + Ir_total.mL;
1678 LL_total_mw = Dw_total.mL;
1679 VG_(umsg)(fmt, "LL misses: ",
1680 LL_total_m, LL_total_mr, LL_total_mw);
njnd3bef4f2005-10-15 17:46:18 +00001681
njn2d853a12010-10-06 22:46:31 +00001682 VG_(percentify)(LL_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1);
1683 VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
1684 VG_(percentify)(LL_total_mw, Dw_total.a, 1, l3+1, buf3);
1685 VG_(umsg)("LL miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardj8badbaa2007-05-08 09:20:25 +00001686 }
1687
1688 /* If branch profiling is enabled, show branch overall results. */
1689 if (clo_branch_sim) {
1690 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001691 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1692 l1, l2, l3);
sewardj8badbaa2007-05-08 09:20:25 +00001693
1694 if (0 == Bc_total.b) Bc_total.b = 1;
1695 if (0 == Bi_total.b) Bi_total.b = 1;
1696 B_total.b = Bc_total.b + Bi_total.b;
1697 B_total.mp = Bc_total.mp + Bi_total.mp;
1698
sewardjb2c985b2009-07-15 14:51:17 +00001699 VG_(umsg)("\n");
1700 VG_(umsg)(fmt, "Branches: ",
1701 B_total.b, Bc_total.b, Bi_total.b);
sewardj8badbaa2007-05-08 09:20:25 +00001702
sewardjb2c985b2009-07-15 14:51:17 +00001703 VG_(umsg)(fmt, "Mispredicts: ",
1704 B_total.mp, Bc_total.mp, Bi_total.mp);
sewardj8badbaa2007-05-08 09:20:25 +00001705
1706 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1);
1707 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
1708 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
1709
sewardjb2c985b2009-07-15 14:51:17 +00001710 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardj8badbaa2007-05-08 09:20:25 +00001711 }
njn4f9c9342002-04-29 16:03:24 +00001712
nethercote9313ac42004-07-06 21:54:20 +00001713 // Various stats
sewardj2d9e8742009-08-07 15:46:56 +00001714 if (VG_(clo_stats)) {
njn1baf7db2006-04-18 22:34:48 +00001715 Int debug_lookups = full_debugs + fn_debugs +
1716 file_line_debugs + no_debugs;
njnd3bef4f2005-10-15 17:46:18 +00001717
sewardjb2c985b2009-07-15 14:51:17 +00001718 VG_(dmsg)("\n");
weidendo6fc0de02012-10-30 00:28:29 +00001719 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
1720 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1721 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
1722 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1723 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
sewardjb2c985b2009-07-15 14:51:17 +00001724 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
njn1baf7db2006-04-18 22:34:48 +00001725
1726 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1);
1727 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
1728 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3);
1729 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4);
sewardjb2c985b2009-07-15 14:51:17 +00001730 VG_(dmsg)("cachegrind: with full info:%s (%d)\n",
1731 buf1, full_debugs);
1732 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
1733 buf2, file_line_debugs);
1734 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n",
1735 buf3, fn_debugs);
1736 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n",
1737 buf4, no_debugs);
njn1baf7db2006-04-18 22:34:48 +00001738
sewardjb2c985b2009-07-15 14:51:17 +00001739 VG_(dmsg)("cachegrind: string table size: %lu\n",
1740 VG_(OSetGen_Size)(stringTable));
1741 VG_(dmsg)("cachegrind: CC table size: %lu\n",
1742 VG_(OSetGen_Size)(CC_table));
1743 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1744 VG_(OSetGen_Size)(instrInfoTable));
njn4f9c9342002-04-29 16:03:24 +00001745 }
njn4f9c9342002-04-29 16:03:24 +00001746}
1747
nethercote9313ac42004-07-06 21:54:20 +00001748/*--------------------------------------------------------------------*/
1749/*--- Discarding BB info ---*/
1750/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001751
sewardja3a29a52005-10-12 16:16:03 +00001752// Called when a translation is removed from the translation cache for
1753// any reason at all: to free up space, because the guest code was
1754// unmapped or modified, or for any arbitrary reason.
sewardj4ba057c2005-10-18 12:04:18 +00001755static
sewardj0b9d74a2006-12-24 02:24:11 +00001756void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
sewardj18d75132002-05-16 11:06:21 +00001757{
sewardj0b9d74a2006-12-24 02:24:11 +00001758 SB_info* sbInfo;
sewardj3a384b32006-01-22 01:12:51 +00001759 Addr orig_addr = (Addr)vge.base[0];
njn4294fd42002-06-05 14:41:10 +00001760
sewardj5155dec2005-10-12 10:09:23 +00001761 tl_assert(vge.n_used > 0);
1762
1763 if (DEBUG_CG)
sewardj4ba057c2005-10-18 12:04:18 +00001764 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1765 (void*)(Addr)orig_addr,
sewardj5155dec2005-10-12 10:09:23 +00001766 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
njn4294fd42002-06-05 14:41:10 +00001767
sewardj4ba057c2005-10-18 12:04:18 +00001768 // Get BB info, remove from table, free BB info. Simple! Note that we
1769 // use orig_addr, not the first instruction address in vge.
njne2a9ad32007-09-17 05:30:48 +00001770 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
sewardj0b9d74a2006-12-24 02:24:11 +00001771 tl_assert(NULL != sbInfo);
njne2a9ad32007-09-17 05:30:48 +00001772 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
sewardj18d75132002-05-16 11:06:21 +00001773}
1774
1775/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001776/*--- Command line processing ---*/
1777/*--------------------------------------------------------------------*/
1778
florian19f91bb2012-11-10 22:29:54 +00001779static Bool cg_process_cmd_line_option(const HChar* arg)
njn25e49d8e72002-09-23 09:36:25 +00001780{
weidendo23642272011-09-06 19:08:31 +00001781 if (VG_(str_clo_cache_opt)(arg,
1782 &clo_I1_cache,
1783 &clo_D1_cache,
1784 &clo_LL_cache)) {}
njn83df0b62009-02-25 01:01:05 +00001785
1786 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1787 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1788 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
njn25e49d8e72002-09-23 09:36:25 +00001789 else
1790 return False;
1791
1792 return True;
1793}
1794
njn51d827b2005-05-09 01:02:08 +00001795static void cg_print_usage(void)
njn25e49d8e72002-09-23 09:36:25 +00001796{
weidendo23642272011-09-06 19:08:31 +00001797 VG_(print_cache_clo_opts)();
njn3e884182003-04-15 13:03:23 +00001798 VG_(printf)(
sewardj8badbaa2007-05-08 09:20:25 +00001799" --cache-sim=yes|no [yes] collect cache stats?\n"
1800" --branch-sim=yes|no [no] collect branch prediction stats?\n"
njn374a36d2007-11-23 01:41:32 +00001801" --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
njn3e884182003-04-15 13:03:23 +00001802 );
1803}
1804
njn51d827b2005-05-09 01:02:08 +00001805static void cg_print_debug_usage(void)
njn3e884182003-04-15 13:03:23 +00001806{
1807 VG_(printf)(
1808" (none)\n"
1809 );
njn25e49d8e72002-09-23 09:36:25 +00001810}
1811
1812/*--------------------------------------------------------------------*/
1813/*--- Setup ---*/
1814/*--------------------------------------------------------------------*/
1815
sewardje1216cb2007-02-07 19:55:30 +00001816static void cg_post_clo_init(void); /* just below */
1817
njn51d827b2005-05-09 01:02:08 +00001818static void cg_pre_clo_init(void)
1819{
njn51d827b2005-05-09 01:02:08 +00001820 VG_(details_name) ("Cachegrind");
1821 VG_(details_version) (NULL);
sewardj8badbaa2007-05-08 09:20:25 +00001822 VG_(details_description) ("a cache and branch-prediction profiler");
njn51d827b2005-05-09 01:02:08 +00001823 VG_(details_copyright_author)(
sewardj0f157dd2013-10-18 14:27:36 +00001824 "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.");
njn51d827b2005-05-09 01:02:08 +00001825 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardje8089302006-10-17 02:15:17 +00001826 VG_(details_avg_translation_sizeB) ( 500 );
njn51d827b2005-05-09 01:02:08 +00001827
philippe5b240c22012-08-14 22:28:31 +00001828 VG_(clo_vex_control).iropt_register_updates
1829 = VexRegUpdSpAtMemAccess; // overridable by the user.
njn51d827b2005-05-09 01:02:08 +00001830 VG_(basic_tool_funcs) (cg_post_clo_init,
1831 cg_instrument,
1832 cg_fini);
1833
sewardj0b9d74a2006-12-24 02:24:11 +00001834 VG_(needs_superblock_discards)(cg_discard_superblock_info);
njn51d827b2005-05-09 01:02:08 +00001835 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1836 cg_print_usage,
1837 cg_print_debug_usage);
sewardje1216cb2007-02-07 19:55:30 +00001838}
1839
1840static void cg_post_clo_init(void)
1841{
njn2d853a12010-10-06 22:46:31 +00001842 cache_t I1c, D1c, LLc;
njn51d827b2005-05-09 01:02:08 +00001843
njne2a9ad32007-09-17 05:30:48 +00001844 CC_table =
1845 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1846 cmp_CodeLoc_LineCC,
sewardj9c606bd2008-09-18 18:12:50 +00001847 VG_(malloc), "cg.main.cpci.1",
1848 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001849 instrInfoTable =
1850 VG_(OSetGen_Create)(/*keyOff*/0,
1851 NULL,
sewardj9c606bd2008-09-18 18:12:50 +00001852 VG_(malloc), "cg.main.cpci.2",
1853 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001854 stringTable =
1855 VG_(OSetGen_Create)(/*keyOff*/0,
1856 stringCmp,
sewardj9c606bd2008-09-18 18:12:50 +00001857 VG_(malloc), "cg.main.cpci.3",
1858 VG_(free));
sewardje1216cb2007-02-07 19:55:30 +00001859
weidendo23642272011-09-06 19:08:31 +00001860 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1861 &clo_I1_cache,
1862 &clo_D1_cache,
1863 &clo_LL_cache);
sewardje1216cb2007-02-07 19:55:30 +00001864
sewardj98763d52012-06-03 22:40:07 +00001865 // min_line_size is used to make sure that we never feed
1866 // accesses to the simulator straddling more than two
1867 // cache lines at any cache level
1868 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1869 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1870
1871 Int largest_load_or_store_size
1872 = VG_(machine_get_size_of_largest_guest_register)();
1873 if (min_line_size < largest_load_or_store_size) {
1874 /* We can't continue, because the cache simulation might
1875 straddle more than 2 lines, and it will assert. So let's
1876 just stop before we start. */
1877 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1878 (Int)min_line_size);
1879 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1880 largest_load_or_store_size );
1881 VG_(umsg)(" but it is not. Exiting now.\n");
1882 VG_(exit)(1);
1883 }
1884
weidendoc1e94262012-10-05 23:58:17 +00001885 cachesim_initcaches(I1c, D1c, LLc);
njn51d827b2005-05-09 01:02:08 +00001886}
1887
sewardj45f4e7c2005-09-27 19:20:21 +00001888VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
fitzhardinge98abfc72003-12-16 02:05:15 +00001889
njn25e49d8e72002-09-23 09:36:25 +00001890/*--------------------------------------------------------------------*/
njnf69f9452005-07-03 17:53:11 +00001891/*--- end ---*/
sewardj18d75132002-05-16 11:06:21 +00001892/*--------------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +00001893