blob: 5d44667f02cfa3b1d87516bd640dde0f6347e1c3 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njn101e5722005-04-21 02:37:54 +00003/*--- Cachegrind: everything but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
Elliott Hughesed398002017-06-21 14:41:24 -070011 Copyright (C) 2002-2017 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000012 njn@valgrind.org
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njnea27e462005-05-31 02:38:09 +000033#include "pub_tool_debuginfo.h"
njn97405b22005-06-02 03:39:33 +000034#include "pub_tool_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njneb8896b2005-06-04 20:03:55 +000036#include "pub_tool_libcfile.h"
njn36a20fa2005-06-03 03:08:39 +000037#include "pub_tool_libcprint.h"
njnf39e9a32005-06-12 02:43:17 +000038#include "pub_tool_libcproc.h"
njn717cde52005-05-10 02:47:21 +000039#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +000040#include "pub_tool_options.h"
njnd3bef4f2005-10-15 17:46:18 +000041#include "pub_tool_oset.h"
njn43b9a8a2005-05-10 04:37:01 +000042#include "pub_tool_tooliface.h"
sewardj14c7cc52007-02-25 15:08:24 +000043#include "pub_tool_xarray.h"
sewardj45f4e7c2005-09-27 19:20:21 +000044#include "pub_tool_clientstate.h"
sewardj5bb86822005-12-23 12:47:42 +000045#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
njn25e49d8e72002-09-23 09:36:25 +000046
nethercoteb35a8b92004-09-11 16:45:27 +000047#include "cg_arch.h"
nethercote27fc1da2004-01-04 16:56:57 +000048#include "cg_sim.c"
sewardj8badbaa2007-05-08 09:20:25 +000049#include "cg_branchpred.c"
njn4f9c9342002-04-29 16:03:24 +000050
njn25e49d8e72002-09-23 09:36:25 +000051/*------------------------------------------------------------*/
52/*--- Constants ---*/
53/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000054
sewardj5155dec2005-10-12 10:09:23 +000055/* Set to 1 for very verbose debugging */
56#define DEBUG_CG 0
57
njn7cf0bd32002-06-08 13:36:03 +000058/*------------------------------------------------------------*/
sewardj8badbaa2007-05-08 09:20:25 +000059/*--- Options ---*/
60/*------------------------------------------------------------*/
61
njn374a36d2007-11-23 01:41:32 +000062static Bool clo_cache_sim = True; /* do cache simulation? */
63static Bool clo_branch_sim = False; /* do branch simulation? */
florian19f91bb2012-11-10 22:29:54 +000064static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
sewardj8badbaa2007-05-08 09:20:25 +000065
66/*------------------------------------------------------------*/
sewardj98763d52012-06-03 22:40:07 +000067/*--- Cachesim configuration ---*/
68/*------------------------------------------------------------*/
69
70static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
71
72/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000073/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000074/*------------------------------------------------------------*/
75
sewardj8badbaa2007-05-08 09:20:25 +000076typedef
77 struct {
78 ULong a; /* total # memory accesses of this kind */
79 ULong m1; /* misses in the first level cache */
njn2d853a12010-10-06 22:46:31 +000080 ULong mL; /* misses in the second level cache */
sewardj8badbaa2007-05-08 09:20:25 +000081 }
82 CacheCC;
83
84typedef
85 struct {
86 ULong b; /* total # branches of this kind */
87 ULong mp; /* number of branches mispredicted */
88 }
89 BranchCC;
njn4f9c9342002-04-29 16:03:24 +000090
nethercote9313ac42004-07-06 21:54:20 +000091//------------------------------------------------------------
92// Primary data structure #1: CC table
93// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
njnd3bef4f2005-10-15 17:46:18 +000094// - an ordered set of CCs. CC indexing done by file/function/line (as
95// determined from the instrAddr).
nethercote9313ac42004-07-06 21:54:20 +000096// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +000097
njnd3bef4f2005-10-15 17:46:18 +000098typedef struct {
florian19f91bb2012-11-10 22:29:54 +000099 HChar* file;
florian46cc0452014-10-25 19:20:38 +0000100 const HChar* fn;
florian19f91bb2012-11-10 22:29:54 +0000101 Int line;
njnd3bef4f2005-10-15 17:46:18 +0000102}
103CodeLoc;
njn4f9c9342002-04-29 16:03:24 +0000104
sewardj8badbaa2007-05-08 09:20:25 +0000105typedef struct {
106 CodeLoc loc; /* Source location that these counts pertain to */
107 CacheCC Ir; /* Insn read counts */
108 CacheCC Dr; /* Data read counts */
109 CacheCC Dw; /* Data write/modify counts */
110 BranchCC Bc; /* Conditional branch counts */
111 BranchCC Bi; /* Indirect branch counts */
112} LineCC;
njn4f9c9342002-04-29 16:03:24 +0000113
njnd3bef4f2005-10-15 17:46:18 +0000114// First compare file, then fn, then line.
tom5a835d52007-12-30 12:28:26 +0000115static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
njnd3bef4f2005-10-15 17:46:18 +0000116{
njnafa12262005-12-24 03:10:56 +0000117 Word res;
florian3e798632012-11-24 19:41:54 +0000118 const CodeLoc* a = (const CodeLoc*)vloc;
119 const CodeLoc* b = &(((const LineCC*)vcc)->loc);
njn4f9c9342002-04-29 16:03:24 +0000120
njnd3bef4f2005-10-15 17:46:18 +0000121 res = VG_(strcmp)(a->file, b->file);
122 if (0 != res)
123 return res;
njn4f9c9342002-04-29 16:03:24 +0000124
njnd3bef4f2005-10-15 17:46:18 +0000125 res = VG_(strcmp)(a->fn, b->fn);
126 if (0 != res)
127 return res;
128
129 return a->line - b->line;
130}
131
132static OSet* CC_table;
njn4f9c9342002-04-29 16:03:24 +0000133
nethercote9313ac42004-07-06 21:54:20 +0000134//------------------------------------------------------------
njnd3bef4f2005-10-15 17:46:18 +0000135// Primary data structure #2: InstrInfo table
nethercote9313ac42004-07-06 21:54:20 +0000136// - Holds the cached info about each instr that is used for simulation.
sewardj0b9d74a2006-12-24 02:24:11 +0000137// - table(SB_start_addr, list(InstrInfo))
138// - For each SB, each InstrInfo in the list holds info about the
njnd3bef4f2005-10-15 17:46:18 +0000139// instruction (instrLen, instrAddr, etc), plus a pointer to its line
nethercote9313ac42004-07-06 21:54:20 +0000140// CC. This node is what's passed to the simulation function.
sewardj0b9d74a2006-12-24 02:24:11 +0000141// - When SBs are discarded the relevant list(instr_details) is freed.
nethercote9313ac42004-07-06 21:54:20 +0000142
njnd3bef4f2005-10-15 17:46:18 +0000143typedef struct _InstrInfo InstrInfo;
144struct _InstrInfo {
nethercoteca1f2dc2004-07-21 08:49:02 +0000145 Addr instr_addr;
njn6a3009b2005-03-20 00:20:06 +0000146 UChar instr_len;
njnd3bef4f2005-10-15 17:46:18 +0000147 LineCC* parent; // parent line-CC
nethercote9313ac42004-07-06 21:54:20 +0000148};
149
sewardj0b9d74a2006-12-24 02:24:11 +0000150typedef struct _SB_info SB_info;
151struct _SB_info {
152 Addr SB_addr; // key; MUST BE FIRST
njnd3bef4f2005-10-15 17:46:18 +0000153 Int n_instrs;
154 InstrInfo instrs[0];
nethercote9313ac42004-07-06 21:54:20 +0000155};
156
njnd3bef4f2005-10-15 17:46:18 +0000157static OSet* instrInfoTable;
158
159//------------------------------------------------------------
160// Secondary data structure: string table
161// - holds strings, avoiding dups
162// - used for filenames and function names, each of which will be
163// pointed to by one or more CCs.
164// - it also allows equality checks just by pointer comparison, which
165// is good when printing the output file at the end.
166
167static OSet* stringTable;
nethercote9313ac42004-07-06 21:54:20 +0000168
169//------------------------------------------------------------
170// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000171static Int distinct_files = 0;
172static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000173static Int distinct_lines = 0;
weidendo6fc0de02012-10-30 00:28:29 +0000174static Int distinct_instrsGen = 0;
175static Int distinct_instrsNoX = 0;
nethercote9313ac42004-07-06 21:54:20 +0000176
njnd3bef4f2005-10-15 17:46:18 +0000177static Int full_debugs = 0;
178static Int file_line_debugs = 0;
179static Int fn_debugs = 0;
180static Int no_debugs = 0;
njn4f9c9342002-04-29 16:03:24 +0000181
nethercote9313ac42004-07-06 21:54:20 +0000182/*------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +0000183/*--- String table operations ---*/
184/*------------------------------------------------------------*/
185
tom5a835d52007-12-30 12:28:26 +0000186static Word stringCmp( const void* key, const void* elem )
njnd3bef4f2005-10-15 17:46:18 +0000187{
florian3e798632012-11-24 19:41:54 +0000188 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
njnd3bef4f2005-10-15 17:46:18 +0000189}
190
191// Get a permanent string; either pull it out of the string table if it's
192// been encountered before, or dup it and put it into the string table.
florian46cc0452014-10-25 19:20:38 +0000193static HChar* get_perm_string(const HChar* s)
njnd3bef4f2005-10-15 17:46:18 +0000194{
florian19f91bb2012-11-10 22:29:54 +0000195 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
njnd3bef4f2005-10-15 17:46:18 +0000196 if (s_ptr) {
197 return *s_ptr;
198 } else {
florian19f91bb2012-11-10 22:29:54 +0000199 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
sewardj9c606bd2008-09-18 18:12:50 +0000200 *s_node = VG_(strdup)("cg.main.gps.1", s);
njne2a9ad32007-09-17 05:30:48 +0000201 VG_(OSetGen_Insert)(stringTable, s_node);
njnd3bef4f2005-10-15 17:46:18 +0000202 return *s_node;
203 }
204}
205
206/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000207/*--- CC table operations ---*/
208/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000209
florian10ef7252014-10-27 12:06:35 +0000210static void get_debug_info(Addr instr_addr, const HChar **dir,
211 const HChar **file, const HChar **fn, UInt* line)
njn4f9c9342002-04-29 16:03:24 +0000212{
sewardj7cee6f92005-06-13 17:39:06 +0000213 Bool found_file_line = VG_(get_filename_linenum)(
214 instr_addr,
florianf4384f42014-12-16 20:55:58 +0000215 file, dir,
sewardj7cee6f92005-06-13 17:39:06 +0000216 line
217 );
florian46cc0452014-10-25 19:20:38 +0000218 Bool found_fn = VG_(get_fnname)(instr_addr, fn);
njn4f9c9342002-04-29 16:03:24 +0000219
nethercote9313ac42004-07-06 21:54:20 +0000220 if (!found_file_line) {
florian10ef7252014-10-27 12:06:35 +0000221 *file = "???";
nethercote9313ac42004-07-06 21:54:20 +0000222 *line = 0;
223 }
224 if (!found_fn) {
florian46cc0452014-10-25 19:20:38 +0000225 *fn = "???";
nethercote9313ac42004-07-06 21:54:20 +0000226 }
njnf3b61d62007-09-17 00:41:07 +0000227
nethercote9313ac42004-07-06 21:54:20 +0000228 if (found_file_line) {
njnd3bef4f2005-10-15 17:46:18 +0000229 if (found_fn) full_debugs++;
230 else file_line_debugs++;
nethercote9313ac42004-07-06 21:54:20 +0000231 } else {
njnd3bef4f2005-10-15 17:46:18 +0000232 if (found_fn) fn_debugs++;
233 else no_debugs++;
njn4f9c9342002-04-29 16:03:24 +0000234 }
235}
236
nethercote9313ac42004-07-06 21:54:20 +0000237// Do a three step traversal: by file, then fn, then line.
njnd3bef4f2005-10-15 17:46:18 +0000238// Returns a pointer to the line CC, creates a new one if necessary.
239static LineCC* get_lineCC(Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000240{
florian10ef7252014-10-27 12:06:35 +0000241 const HChar *fn, *file, *dir;
florian19f91bb2012-11-10 22:29:54 +0000242 UInt line;
njnd3bef4f2005-10-15 17:46:18 +0000243 CodeLoc loc;
244 LineCC* lineCC;
nethercote9313ac42004-07-06 21:54:20 +0000245
florian10ef7252014-10-27 12:06:35 +0000246 get_debug_info(origAddr, &dir, &file, &fn, &line);
nethercote9313ac42004-07-06 21:54:20 +0000247
florian6c67c502014-10-26 17:12:12 +0000248 // Form an absolute pathname if a directory is available
249 HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
250
251 if (dir[0]) {
252 VG_(sprintf)(absfile, "%s/%s", dir, file);
253 } else {
254 VG_(sprintf)(absfile, "%s", file);
255 }
256
257 loc.file = absfile;
njnd3bef4f2005-10-15 17:46:18 +0000258 loc.fn = fn;
259 loc.line = line;
njn4f9c9342002-04-29 16:03:24 +0000260
njne2a9ad32007-09-17 05:30:48 +0000261 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
njnd3bef4f2005-10-15 17:46:18 +0000262 if (!lineCC) {
263 // Allocate and zero a new node.
njne2a9ad32007-09-17 05:30:48 +0000264 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
njnd3bef4f2005-10-15 17:46:18 +0000265 lineCC->loc.file = get_perm_string(loc.file);
266 lineCC->loc.fn = get_perm_string(loc.fn);
267 lineCC->loc.line = loc.line;
njn0a8db5c2007-04-02 03:11:41 +0000268 lineCC->Ir.a = 0;
269 lineCC->Ir.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000270 lineCC->Ir.mL = 0;
njn0a8db5c2007-04-02 03:11:41 +0000271 lineCC->Dr.a = 0;
272 lineCC->Dr.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000273 lineCC->Dr.mL = 0;
njn0a8db5c2007-04-02 03:11:41 +0000274 lineCC->Dw.a = 0;
275 lineCC->Dw.m1 = 0;
njn2d853a12010-10-06 22:46:31 +0000276 lineCC->Dw.mL = 0;
sewardj8badbaa2007-05-08 09:20:25 +0000277 lineCC->Bc.b = 0;
278 lineCC->Bc.mp = 0;
279 lineCC->Bi.b = 0;
280 lineCC->Bi.mp = 0;
njne2a9ad32007-09-17 05:30:48 +0000281 VG_(OSetGen_Insert)(CC_table, lineCC);
njn4f9c9342002-04-29 16:03:24 +0000282 }
nethercote9313ac42004-07-06 21:54:20 +0000283
njnd3bef4f2005-10-15 17:46:18 +0000284 return lineCC;
njn4f9c9342002-04-29 16:03:24 +0000285}
286
287/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000288/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000289/*------------------------------------------------------------*/
290
weidendo6fc0de02012-10-30 00:28:29 +0000291/* A common case for an instruction read event is that the
292 * bytes read belong to the same cache line in both L1I and LL
293 * (if cache line sizes of L1 and LL are the same).
294 * As this can be detected at instrumentation time, and results
295 * in faster simulation, special-casing is benefical.
296 *
Elliott Hughesed398002017-06-21 14:41:24 -0700297 * Abbreviations used in var/function names:
weidendo6fc0de02012-10-30 00:28:29 +0000298 * IrNoX - instruction read does not cross cache lines
299 * IrGen - generic instruction read; not detected as IrNoX
300 * Ir - not known / not important whether it is an IrNoX
301 */
302
njnc52b9322010-09-27 02:20:38 +0000303// Only used with --cache-sim=no.
304static VG_REGPARM(1)
weidendo6fc0de02012-10-30 00:28:29 +0000305void log_1Ir(InstrInfo* n)
njnc52b9322010-09-27 02:20:38 +0000306{
307 n->parent->Ir.a++;
308}
309
310// Only used with --cache-sim=no.
311static VG_REGPARM(2)
weidendo6fc0de02012-10-30 00:28:29 +0000312void log_2Ir(InstrInfo* n, InstrInfo* n2)
njnc52b9322010-09-27 02:20:38 +0000313{
314 n->parent->Ir.a++;
315 n2->parent->Ir.a++;
316}
317
318// Only used with --cache-sim=no.
319static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000320void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
njnc52b9322010-09-27 02:20:38 +0000321{
322 n->parent->Ir.a++;
323 n2->parent->Ir.a++;
324 n3->parent->Ir.a++;
325}
326
weidendo6fc0de02012-10-30 00:28:29 +0000327// Generic case for instruction reads: may cross cache lines.
328// All other Ir handlers expect IrNoX instruction reads.
njnaf839f52005-06-23 03:27:57 +0000329static VG_REGPARM(1)
weidendo6fc0de02012-10-30 00:28:29 +0000330void log_1IrGen_0D_cache_access(InstrInfo* n)
njn25e49d8e72002-09-23 09:36:25 +0000331{
weidendo6fc0de02012-10-30 00:28:29 +0000332 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000333 // n, n->instr_addr, n->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000334 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
335 &n->parent->Ir.m1, &n->parent->Ir.mL);
336 n->parent->Ir.a++;
337}
338
339static VG_REGPARM(1)
340void log_1IrNoX_0D_cache_access(InstrInfo* n)
341{
342 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
343 // n, n->instr_addr, n->instr_len);
344 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
345 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000346 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000347}
348
njnaf839f52005-06-23 03:27:57 +0000349static VG_REGPARM(2)
weidendo6fc0de02012-10-30 00:28:29 +0000350void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
njn25e49d8e72002-09-23 09:36:25 +0000351{
weidendo6fc0de02012-10-30 00:28:29 +0000352 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
353 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000354 // n, n->instr_addr, n->instr_len,
355 // n2, n2->instr_addr, n2->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000356 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
357 &n->parent->Ir.m1, &n->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000358 n->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000359 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
360 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000361 n2->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000362}
363
364static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000365void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
sewardj5155dec2005-10-12 10:09:23 +0000366{
weidendo6fc0de02012-10-30 00:28:29 +0000367 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
368 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
369 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000370 // n, n->instr_addr, n->instr_len,
371 // n2, n2->instr_addr, n2->instr_len,
372 // n3, n3->instr_addr, n3->instr_len);
weidendo6fc0de02012-10-30 00:28:29 +0000373 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
374 &n->parent->Ir.m1, &n->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000375 n->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000376 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
377 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000378 n2->parent->Ir.a++;
weidendo6fc0de02012-10-30 00:28:29 +0000379 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
380 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
sewardj5155dec2005-10-12 10:09:23 +0000381 n3->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000382}
383
384static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000385void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000386{
weidendo6fc0de02012-10-30 00:28:29 +0000387 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
sewardj5155dec2005-10-12 10:09:23 +0000388 // " daddr=0x%010lx, dsize=%lu\n",
389 // n, n->instr_addr, n->instr_len, data_addr, data_size);
weidendo6fc0de02012-10-30 00:28:29 +0000390 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
391 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000392 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000393
sewardj5155dec2005-10-12 10:09:23 +0000394 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000395 &n->parent->Dr.m1, &n->parent->Dr.mL);
nethercote9313ac42004-07-06 21:54:20 +0000396 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000397}
398
sewardj5155dec2005-10-12 10:09:23 +0000399static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000400void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000401{
weidendo6fc0de02012-10-30 00:28:29 +0000402 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
sewardj5155dec2005-10-12 10:09:23 +0000403 // " daddr=0x%010lx, dsize=%lu\n",
404 // n, n->instr_addr, n->instr_len, data_addr, data_size);
weidendo6fc0de02012-10-30 00:28:29 +0000405 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
406 &n->parent->Ir.m1, &n->parent->Ir.mL);
nethercote9313ac42004-07-06 21:54:20 +0000407 n->parent->Ir.a++;
408
sewardj5155dec2005-10-12 10:09:23 +0000409 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000410 &n->parent->Dw.m1, &n->parent->Dw.mL);
nethercote9313ac42004-07-06 21:54:20 +0000411 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000412}
413
sewardjcafe5052013-01-17 14:24:35 +0000414/* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
415 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
416 you change them, you must change addEvent_D_guarded too. */
njnaf839f52005-06-23 03:27:57 +0000417static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000418void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000419{
weidendo6fc0de02012-10-30 00:28:29 +0000420 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000421 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000422 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000423 &n->parent->Dr.m1, &n->parent->Dr.mL);
nethercote9313ac42004-07-06 21:54:20 +0000424 n->parent->Dr.a++;
sewardj5155dec2005-10-12 10:09:23 +0000425}
426
sewardjcafe5052013-01-17 14:24:35 +0000427/* See comment on log_0Ir_1Dr_cache_access. */
sewardj5155dec2005-10-12 10:09:23 +0000428static VG_REGPARM(3)
weidendo6fc0de02012-10-30 00:28:29 +0000429void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000430{
weidendo6fc0de02012-10-30 00:28:29 +0000431 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
sewardj5155dec2005-10-12 10:09:23 +0000432 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000433 cachesim_D1_doref(data_addr, data_size,
njn2d853a12010-10-06 22:46:31 +0000434 &n->parent->Dw.m1, &n->parent->Dw.mL);
nethercote9313ac42004-07-06 21:54:20 +0000435 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000436}
437
sewardj8badbaa2007-05-08 09:20:25 +0000438/* For branches, we consult two different predictors, one which
439 predicts taken/untaken for conditional branches, and the other
440 which predicts the branch target address for indirect branches
441 (jump-to-register style ones). */
442
443static VG_REGPARM(2)
444void log_cond_branch(InstrInfo* n, Word taken)
445{
446 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
447 // n, taken);
448 n->parent->Bc.b++;
449 n->parent->Bc.mp
450 += (1 & do_cond_branch_predict(n->instr_addr, taken));
451}
452
453static VG_REGPARM(2)
454void log_ind_branch(InstrInfo* n, UWord actual_dst)
455{
456 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
457 // n, actual_dst);
458 n->parent->Bi.b++;
459 n->parent->Bi.mp
460 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
461}
462
463
nethercote9313ac42004-07-06 21:54:20 +0000464/*------------------------------------------------------------*/
sewardj5155dec2005-10-12 10:09:23 +0000465/*--- Instrumentation types and structures ---*/
466/*------------------------------------------------------------*/
467
468/* Maintain an ordered list of memory events which are outstanding, in
469 the sense that no IR has yet been generated to do the relevant
470 helper calls. The BB is scanned top to bottom and memory events
471 are added to the end of the list, merging with the most recent
472 notified event where possible (Dw immediately following Dr and
473 having the same size and EA can be merged).
474
475 This merging is done so that for architectures which have
476 load-op-store instructions (x86, amd64), the insn is treated as if
477 it makes just one memory reference (a modify), rather than two (a
478 read followed by a write at the same address).
479
480 At various points the list will need to be flushed, that is, IR
481 generated from it. That must happen before any possible exit from
482 the block (the end, or an IRStmt_Exit). Flushing also takes place
483 when there is no space to add a new event.
484
485 If we require the simulation statistics to be up to date with
486 respect to possible memory exceptions, then the list would have to
487 be flushed before each memory reference. That would however lose
488 performance by inhibiting event-merging during flushing.
489
490 Flushing the list consists of walking it start to end and emitting
491 instrumentation IR for each event, in the order in which they
492 appear. It may be possible to emit a single call for two adjacent
493 events in order to reduce the number of helper function calls made.
494 For example, it could well be profitable to handle two adjacent Ir
495 events with a single helper call. */
496
497typedef
498 IRExpr
499 IRAtom;
500
501typedef
sewardj8badbaa2007-05-08 09:20:25 +0000502 enum {
weidendo6fc0de02012-10-30 00:28:29 +0000503 Ev_IrNoX, // Instruction read not crossing cache lines
504 Ev_IrGen, // Generic Ir, not being detected as IrNoX
505 Ev_Dr, // Data read
506 Ev_Dw, // Data write
507 Ev_Dm, // Data modify (read then write)
508 Ev_Bc, // branch conditional
509 Ev_Bi // branch indirect (to unknown destination)
sewardj8badbaa2007-05-08 09:20:25 +0000510 }
511 EventTag;
sewardj5155dec2005-10-12 10:09:23 +0000512
513typedef
514 struct {
sewardj8badbaa2007-05-08 09:20:25 +0000515 EventTag tag;
516 InstrInfo* inode;
517 union {
518 struct {
weidendo6fc0de02012-10-30 00:28:29 +0000519 } IrGen;
520 struct {
521 } IrNoX;
sewardj8badbaa2007-05-08 09:20:25 +0000522 struct {
523 IRAtom* ea;
524 Int szB;
525 } Dr;
526 struct {
527 IRAtom* ea;
528 Int szB;
529 } Dw;
530 struct {
531 IRAtom* ea;
532 Int szB;
533 } Dm;
534 struct {
535 IRAtom* taken; /* :: Ity_I1 */
536 } Bc;
537 struct {
538 IRAtom* dst;
539 } Bi;
540 } Ev;
sewardj5155dec2005-10-12 10:09:23 +0000541 }
542 Event;
543
sewardj8badbaa2007-05-08 09:20:25 +0000544static void init_Event ( Event* ev ) {
545 VG_(memset)(ev, 0, sizeof(Event));
546}
547
548static IRAtom* get_Event_dea ( Event* ev ) {
549 switch (ev->tag) {
550 case Ev_Dr: return ev->Ev.Dr.ea;
551 case Ev_Dw: return ev->Ev.Dw.ea;
552 case Ev_Dm: return ev->Ev.Dm.ea;
553 default: tl_assert(0);
554 }
555}
556
557static Int get_Event_dszB ( Event* ev ) {
558 switch (ev->tag) {
559 case Ev_Dr: return ev->Ev.Dr.szB;
560 case Ev_Dw: return ev->Ev.Dw.szB;
561 case Ev_Dm: return ev->Ev.Dm.szB;
562 default: tl_assert(0);
563 }
564}
565
566
sewardj5155dec2005-10-12 10:09:23 +0000567/* Up to this many unnotified events are allowed. Number is
568 arbitrary. Larger numbers allow more event merging to occur, but
569 potentially induce more spilling due to extending live ranges of
570 address temporaries. */
571#define N_EVENTS 16
572
573
574/* A struct which holds all the running state during instrumentation.
575 Mostly to avoid passing loads of parameters everywhere. */
576typedef
577 struct {
578 /* The current outstanding-memory-event list. */
579 Event events[N_EVENTS];
580 Int events_used;
581
njnd3bef4f2005-10-15 17:46:18 +0000582 /* The array of InstrInfo bins for the BB. */
sewardj0b9d74a2006-12-24 02:24:11 +0000583 SB_info* sbInfo;
sewardj5155dec2005-10-12 10:09:23 +0000584
njnd3bef4f2005-10-15 17:46:18 +0000585 /* Number InstrInfo bins 'used' so far. */
sewardj0b9d74a2006-12-24 02:24:11 +0000586 Int sbInfo_i;
sewardj5155dec2005-10-12 10:09:23 +0000587
sewardj0b9d74a2006-12-24 02:24:11 +0000588 /* The output SB being constructed. */
589 IRSB* sbOut;
sewardj5155dec2005-10-12 10:09:23 +0000590 }
591 CgState;
592
593
sewardj5155dec2005-10-12 10:09:23 +0000594/*------------------------------------------------------------*/
595/*--- Instrumentation main ---*/
nethercote9313ac42004-07-06 21:54:20 +0000596/*------------------------------------------------------------*/
597
sewardj4ba057c2005-10-18 12:04:18 +0000598// Note that origAddr is the real origAddr, not the address of the first
599// instruction in the block (they can be different due to redirection).
nethercote564b2b02004-08-07 15:54:53 +0000600static
sewardj0b9d74a2006-12-24 02:24:11 +0000601SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000602{
njn4bd67b52005-08-11 00:47:10 +0000603 Int i, n_instrs;
604 IRStmt* st;
sewardj0b9d74a2006-12-24 02:24:11 +0000605 SB_info* sbInfo;
njnd3bef4f2005-10-15 17:46:18 +0000606
sewardj0b9d74a2006-12-24 02:24:11 +0000607 // Count number of original instrs in SB
njn6a3009b2005-03-20 00:20:06 +0000608 n_instrs = 0;
sewardj0b9d74a2006-12-24 02:24:11 +0000609 for (i = 0; i < sbIn->stmts_used; i++) {
610 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +0000611 if (Ist_IMark == st->tag) n_instrs++;
nethercote9313ac42004-07-06 21:54:20 +0000612 }
613
njnf7d26092005-10-12 16:45:17 +0000614 // Check that we don't have an entry for this BB in the instr-info table.
615 // If this assertion fails, there has been some screwup: some
616 // translations must have been discarded but Cachegrind hasn't discarded
617 // the corresponding entries in the instr-info table.
njne2a9ad32007-09-17 05:30:48 +0000618 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
sewardj0b9d74a2006-12-24 02:24:11 +0000619 tl_assert(NULL == sbInfo);
sewardja3a29a52005-10-12 16:16:03 +0000620
njnd3bef4f2005-10-15 17:46:18 +0000621 // BB never translated before (at this address, at least; could have
622 // been unloaded and then reloaded elsewhere in memory)
njne2a9ad32007-09-17 05:30:48 +0000623 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
sewardj0b9d74a2006-12-24 02:24:11 +0000624 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
625 sbInfo->SB_addr = origAddr;
626 sbInfo->n_instrs = n_instrs;
njne2a9ad32007-09-17 05:30:48 +0000627 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
sewardja3a29a52005-10-12 16:16:03 +0000628
sewardj0b9d74a2006-12-24 02:24:11 +0000629 return sbInfo;
nethercote9313ac42004-07-06 21:54:20 +0000630}
njn6a3009b2005-03-20 00:20:06 +0000631
nethercote9313ac42004-07-06 21:54:20 +0000632
sewardj5155dec2005-10-12 10:09:23 +0000633static void showEvent ( Event* ev )
nethercote9313ac42004-07-06 21:54:20 +0000634{
sewardj8badbaa2007-05-08 09:20:25 +0000635 switch (ev->tag) {
weidendo6fc0de02012-10-30 00:28:29 +0000636 case Ev_IrGen:
637 VG_(printf)("IrGen %p\n", ev->inode);
638 break;
639 case Ev_IrNoX:
640 VG_(printf)("IrNoX %p\n", ev->inode);
sewardj5155dec2005-10-12 10:09:23 +0000641 break;
sewardj8badbaa2007-05-08 09:20:25 +0000642 case Ev_Dr:
643 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
644 ppIRExpr(ev->Ev.Dr.ea);
sewardj5155dec2005-10-12 10:09:23 +0000645 VG_(printf)("\n");
646 break;
sewardj8badbaa2007-05-08 09:20:25 +0000647 case Ev_Dw:
648 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
649 ppIRExpr(ev->Ev.Dw.ea);
sewardj5155dec2005-10-12 10:09:23 +0000650 VG_(printf)("\n");
651 break;
sewardj8badbaa2007-05-08 09:20:25 +0000652 case Ev_Dm:
653 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
654 ppIRExpr(ev->Ev.Dm.ea);
655 VG_(printf)("\n");
656 break;
657 case Ev_Bc:
658 VG_(printf)("Bc %p GA=", ev->inode);
659 ppIRExpr(ev->Ev.Bc.taken);
660 VG_(printf)("\n");
661 break;
662 case Ev_Bi:
663 VG_(printf)("Bi %p DST=", ev->inode);
664 ppIRExpr(ev->Ev.Bi.dst);
sewardj5155dec2005-10-12 10:09:23 +0000665 VG_(printf)("\n");
666 break;
667 default:
668 tl_assert(0);
669 break;
670 }
njn6a3009b2005-03-20 00:20:06 +0000671}
672
njnfd9f6222005-10-16 00:17:37 +0000673// Reserve and initialise an InstrInfo for the first mention of a new insn.
674static
675InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
njn6a3009b2005-03-20 00:20:06 +0000676{
njnd3bef4f2005-10-15 17:46:18 +0000677 InstrInfo* i_node;
sewardj0b9d74a2006-12-24 02:24:11 +0000678 tl_assert(cgs->sbInfo_i >= 0);
679 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
680 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
njnfd9f6222005-10-16 00:17:37 +0000681 i_node->instr_addr = instr_addr;
682 i_node->instr_len = instr_len;
683 i_node->parent = get_lineCC(instr_addr);
sewardj0b9d74a2006-12-24 02:24:11 +0000684 cgs->sbInfo_i++;
sewardj5155dec2005-10-12 10:09:23 +0000685 return i_node;
686}
sewardj17a56bf2005-03-21 01:35:02 +0000687
sewardj17a56bf2005-03-21 01:35:02 +0000688
sewardj5155dec2005-10-12 10:09:23 +0000689/* Generate code for all outstanding memory events, and mark the queue
690 empty. Code is generated into cgs->bbOut, and this activity
sewardj0b9d74a2006-12-24 02:24:11 +0000691 'consumes' slots in cgs->sbInfo. */
njn6a3009b2005-03-20 00:20:06 +0000692
sewardj5155dec2005-10-12 10:09:23 +0000693static void flushEvents ( CgState* cgs )
694{
njnd3bef4f2005-10-15 17:46:18 +0000695 Int i, regparms;
florianee90c8a2012-10-21 02:39:42 +0000696 const HChar* helperName;
njnd3bef4f2005-10-15 17:46:18 +0000697 void* helperAddr;
698 IRExpr** argv;
699 IRExpr* i_node_expr;
njnd3bef4f2005-10-15 17:46:18 +0000700 IRDirty* di;
njnc285dca2005-10-15 22:07:28 +0000701 Event* ev;
702 Event* ev2;
703 Event* ev3;
njn6a3009b2005-03-20 00:20:06 +0000704
sewardj5155dec2005-10-12 10:09:23 +0000705 i = 0;
706 while (i < cgs->events_used) {
njn6a3009b2005-03-20 00:20:06 +0000707
sewardj5155dec2005-10-12 10:09:23 +0000708 helperName = NULL;
709 helperAddr = NULL;
710 argv = NULL;
711 regparms = 0;
712
713 /* generate IR to notify event i and possibly the ones
714 immediately following it. */
715 tl_assert(i >= 0 && i < cgs->events_used);
njnc285dca2005-10-15 22:07:28 +0000716
717 ev = &cgs->events[i];
718 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
719 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
720
sewardj5155dec2005-10-12 10:09:23 +0000721 if (DEBUG_CG) {
722 VG_(printf)(" flush ");
njnc285dca2005-10-15 22:07:28 +0000723 showEvent( ev );
njn4f9c9342002-04-29 16:03:24 +0000724 }
sewardj5155dec2005-10-12 10:09:23 +0000725
njnfd9f6222005-10-16 00:17:37 +0000726 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
sewardj5155dec2005-10-12 10:09:23 +0000727
728 /* Decide on helper fn to call and args to pass it, and advance
729 i appropriately. */
sewardj8badbaa2007-05-08 09:20:25 +0000730 switch (ev->tag) {
weidendo6fc0de02012-10-30 00:28:29 +0000731 case Ev_IrNoX:
732 /* Merge an IrNoX with a following Dr/Dm. */
sewardj8badbaa2007-05-08 09:20:25 +0000733 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
734 /* Why is this true? It's because we're merging an Ir
735 with a following Dr or Dm. The Ir derives from the
736 instruction's IMark and the Dr/Dm from data
737 references which follow it. In short it holds
738 because each insn starts with an IMark, hence an
739 Ev_Ir, and so these Dr/Dm must pertain to the
740 immediately preceding Ir. Same applies to analogous
741 assertions in the subsequent cases. */
njnfd9f6222005-10-16 00:17:37 +0000742 tl_assert(ev2->inode == ev->inode);
weidendo6fc0de02012-10-30 00:28:29 +0000743 helperName = "log_1IrNoX_1Dr_cache_access";
744 helperAddr = &log_1IrNoX_1Dr_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000745 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000746 get_Event_dea(ev2),
747 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000748 regparms = 3;
749 i += 2;
750 }
weidendo6fc0de02012-10-30 00:28:29 +0000751 /* Merge an IrNoX with a following Dw. */
sewardj5155dec2005-10-12 10:09:23 +0000752 else
sewardj8badbaa2007-05-08 09:20:25 +0000753 if (ev2 && ev2->tag == Ev_Dw) {
njnfd9f6222005-10-16 00:17:37 +0000754 tl_assert(ev2->inode == ev->inode);
weidendo6fc0de02012-10-30 00:28:29 +0000755 helperName = "log_1IrNoX_1Dw_cache_access";
756 helperAddr = &log_1IrNoX_1Dw_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000757 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000758 get_Event_dea(ev2),
759 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000760 regparms = 3;
761 i += 2;
762 }
weidendo6fc0de02012-10-30 00:28:29 +0000763 /* Merge an IrNoX with two following IrNoX's. */
sewardj5155dec2005-10-12 10:09:23 +0000764 else
weidendo6fc0de02012-10-30 00:28:29 +0000765 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
njnc285dca2005-10-15 22:07:28 +0000766 {
njnc52b9322010-09-27 02:20:38 +0000767 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000768 helperName = "log_3IrNoX_0D_cache_access";
769 helperAddr = &log_3IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000770 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000771 helperName = "log_3Ir";
772 helperAddr = &log_3Ir;
njnc52b9322010-09-27 02:20:38 +0000773 }
njnfd9f6222005-10-16 00:17:37 +0000774 argv = mkIRExprVec_3( i_node_expr,
775 mkIRExpr_HWord( (HWord)ev2->inode ),
776 mkIRExpr_HWord( (HWord)ev3->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000777 regparms = 3;
778 i += 3;
779 }
weidendo6fc0de02012-10-30 00:28:29 +0000780 /* Merge an IrNoX with one following IrNoX. */
sewardj5155dec2005-10-12 10:09:23 +0000781 else
weidendo6fc0de02012-10-30 00:28:29 +0000782 if (ev2 && ev2->tag == Ev_IrNoX) {
njnc52b9322010-09-27 02:20:38 +0000783 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000784 helperName = "log_2IrNoX_0D_cache_access";
785 helperAddr = &log_2IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000786 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000787 helperName = "log_2Ir";
788 helperAddr = &log_2Ir;
njnc52b9322010-09-27 02:20:38 +0000789 }
njnfd9f6222005-10-16 00:17:37 +0000790 argv = mkIRExprVec_2( i_node_expr,
791 mkIRExpr_HWord( (HWord)ev2->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000792 regparms = 2;
793 i += 2;
794 }
795 /* No merging possible; emit as-is. */
796 else {
njnc52b9322010-09-27 02:20:38 +0000797 if (clo_cache_sim) {
weidendo6fc0de02012-10-30 00:28:29 +0000798 helperName = "log_1IrNoX_0D_cache_access";
799 helperAddr = &log_1IrNoX_0D_cache_access;
njnc52b9322010-09-27 02:20:38 +0000800 } else {
weidendo6fc0de02012-10-30 00:28:29 +0000801 helperName = "log_1Ir";
802 helperAddr = &log_1Ir;
njnc52b9322010-09-27 02:20:38 +0000803 }
sewardj5155dec2005-10-12 10:09:23 +0000804 argv = mkIRExprVec_1( i_node_expr );
805 regparms = 1;
806 i++;
807 }
808 break;
weidendo6fc0de02012-10-30 00:28:29 +0000809 case Ev_IrGen:
810 if (clo_cache_sim) {
811 helperName = "log_1IrGen_0D_cache_access";
812 helperAddr = &log_1IrGen_0D_cache_access;
813 } else {
814 helperName = "log_1Ir";
815 helperAddr = &log_1Ir;
816 }
817 argv = mkIRExprVec_1( i_node_expr );
818 regparms = 1;
819 i++;
820 break;
sewardj8badbaa2007-05-08 09:20:25 +0000821 case Ev_Dr:
822 case Ev_Dm:
823 /* Data read or modify */
weidendo6fc0de02012-10-30 00:28:29 +0000824 helperName = "log_0Ir_1Dr_cache_access";
825 helperAddr = &log_0Ir_1Dr_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000826 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000827 get_Event_dea(ev),
828 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000829 regparms = 3;
830 i++;
831 break;
sewardj8badbaa2007-05-08 09:20:25 +0000832 case Ev_Dw:
833 /* Data write */
weidendo6fc0de02012-10-30 00:28:29 +0000834 helperName = "log_0Ir_1Dw_cache_access";
835 helperAddr = &log_0Ir_1Dw_cache_access;
sewardj5155dec2005-10-12 10:09:23 +0000836 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000837 get_Event_dea(ev),
838 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000839 regparms = 3;
840 i++;
841 break;
sewardj8badbaa2007-05-08 09:20:25 +0000842 case Ev_Bc:
843 /* Conditional branch */
844 helperName = "log_cond_branch";
845 helperAddr = &log_cond_branch;
846 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
847 regparms = 2;
848 i++;
849 break;
850 case Ev_Bi:
851 /* Branch to an unknown destination */
852 helperName = "log_ind_branch";
853 helperAddr = &log_ind_branch;
854 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
855 regparms = 2;
856 i++;
857 break;
sewardj5155dec2005-10-12 10:09:23 +0000858 default:
859 tl_assert(0);
860 }
861
862 /* Add the helper. */
863 tl_assert(helperName);
864 tl_assert(helperAddr);
865 tl_assert(argv);
sewardj5bb86822005-12-23 12:47:42 +0000866 di = unsafeIRDirty_0_N( regparms,
867 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
868 argv );
sewardj0b9d74a2006-12-24 02:24:11 +0000869 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
njn4f9c9342002-04-29 16:03:24 +0000870 }
871
sewardj5155dec2005-10-12 10:09:23 +0000872 cgs->events_used = 0;
njn4f9c9342002-04-29 16:03:24 +0000873}
njn14d01ce2004-11-26 11:30:14 +0000874
njnfd9f6222005-10-16 00:17:37 +0000875static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
sewardj5155dec2005-10-12 10:09:23 +0000876{
877 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000878 if (cgs->events_used == N_EVENTS)
879 flushEvents(cgs);
880 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
881 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000882 init_Event(evt);
njnfd9f6222005-10-16 00:17:37 +0000883 evt->inode = inode;
weidendo6fc0de02012-10-30 00:28:29 +0000884 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
885 evt->tag = Ev_IrNoX;
886 distinct_instrsNoX++;
887 } else {
888 evt->tag = Ev_IrGen;
889 distinct_instrsGen++;
890 }
sewardj5155dec2005-10-12 10:09:23 +0000891 cgs->events_used++;
892}
893
njnfd9f6222005-10-16 00:17:37 +0000894static
895void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
sewardj5155dec2005-10-12 10:09:23 +0000896{
njnfd9f6222005-10-16 00:17:37 +0000897 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000898 tl_assert(isIRAtom(ea));
sewardj98763d52012-06-03 22:40:07 +0000899 tl_assert(datasize >= 1 && datasize <= min_line_size);
sewardj8badbaa2007-05-08 09:20:25 +0000900 if (!clo_cache_sim)
901 return;
njnfd9f6222005-10-16 00:17:37 +0000902 if (cgs->events_used == N_EVENTS)
903 flushEvents(cgs);
904 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
905 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000906 init_Event(evt);
907 evt->tag = Ev_Dr;
908 evt->inode = inode;
909 evt->Ev.Dr.szB = datasize;
910 evt->Ev.Dr.ea = ea;
njnfd9f6222005-10-16 00:17:37 +0000911 cgs->events_used++;
912}
sewardj5155dec2005-10-12 10:09:23 +0000913
njnfd9f6222005-10-16 00:17:37 +0000914static
915void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
916{
njnfd9f6222005-10-16 00:17:37 +0000917 Event* evt;
918
919 tl_assert(isIRAtom(ea));
sewardj98763d52012-06-03 22:40:07 +0000920 tl_assert(datasize >= 1 && datasize <= min_line_size);
njnfd9f6222005-10-16 00:17:37 +0000921
sewardj8badbaa2007-05-08 09:20:25 +0000922 if (!clo_cache_sim)
923 return;
924
njnfd9f6222005-10-16 00:17:37 +0000925 /* Is it possible to merge this write with the preceding read? */
Elliott Hughesa0664b92017-04-18 17:46:52 -0700926 if (cgs->events_used > 0) {
927 Event* lastEvt = &cgs->events[cgs->events_used-1];
928 if ( lastEvt->tag == Ev_Dr
929 && lastEvt->Ev.Dr.szB == datasize
930 && lastEvt->inode == inode
931 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
932 {
933 lastEvt->tag = Ev_Dm;
934 return;
935 }
sewardj5155dec2005-10-12 10:09:23 +0000936 }
937
938 /* No. Add as normal. */
939 if (cgs->events_used == N_EVENTS)
940 flushEvents(cgs);
941 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
njnfd9f6222005-10-16 00:17:37 +0000942 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000943 init_Event(evt);
944 evt->tag = Ev_Dw;
945 evt->inode = inode;
946 evt->Ev.Dw.szB = datasize;
947 evt->Ev.Dw.ea = ea;
948 cgs->events_used++;
949}
950
951static
sewardjcafe5052013-01-17 14:24:35 +0000952void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
953 Int datasize, IRAtom* ea, IRAtom* guard,
954 Bool isWrite )
955{
956 tl_assert(isIRAtom(ea));
957 tl_assert(guard);
958 tl_assert(isIRAtom(guard));
959 tl_assert(datasize >= 1 && datasize <= min_line_size);
960
961 if (!clo_cache_sim)
962 return;
963
964 /* Adding guarded memory actions and merging them with the existing
965 queue is too complex. Simply flush the queue and add this
966 action immediately. Since guarded loads and stores are pretty
967 rare, this is not thought likely to cause any noticeable
968 performance loss as a result of the loss of event-merging
969 opportunities. */
970 tl_assert(cgs->events_used >= 0);
971 flushEvents(cgs);
972 tl_assert(cgs->events_used == 0);
973 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
974 IRExpr* i_node_expr;
975 const HChar* helperName;
976 void* helperAddr;
977 IRExpr** argv;
978 Int regparms;
979 IRDirty* di;
980 i_node_expr = mkIRExpr_HWord( (HWord)inode );
981 helperName = isWrite ? "log_0Ir_1Dw_cache_access"
982 : "log_0Ir_1Dr_cache_access";
983 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access
984 : &log_0Ir_1Dr_cache_access;
985 argv = mkIRExprVec_3( i_node_expr,
986 ea, mkIRExpr_HWord( datasize ) );
987 regparms = 3;
988 di = unsafeIRDirty_0_N(
989 regparms,
990 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
991 argv );
992 di->guard = guard;
993 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
994}
995
996
997static
sewardj8badbaa2007-05-08 09:20:25 +0000998void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
999{
1000 Event* evt;
1001 tl_assert(isIRAtom(guard));
1002 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1003 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1004 if (!clo_branch_sim)
1005 return;
1006 if (cgs->events_used == N_EVENTS)
1007 flushEvents(cgs);
1008 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1009 evt = &cgs->events[cgs->events_used];
1010 init_Event(evt);
1011 evt->tag = Ev_Bc;
1012 evt->inode = inode;
1013 evt->Ev.Bc.taken = guard;
1014 cgs->events_used++;
1015}
1016
1017static
1018void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1019{
1020 Event* evt;
1021 tl_assert(isIRAtom(whereTo));
1022 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1023 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1024 if (!clo_branch_sim)
1025 return;
1026 if (cgs->events_used == N_EVENTS)
1027 flushEvents(cgs);
1028 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1029 evt = &cgs->events[cgs->events_used];
1030 init_Event(evt);
1031 evt->tag = Ev_Bi;
1032 evt->inode = inode;
1033 evt->Ev.Bi.dst = whereTo;
sewardj5155dec2005-10-12 10:09:23 +00001034 cgs->events_used++;
1035}
1036
1037////////////////////////////////////////////////////////////
1038
1039
sewardj4ba057c2005-10-18 12:04:18 +00001040static
sewardj0b9d74a2006-12-24 02:24:11 +00001041IRSB* cg_instrument ( VgCallbackClosure* closure,
1042 IRSB* sbIn,
florian3c0c9472014-09-24 12:06:55 +00001043 const VexGuestLayout* layout,
1044 const VexGuestExtents* vge,
1045 const VexArchInfo* archinfo_host,
sewardj4ba057c2005-10-18 12:04:18 +00001046 IRType gWordTy, IRType hWordTy )
njn14d01ce2004-11-26 11:30:14 +00001047{
florianf466eef2015-01-02 17:32:40 +00001048 Int i;
1049 UInt isize;
sewardj5155dec2005-10-12 10:09:23 +00001050 IRStmt* st;
florianf466eef2015-01-02 17:32:40 +00001051 Addr cia; /* address of current insn */
sewardj5155dec2005-10-12 10:09:23 +00001052 CgState cgs;
sewardj0b9d74a2006-12-24 02:24:11 +00001053 IRTypeEnv* tyenv = sbIn->tyenv;
njnfd9f6222005-10-16 00:17:37 +00001054 InstrInfo* curr_inode = NULL;
sewardj5155dec2005-10-12 10:09:23 +00001055
sewardjd54babf2005-03-21 00:55:49 +00001056 if (gWordTy != hWordTy) {
1057 /* We don't currently support this case. */
1058 VG_(tool_panic)("host/guest word size mismatch");
1059 }
1060
sewardj0b9d74a2006-12-24 02:24:11 +00001061 // Set up new SB
1062 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
njn6a3009b2005-03-20 00:20:06 +00001063
sewardja9f538c2005-10-23 12:06:55 +00001064 // Copy verbatim any IR preamble preceding the first IMark
njn6a3009b2005-03-20 00:20:06 +00001065 i = 0;
sewardj0b9d74a2006-12-24 02:24:11 +00001066 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1067 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
sewardja9f538c2005-10-23 12:06:55 +00001068 i++;
1069 }
1070
1071 // Get the first statement, and initial cia from it
sewardj0b9d74a2006-12-24 02:24:11 +00001072 tl_assert(sbIn->stmts_used > 0);
1073 tl_assert(i < sbIn->stmts_used);
1074 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +00001075 tl_assert(Ist_IMark == st->tag);
sewardj8badbaa2007-05-08 09:20:25 +00001076
1077 cia = st->Ist.IMark.addr;
1078 isize = st->Ist.IMark.len;
1079 // If Vex fails to decode an instruction, the size will be zero.
1080 // Pretend otherwise.
1081 if (isize == 0) isize = VG_MIN_INSTR_SZB;
njn6a3009b2005-03-20 00:20:06 +00001082
sewardj5155dec2005-10-12 10:09:23 +00001083 // Set up running state and get block info
sewardj3a384b32006-01-22 01:12:51 +00001084 tl_assert(closure->readdr == vge->base[0]);
sewardj5155dec2005-10-12 10:09:23 +00001085 cgs.events_used = 0;
sewardj0b9d74a2006-12-24 02:24:11 +00001086 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
1087 cgs.sbInfo_i = 0;
njn6a3009b2005-03-20 00:20:06 +00001088
sewardj5155dec2005-10-12 10:09:23 +00001089 if (DEBUG_CG)
1090 VG_(printf)("\n\n---------- cg_instrument ----------\n");
njn6a3009b2005-03-20 00:20:06 +00001091
njnfd9f6222005-10-16 00:17:37 +00001092 // Traverse the block, initialising inodes, adding events and flushing as
1093 // necessary.
sewardj0b9d74a2006-12-24 02:24:11 +00001094 for (/*use current i*/; i < sbIn->stmts_used; i++) {
njn6a3009b2005-03-20 00:20:06 +00001095
sewardj0b9d74a2006-12-24 02:24:11 +00001096 st = sbIn->stmts[i];
sewardj5155dec2005-10-12 10:09:23 +00001097 tl_assert(isFlatIRStmt(st));
njnb3507ea2005-08-02 23:07:02 +00001098
sewardj5155dec2005-10-12 10:09:23 +00001099 switch (st->tag) {
1100 case Ist_NoOp:
1101 case Ist_AbiHint:
1102 case Ist_Put:
1103 case Ist_PutI:
sewardj72d75132007-11-09 23:06:35 +00001104 case Ist_MBE:
sewardj5155dec2005-10-12 10:09:23 +00001105 break;
njn20677cc2005-08-12 23:47:51 +00001106
sewardj5155dec2005-10-12 10:09:23 +00001107 case Ist_IMark:
njnfd9f6222005-10-16 00:17:37 +00001108 cia = st->Ist.IMark.addr;
1109 isize = st->Ist.IMark.len;
1110
1111 // If Vex fails to decode an instruction, the size will be zero.
1112 // Pretend otherwise.
1113 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1114
njna5ad9ba2005-11-10 15:20:37 +00001115 // Sanity-check size.
1116 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1117 || VG_CLREQ_SZB == isize );
njnfd9f6222005-10-16 00:17:37 +00001118
1119 // Get space for and init the inode, record it as the current one.
1120 // Subsequent Dr/Dw/Dm events from the same instruction will
1121 // also use it.
1122 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1123
1124 addEvent_Ir( &cgs, curr_inode );
sewardj5155dec2005-10-12 10:09:23 +00001125 break;
1126
sewardj0b9d74a2006-12-24 02:24:11 +00001127 case Ist_WrTmp: {
1128 IRExpr* data = st->Ist.WrTmp.data;
sewardj5155dec2005-10-12 10:09:23 +00001129 if (data->tag == Iex_Load) {
1130 IRExpr* aexpr = data->Iex.Load.addr;
sewardj5155dec2005-10-12 10:09:23 +00001131 // Note also, endianness info is ignored. I guess
1132 // that's not interesting.
njnfd9f6222005-10-16 00:17:37 +00001133 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1134 aexpr );
sewardj5155dec2005-10-12 10:09:23 +00001135 }
1136 break;
njnb3507ea2005-08-02 23:07:02 +00001137 }
1138
sewardj5155dec2005-10-12 10:09:23 +00001139 case Ist_Store: {
1140 IRExpr* data = st->Ist.Store.data;
1141 IRExpr* aexpr = st->Ist.Store.addr;
njnfd9f6222005-10-16 00:17:37 +00001142 addEvent_Dw( &cgs, curr_inode,
1143 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
sewardj5155dec2005-10-12 10:09:23 +00001144 break;
1145 }
njnb3507ea2005-08-02 23:07:02 +00001146
sewardjcafe5052013-01-17 14:24:35 +00001147 case Ist_StoreG: {
1148 IRStoreG* sg = st->Ist.StoreG.details;
1149 IRExpr* data = sg->data;
1150 IRExpr* addr = sg->addr;
1151 IRType type = typeOfIRExpr(tyenv, data);
1152 tl_assert(type != Ity_INVALID);
1153 addEvent_D_guarded( &cgs, curr_inode,
1154 sizeofIRType(type), addr, sg->guard,
1155 True/*isWrite*/ );
1156 break;
1157 }
1158
1159 case Ist_LoadG: {
1160 IRLoadG* lg = st->Ist.LoadG.details;
1161 IRType type = Ity_INVALID; /* loaded type */
1162 IRType typeWide = Ity_INVALID; /* after implicit widening */
1163 IRExpr* addr = lg->addr;
1164 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1165 tl_assert(type != Ity_INVALID);
1166 addEvent_D_guarded( &cgs, curr_inode,
1167 sizeofIRType(type), addr, lg->guard,
1168 False/*!isWrite*/ );
1169 break;
1170 }
1171
sewardj5155dec2005-10-12 10:09:23 +00001172 case Ist_Dirty: {
1173 Int dataSize;
1174 IRDirty* d = st->Ist.Dirty.details;
1175 if (d->mFx != Ifx_None) {
njnfd9f6222005-10-16 00:17:37 +00001176 /* This dirty helper accesses memory. Collect the details. */
sewardj5155dec2005-10-12 10:09:23 +00001177 tl_assert(d->mAddr != NULL);
1178 tl_assert(d->mSize != 0);
1179 dataSize = d->mSize;
1180 // Large (eg. 28B, 108B, 512B on x86) data-sized
1181 // instructions will be done inaccurately, but they're
1182 // very rare and this avoids errors from hitting more
1183 // than two cache lines in the simulation.
sewardj98763d52012-06-03 22:40:07 +00001184 if (dataSize > min_line_size)
1185 dataSize = min_line_size;
sewardj5155dec2005-10-12 10:09:23 +00001186 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001187 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001188 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001189 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001190 } else {
1191 tl_assert(d->mAddr == NULL);
1192 tl_assert(d->mSize == 0);
1193 }
1194 break;
1195 }
njn6a3009b2005-03-20 00:20:06 +00001196
sewardj1c0ce7a2009-07-01 08:10:49 +00001197 case Ist_CAS: {
1198 /* We treat it as a read and a write of the location. I
1199 think that is the same behaviour as it was before IRCAS
1200 was introduced, since prior to that point, the Vex
1201 front ends would translate a lock-prefixed instruction
1202 into a (normal) read followed by a (normal) write. */
1203 Int dataSize;
1204 IRCAS* cas = st->Ist.CAS.details;
1205 tl_assert(cas->addr != NULL);
1206 tl_assert(cas->dataLo != NULL);
1207 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1208 if (cas->dataHi != NULL)
1209 dataSize *= 2; /* since it's a doubleword-CAS */
1210 /* I don't think this can ever happen, but play safe. */
sewardj98763d52012-06-03 22:40:07 +00001211 if (dataSize > min_line_size)
1212 dataSize = min_line_size;
sewardj1c0ce7a2009-07-01 08:10:49 +00001213 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1214 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1215 break;
1216 }
1217
sewardjdb5907d2009-11-26 17:20:21 +00001218 case Ist_LLSC: {
1219 IRType dataTy;
1220 if (st->Ist.LLSC.storedata == NULL) {
1221 /* LL */
1222 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1223 addEvent_Dr( &cgs, curr_inode,
1224 sizeofIRType(dataTy), st->Ist.LLSC.addr );
weidendod4053322012-11-26 18:16:58 +00001225 /* flush events before LL, should help SC to succeed */
1226 flushEvents( &cgs );
sewardjdb5907d2009-11-26 17:20:21 +00001227 } else {
1228 /* SC */
1229 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1230 addEvent_Dw( &cgs, curr_inode,
1231 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1232 }
1233 break;
1234 }
1235
sewardj8badbaa2007-05-08 09:20:25 +00001236 case Ist_Exit: {
weidendo374a48f2010-09-02 17:06:49 +00001237 // call branch predictor only if this is a branch in guest code
1238 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1239 (st->Ist.Exit.jk == Ijk_Call) ||
1240 (st->Ist.Exit.jk == Ijk_Ret) )
1241 {
1242 /* Stuff to widen the guard expression to a host word, so
1243 we can pass it to the branch predictor simulation
1244 functions easily. */
1245 Bool inverted;
florianf466eef2015-01-02 17:32:40 +00001246 Addr nia, sea;
weidendo374a48f2010-09-02 17:06:49 +00001247 IRConst* dst;
1248 IRType tyW = hWordTy;
1249 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1250 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1251 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1252 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1253 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1254 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1255 : IRExpr_Const(IRConst_U64(1));
sewardj8badbaa2007-05-08 09:20:25 +00001256
weidendo374a48f2010-09-02 17:06:49 +00001257 /* First we need to figure out whether the side exit got
1258 inverted by the ir optimiser. To do that, figure out
1259 the next (fallthrough) instruction's address and the
1260 side exit address and see if they are the same. */
florianf466eef2015-01-02 17:32:40 +00001261 nia = cia + isize;
sewardj8badbaa2007-05-08 09:20:25 +00001262
weidendo374a48f2010-09-02 17:06:49 +00001263 /* Side exit address */
1264 dst = st->Ist.Exit.dst;
1265 if (tyW == Ity_I32) {
1266 tl_assert(dst->tag == Ico_U32);
florianf466eef2015-01-02 17:32:40 +00001267 sea = dst->Ico.U32;
weidendo374a48f2010-09-02 17:06:49 +00001268 } else {
1269 tl_assert(tyW == Ity_I64);
1270 tl_assert(dst->tag == Ico_U64);
1271 sea = dst->Ico.U64;
1272 }
1273
1274 inverted = nia == sea;
1275
1276 /* Widen the guard expression. */
1277 addStmtToIRSB( cgs.sbOut,
1278 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1279 addStmtToIRSB( cgs.sbOut,
1280 IRStmt_WrTmp( guardW,
1281 IRExpr_Unop(widen,
1282 IRExpr_RdTmp(guard1))) );
1283 /* If the exit is inverted, invert the sense of the guard. */
1284 addStmtToIRSB(
1285 cgs.sbOut,
1286 IRStmt_WrTmp(
1287 guard,
1288 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1289 : IRExpr_RdTmp(guardW)
1290 ));
1291 /* And post the event. */
1292 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
sewardj8badbaa2007-05-08 09:20:25 +00001293 }
1294
sewardj5155dec2005-10-12 10:09:23 +00001295 /* We may never reach the next statement, so need to flush
1296 all outstanding transactions now. */
1297 flushEvents( &cgs );
1298 break;
sewardj8badbaa2007-05-08 09:20:25 +00001299 }
sewardj5155dec2005-10-12 10:09:23 +00001300
1301 default:
sewardjcafe5052013-01-17 14:24:35 +00001302 ppIRStmt(st);
sewardj5155dec2005-10-12 10:09:23 +00001303 tl_assert(0);
1304 break;
njnb3507ea2005-08-02 23:07:02 +00001305 }
njn6a3009b2005-03-20 00:20:06 +00001306
sewardj5155dec2005-10-12 10:09:23 +00001307 /* Copy the original statement */
sewardj0b9d74a2006-12-24 02:24:11 +00001308 addStmtToIRSB( cgs.sbOut, st );
njn6a3009b2005-03-20 00:20:06 +00001309
sewardj5155dec2005-10-12 10:09:23 +00001310 if (DEBUG_CG) {
1311 ppIRStmt(st);
1312 VG_(printf)("\n");
1313 }
1314 }
1315
sewardj8badbaa2007-05-08 09:20:25 +00001316 /* Deal with branches to unknown destinations. Except ignore ones
1317 which are function returns as we assume the return stack
1318 predictor never mispredicts. */
weidendo374a48f2010-09-02 17:06:49 +00001319 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
sewardj8badbaa2007-05-08 09:20:25 +00001320 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1321 switch (sbIn->next->tag) {
1322 case Iex_Const:
1323 break; /* boring - branch to known address */
1324 case Iex_RdTmp:
1325 /* looks like an indirect branch (branch to unknown) */
1326 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1327 break;
1328 default:
1329 /* shouldn't happen - if the incoming IR is properly
1330 flattened, should only have tmp and const cases to
1331 consider. */
1332 tl_assert(0);
1333 }
1334 }
1335
sewardj5155dec2005-10-12 10:09:23 +00001336 /* At the end of the bb. Flush outstandings. */
sewardj5155dec2005-10-12 10:09:23 +00001337 flushEvents( &cgs );
1338
sewardj5155dec2005-10-12 10:09:23 +00001339 /* done. stay sane ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001340 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
sewardj5155dec2005-10-12 10:09:23 +00001341
1342 if (DEBUG_CG) {
1343 VG_(printf)( "goto {");
sewardj0b9d74a2006-12-24 02:24:11 +00001344 ppIRJumpKind(sbIn->jumpkind);
sewardj5155dec2005-10-12 10:09:23 +00001345 VG_(printf)( "} ");
sewardj0b9d74a2006-12-24 02:24:11 +00001346 ppIRExpr( sbIn->next );
sewardj5155dec2005-10-12 10:09:23 +00001347 VG_(printf)( "}\n");
1348 }
1349
sewardj0b9d74a2006-12-24 02:24:11 +00001350 return cgs.sbOut;
njn14d01ce2004-11-26 11:30:14 +00001351}
njn4f9c9342002-04-29 16:03:24 +00001352
1353/*------------------------------------------------------------*/
nethercoteb35a8b92004-09-11 16:45:27 +00001354/*--- Cache configuration ---*/
njn4f9c9342002-04-29 16:03:24 +00001355/*------------------------------------------------------------*/
1356
njn25e49d8e72002-09-23 09:36:25 +00001357static cache_t clo_I1_cache = UNDEFINED_CACHE;
1358static cache_t clo_D1_cache = UNDEFINED_CACHE;
njn2d853a12010-10-06 22:46:31 +00001359static cache_t clo_LL_cache = UNDEFINED_CACHE;
njn25e49d8e72002-09-23 09:36:25 +00001360
njn4f9c9342002-04-29 16:03:24 +00001361/*------------------------------------------------------------*/
njn51d827b2005-05-09 01:02:08 +00001362/*--- cg_fini() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001363/*------------------------------------------------------------*/
1364
nethercote9313ac42004-07-06 21:54:20 +00001365// Total reads/writes/misses. Calculated during CC traversal at the end.
1366// All auto-zeroed.
sewardj8badbaa2007-05-08 09:20:25 +00001367static CacheCC Ir_total;
1368static CacheCC Dr_total;
1369static CacheCC Dw_total;
1370static BranchCC Bc_total;
1371static BranchCC Bi_total;
nethercote9313ac42004-07-06 21:54:20 +00001372
nethercote9313ac42004-07-06 21:54:20 +00001373static void fprint_CC_table_and_calc_totals(void)
1374{
florian12d2eb52014-10-30 22:17:56 +00001375 Int i;
1376 VgFile *fp;
florian46cc0452014-10-25 19:20:38 +00001377 HChar *currFile = NULL;
1378 const HChar *currFn = NULL;
njnd3bef4f2005-10-15 17:46:18 +00001379 LineCC* lineCC;
njn4f9c9342002-04-29 16:03:24 +00001380
njn7064fb22008-05-29 23:09:52 +00001381 // Setup output filename. Nb: it's important to do this now, ie. as late
1382 // as possible. If we do it at start-up and the program forks and the
1383 // output file format string contains a %p (pid) specifier, both the
1384 // parent and child will incorrectly write to the same file; this
1385 // happened in 3.3.0.
florian19f91bb2012-11-10 22:29:54 +00001386 HChar* cachegrind_out_file =
njn7064fb22008-05-29 23:09:52 +00001387 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1388
florian12d2eb52014-10-30 22:17:56 +00001389 fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1390 VKI_S_IRUSR|VKI_S_IWUSR);
1391 if (fp == NULL) {
nethercote9313ac42004-07-06 21:54:20 +00001392 // If the file can't be opened for whatever reason (conflict
1393 // between multiple cachegrinded processes?), give up now.
sewardjb2c985b2009-07-15 14:51:17 +00001394 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1395 cachegrind_out_file );
1396 VG_(umsg)(" ... so simulation results will be missing.\n");
njn7064fb22008-05-29 23:09:52 +00001397 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001398 return;
sewardj92645592005-07-23 09:18:34 +00001399 } else {
njn7064fb22008-05-29 23:09:52 +00001400 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001401 }
njn4f9c9342002-04-29 16:03:24 +00001402
njn2d853a12010-10-06 22:46:31 +00001403 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
nethercote9313ac42004-07-06 21:54:20 +00001404 // the 2nd colon makes cg_annotate's output look nicer.
florian12d2eb52014-10-30 22:17:56 +00001405 VG_(fprintf)(fp, "desc: I1 cache: %s\n"
nethercote9313ac42004-07-06 21:54:20 +00001406 "desc: D1 cache: %s\n"
njn2d853a12010-10-06 22:46:31 +00001407 "desc: LL cache: %s\n",
1408 I1.desc_line, D1.desc_line, LL.desc_line);
njn4f9c9342002-04-29 16:03:24 +00001409
nethercote9313ac42004-07-06 21:54:20 +00001410 // "cmd:" line
florian12d2eb52014-10-30 22:17:56 +00001411 VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
sewardj14c7cc52007-02-25 15:08:24 +00001412 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1413 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
florian12d2eb52014-10-30 22:17:56 +00001414 VG_(fprintf)(fp, " %s", arg);
njn4f9c9342002-04-29 16:03:24 +00001415 }
nethercote9313ac42004-07-06 21:54:20 +00001416 // "events:" line
sewardj8badbaa2007-05-08 09:20:25 +00001417 if (clo_cache_sim && clo_branch_sim) {
florian12d2eb52014-10-30 22:17:56 +00001418 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
sewardj8badbaa2007-05-08 09:20:25 +00001419 "Bc Bcm Bi Bim\n");
1420 }
1421 else if (clo_cache_sim && !clo_branch_sim) {
florian12d2eb52014-10-30 22:17:56 +00001422 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
sewardj8badbaa2007-05-08 09:20:25 +00001423 "\n");
1424 }
1425 else if (!clo_cache_sim && clo_branch_sim) {
florian12d2eb52014-10-30 22:17:56 +00001426 VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
sewardj8badbaa2007-05-08 09:20:25 +00001427 }
njne90711c2010-09-27 01:04:20 +00001428 else {
florian12d2eb52014-10-30 22:17:56 +00001429 VG_(fprintf)(fp, "\nevents: Ir\n");
njne90711c2010-09-27 01:04:20 +00001430 }
sewardj8badbaa2007-05-08 09:20:25 +00001431
njnd3bef4f2005-10-15 17:46:18 +00001432 // Traverse every lineCC
njne2a9ad32007-09-17 05:30:48 +00001433 VG_(OSetGen_ResetIter)(CC_table);
1434 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
njn4311fe62005-12-08 23:18:50 +00001435 Bool just_hit_a_new_file = False;
njnd3bef4f2005-10-15 17:46:18 +00001436 // If we've hit a new file, print a "fl=" line. Note that because
1437 // each string is stored exactly once in the string table, we can use
1438 // pointer comparison rather than strcmp() to test for equality, which
1439 // is good because most of the time the comparisons are equal and so
njn4311fe62005-12-08 23:18:50 +00001440 // the whole strings would have to be checked.
njnd3bef4f2005-10-15 17:46:18 +00001441 if ( lineCC->loc.file != currFile ) {
1442 currFile = lineCC->loc.file;
florian12d2eb52014-10-30 22:17:56 +00001443 VG_(fprintf)(fp, "fl=%s\n", currFile);
njnd3bef4f2005-10-15 17:46:18 +00001444 distinct_files++;
njn4311fe62005-12-08 23:18:50 +00001445 just_hit_a_new_file = True;
njn4f9c9342002-04-29 16:03:24 +00001446 }
njn4311fe62005-12-08 23:18:50 +00001447 // If we've hit a new function, print a "fn=" line. We know to do
1448 // this when the function name changes, and also every time we hit a
1449 // new file (in which case the new function name might be the same as
1450 // in the old file, hence the just_hit_a_new_file test).
1451 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
njnd3bef4f2005-10-15 17:46:18 +00001452 currFn = lineCC->loc.fn;
florian12d2eb52014-10-30 22:17:56 +00001453 VG_(fprintf)(fp, "fn=%s\n", currFn);
njnd3bef4f2005-10-15 17:46:18 +00001454 distinct_fns++;
1455 }
1456
1457 // Print the LineCC
sewardj8badbaa2007-05-08 09:20:25 +00001458 if (clo_cache_sim && clo_branch_sim) {
florian686870c2015-08-05 13:46:58 +00001459 VG_(fprintf)(fp, "%d %llu %llu %llu"
sewardj8badbaa2007-05-08 09:20:25 +00001460 " %llu %llu %llu"
1461 " %llu %llu %llu"
1462 " %llu %llu %llu %llu\n",
1463 lineCC->loc.line,
njn2d853a12010-10-06 22:46:31 +00001464 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1465 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1466 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
sewardj8badbaa2007-05-08 09:20:25 +00001467 lineCC->Bc.b, lineCC->Bc.mp,
1468 lineCC->Bi.b, lineCC->Bi.mp);
1469 }
1470 else if (clo_cache_sim && !clo_branch_sim) {
florian686870c2015-08-05 13:46:58 +00001471 VG_(fprintf)(fp, "%d %llu %llu %llu"
sewardj8badbaa2007-05-08 09:20:25 +00001472 " %llu %llu %llu"
1473 " %llu %llu %llu\n",
1474 lineCC->loc.line,
njn2d853a12010-10-06 22:46:31 +00001475 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1476 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1477 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
sewardj8badbaa2007-05-08 09:20:25 +00001478 }
1479 else if (!clo_cache_sim && clo_branch_sim) {
florian686870c2015-08-05 13:46:58 +00001480 VG_(fprintf)(fp, "%d %llu"
sewardj8badbaa2007-05-08 09:20:25 +00001481 " %llu %llu %llu %llu\n",
1482 lineCC->loc.line,
1483 lineCC->Ir.a,
1484 lineCC->Bc.b, lineCC->Bc.mp,
1485 lineCC->Bi.b, lineCC->Bi.mp);
1486 }
njne90711c2010-09-27 01:04:20 +00001487 else {
florian686870c2015-08-05 13:46:58 +00001488 VG_(fprintf)(fp, "%d %llu\n",
njne90711c2010-09-27 01:04:20 +00001489 lineCC->loc.line,
1490 lineCC->Ir.a);
1491 }
sewardj8badbaa2007-05-08 09:20:25 +00001492
njnd3bef4f2005-10-15 17:46:18 +00001493 // Update summary stats
1494 Ir_total.a += lineCC->Ir.a;
1495 Ir_total.m1 += lineCC->Ir.m1;
njn2d853a12010-10-06 22:46:31 +00001496 Ir_total.mL += lineCC->Ir.mL;
njnd3bef4f2005-10-15 17:46:18 +00001497 Dr_total.a += lineCC->Dr.a;
1498 Dr_total.m1 += lineCC->Dr.m1;
njn2d853a12010-10-06 22:46:31 +00001499 Dr_total.mL += lineCC->Dr.mL;
njnd3bef4f2005-10-15 17:46:18 +00001500 Dw_total.a += lineCC->Dw.a;
1501 Dw_total.m1 += lineCC->Dw.m1;
njn2d853a12010-10-06 22:46:31 +00001502 Dw_total.mL += lineCC->Dw.mL;
sewardj8badbaa2007-05-08 09:20:25 +00001503 Bc_total.b += lineCC->Bc.b;
1504 Bc_total.mp += lineCC->Bc.mp;
1505 Bi_total.b += lineCC->Bi.b;
1506 Bi_total.mp += lineCC->Bi.mp;
njnd3bef4f2005-10-15 17:46:18 +00001507
1508 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +00001509 }
1510
nethercote9313ac42004-07-06 21:54:20 +00001511 // Summary stats must come after rest of table, since we calculate them
sewardj8badbaa2007-05-08 09:20:25 +00001512 // during traversal. */
1513 if (clo_cache_sim && clo_branch_sim) {
florian12d2eb52014-10-30 22:17:56 +00001514 VG_(fprintf)(fp, "summary:"
sewardj8badbaa2007-05-08 09:20:25 +00001515 " %llu %llu %llu"
1516 " %llu %llu %llu"
1517 " %llu %llu %llu"
1518 " %llu %llu %llu %llu\n",
njn2d853a12010-10-06 22:46:31 +00001519 Ir_total.a, Ir_total.m1, Ir_total.mL,
1520 Dr_total.a, Dr_total.m1, Dr_total.mL,
1521 Dw_total.a, Dw_total.m1, Dw_total.mL,
sewardj8badbaa2007-05-08 09:20:25 +00001522 Bc_total.b, Bc_total.mp,
1523 Bi_total.b, Bi_total.mp);
1524 }
1525 else if (clo_cache_sim && !clo_branch_sim) {
florian12d2eb52014-10-30 22:17:56 +00001526 VG_(fprintf)(fp, "summary:"
sewardj8badbaa2007-05-08 09:20:25 +00001527 " %llu %llu %llu"
1528 " %llu %llu %llu"
1529 " %llu %llu %llu\n",
njn2d853a12010-10-06 22:46:31 +00001530 Ir_total.a, Ir_total.m1, Ir_total.mL,
1531 Dr_total.a, Dr_total.m1, Dr_total.mL,
1532 Dw_total.a, Dw_total.m1, Dw_total.mL);
sewardj8badbaa2007-05-08 09:20:25 +00001533 }
1534 else if (!clo_cache_sim && clo_branch_sim) {
florian12d2eb52014-10-30 22:17:56 +00001535 VG_(fprintf)(fp, "summary:"
sewardj8badbaa2007-05-08 09:20:25 +00001536 " %llu"
1537 " %llu %llu %llu %llu\n",
1538 Ir_total.a,
1539 Bc_total.b, Bc_total.mp,
1540 Bi_total.b, Bi_total.mp);
1541 }
njne90711c2010-09-27 01:04:20 +00001542 else {
florian12d2eb52014-10-30 22:17:56 +00001543 VG_(fprintf)(fp, "summary:"
njne90711c2010-09-27 01:04:20 +00001544 " %llu\n",
1545 Ir_total.a);
1546 }
sewardj8badbaa2007-05-08 09:20:25 +00001547
florian12d2eb52014-10-30 22:17:56 +00001548 VG_(fclose)(fp);
njn4f9c9342002-04-29 16:03:24 +00001549}
1550
njn607adfc2003-09-30 14:15:44 +00001551static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001552{
njn607adfc2003-09-30 14:15:44 +00001553 UInt w = 0;
1554 while (n > 0) {
1555 n = n / 10;
1556 w++;
njn4f9c9342002-04-29 16:03:24 +00001557 }
sewardj46c59b12005-11-01 02:20:19 +00001558 if (w == 0) w = 1;
njn607adfc2003-09-30 14:15:44 +00001559 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001560}
1561
njn51d827b2005-05-09 01:02:08 +00001562static void cg_fini(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001563{
florian12d2eb52014-10-30 22:17:56 +00001564 static HChar fmt[128]; // OK; large enough
njn607adfc2003-09-30 14:15:44 +00001565
sewardj8badbaa2007-05-08 09:20:25 +00001566 CacheCC D_total;
1567 BranchCC B_total;
njn2d853a12010-10-06 22:46:31 +00001568 ULong LL_total_m, LL_total_mr, LL_total_mw,
1569 LL_total, LL_total_r, LL_total_w;
njn4c245e52009-03-15 23:25:38 +00001570 Int l1, l2, l3;
njn4f9c9342002-04-29 16:03:24 +00001571
nethercote9313ac42004-07-06 21:54:20 +00001572 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001573
njn7cf0bd32002-06-08 13:36:03 +00001574 if (VG_(clo_verbosity) == 0)
1575 return;
1576
njnf76d27a2009-05-28 01:53:07 +00001577 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1578 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
njn4c245e52009-03-15 23:25:38 +00001579
njn4f9c9342002-04-29 16:03:24 +00001580 /* I cache results. Use the I_refs value to determine the first column
1581 * width. */
njn607adfc2003-09-30 14:15:44 +00001582 l1 = ULong_width(Ir_total.a);
njnf76d27a2009-05-28 01:53:07 +00001583 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1584 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
njn4f9c9342002-04-29 16:03:24 +00001585
njn607adfc2003-09-30 14:15:44 +00001586 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001587 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
njnd3bef4f2005-10-15 17:46:18 +00001588
sewardj8badbaa2007-05-08 09:20:25 +00001589 /* Always print this */
sewardjb2c985b2009-07-15 14:51:17 +00001590 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
njn4f9c9342002-04-29 16:03:24 +00001591
sewardj8badbaa2007-05-08 09:20:25 +00001592 /* If cache profiling is enabled, show D access numbers and all
1593 miss numbers */
1594 if (clo_cache_sim) {
sewardjb2c985b2009-07-15 14:51:17 +00001595 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
njn2d853a12010-10-06 22:46:31 +00001596 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
njn4f9c9342002-04-29 16:03:24 +00001597
sewardj8badbaa2007-05-08 09:20:25 +00001598 if (0 == Ir_total.a) Ir_total.a = 1;
florian227a1ec2014-12-12 19:32:10 +00001599 VG_(umsg)("I1 miss rate: %*.2f%%\n", l1,
1600 Ir_total.m1 * 100.0 / Ir_total.a);
1601 VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
1602 Ir_total.mL * 100.0 / Ir_total.a);
sewardjb2c985b2009-07-15 14:51:17 +00001603 VG_(umsg)("\n");
njnd3bef4f2005-10-15 17:46:18 +00001604
sewardj8badbaa2007-05-08 09:20:25 +00001605 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1606 * determine the width of columns 2 & 3. */
1607 D_total.a = Dr_total.a + Dw_total.a;
1608 D_total.m1 = Dr_total.m1 + Dw_total.m1;
njn2d853a12010-10-06 22:46:31 +00001609 D_total.mL = Dr_total.mL + Dw_total.mL;
njn4f9c9342002-04-29 16:03:24 +00001610
sewardj8badbaa2007-05-08 09:20:25 +00001611 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001612 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1613 l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001614
sewardjb2c985b2009-07-15 14:51:17 +00001615 VG_(umsg)(fmt, "D refs: ",
1616 D_total.a, Dr_total.a, Dw_total.a);
1617 VG_(umsg)(fmt, "D1 misses: ",
1618 D_total.m1, Dr_total.m1, Dw_total.m1);
njn2d853a12010-10-06 22:46:31 +00001619 VG_(umsg)(fmt, "LLd misses: ",
1620 D_total.mL, Dr_total.mL, Dw_total.mL);
njnd3bef4f2005-10-15 17:46:18 +00001621
sewardj8badbaa2007-05-08 09:20:25 +00001622 if (0 == D_total.a) D_total.a = 1;
1623 if (0 == Dr_total.a) Dr_total.a = 1;
1624 if (0 == Dw_total.a) Dw_total.a = 1;
florian227a1ec2014-12-12 19:32:10 +00001625 VG_(umsg)("D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1626 l1, D_total.m1 * 100.0 / D_total.a,
1627 l2, Dr_total.m1 * 100.0 / Dr_total.a,
1628 l3, Dw_total.m1 * 100.0 / Dw_total.a);
1629 VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1630 l1, D_total.mL * 100.0 / D_total.a,
1631 l2, Dr_total.mL * 100.0 / Dr_total.a,
1632 l3, Dw_total.mL * 100.0 / Dw_total.a);
sewardjb2c985b2009-07-15 14:51:17 +00001633 VG_(umsg)("\n");
njn1d021fa2002-05-02 13:56:34 +00001634
njn2d853a12010-10-06 22:46:31 +00001635 /* LL overall results */
njn1d021fa2002-05-02 13:56:34 +00001636
njn2d853a12010-10-06 22:46:31 +00001637 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1638 LL_total_r = Dr_total.m1 + Ir_total.m1;
1639 LL_total_w = Dw_total.m1;
1640 VG_(umsg)(fmt, "LL refs: ",
1641 LL_total, LL_total_r, LL_total_w);
njn4f9c9342002-04-29 16:03:24 +00001642
njn2d853a12010-10-06 22:46:31 +00001643 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1644 LL_total_mr = Dr_total.mL + Ir_total.mL;
1645 LL_total_mw = Dw_total.mL;
1646 VG_(umsg)(fmt, "LL misses: ",
1647 LL_total_m, LL_total_mr, LL_total_mw);
njnd3bef4f2005-10-15 17:46:18 +00001648
florian227a1ec2014-12-12 19:32:10 +00001649 VG_(umsg)("LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1650 l1, LL_total_m * 100.0 / (Ir_total.a + D_total.a),
1651 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
1652 l3, LL_total_mw * 100.0 / Dw_total.a);
sewardj8badbaa2007-05-08 09:20:25 +00001653 }
1654
1655 /* If branch profiling is enabled, show branch overall results. */
1656 if (clo_branch_sim) {
1657 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001658 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1659 l1, l2, l3);
sewardj8badbaa2007-05-08 09:20:25 +00001660
1661 if (0 == Bc_total.b) Bc_total.b = 1;
1662 if (0 == Bi_total.b) Bi_total.b = 1;
1663 B_total.b = Bc_total.b + Bi_total.b;
1664 B_total.mp = Bc_total.mp + Bi_total.mp;
1665
sewardjb2c985b2009-07-15 14:51:17 +00001666 VG_(umsg)("\n");
1667 VG_(umsg)(fmt, "Branches: ",
1668 B_total.b, Bc_total.b, Bi_total.b);
sewardj8badbaa2007-05-08 09:20:25 +00001669
sewardjb2c985b2009-07-15 14:51:17 +00001670 VG_(umsg)(fmt, "Mispredicts: ",
1671 B_total.mp, Bc_total.mp, Bi_total.mp);
sewardj8badbaa2007-05-08 09:20:25 +00001672
florian227a1ec2014-12-12 19:32:10 +00001673 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1674 l1, B_total.mp * 100.0 / B_total.b,
1675 l2, Bc_total.mp * 100.0 / Bc_total.b,
1676 l3, Bi_total.mp * 100.0 / Bi_total.b);
sewardj8badbaa2007-05-08 09:20:25 +00001677 }
njn4f9c9342002-04-29 16:03:24 +00001678
nethercote9313ac42004-07-06 21:54:20 +00001679 // Various stats
sewardj2d9e8742009-08-07 15:46:56 +00001680 if (VG_(clo_stats)) {
njn1baf7db2006-04-18 22:34:48 +00001681 Int debug_lookups = full_debugs + fn_debugs +
1682 file_line_debugs + no_debugs;
njnd3bef4f2005-10-15 17:46:18 +00001683
sewardjb2c985b2009-07-15 14:51:17 +00001684 VG_(dmsg)("\n");
weidendo6fc0de02012-10-30 00:28:29 +00001685 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
1686 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1687 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
1688 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1689 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
sewardjb2c985b2009-07-15 14:51:17 +00001690 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
njn1baf7db2006-04-18 22:34:48 +00001691
florian227a1ec2014-12-12 19:32:10 +00001692 VG_(dmsg)("cachegrind: with full info:%6.1f%% (%d)\n",
1693 full_debugs * 100.0 / debug_lookups, full_debugs);
1694 VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1695 file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
1696 VG_(dmsg)("cachegrind: with fn name info:%6.1f%% (%d)\n",
1697 fn_debugs * 100.0 / debug_lookups, fn_debugs);
1698 VG_(dmsg)("cachegrind: with zero info:%6.1f%% (%d)\n",
1699 no_debugs * 100.0 / debug_lookups, no_debugs);
njn1baf7db2006-04-18 22:34:48 +00001700
florian47755db2015-08-05 12:09:55 +00001701 VG_(dmsg)("cachegrind: string table size: %u\n",
sewardjb2c985b2009-07-15 14:51:17 +00001702 VG_(OSetGen_Size)(stringTable));
florian47755db2015-08-05 12:09:55 +00001703 VG_(dmsg)("cachegrind: CC table size: %u\n",
sewardjb2c985b2009-07-15 14:51:17 +00001704 VG_(OSetGen_Size)(CC_table));
florian47755db2015-08-05 12:09:55 +00001705 VG_(dmsg)("cachegrind: InstrInfo table size: %u\n",
sewardjb2c985b2009-07-15 14:51:17 +00001706 VG_(OSetGen_Size)(instrInfoTable));
njn4f9c9342002-04-29 16:03:24 +00001707 }
njn4f9c9342002-04-29 16:03:24 +00001708}
1709
nethercote9313ac42004-07-06 21:54:20 +00001710/*--------------------------------------------------------------------*/
1711/*--- Discarding BB info ---*/
1712/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001713
sewardja3a29a52005-10-12 16:16:03 +00001714// Called when a translation is removed from the translation cache for
1715// any reason at all: to free up space, because the guest code was
1716// unmapped or modified, or for any arbitrary reason.
sewardj4ba057c2005-10-18 12:04:18 +00001717static
florianddd61ff2015-01-04 17:20:45 +00001718void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
sewardj18d75132002-05-16 11:06:21 +00001719{
sewardj0b9d74a2006-12-24 02:24:11 +00001720 SB_info* sbInfo;
florianddd61ff2015-01-04 17:20:45 +00001721 Addr orig_addr = vge.base[0];
njn4294fd42002-06-05 14:41:10 +00001722
sewardj5155dec2005-10-12 10:09:23 +00001723 tl_assert(vge.n_used > 0);
1724
1725 if (DEBUG_CG)
sewardj4ba057c2005-10-18 12:04:18 +00001726 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
florianddd61ff2015-01-04 17:20:45 +00001727 (void*)orig_addr,
1728 (void*)vge.base[0], (ULong)vge.len[0]);
njn4294fd42002-06-05 14:41:10 +00001729
sewardj4ba057c2005-10-18 12:04:18 +00001730 // Get BB info, remove from table, free BB info. Simple! Note that we
1731 // use orig_addr, not the first instruction address in vge.
njne2a9ad32007-09-17 05:30:48 +00001732 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
sewardj0b9d74a2006-12-24 02:24:11 +00001733 tl_assert(NULL != sbInfo);
njne2a9ad32007-09-17 05:30:48 +00001734 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
sewardj18d75132002-05-16 11:06:21 +00001735}
1736
1737/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001738/*--- Command line processing ---*/
1739/*--------------------------------------------------------------------*/
1740
florian19f91bb2012-11-10 22:29:54 +00001741static Bool cg_process_cmd_line_option(const HChar* arg)
njn25e49d8e72002-09-23 09:36:25 +00001742{
weidendo23642272011-09-06 19:08:31 +00001743 if (VG_(str_clo_cache_opt)(arg,
1744 &clo_I1_cache,
1745 &clo_D1_cache,
1746 &clo_LL_cache)) {}
njn83df0b62009-02-25 01:01:05 +00001747
1748 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1749 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1750 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
njn25e49d8e72002-09-23 09:36:25 +00001751 else
1752 return False;
1753
1754 return True;
1755}
1756
njn51d827b2005-05-09 01:02:08 +00001757static void cg_print_usage(void)
njn25e49d8e72002-09-23 09:36:25 +00001758{
weidendo23642272011-09-06 19:08:31 +00001759 VG_(print_cache_clo_opts)();
njn3e884182003-04-15 13:03:23 +00001760 VG_(printf)(
sewardj8badbaa2007-05-08 09:20:25 +00001761" --cache-sim=yes|no [yes] collect cache stats?\n"
1762" --branch-sim=yes|no [no] collect branch prediction stats?\n"
njn374a36d2007-11-23 01:41:32 +00001763" --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
njn3e884182003-04-15 13:03:23 +00001764 );
1765}
1766
njn51d827b2005-05-09 01:02:08 +00001767static void cg_print_debug_usage(void)
njn3e884182003-04-15 13:03:23 +00001768{
1769 VG_(printf)(
1770" (none)\n"
1771 );
njn25e49d8e72002-09-23 09:36:25 +00001772}
1773
1774/*--------------------------------------------------------------------*/
1775/*--- Setup ---*/
1776/*--------------------------------------------------------------------*/
1777
sewardje1216cb2007-02-07 19:55:30 +00001778static void cg_post_clo_init(void); /* just below */
1779
njn51d827b2005-05-09 01:02:08 +00001780static void cg_pre_clo_init(void)
1781{
njn51d827b2005-05-09 01:02:08 +00001782 VG_(details_name) ("Cachegrind");
1783 VG_(details_version) (NULL);
sewardj8badbaa2007-05-08 09:20:25 +00001784 VG_(details_description) ("a cache and branch-prediction profiler");
njn51d827b2005-05-09 01:02:08 +00001785 VG_(details_copyright_author)(
Elliott Hughesed398002017-06-21 14:41:24 -07001786 "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.");
njn51d827b2005-05-09 01:02:08 +00001787 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardje8089302006-10-17 02:15:17 +00001788 VG_(details_avg_translation_sizeB) ( 500 );
njn51d827b2005-05-09 01:02:08 +00001789
sewardj8d47a612015-02-05 12:59:46 +00001790 VG_(clo_vex_control).iropt_register_updates_default
1791 = VG_(clo_px_file_backed)
philippe5b240c22012-08-14 22:28:31 +00001792 = VexRegUpdSpAtMemAccess; // overridable by the user.
sewardj8d47a612015-02-05 12:59:46 +00001793
njn51d827b2005-05-09 01:02:08 +00001794 VG_(basic_tool_funcs) (cg_post_clo_init,
1795 cg_instrument,
1796 cg_fini);
1797
sewardj0b9d74a2006-12-24 02:24:11 +00001798 VG_(needs_superblock_discards)(cg_discard_superblock_info);
njn51d827b2005-05-09 01:02:08 +00001799 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1800 cg_print_usage,
1801 cg_print_debug_usage);
sewardje1216cb2007-02-07 19:55:30 +00001802}
1803
1804static void cg_post_clo_init(void)
1805{
njn2d853a12010-10-06 22:46:31 +00001806 cache_t I1c, D1c, LLc;
njn51d827b2005-05-09 01:02:08 +00001807
njne2a9ad32007-09-17 05:30:48 +00001808 CC_table =
1809 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1810 cmp_CodeLoc_LineCC,
sewardj9c606bd2008-09-18 18:12:50 +00001811 VG_(malloc), "cg.main.cpci.1",
1812 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001813 instrInfoTable =
1814 VG_(OSetGen_Create)(/*keyOff*/0,
1815 NULL,
sewardj9c606bd2008-09-18 18:12:50 +00001816 VG_(malloc), "cg.main.cpci.2",
1817 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001818 stringTable =
1819 VG_(OSetGen_Create)(/*keyOff*/0,
1820 stringCmp,
sewardj9c606bd2008-09-18 18:12:50 +00001821 VG_(malloc), "cg.main.cpci.3",
1822 VG_(free));
sewardje1216cb2007-02-07 19:55:30 +00001823
weidendo23642272011-09-06 19:08:31 +00001824 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1825 &clo_I1_cache,
1826 &clo_D1_cache,
1827 &clo_LL_cache);
sewardje1216cb2007-02-07 19:55:30 +00001828
sewardj98763d52012-06-03 22:40:07 +00001829 // min_line_size is used to make sure that we never feed
1830 // accesses to the simulator straddling more than two
1831 // cache lines at any cache level
1832 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1833 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1834
1835 Int largest_load_or_store_size
1836 = VG_(machine_get_size_of_largest_guest_register)();
1837 if (min_line_size < largest_load_or_store_size) {
1838 /* We can't continue, because the cache simulation might
1839 straddle more than 2 lines, and it will assert. So let's
1840 just stop before we start. */
1841 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1842 (Int)min_line_size);
1843 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1844 largest_load_or_store_size );
1845 VG_(umsg)(" but it is not. Exiting now.\n");
1846 VG_(exit)(1);
1847 }
1848
weidendoc1e94262012-10-05 23:58:17 +00001849 cachesim_initcaches(I1c, D1c, LLc);
njn51d827b2005-05-09 01:02:08 +00001850}
1851
sewardj45f4e7c2005-09-27 19:20:21 +00001852VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
fitzhardinge98abfc72003-12-16 02:05:15 +00001853
njn25e49d8e72002-09-23 09:36:25 +00001854/*--------------------------------------------------------------------*/
njnf69f9452005-07-03 17:53:11 +00001855/*--- end ---*/
sewardj18d75132002-05-16 11:06:21 +00001856/*--------------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +00001857