blob: 690d5d0da5284b0e55b62a1f9acb29783eec7f37 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njn101e5722005-04-21 02:37:54 +00003/*--- Cachegrind: everything but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2002-2010 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000012 njn@valgrind.org
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
sewardj4cfea4f2006-10-14 19:26:10 +000033#include "pub_tool_vki.h"
njnea27e462005-05-31 02:38:09 +000034#include "pub_tool_debuginfo.h"
njn97405b22005-06-02 03:39:33 +000035#include "pub_tool_libcbase.h"
njn132bfcc2005-06-04 19:16:06 +000036#include "pub_tool_libcassert.h"
njneb8896b2005-06-04 20:03:55 +000037#include "pub_tool_libcfile.h"
njn36a20fa2005-06-03 03:08:39 +000038#include "pub_tool_libcprint.h"
njnf39e9a32005-06-12 02:43:17 +000039#include "pub_tool_libcproc.h"
njnf536bbb2005-06-13 04:21:38 +000040#include "pub_tool_machine.h"
njn717cde52005-05-10 02:47:21 +000041#include "pub_tool_mallocfree.h"
njn20242342005-05-16 23:31:24 +000042#include "pub_tool_options.h"
njnd3bef4f2005-10-15 17:46:18 +000043#include "pub_tool_oset.h"
njn43b9a8a2005-05-10 04:37:01 +000044#include "pub_tool_tooliface.h"
sewardj14c7cc52007-02-25 15:08:24 +000045#include "pub_tool_xarray.h"
sewardj45f4e7c2005-09-27 19:20:21 +000046#include "pub_tool_clientstate.h"
sewardj5bb86822005-12-23 12:47:42 +000047#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
njn25e49d8e72002-09-23 09:36:25 +000048
nethercoteb35a8b92004-09-11 16:45:27 +000049#include "cg_arch.h"
nethercote27fc1da2004-01-04 16:56:57 +000050#include "cg_sim.c"
sewardj8badbaa2007-05-08 09:20:25 +000051#include "cg_branchpred.c"
njn4f9c9342002-04-29 16:03:24 +000052
njn25e49d8e72002-09-23 09:36:25 +000053/*------------------------------------------------------------*/
54/*--- Constants ---*/
55/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000056
sewardj5155dec2005-10-12 10:09:23 +000057/* Set to 1 for very verbose debugging */
58#define DEBUG_CG 0
59
nethercote9313ac42004-07-06 21:54:20 +000060#define MIN_LINE_SIZE 16
njnd3bef4f2005-10-15 17:46:18 +000061#define FILE_LEN VKI_PATH_MAX
nethercote9313ac42004-07-06 21:54:20 +000062#define FN_LEN 256
njn7cf0bd32002-06-08 13:36:03 +000063
64/*------------------------------------------------------------*/
sewardj8badbaa2007-05-08 09:20:25 +000065/*--- Options ---*/
66/*------------------------------------------------------------*/
67
njn374a36d2007-11-23 01:41:32 +000068static Bool clo_cache_sim = True; /* do cache simulation? */
69static Bool clo_branch_sim = False; /* do branch simulation? */
70static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
sewardj8badbaa2007-05-08 09:20:25 +000071
72/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000073/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000074/*------------------------------------------------------------*/
75
sewardj8badbaa2007-05-08 09:20:25 +000076typedef
77 struct {
78 ULong a; /* total # memory accesses of this kind */
79 ULong m1; /* misses in the first level cache */
80 ULong m2; /* misses in the second level cache */
81 }
82 CacheCC;
83
84typedef
85 struct {
86 ULong b; /* total # branches of this kind */
87 ULong mp; /* number of branches mispredicted */
88 }
89 BranchCC;
njn4f9c9342002-04-29 16:03:24 +000090
nethercote9313ac42004-07-06 21:54:20 +000091//------------------------------------------------------------
92// Primary data structure #1: CC table
93// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
njnd3bef4f2005-10-15 17:46:18 +000094// - an ordered set of CCs. CC indexing done by file/function/line (as
95// determined from the instrAddr).
nethercote9313ac42004-07-06 21:54:20 +000096// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +000097
njnd3bef4f2005-10-15 17:46:18 +000098typedef struct {
99 Char* file;
100 Char* fn;
101 Int line;
102}
103CodeLoc;
njn4f9c9342002-04-29 16:03:24 +0000104
sewardj8badbaa2007-05-08 09:20:25 +0000105typedef struct {
106 CodeLoc loc; /* Source location that these counts pertain to */
107 CacheCC Ir; /* Insn read counts */
108 CacheCC Dr; /* Data read counts */
109 CacheCC Dw; /* Data write/modify counts */
110 BranchCC Bc; /* Conditional branch counts */
111 BranchCC Bi; /* Indirect branch counts */
112} LineCC;
njn4f9c9342002-04-29 16:03:24 +0000113
njnd3bef4f2005-10-15 17:46:18 +0000114// First compare file, then fn, then line.
tom5a835d52007-12-30 12:28:26 +0000115static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
njnd3bef4f2005-10-15 17:46:18 +0000116{
njnafa12262005-12-24 03:10:56 +0000117 Word res;
njnd3bef4f2005-10-15 17:46:18 +0000118 CodeLoc* a = (CodeLoc*)vloc;
119 CodeLoc* b = &(((LineCC*)vcc)->loc);
njn4f9c9342002-04-29 16:03:24 +0000120
njnd3bef4f2005-10-15 17:46:18 +0000121 res = VG_(strcmp)(a->file, b->file);
122 if (0 != res)
123 return res;
njn4f9c9342002-04-29 16:03:24 +0000124
njnd3bef4f2005-10-15 17:46:18 +0000125 res = VG_(strcmp)(a->fn, b->fn);
126 if (0 != res)
127 return res;
128
129 return a->line - b->line;
130}
131
132static OSet* CC_table;
njn4f9c9342002-04-29 16:03:24 +0000133
nethercote9313ac42004-07-06 21:54:20 +0000134//------------------------------------------------------------
njnd3bef4f2005-10-15 17:46:18 +0000135// Primary data structure #2: InstrInfo table
nethercote9313ac42004-07-06 21:54:20 +0000136// - Holds the cached info about each instr that is used for simulation.
sewardj0b9d74a2006-12-24 02:24:11 +0000137// - table(SB_start_addr, list(InstrInfo))
138// - For each SB, each InstrInfo in the list holds info about the
njnd3bef4f2005-10-15 17:46:18 +0000139// instruction (instrLen, instrAddr, etc), plus a pointer to its line
nethercote9313ac42004-07-06 21:54:20 +0000140// CC. This node is what's passed to the simulation function.
sewardj0b9d74a2006-12-24 02:24:11 +0000141// - When SBs are discarded the relevant list(instr_details) is freed.
nethercote9313ac42004-07-06 21:54:20 +0000142
njnd3bef4f2005-10-15 17:46:18 +0000143typedef struct _InstrInfo InstrInfo;
144struct _InstrInfo {
nethercoteca1f2dc2004-07-21 08:49:02 +0000145 Addr instr_addr;
njn6a3009b2005-03-20 00:20:06 +0000146 UChar instr_len;
njnd3bef4f2005-10-15 17:46:18 +0000147 LineCC* parent; // parent line-CC
nethercote9313ac42004-07-06 21:54:20 +0000148};
149
sewardj0b9d74a2006-12-24 02:24:11 +0000150typedef struct _SB_info SB_info;
151struct _SB_info {
152 Addr SB_addr; // key; MUST BE FIRST
njnd3bef4f2005-10-15 17:46:18 +0000153 Int n_instrs;
154 InstrInfo instrs[0];
nethercote9313ac42004-07-06 21:54:20 +0000155};
156
njnd3bef4f2005-10-15 17:46:18 +0000157static OSet* instrInfoTable;
158
159//------------------------------------------------------------
160// Secondary data structure: string table
161// - holds strings, avoiding dups
162// - used for filenames and function names, each of which will be
163// pointed to by one or more CCs.
164// - it also allows equality checks just by pointer comparison, which
165// is good when printing the output file at the end.
166
167static OSet* stringTable;
nethercote9313ac42004-07-06 21:54:20 +0000168
169//------------------------------------------------------------
170// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000171static Int distinct_files = 0;
172static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000173static Int distinct_lines = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000174static Int distinct_instrs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000175
njnd3bef4f2005-10-15 17:46:18 +0000176static Int full_debugs = 0;
177static Int file_line_debugs = 0;
178static Int fn_debugs = 0;
179static Int no_debugs = 0;
njn4f9c9342002-04-29 16:03:24 +0000180
nethercote9313ac42004-07-06 21:54:20 +0000181/*------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +0000182/*--- String table operations ---*/
183/*------------------------------------------------------------*/
184
tom5a835d52007-12-30 12:28:26 +0000185static Word stringCmp( const void* key, const void* elem )
njnd3bef4f2005-10-15 17:46:18 +0000186{
187 return VG_(strcmp)(*(Char**)key, *(Char**)elem);
188}
189
190// Get a permanent string; either pull it out of the string table if it's
191// been encountered before, or dup it and put it into the string table.
192static Char* get_perm_string(Char* s)
193{
njne2a9ad32007-09-17 05:30:48 +0000194 Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
njnd3bef4f2005-10-15 17:46:18 +0000195 if (s_ptr) {
196 return *s_ptr;
197 } else {
njne2a9ad32007-09-17 05:30:48 +0000198 Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
sewardj9c606bd2008-09-18 18:12:50 +0000199 *s_node = VG_(strdup)("cg.main.gps.1", s);
njne2a9ad32007-09-17 05:30:48 +0000200 VG_(OSetGen_Insert)(stringTable, s_node);
njnd3bef4f2005-10-15 17:46:18 +0000201 return *s_node;
202 }
203}
204
205/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000206/*--- CC table operations ---*/
207/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000208
nethercote9313ac42004-07-06 21:54:20 +0000209static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
210 Char fn[FN_LEN], Int* line)
njn4f9c9342002-04-29 16:03:24 +0000211{
njnf3b61d62007-09-17 00:41:07 +0000212 Char dir[FILE_LEN];
213 Bool found_dirname;
sewardj7cee6f92005-06-13 17:39:06 +0000214 Bool found_file_line = VG_(get_filename_linenum)(
215 instr_addr,
216 file, FILE_LEN,
njnf3b61d62007-09-17 00:41:07 +0000217 dir, FILE_LEN, &found_dirname,
sewardj7cee6f92005-06-13 17:39:06 +0000218 line
219 );
nethercote9313ac42004-07-06 21:54:20 +0000220 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
njn4f9c9342002-04-29 16:03:24 +0000221
nethercote9313ac42004-07-06 21:54:20 +0000222 if (!found_file_line) {
223 VG_(strcpy)(file, "???");
224 *line = 0;
225 }
226 if (!found_fn) {
227 VG_(strcpy)(fn, "???");
228 }
njnf3b61d62007-09-17 00:41:07 +0000229
230 if (found_dirname) {
231 // +1 for the '/'.
232 tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
233 VG_(strcat)(dir, "/"); // Append '/'
234 VG_(strcat)(dir, file); // Append file to dir
235 VG_(strcpy)(file, dir); // Move dir+file to file
236 }
237
nethercote9313ac42004-07-06 21:54:20 +0000238 if (found_file_line) {
njnd3bef4f2005-10-15 17:46:18 +0000239 if (found_fn) full_debugs++;
240 else file_line_debugs++;
nethercote9313ac42004-07-06 21:54:20 +0000241 } else {
njnd3bef4f2005-10-15 17:46:18 +0000242 if (found_fn) fn_debugs++;
243 else no_debugs++;
njn4f9c9342002-04-29 16:03:24 +0000244 }
245}
246
nethercote9313ac42004-07-06 21:54:20 +0000247// Do a three step traversal: by file, then fn, then line.
njnd3bef4f2005-10-15 17:46:18 +0000248// Returns a pointer to the line CC, creates a new one if necessary.
249static LineCC* get_lineCC(Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000250{
nethercote9313ac42004-07-06 21:54:20 +0000251 Char file[FILE_LEN], fn[FN_LEN];
252 Int line;
njnd3bef4f2005-10-15 17:46:18 +0000253 CodeLoc loc;
254 LineCC* lineCC;
nethercote9313ac42004-07-06 21:54:20 +0000255
njn6a3009b2005-03-20 00:20:06 +0000256 get_debug_info(origAddr, file, fn, &line);
nethercote9313ac42004-07-06 21:54:20 +0000257
njnd3bef4f2005-10-15 17:46:18 +0000258 loc.file = file;
259 loc.fn = fn;
260 loc.line = line;
njn4f9c9342002-04-29 16:03:24 +0000261
njne2a9ad32007-09-17 05:30:48 +0000262 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
njnd3bef4f2005-10-15 17:46:18 +0000263 if (!lineCC) {
264 // Allocate and zero a new node.
njne2a9ad32007-09-17 05:30:48 +0000265 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
njnd3bef4f2005-10-15 17:46:18 +0000266 lineCC->loc.file = get_perm_string(loc.file);
267 lineCC->loc.fn = get_perm_string(loc.fn);
268 lineCC->loc.line = loc.line;
njn0a8db5c2007-04-02 03:11:41 +0000269 lineCC->Ir.a = 0;
270 lineCC->Ir.m1 = 0;
271 lineCC->Ir.m2 = 0;
272 lineCC->Dr.a = 0;
273 lineCC->Dr.m1 = 0;
274 lineCC->Dr.m2 = 0;
275 lineCC->Dw.a = 0;
276 lineCC->Dw.m1 = 0;
277 lineCC->Dw.m2 = 0;
sewardj8badbaa2007-05-08 09:20:25 +0000278 lineCC->Bc.b = 0;
279 lineCC->Bc.mp = 0;
280 lineCC->Bi.b = 0;
281 lineCC->Bi.mp = 0;
njne2a9ad32007-09-17 05:30:48 +0000282 VG_(OSetGen_Insert)(CC_table, lineCC);
njn4f9c9342002-04-29 16:03:24 +0000283 }
nethercote9313ac42004-07-06 21:54:20 +0000284
njnd3bef4f2005-10-15 17:46:18 +0000285 return lineCC;
njn4f9c9342002-04-29 16:03:24 +0000286}
287
288/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000289/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000290/*------------------------------------------------------------*/
291
njnaf839f52005-06-23 03:27:57 +0000292static VG_REGPARM(1)
njnd3bef4f2005-10-15 17:46:18 +0000293void log_1I_0D_cache_access(InstrInfo* n)
njn25e49d8e72002-09-23 09:36:25 +0000294{
sewardj5155dec2005-10-12 10:09:23 +0000295 //VG_(printf)("1I_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
296 // n, n->instr_addr, n->instr_len);
njn6a3009b2005-03-20 00:20:06 +0000297 cachesim_I1_doref(n->instr_addr, n->instr_len,
nethercote9313ac42004-07-06 21:54:20 +0000298 &n->parent->Ir.m1, &n->parent->Ir.m2);
299 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000300}
301
njnaf839f52005-06-23 03:27:57 +0000302static VG_REGPARM(2)
njnd3bef4f2005-10-15 17:46:18 +0000303void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
njn25e49d8e72002-09-23 09:36:25 +0000304{
sewardj5155dec2005-10-12 10:09:23 +0000305 //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
306 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
307 // n, n->instr_addr, n->instr_len,
308 // n2, n2->instr_addr, n2->instr_len);
sewardj5155dec2005-10-12 10:09:23 +0000309 cachesim_I1_doref(n->instr_addr, n->instr_len,
310 &n->parent->Ir.m1, &n->parent->Ir.m2);
311 n->parent->Ir.a++;
312 cachesim_I1_doref(n2->instr_addr, n2->instr_len,
313 &n2->parent->Ir.m1, &n2->parent->Ir.m2);
314 n2->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000315}
316
317static VG_REGPARM(3)
njnd3bef4f2005-10-15 17:46:18 +0000318void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
sewardj5155dec2005-10-12 10:09:23 +0000319{
320 //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
321 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
322 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
323 // n, n->instr_addr, n->instr_len,
324 // n2, n2->instr_addr, n2->instr_len,
325 // n3, n3->instr_addr, n3->instr_len);
sewardj5155dec2005-10-12 10:09:23 +0000326 cachesim_I1_doref(n->instr_addr, n->instr_len,
327 &n->parent->Ir.m1, &n->parent->Ir.m2);
328 n->parent->Ir.a++;
329 cachesim_I1_doref(n2->instr_addr, n2->instr_len,
330 &n2->parent->Ir.m1, &n2->parent->Ir.m2);
331 n2->parent->Ir.a++;
332 cachesim_I1_doref(n3->instr_addr, n3->instr_len,
333 &n3->parent->Ir.m1, &n3->parent->Ir.m2);
334 n3->parent->Ir.a++;
sewardj5155dec2005-10-12 10:09:23 +0000335}
336
337static VG_REGPARM(3)
njnd3bef4f2005-10-15 17:46:18 +0000338void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000339{
340 //VG_(printf)("1I_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
341 // " daddr=0x%010lx, dsize=%lu\n",
342 // n, n->instr_addr, n->instr_len, data_addr, data_size);
njn6a3009b2005-03-20 00:20:06 +0000343 cachesim_I1_doref(n->instr_addr, n->instr_len,
nethercote9313ac42004-07-06 21:54:20 +0000344 &n->parent->Ir.m1, &n->parent->Ir.m2);
345 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000346
sewardj5155dec2005-10-12 10:09:23 +0000347 cachesim_D1_doref(data_addr, data_size,
nethercote9313ac42004-07-06 21:54:20 +0000348 &n->parent->Dr.m1, &n->parent->Dr.m2);
349 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000350}
351
sewardj5155dec2005-10-12 10:09:23 +0000352static VG_REGPARM(3)
njnd3bef4f2005-10-15 17:46:18 +0000353void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000354{
sewardj5155dec2005-10-12 10:09:23 +0000355 //VG_(printf)("1I_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
356 // " daddr=0x%010lx, dsize=%lu\n",
357 // n, n->instr_addr, n->instr_len, data_addr, data_size);
njn6a3009b2005-03-20 00:20:06 +0000358 cachesim_I1_doref(n->instr_addr, n->instr_len,
nethercote9313ac42004-07-06 21:54:20 +0000359 &n->parent->Ir.m1, &n->parent->Ir.m2);
360 n->parent->Ir.a++;
361
sewardj5155dec2005-10-12 10:09:23 +0000362 cachesim_D1_doref(data_addr, data_size,
nethercote9313ac42004-07-06 21:54:20 +0000363 &n->parent->Dw.m1, &n->parent->Dw.m2);
364 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000365}
366
njnaf839f52005-06-23 03:27:57 +0000367static VG_REGPARM(3)
njnd3bef4f2005-10-15 17:46:18 +0000368void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
njn25e49d8e72002-09-23 09:36:25 +0000369{
sewardj5155dec2005-10-12 10:09:23 +0000370 //VG_(printf)("0I_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
371 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000372 cachesim_D1_doref(data_addr, data_size,
nethercote9313ac42004-07-06 21:54:20 +0000373 &n->parent->Dr.m1, &n->parent->Dr.m2);
374 n->parent->Dr.a++;
sewardj5155dec2005-10-12 10:09:23 +0000375}
376
377static VG_REGPARM(3)
njnd3bef4f2005-10-15 17:46:18 +0000378void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
sewardj5155dec2005-10-12 10:09:23 +0000379{
380 //VG_(printf)("0I_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
381 // n, data_addr, data_size);
sewardj5155dec2005-10-12 10:09:23 +0000382 cachesim_D1_doref(data_addr, data_size,
nethercote9313ac42004-07-06 21:54:20 +0000383 &n->parent->Dw.m1, &n->parent->Dw.m2);
384 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000385}
386
sewardj8badbaa2007-05-08 09:20:25 +0000387/* For branches, we consult two different predictors, one which
388 predicts taken/untaken for conditional branches, and the other
389 which predicts the branch target address for indirect branches
390 (jump-to-register style ones). */
391
392static VG_REGPARM(2)
393void log_cond_branch(InstrInfo* n, Word taken)
394{
395 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
396 // n, taken);
397 n->parent->Bc.b++;
398 n->parent->Bc.mp
399 += (1 & do_cond_branch_predict(n->instr_addr, taken));
400}
401
402static VG_REGPARM(2)
403void log_ind_branch(InstrInfo* n, UWord actual_dst)
404{
405 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
406 // n, actual_dst);
407 n->parent->Bi.b++;
408 n->parent->Bi.mp
409 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
410}
411
412
nethercote9313ac42004-07-06 21:54:20 +0000413/*------------------------------------------------------------*/
sewardj5155dec2005-10-12 10:09:23 +0000414/*--- Instrumentation types and structures ---*/
415/*------------------------------------------------------------*/
416
417/* Maintain an ordered list of memory events which are outstanding, in
418 the sense that no IR has yet been generated to do the relevant
419 helper calls. The BB is scanned top to bottom and memory events
420 are added to the end of the list, merging with the most recent
421 notified event where possible (Dw immediately following Dr and
422 having the same size and EA can be merged).
423
424 This merging is done so that for architectures which have
425 load-op-store instructions (x86, amd64), the insn is treated as if
426 it makes just one memory reference (a modify), rather than two (a
427 read followed by a write at the same address).
428
429 At various points the list will need to be flushed, that is, IR
430 generated from it. That must happen before any possible exit from
431 the block (the end, or an IRStmt_Exit). Flushing also takes place
432 when there is no space to add a new event.
433
434 If we require the simulation statistics to be up to date with
435 respect to possible memory exceptions, then the list would have to
436 be flushed before each memory reference. That would however lose
437 performance by inhibiting event-merging during flushing.
438
439 Flushing the list consists of walking it start to end and emitting
440 instrumentation IR for each event, in the order in which they
441 appear. It may be possible to emit a single call for two adjacent
442 events in order to reduce the number of helper function calls made.
443 For example, it could well be profitable to handle two adjacent Ir
444 events with a single helper call. */
445
446typedef
447 IRExpr
448 IRAtom;
449
450typedef
sewardj8badbaa2007-05-08 09:20:25 +0000451 enum {
452 Ev_Ir, // Instruction read
453 Ev_Dr, // Data read
454 Ev_Dw, // Data write
455 Ev_Dm, // Data modify (read then write)
456 Ev_Bc, // branch conditional
457 Ev_Bi // branch indirect (to unknown destination)
458 }
459 EventTag;
sewardj5155dec2005-10-12 10:09:23 +0000460
461typedef
462 struct {
sewardj8badbaa2007-05-08 09:20:25 +0000463 EventTag tag;
464 InstrInfo* inode;
465 union {
466 struct {
467 } Ir;
468 struct {
469 IRAtom* ea;
470 Int szB;
471 } Dr;
472 struct {
473 IRAtom* ea;
474 Int szB;
475 } Dw;
476 struct {
477 IRAtom* ea;
478 Int szB;
479 } Dm;
480 struct {
481 IRAtom* taken; /* :: Ity_I1 */
482 } Bc;
483 struct {
484 IRAtom* dst;
485 } Bi;
486 } Ev;
sewardj5155dec2005-10-12 10:09:23 +0000487 }
488 Event;
489
sewardj8badbaa2007-05-08 09:20:25 +0000490static void init_Event ( Event* ev ) {
491 VG_(memset)(ev, 0, sizeof(Event));
492}
493
494static IRAtom* get_Event_dea ( Event* ev ) {
495 switch (ev->tag) {
496 case Ev_Dr: return ev->Ev.Dr.ea;
497 case Ev_Dw: return ev->Ev.Dw.ea;
498 case Ev_Dm: return ev->Ev.Dm.ea;
499 default: tl_assert(0);
500 }
501}
502
503static Int get_Event_dszB ( Event* ev ) {
504 switch (ev->tag) {
505 case Ev_Dr: return ev->Ev.Dr.szB;
506 case Ev_Dw: return ev->Ev.Dw.szB;
507 case Ev_Dm: return ev->Ev.Dm.szB;
508 default: tl_assert(0);
509 }
510}
511
512
sewardj5155dec2005-10-12 10:09:23 +0000513/* Up to this many unnotified events are allowed. Number is
514 arbitrary. Larger numbers allow more event merging to occur, but
515 potentially induce more spilling due to extending live ranges of
516 address temporaries. */
517#define N_EVENTS 16
518
519
520/* A struct which holds all the running state during instrumentation.
521 Mostly to avoid passing loads of parameters everywhere. */
522typedef
523 struct {
524 /* The current outstanding-memory-event list. */
525 Event events[N_EVENTS];
526 Int events_used;
527
njnd3bef4f2005-10-15 17:46:18 +0000528 /* The array of InstrInfo bins for the BB. */
sewardj0b9d74a2006-12-24 02:24:11 +0000529 SB_info* sbInfo;
sewardj5155dec2005-10-12 10:09:23 +0000530
njnd3bef4f2005-10-15 17:46:18 +0000531 /* Number InstrInfo bins 'used' so far. */
sewardj0b9d74a2006-12-24 02:24:11 +0000532 Int sbInfo_i;
sewardj5155dec2005-10-12 10:09:23 +0000533
sewardj0b9d74a2006-12-24 02:24:11 +0000534 /* The output SB being constructed. */
535 IRSB* sbOut;
sewardj5155dec2005-10-12 10:09:23 +0000536 }
537 CgState;
538
539
sewardj5155dec2005-10-12 10:09:23 +0000540/*------------------------------------------------------------*/
541/*--- Instrumentation main ---*/
nethercote9313ac42004-07-06 21:54:20 +0000542/*------------------------------------------------------------*/
543
sewardj4ba057c2005-10-18 12:04:18 +0000544// Note that origAddr is the real origAddr, not the address of the first
545// instruction in the block (they can be different due to redirection).
nethercote564b2b02004-08-07 15:54:53 +0000546static
sewardj0b9d74a2006-12-24 02:24:11 +0000547SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
nethercote9313ac42004-07-06 21:54:20 +0000548{
njn4bd67b52005-08-11 00:47:10 +0000549 Int i, n_instrs;
550 IRStmt* st;
sewardj0b9d74a2006-12-24 02:24:11 +0000551 SB_info* sbInfo;
njnd3bef4f2005-10-15 17:46:18 +0000552
sewardj0b9d74a2006-12-24 02:24:11 +0000553 // Count number of original instrs in SB
njn6a3009b2005-03-20 00:20:06 +0000554 n_instrs = 0;
sewardj0b9d74a2006-12-24 02:24:11 +0000555 for (i = 0; i < sbIn->stmts_used; i++) {
556 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +0000557 if (Ist_IMark == st->tag) n_instrs++;
nethercote9313ac42004-07-06 21:54:20 +0000558 }
559
njnf7d26092005-10-12 16:45:17 +0000560 // Check that we don't have an entry for this BB in the instr-info table.
561 // If this assertion fails, there has been some screwup: some
562 // translations must have been discarded but Cachegrind hasn't discarded
563 // the corresponding entries in the instr-info table.
njne2a9ad32007-09-17 05:30:48 +0000564 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
sewardj0b9d74a2006-12-24 02:24:11 +0000565 tl_assert(NULL == sbInfo);
sewardja3a29a52005-10-12 16:16:03 +0000566
njnd3bef4f2005-10-15 17:46:18 +0000567 // BB never translated before (at this address, at least; could have
568 // been unloaded and then reloaded elsewhere in memory)
njne2a9ad32007-09-17 05:30:48 +0000569 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
sewardj0b9d74a2006-12-24 02:24:11 +0000570 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
571 sbInfo->SB_addr = origAddr;
572 sbInfo->n_instrs = n_instrs;
njne2a9ad32007-09-17 05:30:48 +0000573 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
sewardja3a29a52005-10-12 16:16:03 +0000574 distinct_instrs++;
575
sewardj0b9d74a2006-12-24 02:24:11 +0000576 return sbInfo;
nethercote9313ac42004-07-06 21:54:20 +0000577}
njn6a3009b2005-03-20 00:20:06 +0000578
nethercote9313ac42004-07-06 21:54:20 +0000579
sewardj5155dec2005-10-12 10:09:23 +0000580static void showEvent ( Event* ev )
nethercote9313ac42004-07-06 21:54:20 +0000581{
sewardj8badbaa2007-05-08 09:20:25 +0000582 switch (ev->tag) {
583 case Ev_Ir:
njnfd9f6222005-10-16 00:17:37 +0000584 VG_(printf)("Ir %p\n", ev->inode);
sewardj5155dec2005-10-12 10:09:23 +0000585 break;
sewardj8badbaa2007-05-08 09:20:25 +0000586 case Ev_Dr:
587 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
588 ppIRExpr(ev->Ev.Dr.ea);
sewardj5155dec2005-10-12 10:09:23 +0000589 VG_(printf)("\n");
590 break;
sewardj8badbaa2007-05-08 09:20:25 +0000591 case Ev_Dw:
592 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
593 ppIRExpr(ev->Ev.Dw.ea);
sewardj5155dec2005-10-12 10:09:23 +0000594 VG_(printf)("\n");
595 break;
sewardj8badbaa2007-05-08 09:20:25 +0000596 case Ev_Dm:
597 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
598 ppIRExpr(ev->Ev.Dm.ea);
599 VG_(printf)("\n");
600 break;
601 case Ev_Bc:
602 VG_(printf)("Bc %p GA=", ev->inode);
603 ppIRExpr(ev->Ev.Bc.taken);
604 VG_(printf)("\n");
605 break;
606 case Ev_Bi:
607 VG_(printf)("Bi %p DST=", ev->inode);
608 ppIRExpr(ev->Ev.Bi.dst);
sewardj5155dec2005-10-12 10:09:23 +0000609 VG_(printf)("\n");
610 break;
611 default:
612 tl_assert(0);
613 break;
614 }
njn6a3009b2005-03-20 00:20:06 +0000615}
616
njnfd9f6222005-10-16 00:17:37 +0000617// Reserve and initialise an InstrInfo for the first mention of a new insn.
618static
619InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
njn6a3009b2005-03-20 00:20:06 +0000620{
njnd3bef4f2005-10-15 17:46:18 +0000621 InstrInfo* i_node;
sewardj0b9d74a2006-12-24 02:24:11 +0000622 tl_assert(cgs->sbInfo_i >= 0);
623 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
624 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
njnfd9f6222005-10-16 00:17:37 +0000625 i_node->instr_addr = instr_addr;
626 i_node->instr_len = instr_len;
627 i_node->parent = get_lineCC(instr_addr);
sewardj0b9d74a2006-12-24 02:24:11 +0000628 cgs->sbInfo_i++;
sewardj5155dec2005-10-12 10:09:23 +0000629 return i_node;
630}
sewardj17a56bf2005-03-21 01:35:02 +0000631
sewardj17a56bf2005-03-21 01:35:02 +0000632
sewardj5155dec2005-10-12 10:09:23 +0000633/* Generate code for all outstanding memory events, and mark the queue
634 empty. Code is generated into cgs->bbOut, and this activity
sewardj0b9d74a2006-12-24 02:24:11 +0000635 'consumes' slots in cgs->sbInfo. */
njn6a3009b2005-03-20 00:20:06 +0000636
sewardj5155dec2005-10-12 10:09:23 +0000637static void flushEvents ( CgState* cgs )
638{
njnd3bef4f2005-10-15 17:46:18 +0000639 Int i, regparms;
640 Char* helperName;
641 void* helperAddr;
642 IRExpr** argv;
643 IRExpr* i_node_expr;
njnd3bef4f2005-10-15 17:46:18 +0000644 IRDirty* di;
njnc285dca2005-10-15 22:07:28 +0000645 Event* ev;
646 Event* ev2;
647 Event* ev3;
njn6a3009b2005-03-20 00:20:06 +0000648
sewardj5155dec2005-10-12 10:09:23 +0000649 i = 0;
650 while (i < cgs->events_used) {
njn6a3009b2005-03-20 00:20:06 +0000651
sewardj5155dec2005-10-12 10:09:23 +0000652 helperName = NULL;
653 helperAddr = NULL;
654 argv = NULL;
655 regparms = 0;
656
657 /* generate IR to notify event i and possibly the ones
658 immediately following it. */
659 tl_assert(i >= 0 && i < cgs->events_used);
njnc285dca2005-10-15 22:07:28 +0000660
661 ev = &cgs->events[i];
662 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
663 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
664
sewardj5155dec2005-10-12 10:09:23 +0000665 if (DEBUG_CG) {
666 VG_(printf)(" flush ");
njnc285dca2005-10-15 22:07:28 +0000667 showEvent( ev );
njn4f9c9342002-04-29 16:03:24 +0000668 }
sewardj5155dec2005-10-12 10:09:23 +0000669
njnfd9f6222005-10-16 00:17:37 +0000670 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
sewardj5155dec2005-10-12 10:09:23 +0000671
672 /* Decide on helper fn to call and args to pass it, and advance
673 i appropriately. */
sewardj8badbaa2007-05-08 09:20:25 +0000674 switch (ev->tag) {
675 case Ev_Ir:
676 /* Merge an Ir with a following Dr/Dm. */
677 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
678 /* Why is this true? It's because we're merging an Ir
679 with a following Dr or Dm. The Ir derives from the
680 instruction's IMark and the Dr/Dm from data
681 references which follow it. In short it holds
682 because each insn starts with an IMark, hence an
683 Ev_Ir, and so these Dr/Dm must pertain to the
684 immediately preceding Ir. Same applies to analogous
685 assertions in the subsequent cases. */
njnfd9f6222005-10-16 00:17:37 +0000686 tl_assert(ev2->inode == ev->inode);
sewardj5155dec2005-10-12 10:09:23 +0000687 helperName = "log_1I_1Dr_cache_access";
688 helperAddr = &log_1I_1Dr_cache_access;
689 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000690 get_Event_dea(ev2),
691 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000692 regparms = 3;
693 i += 2;
694 }
sewardj8badbaa2007-05-08 09:20:25 +0000695 /* Merge an Ir with a following Dw. */
sewardj5155dec2005-10-12 10:09:23 +0000696 else
sewardj8badbaa2007-05-08 09:20:25 +0000697 if (ev2 && ev2->tag == Ev_Dw) {
njnfd9f6222005-10-16 00:17:37 +0000698 tl_assert(ev2->inode == ev->inode);
sewardj5155dec2005-10-12 10:09:23 +0000699 helperName = "log_1I_1Dw_cache_access";
700 helperAddr = &log_1I_1Dw_cache_access;
701 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000702 get_Event_dea(ev2),
703 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
sewardj5155dec2005-10-12 10:09:23 +0000704 regparms = 3;
705 i += 2;
706 }
sewardj8badbaa2007-05-08 09:20:25 +0000707 /* Merge an Ir with two following Irs. */
sewardj5155dec2005-10-12 10:09:23 +0000708 else
sewardj8badbaa2007-05-08 09:20:25 +0000709 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
njnc285dca2005-10-15 22:07:28 +0000710 {
sewardj5155dec2005-10-12 10:09:23 +0000711 helperName = "log_3I_0D_cache_access";
712 helperAddr = &log_3I_0D_cache_access;
njnfd9f6222005-10-16 00:17:37 +0000713 argv = mkIRExprVec_3( i_node_expr,
714 mkIRExpr_HWord( (HWord)ev2->inode ),
715 mkIRExpr_HWord( (HWord)ev3->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000716 regparms = 3;
717 i += 3;
718 }
sewardj8badbaa2007-05-08 09:20:25 +0000719 /* Merge an Ir with one following Ir. */
sewardj5155dec2005-10-12 10:09:23 +0000720 else
sewardj8badbaa2007-05-08 09:20:25 +0000721 if (ev2 && ev2->tag == Ev_Ir) {
sewardj5155dec2005-10-12 10:09:23 +0000722 helperName = "log_2I_0D_cache_access";
723 helperAddr = &log_2I_0D_cache_access;
njnfd9f6222005-10-16 00:17:37 +0000724 argv = mkIRExprVec_2( i_node_expr,
725 mkIRExpr_HWord( (HWord)ev2->inode ) );
sewardj5155dec2005-10-12 10:09:23 +0000726 regparms = 2;
727 i += 2;
728 }
729 /* No merging possible; emit as-is. */
730 else {
731 helperName = "log_1I_0D_cache_access";
732 helperAddr = &log_1I_0D_cache_access;
733 argv = mkIRExprVec_1( i_node_expr );
734 regparms = 1;
735 i++;
736 }
737 break;
sewardj8badbaa2007-05-08 09:20:25 +0000738 case Ev_Dr:
739 case Ev_Dm:
740 /* Data read or modify */
sewardj5155dec2005-10-12 10:09:23 +0000741 helperName = "log_0I_1Dr_cache_access";
742 helperAddr = &log_0I_1Dr_cache_access;
743 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000744 get_Event_dea(ev),
745 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000746 regparms = 3;
747 i++;
748 break;
sewardj8badbaa2007-05-08 09:20:25 +0000749 case Ev_Dw:
750 /* Data write */
sewardj5155dec2005-10-12 10:09:23 +0000751 helperName = "log_0I_1Dw_cache_access";
752 helperAddr = &log_0I_1Dw_cache_access;
753 argv = mkIRExprVec_3( i_node_expr,
sewardj8badbaa2007-05-08 09:20:25 +0000754 get_Event_dea(ev),
755 mkIRExpr_HWord( get_Event_dszB(ev) ) );
sewardj5155dec2005-10-12 10:09:23 +0000756 regparms = 3;
757 i++;
758 break;
sewardj8badbaa2007-05-08 09:20:25 +0000759 case Ev_Bc:
760 /* Conditional branch */
761 helperName = "log_cond_branch";
762 helperAddr = &log_cond_branch;
763 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
764 regparms = 2;
765 i++;
766 break;
767 case Ev_Bi:
768 /* Branch to an unknown destination */
769 helperName = "log_ind_branch";
770 helperAddr = &log_ind_branch;
771 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
772 regparms = 2;
773 i++;
774 break;
sewardj5155dec2005-10-12 10:09:23 +0000775 default:
776 tl_assert(0);
777 }
778
779 /* Add the helper. */
780 tl_assert(helperName);
781 tl_assert(helperAddr);
782 tl_assert(argv);
sewardj5bb86822005-12-23 12:47:42 +0000783 di = unsafeIRDirty_0_N( regparms,
784 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
785 argv );
sewardj0b9d74a2006-12-24 02:24:11 +0000786 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
njn4f9c9342002-04-29 16:03:24 +0000787 }
788
sewardj5155dec2005-10-12 10:09:23 +0000789 cgs->events_used = 0;
njn4f9c9342002-04-29 16:03:24 +0000790}
njn14d01ce2004-11-26 11:30:14 +0000791
njnfd9f6222005-10-16 00:17:37 +0000792static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
sewardj5155dec2005-10-12 10:09:23 +0000793{
794 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000795 if (cgs->events_used == N_EVENTS)
796 flushEvents(cgs);
797 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
798 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000799 init_Event(evt);
800 evt->tag = Ev_Ir;
njnfd9f6222005-10-16 00:17:37 +0000801 evt->inode = inode;
sewardj5155dec2005-10-12 10:09:23 +0000802 cgs->events_used++;
803}
804
njnfd9f6222005-10-16 00:17:37 +0000805static
806void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
sewardj5155dec2005-10-12 10:09:23 +0000807{
njnfd9f6222005-10-16 00:17:37 +0000808 Event* evt;
sewardj5155dec2005-10-12 10:09:23 +0000809 tl_assert(isIRAtom(ea));
njnfd9f6222005-10-16 00:17:37 +0000810 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
sewardj8badbaa2007-05-08 09:20:25 +0000811 if (!clo_cache_sim)
812 return;
njnfd9f6222005-10-16 00:17:37 +0000813 if (cgs->events_used == N_EVENTS)
814 flushEvents(cgs);
815 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
816 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000817 init_Event(evt);
818 evt->tag = Ev_Dr;
819 evt->inode = inode;
820 evt->Ev.Dr.szB = datasize;
821 evt->Ev.Dr.ea = ea;
njnfd9f6222005-10-16 00:17:37 +0000822 cgs->events_used++;
823}
sewardj5155dec2005-10-12 10:09:23 +0000824
njnfd9f6222005-10-16 00:17:37 +0000825static
826void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
827{
828 Event* lastEvt;
829 Event* evt;
830
831 tl_assert(isIRAtom(ea));
832 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
833
sewardj8badbaa2007-05-08 09:20:25 +0000834 if (!clo_cache_sim)
835 return;
836
njnfd9f6222005-10-16 00:17:37 +0000837 /* Is it possible to merge this write with the preceding read? */
838 lastEvt = &cgs->events[cgs->events_used-1];
sewardj5155dec2005-10-12 10:09:23 +0000839 if (cgs->events_used > 0
sewardj8badbaa2007-05-08 09:20:25 +0000840 && lastEvt->tag == Ev_Dr
841 && lastEvt->Ev.Dr.szB == datasize
842 && lastEvt->inode == inode
843 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
njnfd9f6222005-10-16 00:17:37 +0000844 {
sewardj8badbaa2007-05-08 09:20:25 +0000845 lastEvt->tag = Ev_Dm;
sewardj5155dec2005-10-12 10:09:23 +0000846 return;
847 }
848
849 /* No. Add as normal. */
850 if (cgs->events_used == N_EVENTS)
851 flushEvents(cgs);
852 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
njnfd9f6222005-10-16 00:17:37 +0000853 evt = &cgs->events[cgs->events_used];
sewardj8badbaa2007-05-08 09:20:25 +0000854 init_Event(evt);
855 evt->tag = Ev_Dw;
856 evt->inode = inode;
857 evt->Ev.Dw.szB = datasize;
858 evt->Ev.Dw.ea = ea;
859 cgs->events_used++;
860}
861
862static
863void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
864{
865 Event* evt;
866 tl_assert(isIRAtom(guard));
867 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
868 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
869 if (!clo_branch_sim)
870 return;
871 if (cgs->events_used == N_EVENTS)
872 flushEvents(cgs);
873 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
874 evt = &cgs->events[cgs->events_used];
875 init_Event(evt);
876 evt->tag = Ev_Bc;
877 evt->inode = inode;
878 evt->Ev.Bc.taken = guard;
879 cgs->events_used++;
880}
881
882static
883void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
884{
885 Event* evt;
886 tl_assert(isIRAtom(whereTo));
887 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
888 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
889 if (!clo_branch_sim)
890 return;
891 if (cgs->events_used == N_EVENTS)
892 flushEvents(cgs);
893 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
894 evt = &cgs->events[cgs->events_used];
895 init_Event(evt);
896 evt->tag = Ev_Bi;
897 evt->inode = inode;
898 evt->Ev.Bi.dst = whereTo;
sewardj5155dec2005-10-12 10:09:23 +0000899 cgs->events_used++;
900}
901
902////////////////////////////////////////////////////////////
903
904
sewardj4ba057c2005-10-18 12:04:18 +0000905static
sewardj0b9d74a2006-12-24 02:24:11 +0000906IRSB* cg_instrument ( VgCallbackClosure* closure,
907 IRSB* sbIn,
sewardj461df9c2006-01-17 02:06:39 +0000908 VexGuestLayout* layout,
909 VexGuestExtents* vge,
sewardj4ba057c2005-10-18 12:04:18 +0000910 IRType gWordTy, IRType hWordTy )
njn14d01ce2004-11-26 11:30:14 +0000911{
njnfd9f6222005-10-16 00:17:37 +0000912 Int i, isize;
sewardj5155dec2005-10-12 10:09:23 +0000913 IRStmt* st;
914 Addr64 cia; /* address of current insn */
915 CgState cgs;
sewardj0b9d74a2006-12-24 02:24:11 +0000916 IRTypeEnv* tyenv = sbIn->tyenv;
njnfd9f6222005-10-16 00:17:37 +0000917 InstrInfo* curr_inode = NULL;
sewardj5155dec2005-10-12 10:09:23 +0000918
sewardjd54babf2005-03-21 00:55:49 +0000919 if (gWordTy != hWordTy) {
920 /* We don't currently support this case. */
921 VG_(tool_panic)("host/guest word size mismatch");
922 }
923
sewardj0b9d74a2006-12-24 02:24:11 +0000924 // Set up new SB
925 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
njn6a3009b2005-03-20 00:20:06 +0000926
sewardja9f538c2005-10-23 12:06:55 +0000927 // Copy verbatim any IR preamble preceding the first IMark
njn6a3009b2005-03-20 00:20:06 +0000928 i = 0;
sewardj0b9d74a2006-12-24 02:24:11 +0000929 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
930 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
sewardja9f538c2005-10-23 12:06:55 +0000931 i++;
932 }
933
934 // Get the first statement, and initial cia from it
sewardj0b9d74a2006-12-24 02:24:11 +0000935 tl_assert(sbIn->stmts_used > 0);
936 tl_assert(i < sbIn->stmts_used);
937 st = sbIn->stmts[i];
njn6a3009b2005-03-20 00:20:06 +0000938 tl_assert(Ist_IMark == st->tag);
sewardj8badbaa2007-05-08 09:20:25 +0000939
940 cia = st->Ist.IMark.addr;
941 isize = st->Ist.IMark.len;
942 // If Vex fails to decode an instruction, the size will be zero.
943 // Pretend otherwise.
944 if (isize == 0) isize = VG_MIN_INSTR_SZB;
njn6a3009b2005-03-20 00:20:06 +0000945
sewardj5155dec2005-10-12 10:09:23 +0000946 // Set up running state and get block info
sewardj3a384b32006-01-22 01:12:51 +0000947 tl_assert(closure->readdr == vge->base[0]);
sewardj5155dec2005-10-12 10:09:23 +0000948 cgs.events_used = 0;
sewardj0b9d74a2006-12-24 02:24:11 +0000949 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
950 cgs.sbInfo_i = 0;
njn6a3009b2005-03-20 00:20:06 +0000951
sewardj5155dec2005-10-12 10:09:23 +0000952 if (DEBUG_CG)
953 VG_(printf)("\n\n---------- cg_instrument ----------\n");
njn6a3009b2005-03-20 00:20:06 +0000954
njnfd9f6222005-10-16 00:17:37 +0000955 // Traverse the block, initialising inodes, adding events and flushing as
956 // necessary.
sewardj0b9d74a2006-12-24 02:24:11 +0000957 for (/*use current i*/; i < sbIn->stmts_used; i++) {
njn6a3009b2005-03-20 00:20:06 +0000958
sewardj0b9d74a2006-12-24 02:24:11 +0000959 st = sbIn->stmts[i];
sewardj5155dec2005-10-12 10:09:23 +0000960 tl_assert(isFlatIRStmt(st));
njnb3507ea2005-08-02 23:07:02 +0000961
sewardj5155dec2005-10-12 10:09:23 +0000962 switch (st->tag) {
963 case Ist_NoOp:
964 case Ist_AbiHint:
965 case Ist_Put:
966 case Ist_PutI:
sewardj72d75132007-11-09 23:06:35 +0000967 case Ist_MBE:
sewardj5155dec2005-10-12 10:09:23 +0000968 break;
njn20677cc2005-08-12 23:47:51 +0000969
sewardj5155dec2005-10-12 10:09:23 +0000970 case Ist_IMark:
njnfd9f6222005-10-16 00:17:37 +0000971 cia = st->Ist.IMark.addr;
972 isize = st->Ist.IMark.len;
973
974 // If Vex fails to decode an instruction, the size will be zero.
975 // Pretend otherwise.
976 if (isize == 0) isize = VG_MIN_INSTR_SZB;
977
njna5ad9ba2005-11-10 15:20:37 +0000978 // Sanity-check size.
979 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
980 || VG_CLREQ_SZB == isize );
njnfd9f6222005-10-16 00:17:37 +0000981
982 // Get space for and init the inode, record it as the current one.
983 // Subsequent Dr/Dw/Dm events from the same instruction will
984 // also use it.
985 curr_inode = setup_InstrInfo(&cgs, cia, isize);
986
987 addEvent_Ir( &cgs, curr_inode );
sewardj5155dec2005-10-12 10:09:23 +0000988 break;
989
sewardj0b9d74a2006-12-24 02:24:11 +0000990 case Ist_WrTmp: {
991 IRExpr* data = st->Ist.WrTmp.data;
sewardj5155dec2005-10-12 10:09:23 +0000992 if (data->tag == Iex_Load) {
993 IRExpr* aexpr = data->Iex.Load.addr;
sewardj5155dec2005-10-12 10:09:23 +0000994 // Note also, endianness info is ignored. I guess
995 // that's not interesting.
njnfd9f6222005-10-16 00:17:37 +0000996 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
997 aexpr );
sewardj5155dec2005-10-12 10:09:23 +0000998 }
999 break;
njnb3507ea2005-08-02 23:07:02 +00001000 }
1001
sewardj5155dec2005-10-12 10:09:23 +00001002 case Ist_Store: {
1003 IRExpr* data = st->Ist.Store.data;
1004 IRExpr* aexpr = st->Ist.Store.addr;
njnfd9f6222005-10-16 00:17:37 +00001005 addEvent_Dw( &cgs, curr_inode,
1006 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
sewardj5155dec2005-10-12 10:09:23 +00001007 break;
1008 }
njnb3507ea2005-08-02 23:07:02 +00001009
sewardj5155dec2005-10-12 10:09:23 +00001010 case Ist_Dirty: {
1011 Int dataSize;
1012 IRDirty* d = st->Ist.Dirty.details;
1013 if (d->mFx != Ifx_None) {
njnfd9f6222005-10-16 00:17:37 +00001014 /* This dirty helper accesses memory. Collect the details. */
sewardj5155dec2005-10-12 10:09:23 +00001015 tl_assert(d->mAddr != NULL);
1016 tl_assert(d->mSize != 0);
1017 dataSize = d->mSize;
1018 // Large (eg. 28B, 108B, 512B on x86) data-sized
1019 // instructions will be done inaccurately, but they're
1020 // very rare and this avoids errors from hitting more
1021 // than two cache lines in the simulation.
1022 if (dataSize > MIN_LINE_SIZE)
1023 dataSize = MIN_LINE_SIZE;
1024 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001025 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001026 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
njnfd9f6222005-10-16 00:17:37 +00001027 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
sewardj5155dec2005-10-12 10:09:23 +00001028 } else {
1029 tl_assert(d->mAddr == NULL);
1030 tl_assert(d->mSize == 0);
1031 }
1032 break;
1033 }
njn6a3009b2005-03-20 00:20:06 +00001034
sewardj1c0ce7a2009-07-01 08:10:49 +00001035 case Ist_CAS: {
1036 /* We treat it as a read and a write of the location. I
1037 think that is the same behaviour as it was before IRCAS
1038 was introduced, since prior to that point, the Vex
1039 front ends would translate a lock-prefixed instruction
1040 into a (normal) read followed by a (normal) write. */
1041 Int dataSize;
1042 IRCAS* cas = st->Ist.CAS.details;
1043 tl_assert(cas->addr != NULL);
1044 tl_assert(cas->dataLo != NULL);
1045 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1046 if (cas->dataHi != NULL)
1047 dataSize *= 2; /* since it's a doubleword-CAS */
1048 /* I don't think this can ever happen, but play safe. */
1049 if (dataSize > MIN_LINE_SIZE)
1050 dataSize = MIN_LINE_SIZE;
1051 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1052 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1053 break;
1054 }
1055
sewardjdb5907d2009-11-26 17:20:21 +00001056 case Ist_LLSC: {
1057 IRType dataTy;
1058 if (st->Ist.LLSC.storedata == NULL) {
1059 /* LL */
1060 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1061 addEvent_Dr( &cgs, curr_inode,
1062 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1063 } else {
1064 /* SC */
1065 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1066 addEvent_Dw( &cgs, curr_inode,
1067 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1068 }
1069 break;
1070 }
1071
sewardj8badbaa2007-05-08 09:20:25 +00001072 case Ist_Exit: {
weidendo374a48f2010-09-02 17:06:49 +00001073 // call branch predictor only if this is a branch in guest code
1074 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1075 (st->Ist.Exit.jk == Ijk_Call) ||
1076 (st->Ist.Exit.jk == Ijk_Ret) )
1077 {
1078 /* Stuff to widen the guard expression to a host word, so
1079 we can pass it to the branch predictor simulation
1080 functions easily. */
1081 Bool inverted;
1082 Addr64 nia, sea;
1083 IRConst* dst;
1084 IRType tyW = hWordTy;
1085 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1086 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1087 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1088 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1089 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1090 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1091 : IRExpr_Const(IRConst_U64(1));
sewardj8badbaa2007-05-08 09:20:25 +00001092
weidendo374a48f2010-09-02 17:06:49 +00001093 /* First we need to figure out whether the side exit got
1094 inverted by the ir optimiser. To do that, figure out
1095 the next (fallthrough) instruction's address and the
1096 side exit address and see if they are the same. */
1097 nia = cia + (Addr64)isize;
1098 if (tyW == Ity_I32)
1099 nia &= 0xFFFFFFFFULL;
sewardj8badbaa2007-05-08 09:20:25 +00001100
weidendo374a48f2010-09-02 17:06:49 +00001101 /* Side exit address */
1102 dst = st->Ist.Exit.dst;
1103 if (tyW == Ity_I32) {
1104 tl_assert(dst->tag == Ico_U32);
1105 sea = (Addr64)(UInt)dst->Ico.U32;
1106 } else {
1107 tl_assert(tyW == Ity_I64);
1108 tl_assert(dst->tag == Ico_U64);
1109 sea = dst->Ico.U64;
1110 }
1111
1112 inverted = nia == sea;
1113
1114 /* Widen the guard expression. */
1115 addStmtToIRSB( cgs.sbOut,
1116 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1117 addStmtToIRSB( cgs.sbOut,
1118 IRStmt_WrTmp( guardW,
1119 IRExpr_Unop(widen,
1120 IRExpr_RdTmp(guard1))) );
1121 /* If the exit is inverted, invert the sense of the guard. */
1122 addStmtToIRSB(
1123 cgs.sbOut,
1124 IRStmt_WrTmp(
1125 guard,
1126 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1127 : IRExpr_RdTmp(guardW)
1128 ));
1129 /* And post the event. */
1130 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
sewardj8badbaa2007-05-08 09:20:25 +00001131 }
1132
sewardj5155dec2005-10-12 10:09:23 +00001133 /* We may never reach the next statement, so need to flush
1134 all outstanding transactions now. */
1135 flushEvents( &cgs );
1136 break;
sewardj8badbaa2007-05-08 09:20:25 +00001137 }
sewardj5155dec2005-10-12 10:09:23 +00001138
1139 default:
1140 tl_assert(0);
1141 break;
njnb3507ea2005-08-02 23:07:02 +00001142 }
njn6a3009b2005-03-20 00:20:06 +00001143
sewardj5155dec2005-10-12 10:09:23 +00001144 /* Copy the original statement */
sewardj0b9d74a2006-12-24 02:24:11 +00001145 addStmtToIRSB( cgs.sbOut, st );
njn6a3009b2005-03-20 00:20:06 +00001146
sewardj5155dec2005-10-12 10:09:23 +00001147 if (DEBUG_CG) {
1148 ppIRStmt(st);
1149 VG_(printf)("\n");
1150 }
1151 }
1152
sewardj8badbaa2007-05-08 09:20:25 +00001153 /* Deal with branches to unknown destinations. Except ignore ones
1154 which are function returns as we assume the return stack
1155 predictor never mispredicts. */
weidendo374a48f2010-09-02 17:06:49 +00001156 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
sewardj8badbaa2007-05-08 09:20:25 +00001157 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1158 switch (sbIn->next->tag) {
1159 case Iex_Const:
1160 break; /* boring - branch to known address */
1161 case Iex_RdTmp:
1162 /* looks like an indirect branch (branch to unknown) */
1163 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1164 break;
1165 default:
1166 /* shouldn't happen - if the incoming IR is properly
1167 flattened, should only have tmp and const cases to
1168 consider. */
1169 tl_assert(0);
1170 }
1171 }
1172
sewardj5155dec2005-10-12 10:09:23 +00001173 /* At the end of the bb. Flush outstandings. */
sewardj5155dec2005-10-12 10:09:23 +00001174 flushEvents( &cgs );
1175
sewardj5155dec2005-10-12 10:09:23 +00001176 /* done. stay sane ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001177 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
sewardj5155dec2005-10-12 10:09:23 +00001178
1179 if (DEBUG_CG) {
1180 VG_(printf)( "goto {");
sewardj0b9d74a2006-12-24 02:24:11 +00001181 ppIRJumpKind(sbIn->jumpkind);
sewardj5155dec2005-10-12 10:09:23 +00001182 VG_(printf)( "} ");
sewardj0b9d74a2006-12-24 02:24:11 +00001183 ppIRExpr( sbIn->next );
sewardj5155dec2005-10-12 10:09:23 +00001184 VG_(printf)( "}\n");
1185 }
1186
sewardj0b9d74a2006-12-24 02:24:11 +00001187 return cgs.sbOut;
njn14d01ce2004-11-26 11:30:14 +00001188}
njn4f9c9342002-04-29 16:03:24 +00001189
1190/*------------------------------------------------------------*/
nethercoteb35a8b92004-09-11 16:45:27 +00001191/*--- Cache configuration ---*/
njn4f9c9342002-04-29 16:03:24 +00001192/*------------------------------------------------------------*/
1193
sewardjb5f6f512005-03-10 23:59:00 +00001194#define UNDEFINED_CACHE { -1, -1, -1 }
njn25e49d8e72002-09-23 09:36:25 +00001195
1196static cache_t clo_I1_cache = UNDEFINED_CACHE;
1197static cache_t clo_D1_cache = UNDEFINED_CACHE;
1198static cache_t clo_L2_cache = UNDEFINED_CACHE;
1199
njnb1cc5d62010-07-06 04:05:23 +00001200// Checks cache config is ok. Returns NULL if ok, or a pointer to an error
1201// string otherwise.
1202static Char* check_cache(cache_t* cache)
njn7cf0bd32002-06-08 13:36:03 +00001203{
njnb1cc5d62010-07-06 04:05:23 +00001204 // Simulator requires set count to be a power of two.
1205 if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
1206 (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
1207 {
1208 return "Cache set count is not a power of two.\n";
njn7cf0bd32002-06-08 13:36:03 +00001209 }
1210
njnb1cc5d62010-07-06 04:05:23 +00001211 // Simulator requires line size to be a power of two.
sewardj07133bf2002-06-13 10:25:56 +00001212 if (-1 == VG_(log2)(cache->line_size)) {
njnb1cc5d62010-07-06 04:05:23 +00001213 return "Cache line size is not a power of two.\n";
njn7cf0bd32002-06-08 13:36:03 +00001214 }
1215
njn6a3009b2005-03-20 00:20:06 +00001216 // Then check line size >= 16 -- any smaller and a single instruction could
1217 // straddle three cache lines, which breaks a simulation assertion and is
1218 // stupid anyway.
njn7cf0bd32002-06-08 13:36:03 +00001219 if (cache->line_size < MIN_LINE_SIZE) {
njnb1cc5d62010-07-06 04:05:23 +00001220 return "Cache line size is too small.\n";
njn7cf0bd32002-06-08 13:36:03 +00001221 }
1222
1223 /* Then check cache size > line size (causes seg faults if not). */
1224 if (cache->size <= cache->line_size) {
njnb1cc5d62010-07-06 04:05:23 +00001225 return "Cache size <= line size.\n";
njn7cf0bd32002-06-08 13:36:03 +00001226 }
1227
1228 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1229 if (cache->assoc > (cache->size / cache->line_size)) {
njnb1cc5d62010-07-06 04:05:23 +00001230 return "Cache associativity > (size / line size).\n";
njn7cf0bd32002-06-08 13:36:03 +00001231 }
njnb1cc5d62010-07-06 04:05:23 +00001232
1233 return NULL;
njn7cf0bd32002-06-08 13:36:03 +00001234}
1235
sewardj07133bf2002-06-13 10:25:56 +00001236static
nethercoteb35a8b92004-09-11 16:45:27 +00001237void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001238{
nethercote9313ac42004-07-06 21:54:20 +00001239#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1240
njnb1cc5d62010-07-06 04:05:23 +00001241 Char* checkRes;
nethercote9313ac42004-07-06 21:54:20 +00001242
nethercoteb35a8b92004-09-11 16:45:27 +00001243 // Count how many were defined on the command line.
njnb1cc5d62010-07-06 04:05:23 +00001244 Bool all_caches_clo_defined =
1245 (DEFINED(clo_I1_cache) &&
1246 DEFINED(clo_D1_cache) &&
1247 DEFINED(clo_L2_cache));
njn7cf0bd32002-06-08 13:36:03 +00001248
njna1d1a642004-11-26 18:36:02 +00001249 // Set the cache config (using auto-detection, if supported by the
njnb1cc5d62010-07-06 04:05:23 +00001250 // architecture).
1251 VG_(configure_caches)( I1c, D1c, L2c, all_caches_clo_defined );
sewardjb1a77a42002-07-13 13:31:20 +00001252
njnb1cc5d62010-07-06 04:05:23 +00001253 // Check the default/auto-detected values.
1254 checkRes = check_cache(I1c); tl_assert(!checkRes);
1255 checkRes = check_cache(D1c); tl_assert(!checkRes);
1256 checkRes = check_cache(L2c); tl_assert(!checkRes);
1257
1258 // Then replace with any defined on the command line. (Already checked in
1259 // parse_cache_opt().)
nethercoteb35a8b92004-09-11 16:45:27 +00001260 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
1261 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
1262 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
njn7cf0bd32002-06-08 13:36:03 +00001263
njn6f74a7e2009-03-12 00:06:45 +00001264 if (VG_(clo_verbosity) >= 2) {
sewardjb2c985b2009-07-15 14:51:17 +00001265 VG_(umsg)("Cache configuration used:\n");
1266 VG_(umsg)(" I1: %dB, %d-way, %dB lines\n",
1267 I1c->size, I1c->assoc, I1c->line_size);
1268 VG_(umsg)(" D1: %dB, %d-way, %dB lines\n",
1269 D1c->size, D1c->assoc, D1c->line_size);
1270 VG_(umsg)(" L2: %dB, %d-way, %dB lines\n",
1271 L2c->size, L2c->assoc, L2c->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001272 }
nethercote9313ac42004-07-06 21:54:20 +00001273#undef CMD_LINE_DEFINED
njn7cf0bd32002-06-08 13:36:03 +00001274}
1275
njn4f9c9342002-04-29 16:03:24 +00001276/*------------------------------------------------------------*/
njn51d827b2005-05-09 01:02:08 +00001277/*--- cg_fini() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001278/*------------------------------------------------------------*/
1279
nethercote9313ac42004-07-06 21:54:20 +00001280// Total reads/writes/misses. Calculated during CC traversal at the end.
1281// All auto-zeroed.
sewardj8badbaa2007-05-08 09:20:25 +00001282static CacheCC Ir_total;
1283static CacheCC Dr_total;
1284static CacheCC Dw_total;
1285static BranchCC Bc_total;
1286static BranchCC Bi_total;
nethercote9313ac42004-07-06 21:54:20 +00001287
nethercote9313ac42004-07-06 21:54:20 +00001288static void fprint_CC_table_and_calc_totals(void)
1289{
njnd3bef4f2005-10-15 17:46:18 +00001290 Int i, fd;
sewardj92645592005-07-23 09:18:34 +00001291 SysRes sres;
njnd3bef4f2005-10-15 17:46:18 +00001292 Char buf[512], *currFile = NULL, *currFn = NULL;
1293 LineCC* lineCC;
njn4f9c9342002-04-29 16:03:24 +00001294
njn7064fb22008-05-29 23:09:52 +00001295 // Setup output filename. Nb: it's important to do this now, ie. as late
1296 // as possible. If we do it at start-up and the program forks and the
1297 // output file format string contains a %p (pid) specifier, both the
1298 // parent and child will incorrectly write to the same file; this
1299 // happened in 3.3.0.
1300 Char* cachegrind_out_file =
1301 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1302
sewardj92645592005-07-23 09:18:34 +00001303 sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1304 VKI_S_IRUSR|VKI_S_IWUSR);
njncda2f0f2009-05-18 02:12:08 +00001305 if (sr_isError(sres)) {
nethercote9313ac42004-07-06 21:54:20 +00001306 // If the file can't be opened for whatever reason (conflict
1307 // between multiple cachegrinded processes?), give up now.
sewardjb2c985b2009-07-15 14:51:17 +00001308 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1309 cachegrind_out_file );
1310 VG_(umsg)(" ... so simulation results will be missing.\n");
njn7064fb22008-05-29 23:09:52 +00001311 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001312 return;
sewardj92645592005-07-23 09:18:34 +00001313 } else {
njncda2f0f2009-05-18 02:12:08 +00001314 fd = sr_Res(sres);
njn7064fb22008-05-29 23:09:52 +00001315 VG_(free)(cachegrind_out_file);
sewardj0744b6c2002-12-11 00:45:42 +00001316 }
njn4f9c9342002-04-29 16:03:24 +00001317
nethercote9313ac42004-07-06 21:54:20 +00001318 // "desc:" lines (giving I1/D1/L2 cache configuration). The spaces after
1319 // the 2nd colon makes cg_annotate's output look nicer.
1320 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1321 "desc: D1 cache: %s\n"
1322 "desc: L2 cache: %s\n",
1323 I1.desc_line, D1.desc_line, L2.desc_line);
njn7cf0bd32002-06-08 13:36:03 +00001324 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001325
nethercote9313ac42004-07-06 21:54:20 +00001326 // "cmd:" line
njn4f9c9342002-04-29 16:03:24 +00001327 VG_(strcpy)(buf, "cmd:");
1328 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
sewardj45f4e7c2005-09-27 19:20:21 +00001329 if (VG_(args_the_exename)) {
1330 VG_(write)(fd, " ", 1);
1331 VG_(write)(fd, VG_(args_the_exename),
1332 VG_(strlen)( VG_(args_the_exename) ));
1333 }
sewardj14c7cc52007-02-25 15:08:24 +00001334 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1335 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1336 if (arg) {
sewardj45f4e7c2005-09-27 19:20:21 +00001337 VG_(write)(fd, " ", 1);
sewardj14c7cc52007-02-25 15:08:24 +00001338 VG_(write)(fd, arg, VG_(strlen)( arg ));
sewardj45f4e7c2005-09-27 19:20:21 +00001339 }
njn4f9c9342002-04-29 16:03:24 +00001340 }
nethercote9313ac42004-07-06 21:54:20 +00001341 // "events:" line
sewardj8badbaa2007-05-08 09:20:25 +00001342 if (clo_cache_sim && clo_branch_sim) {
1343 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
1344 "Bc Bcm Bi Bim\n");
1345 }
1346 else if (clo_cache_sim && !clo_branch_sim) {
1347 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw "
1348 "\n");
1349 }
1350 else if (!clo_cache_sim && clo_branch_sim) {
1351 VG_(sprintf)(buf, "\nevents: Ir "
1352 "Bc Bcm Bi Bim\n");
1353 }
njne90711c2010-09-27 01:04:20 +00001354 else {
1355 VG_(sprintf)(buf, "\nevents: Ir\n");
1356 }
sewardj8badbaa2007-05-08 09:20:25 +00001357
njn4f9c9342002-04-29 16:03:24 +00001358 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1359
njnd3bef4f2005-10-15 17:46:18 +00001360 // Traverse every lineCC
njne2a9ad32007-09-17 05:30:48 +00001361 VG_(OSetGen_ResetIter)(CC_table);
1362 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
njn4311fe62005-12-08 23:18:50 +00001363 Bool just_hit_a_new_file = False;
njnd3bef4f2005-10-15 17:46:18 +00001364 // If we've hit a new file, print a "fl=" line. Note that because
1365 // each string is stored exactly once in the string table, we can use
1366 // pointer comparison rather than strcmp() to test for equality, which
1367 // is good because most of the time the comparisons are equal and so
njn4311fe62005-12-08 23:18:50 +00001368 // the whole strings would have to be checked.
njnd3bef4f2005-10-15 17:46:18 +00001369 if ( lineCC->loc.file != currFile ) {
1370 currFile = lineCC->loc.file;
1371 VG_(sprintf)(buf, "fl=%s\n", currFile);
njn4f9c9342002-04-29 16:03:24 +00001372 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njnd3bef4f2005-10-15 17:46:18 +00001373 distinct_files++;
njn4311fe62005-12-08 23:18:50 +00001374 just_hit_a_new_file = True;
njn4f9c9342002-04-29 16:03:24 +00001375 }
njn4311fe62005-12-08 23:18:50 +00001376 // If we've hit a new function, print a "fn=" line. We know to do
1377 // this when the function name changes, and also every time we hit a
1378 // new file (in which case the new function name might be the same as
1379 // in the old file, hence the just_hit_a_new_file test).
1380 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
njnd3bef4f2005-10-15 17:46:18 +00001381 currFn = lineCC->loc.fn;
1382 VG_(sprintf)(buf, "fn=%s\n", currFn);
1383 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1384 distinct_fns++;
1385 }
1386
1387 // Print the LineCC
sewardj8badbaa2007-05-08 09:20:25 +00001388 if (clo_cache_sim && clo_branch_sim) {
1389 VG_(sprintf)(buf, "%u %llu %llu %llu"
1390 " %llu %llu %llu"
1391 " %llu %llu %llu"
1392 " %llu %llu %llu %llu\n",
1393 lineCC->loc.line,
1394 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2,
1395 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
1396 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2,
1397 lineCC->Bc.b, lineCC->Bc.mp,
1398 lineCC->Bi.b, lineCC->Bi.mp);
1399 }
1400 else if (clo_cache_sim && !clo_branch_sim) {
1401 VG_(sprintf)(buf, "%u %llu %llu %llu"
1402 " %llu %llu %llu"
1403 " %llu %llu %llu\n",
1404 lineCC->loc.line,
1405 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.m2,
1406 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.m2,
1407 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.m2);
1408 }
1409 else if (!clo_cache_sim && clo_branch_sim) {
1410 VG_(sprintf)(buf, "%u %llu"
1411 " %llu %llu %llu %llu\n",
1412 lineCC->loc.line,
1413 lineCC->Ir.a,
1414 lineCC->Bc.b, lineCC->Bc.mp,
1415 lineCC->Bi.b, lineCC->Bi.mp);
1416 }
njne90711c2010-09-27 01:04:20 +00001417 else {
1418 VG_(sprintf)(buf, "%u %llu\n",
1419 lineCC->loc.line,
1420 lineCC->Ir.a);
1421 }
sewardj8badbaa2007-05-08 09:20:25 +00001422
njnd3bef4f2005-10-15 17:46:18 +00001423 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1424
1425 // Update summary stats
1426 Ir_total.a += lineCC->Ir.a;
1427 Ir_total.m1 += lineCC->Ir.m1;
1428 Ir_total.m2 += lineCC->Ir.m2;
1429 Dr_total.a += lineCC->Dr.a;
1430 Dr_total.m1 += lineCC->Dr.m1;
1431 Dr_total.m2 += lineCC->Dr.m2;
1432 Dw_total.a += lineCC->Dw.a;
1433 Dw_total.m1 += lineCC->Dw.m1;
1434 Dw_total.m2 += lineCC->Dw.m2;
sewardj8badbaa2007-05-08 09:20:25 +00001435 Bc_total.b += lineCC->Bc.b;
1436 Bc_total.mp += lineCC->Bc.mp;
1437 Bi_total.b += lineCC->Bi.b;
1438 Bi_total.mp += lineCC->Bi.mp;
njnd3bef4f2005-10-15 17:46:18 +00001439
1440 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +00001441 }
1442
nethercote9313ac42004-07-06 21:54:20 +00001443 // Summary stats must come after rest of table, since we calculate them
sewardj8badbaa2007-05-08 09:20:25 +00001444 // during traversal. */
1445 if (clo_cache_sim && clo_branch_sim) {
1446 VG_(sprintf)(buf, "summary:"
1447 " %llu %llu %llu"
1448 " %llu %llu %llu"
1449 " %llu %llu %llu"
1450 " %llu %llu %llu %llu\n",
1451 Ir_total.a, Ir_total.m1, Ir_total.m2,
1452 Dr_total.a, Dr_total.m1, Dr_total.m2,
1453 Dw_total.a, Dw_total.m1, Dw_total.m2,
1454 Bc_total.b, Bc_total.mp,
1455 Bi_total.b, Bi_total.mp);
1456 }
1457 else if (clo_cache_sim && !clo_branch_sim) {
1458 VG_(sprintf)(buf, "summary:"
1459 " %llu %llu %llu"
1460 " %llu %llu %llu"
1461 " %llu %llu %llu\n",
1462 Ir_total.a, Ir_total.m1, Ir_total.m2,
1463 Dr_total.a, Dr_total.m1, Dr_total.m2,
1464 Dw_total.a, Dw_total.m1, Dw_total.m2);
1465 }
1466 else if (!clo_cache_sim && clo_branch_sim) {
1467 VG_(sprintf)(buf, "summary:"
1468 " %llu"
1469 " %llu %llu %llu %llu\n",
1470 Ir_total.a,
1471 Bc_total.b, Bc_total.mp,
1472 Bi_total.b, Bi_total.mp);
1473 }
njne90711c2010-09-27 01:04:20 +00001474 else {
1475 VG_(sprintf)(buf, "summary:"
1476 " %llu\n",
1477 Ir_total.a);
1478 }
sewardj8badbaa2007-05-08 09:20:25 +00001479
njn4f9c9342002-04-29 16:03:24 +00001480 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1481 VG_(close)(fd);
1482}
1483
njn607adfc2003-09-30 14:15:44 +00001484static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001485{
njn607adfc2003-09-30 14:15:44 +00001486 UInt w = 0;
1487 while (n > 0) {
1488 n = n / 10;
1489 w++;
njn4f9c9342002-04-29 16:03:24 +00001490 }
sewardj46c59b12005-11-01 02:20:19 +00001491 if (w == 0) w = 1;
njn607adfc2003-09-30 14:15:44 +00001492 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001493}
1494
njn51d827b2005-05-09 01:02:08 +00001495static void cg_fini(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001496{
njn1baf7db2006-04-18 22:34:48 +00001497 static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128];
njn607adfc2003-09-30 14:15:44 +00001498
sewardj8badbaa2007-05-08 09:20:25 +00001499 CacheCC D_total;
1500 BranchCC B_total;
njn1d021fa2002-05-02 13:56:34 +00001501 ULong L2_total_m, L2_total_mr, L2_total_mw,
1502 L2_total, L2_total_r, L2_total_w;
njn4c245e52009-03-15 23:25:38 +00001503 Int l1, l2, l3;
njn4f9c9342002-04-29 16:03:24 +00001504
nethercote9313ac42004-07-06 21:54:20 +00001505 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001506
njn7cf0bd32002-06-08 13:36:03 +00001507 if (VG_(clo_verbosity) == 0)
1508 return;
1509
njnf76d27a2009-05-28 01:53:07 +00001510 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1511 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
njn4c245e52009-03-15 23:25:38 +00001512
njn4f9c9342002-04-29 16:03:24 +00001513 /* I cache results. Use the I_refs value to determine the first column
1514 * width. */
njn607adfc2003-09-30 14:15:44 +00001515 l1 = ULong_width(Ir_total.a);
njnf76d27a2009-05-28 01:53:07 +00001516 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1517 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
njn4f9c9342002-04-29 16:03:24 +00001518
njn607adfc2003-09-30 14:15:44 +00001519 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001520 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
njnd3bef4f2005-10-15 17:46:18 +00001521
sewardj8badbaa2007-05-08 09:20:25 +00001522 /* Always print this */
sewardjb2c985b2009-07-15 14:51:17 +00001523 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
njn4f9c9342002-04-29 16:03:24 +00001524
sewardj8badbaa2007-05-08 09:20:25 +00001525 /* If cache profiling is enabled, show D access numbers and all
1526 miss numbers */
1527 if (clo_cache_sim) {
sewardjb2c985b2009-07-15 14:51:17 +00001528 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
1529 VG_(umsg)(fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001530
sewardj8badbaa2007-05-08 09:20:25 +00001531 if (0 == Ir_total.a) Ir_total.a = 1;
1532 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
sewardjb2c985b2009-07-15 14:51:17 +00001533 VG_(umsg)("I1 miss rate: %s\n", buf1);
njn4f9c9342002-04-29 16:03:24 +00001534
sewardj8badbaa2007-05-08 09:20:25 +00001535 VG_(percentify)(Ir_total.m2, Ir_total.a, 2, l1+1, buf1);
sewardjb2c985b2009-07-15 14:51:17 +00001536 VG_(umsg)("L2i miss rate: %s\n", buf1);
1537 VG_(umsg)("\n");
njnd3bef4f2005-10-15 17:46:18 +00001538
sewardj8badbaa2007-05-08 09:20:25 +00001539 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1540 * determine the width of columns 2 & 3. */
1541 D_total.a = Dr_total.a + Dw_total.a;
1542 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1543 D_total.m2 = Dr_total.m2 + Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001544
sewardj8badbaa2007-05-08 09:20:25 +00001545 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001546 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1547 l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001548
sewardjb2c985b2009-07-15 14:51:17 +00001549 VG_(umsg)(fmt, "D refs: ",
1550 D_total.a, Dr_total.a, Dw_total.a);
1551 VG_(umsg)(fmt, "D1 misses: ",
1552 D_total.m1, Dr_total.m1, Dw_total.m1);
1553 VG_(umsg)(fmt, "L2d misses: ",
1554 D_total.m2, Dr_total.m2, Dw_total.m2);
njnd3bef4f2005-10-15 17:46:18 +00001555
sewardj8badbaa2007-05-08 09:20:25 +00001556 if (0 == D_total.a) D_total.a = 1;
1557 if (0 == Dr_total.a) Dr_total.a = 1;
1558 if (0 == Dw_total.a) Dw_total.a = 1;
1559 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1);
1560 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
1561 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
sewardjb2c985b2009-07-15 14:51:17 +00001562 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
njn4f9c9342002-04-29 16:03:24 +00001563
sewardj8badbaa2007-05-08 09:20:25 +00001564 VG_(percentify)( D_total.m2, D_total.a, 1, l1+1, buf1);
1565 VG_(percentify)(Dr_total.m2, Dr_total.a, 1, l2+1, buf2);
1566 VG_(percentify)(Dw_total.m2, Dw_total.a, 1, l3+1, buf3);
sewardjb2c985b2009-07-15 14:51:17 +00001567 VG_(umsg)("L2d miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1568 VG_(umsg)("\n");
njn1d021fa2002-05-02 13:56:34 +00001569
sewardj8badbaa2007-05-08 09:20:25 +00001570 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001571
sewardj8badbaa2007-05-08 09:20:25 +00001572 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1573 L2_total_r = Dr_total.m1 + Ir_total.m1;
1574 L2_total_w = Dw_total.m1;
sewardjb2c985b2009-07-15 14:51:17 +00001575 VG_(umsg)(fmt, "L2 refs: ",
1576 L2_total, L2_total_r, L2_total_w);
njn4f9c9342002-04-29 16:03:24 +00001577
sewardj8badbaa2007-05-08 09:20:25 +00001578 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1579 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1580 L2_total_mw = Dw_total.m2;
sewardjb2c985b2009-07-15 14:51:17 +00001581 VG_(umsg)(fmt, "L2 misses: ",
1582 L2_total_m, L2_total_mr, L2_total_mw);
njnd3bef4f2005-10-15 17:46:18 +00001583
sewardj8badbaa2007-05-08 09:20:25 +00001584 VG_(percentify)(L2_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1);
1585 VG_(percentify)(L2_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
1586 VG_(percentify)(L2_total_mw, Dw_total.a, 1, l3+1, buf3);
sewardjb2c985b2009-07-15 14:51:17 +00001587 VG_(umsg)("L2 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardj8badbaa2007-05-08 09:20:25 +00001588 }
1589
1590 /* If branch profiling is enabled, show branch overall results. */
1591 if (clo_branch_sim) {
1592 /* Make format string, getting width right for numbers */
sewardjb2c985b2009-07-15 14:51:17 +00001593 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1594 l1, l2, l3);
sewardj8badbaa2007-05-08 09:20:25 +00001595
1596 if (0 == Bc_total.b) Bc_total.b = 1;
1597 if (0 == Bi_total.b) Bi_total.b = 1;
1598 B_total.b = Bc_total.b + Bi_total.b;
1599 B_total.mp = Bc_total.mp + Bi_total.mp;
1600
sewardjb2c985b2009-07-15 14:51:17 +00001601 VG_(umsg)("\n");
1602 VG_(umsg)(fmt, "Branches: ",
1603 B_total.b, Bc_total.b, Bi_total.b);
sewardj8badbaa2007-05-08 09:20:25 +00001604
sewardjb2c985b2009-07-15 14:51:17 +00001605 VG_(umsg)(fmt, "Mispredicts: ",
1606 B_total.mp, Bc_total.mp, Bi_total.mp);
sewardj8badbaa2007-05-08 09:20:25 +00001607
1608 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1);
1609 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
1610 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
1611
sewardjb2c985b2009-07-15 14:51:17 +00001612 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
sewardj8badbaa2007-05-08 09:20:25 +00001613 }
njn4f9c9342002-04-29 16:03:24 +00001614
nethercote9313ac42004-07-06 21:54:20 +00001615 // Various stats
sewardj2d9e8742009-08-07 15:46:56 +00001616 if (VG_(clo_stats)) {
njn1baf7db2006-04-18 22:34:48 +00001617 Int debug_lookups = full_debugs + fn_debugs +
1618 file_line_debugs + no_debugs;
njnd3bef4f2005-10-15 17:46:18 +00001619
sewardjb2c985b2009-07-15 14:51:17 +00001620 VG_(dmsg)("\n");
1621 VG_(dmsg)("cachegrind: distinct files: %d\n", distinct_files);
1622 VG_(dmsg)("cachegrind: distinct fns: %d\n", distinct_fns);
1623 VG_(dmsg)("cachegrind: distinct lines: %d\n", distinct_lines);
1624 VG_(dmsg)("cachegrind: distinct instrs:%d\n", distinct_instrs);
1625 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
njn1baf7db2006-04-18 22:34:48 +00001626
1627 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1);
1628 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
1629 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3);
1630 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4);
sewardjb2c985b2009-07-15 14:51:17 +00001631 VG_(dmsg)("cachegrind: with full info:%s (%d)\n",
1632 buf1, full_debugs);
1633 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
1634 buf2, file_line_debugs);
1635 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n",
1636 buf3, fn_debugs);
1637 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n",
1638 buf4, no_debugs);
njn1baf7db2006-04-18 22:34:48 +00001639
sewardjb2c985b2009-07-15 14:51:17 +00001640 VG_(dmsg)("cachegrind: string table size: %lu\n",
1641 VG_(OSetGen_Size)(stringTable));
1642 VG_(dmsg)("cachegrind: CC table size: %lu\n",
1643 VG_(OSetGen_Size)(CC_table));
1644 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1645 VG_(OSetGen_Size)(instrInfoTable));
njn4f9c9342002-04-29 16:03:24 +00001646 }
njn4f9c9342002-04-29 16:03:24 +00001647}
1648
nethercote9313ac42004-07-06 21:54:20 +00001649/*--------------------------------------------------------------------*/
1650/*--- Discarding BB info ---*/
1651/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001652
sewardja3a29a52005-10-12 16:16:03 +00001653// Called when a translation is removed from the translation cache for
1654// any reason at all: to free up space, because the guest code was
1655// unmapped or modified, or for any arbitrary reason.
sewardj4ba057c2005-10-18 12:04:18 +00001656static
sewardj0b9d74a2006-12-24 02:24:11 +00001657void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
sewardj18d75132002-05-16 11:06:21 +00001658{
sewardj0b9d74a2006-12-24 02:24:11 +00001659 SB_info* sbInfo;
sewardj3a384b32006-01-22 01:12:51 +00001660 Addr orig_addr = (Addr)vge.base[0];
njn4294fd42002-06-05 14:41:10 +00001661
sewardj5155dec2005-10-12 10:09:23 +00001662 tl_assert(vge.n_used > 0);
1663
1664 if (DEBUG_CG)
sewardj4ba057c2005-10-18 12:04:18 +00001665 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1666 (void*)(Addr)orig_addr,
sewardj5155dec2005-10-12 10:09:23 +00001667 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
njn4294fd42002-06-05 14:41:10 +00001668
sewardj4ba057c2005-10-18 12:04:18 +00001669 // Get BB info, remove from table, free BB info. Simple! Note that we
1670 // use orig_addr, not the first instruction address in vge.
njne2a9ad32007-09-17 05:30:48 +00001671 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
sewardj0b9d74a2006-12-24 02:24:11 +00001672 tl_assert(NULL != sbInfo);
njne2a9ad32007-09-17 05:30:48 +00001673 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
sewardj18d75132002-05-16 11:06:21 +00001674}
1675
1676/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001677/*--- Command line processing ---*/
1678/*--------------------------------------------------------------------*/
1679
njnb1cc5d62010-07-06 04:05:23 +00001680static void parse_cache_opt ( cache_t* cache, Char* opt, Char* optval )
njn25e49d8e72002-09-23 09:36:25 +00001681{
njn83df0b62009-02-25 01:01:05 +00001682 Long i1, i2, i3;
1683 Char* endptr;
njnb1cc5d62010-07-06 04:05:23 +00001684 Char* checkRes;
njn25e49d8e72002-09-23 09:36:25 +00001685
njn83df0b62009-02-25 01:01:05 +00001686 // Option argument looks like "65536,2,64". Extract them.
njnb1cc5d62010-07-06 04:05:23 +00001687 i1 = VG_(strtoll10)(optval, &endptr); if (*endptr != ',') goto bad;
njn83df0b62009-02-25 01:01:05 +00001688 i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
1689 i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
njn25e49d8e72002-09-23 09:36:25 +00001690
njn83df0b62009-02-25 01:01:05 +00001691 // Check for overflow.
1692 cache->size = (Int)i1;
1693 cache->assoc = (Int)i2;
1694 cache->line_size = (Int)i3;
1695 if (cache->size != i1) goto overflow;
1696 if (cache->assoc != i2) goto overflow;
1697 if (cache->line_size != i3) goto overflow;
njn25e49d8e72002-09-23 09:36:25 +00001698
njnb1cc5d62010-07-06 04:05:23 +00001699 checkRes = check_cache(cache);
1700 if (checkRes) {
1701 VG_(fmsg)("%s", checkRes);
1702 goto bad;
1703 }
1704
njn25e49d8e72002-09-23 09:36:25 +00001705 return;
1706
1707 bad:
njnb1cc5d62010-07-06 04:05:23 +00001708 VG_(fmsg_bad_option)(opt, "");
1709
1710 overflow:
1711 VG_(fmsg_bad_option)(opt,
1712 "One of the cache parameters was too large and overflowed.\n");
njn25e49d8e72002-09-23 09:36:25 +00001713}
1714
njn51d827b2005-05-09 01:02:08 +00001715static Bool cg_process_cmd_line_option(Char* arg)
njn25e49d8e72002-09-23 09:36:25 +00001716{
njn83df0b62009-02-25 01:01:05 +00001717 Char* tmp_str;
1718
nethercote9313ac42004-07-06 21:54:20 +00001719 // 5 is length of "--I1="
njn83df0b62009-02-25 01:01:05 +00001720 if VG_STR_CLO(arg, "--I1", tmp_str)
njnb1cc5d62010-07-06 04:05:23 +00001721 parse_cache_opt(&clo_I1_cache, arg, tmp_str);
njn83df0b62009-02-25 01:01:05 +00001722 else if VG_STR_CLO(arg, "--D1", tmp_str)
njnb1cc5d62010-07-06 04:05:23 +00001723 parse_cache_opt(&clo_D1_cache, arg, tmp_str);
njn83df0b62009-02-25 01:01:05 +00001724 else if VG_STR_CLO(arg, "--L2", tmp_str)
njnb1cc5d62010-07-06 04:05:23 +00001725 parse_cache_opt(&clo_L2_cache, arg, tmp_str);
njn83df0b62009-02-25 01:01:05 +00001726
1727 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1728 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1729 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
njn25e49d8e72002-09-23 09:36:25 +00001730 else
1731 return False;
1732
1733 return True;
1734}
1735
njn51d827b2005-05-09 01:02:08 +00001736static void cg_print_usage(void)
njn25e49d8e72002-09-23 09:36:25 +00001737{
njn3e884182003-04-15 13:03:23 +00001738 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001739" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1740" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001741" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
sewardj8badbaa2007-05-08 09:20:25 +00001742" --cache-sim=yes|no [yes] collect cache stats?\n"
1743" --branch-sim=yes|no [no] collect branch prediction stats?\n"
njn374a36d2007-11-23 01:41:32 +00001744" --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
njn3e884182003-04-15 13:03:23 +00001745 );
1746}
1747
njn51d827b2005-05-09 01:02:08 +00001748static void cg_print_debug_usage(void)
njn3e884182003-04-15 13:03:23 +00001749{
1750 VG_(printf)(
1751" (none)\n"
1752 );
njn25e49d8e72002-09-23 09:36:25 +00001753}
1754
1755/*--------------------------------------------------------------------*/
1756/*--- Setup ---*/
1757/*--------------------------------------------------------------------*/
1758
sewardje1216cb2007-02-07 19:55:30 +00001759static void cg_post_clo_init(void); /* just below */
1760
njn51d827b2005-05-09 01:02:08 +00001761static void cg_pre_clo_init(void)
1762{
njn51d827b2005-05-09 01:02:08 +00001763 VG_(details_name) ("Cachegrind");
1764 VG_(details_version) (NULL);
sewardj8badbaa2007-05-08 09:20:25 +00001765 VG_(details_description) ("a cache and branch-prediction profiler");
njn51d827b2005-05-09 01:02:08 +00001766 VG_(details_copyright_author)(
sewardj9eecbbb2010-05-03 21:37:12 +00001767 "Copyright (C) 2002-2010, and GNU GPL'd, by Nicholas Nethercote et al.");
njn51d827b2005-05-09 01:02:08 +00001768 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardje8089302006-10-17 02:15:17 +00001769 VG_(details_avg_translation_sizeB) ( 500 );
njn51d827b2005-05-09 01:02:08 +00001770
1771 VG_(basic_tool_funcs) (cg_post_clo_init,
1772 cg_instrument,
1773 cg_fini);
1774
sewardj0b9d74a2006-12-24 02:24:11 +00001775 VG_(needs_superblock_discards)(cg_discard_superblock_info);
njn51d827b2005-05-09 01:02:08 +00001776 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1777 cg_print_usage,
1778 cg_print_debug_usage);
sewardje1216cb2007-02-07 19:55:30 +00001779}
1780
1781static void cg_post_clo_init(void)
1782{
sewardje1216cb2007-02-07 19:55:30 +00001783 cache_t I1c, D1c, L2c;
njn51d827b2005-05-09 01:02:08 +00001784
njne2a9ad32007-09-17 05:30:48 +00001785 CC_table =
1786 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1787 cmp_CodeLoc_LineCC,
sewardj9c606bd2008-09-18 18:12:50 +00001788 VG_(malloc), "cg.main.cpci.1",
1789 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001790 instrInfoTable =
1791 VG_(OSetGen_Create)(/*keyOff*/0,
1792 NULL,
sewardj9c606bd2008-09-18 18:12:50 +00001793 VG_(malloc), "cg.main.cpci.2",
1794 VG_(free));
njne2a9ad32007-09-17 05:30:48 +00001795 stringTable =
1796 VG_(OSetGen_Create)(/*keyOff*/0,
1797 stringCmp,
sewardj9c606bd2008-09-18 18:12:50 +00001798 VG_(malloc), "cg.main.cpci.3",
1799 VG_(free));
sewardje1216cb2007-02-07 19:55:30 +00001800
1801 configure_caches(&I1c, &D1c, &L2c);
1802
1803 cachesim_I1_initcache(I1c);
1804 cachesim_D1_initcache(D1c);
1805 cachesim_L2_initcache(L2c);
njn51d827b2005-05-09 01:02:08 +00001806}
1807
sewardj45f4e7c2005-09-27 19:20:21 +00001808VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
fitzhardinge98abfc72003-12-16 02:05:15 +00001809
njn25e49d8e72002-09-23 09:36:25 +00001810/*--------------------------------------------------------------------*/
njnf69f9452005-07-03 17:53:11 +00001811/*--- end ---*/
sewardj18d75132002-05-16 11:06:21 +00001812/*--------------------------------------------------------------------*/
njnd3bef4f2005-10-15 17:46:18 +00001813