blob: e2bd63b662f560db074695d9ef55f5aa1417e662 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +00003/*--- Cachegrind: every but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
nethercotebb1c9912004-01-04 16:43:23 +000011 Copyright (C) 2002-2004 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000012 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
nethercote46063202004-09-02 08:51:43 +000032#include "tool.h"
njn25e49d8e72002-09-23 09:36:25 +000033//#include "vg_profile.c"
34
35/* For cache simulation */
36typedef struct {
37 int size; /* bytes */
38 int assoc;
39 int line_size; /* bytes */
40} cache_t;
njn4f9c9342002-04-29 16:03:24 +000041
nethercote27fc1da2004-01-04 16:56:57 +000042#include "cg_sim.c"
njn4f9c9342002-04-29 16:03:24 +000043
njn25e49d8e72002-09-23 09:36:25 +000044/*------------------------------------------------------------*/
45/*--- Constants ---*/
46/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000047
nethercote9313ac42004-07-06 21:54:20 +000048#define MIN_LINE_SIZE 16
49#define FILE_LEN 256
50#define FN_LEN 256
njn7cf0bd32002-06-08 13:36:03 +000051
52/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000053/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000054/*------------------------------------------------------------*/
55
njn25e49d8e72002-09-23 09:36:25 +000056typedef
57 enum {
nethercote9313ac42004-07-06 21:54:20 +000058 VgpGetLineCC = VgpFini+1,
njn25e49d8e72002-09-23 09:36:25 +000059 VgpCacheSimulate,
60 VgpCacheResults
61 }
nethercote7cc9c232004-01-21 15:08:04 +000062 VgpToolCC;
sewardj07133bf2002-06-13 10:25:56 +000063
njn4f9c9342002-04-29 16:03:24 +000064/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000065/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000066/*------------------------------------------------------------*/
67
68typedef struct _CC CC;
69struct _CC {
70 ULong a;
71 ULong m1;
72 ULong m2;
73};
74
nethercote9313ac42004-07-06 21:54:20 +000075//------------------------------------------------------------
76// Primary data structure #1: CC table
77// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
78// - hash(file, hash(fn, hash(line+CC)))
79// - Each hash table is separately chained.
80// - The array sizes below work fairly well for Konqueror.
81// - Lookups done by instr_addr, which is converted immediately to a source
82// location.
83// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +000084
85#define N_FILE_ENTRIES 251
86#define N_FN_ENTRIES 53
nethercote9313ac42004-07-06 21:54:20 +000087#define N_LINE_ENTRIES 37
njn4f9c9342002-04-29 16:03:24 +000088
nethercote9313ac42004-07-06 21:54:20 +000089typedef struct _lineCC lineCC;
90struct _lineCC {
91 Int line;
92 CC Ir;
93 CC Dr;
94 CC Dw;
95 lineCC* next;
njn4f9c9342002-04-29 16:03:24 +000096};
97
nethercote9313ac42004-07-06 21:54:20 +000098typedef struct _fnCC fnCC;
99struct _fnCC {
100 Char* fn;
101 fnCC* next;
102 lineCC* lines[N_LINE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000103};
104
nethercote9313ac42004-07-06 21:54:20 +0000105typedef struct _fileCC fileCC;
106struct _fileCC {
107 Char* file;
108 fileCC* next;
109 fnCC* fns[N_FN_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000110};
111
nethercote9313ac42004-07-06 21:54:20 +0000112// Top level of CC table. Auto-zeroed.
113static fileCC *CC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000114
nethercote9313ac42004-07-06 21:54:20 +0000115//------------------------------------------------------------
116// Primary data structre #2: Instr-info table
117// - Holds the cached info about each instr that is used for simulation.
118// - table(BB_start_addr, list(instr_info))
119// - For each BB, each instr_info in the list holds info about the
nethercote7149b422004-07-20 13:29:02 +0000120// instruction (instr_size, instr_addr, etc), plus a pointer to its line
nethercote9313ac42004-07-06 21:54:20 +0000121// CC. This node is what's passed to the simulation function.
122// - When BBs are discarded the relevant list(instr_details) is freed.
123
124typedef struct _instr_info instr_info;
125struct _instr_info {
nethercoteca1f2dc2004-07-21 08:49:02 +0000126 Addr instr_addr;
127 UChar instr_size;
128 UChar data_size;
129 lineCC* parent; // parent line-CC
nethercote9313ac42004-07-06 21:54:20 +0000130};
131
132typedef struct _BB_info BB_info;
133struct _BB_info {
134 BB_info* next; // next field
135 Addr BB_addr; // key
136 Int n_instrs;
137 instr_info instrs[0];
138};
139
140VgHashTable instr_info_table; // hash(Addr, BB_info)
141
142//------------------------------------------------------------
143// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000144static Int distinct_files = 0;
145static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000146static Int distinct_lines = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000147static Int distinct_instrs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000148
sewardj4f29ddf2002-05-03 22:29:04 +0000149static Int full_debug_BBs = 0;
150static Int file_line_debug_BBs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000151static Int fn_debug_BBs = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000152static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000153
sewardj4f29ddf2002-05-03 22:29:04 +0000154static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000155
nethercote9313ac42004-07-06 21:54:20 +0000156/*------------------------------------------------------------*/
157/*--- CC table operations ---*/
158/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000159
nethercote9313ac42004-07-06 21:54:20 +0000160static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
161 Char fn[FN_LEN], Int* line)
njn4f9c9342002-04-29 16:03:24 +0000162{
nethercote9313ac42004-07-06 21:54:20 +0000163 Bool found_file_line = VG_(get_filename_linenum)(instr_addr, file,
164 FILE_LEN, line);
165 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
njn4f9c9342002-04-29 16:03:24 +0000166
nethercote9313ac42004-07-06 21:54:20 +0000167 if (!found_file_line) {
168 VG_(strcpy)(file, "???");
169 *line = 0;
170 }
171 if (!found_fn) {
172 VG_(strcpy)(fn, "???");
173 }
174 if (found_file_line) {
175 if (found_fn) full_debug_BBs++;
176 else file_line_debug_BBs++;
177 } else {
178 if (found_fn) fn_debug_BBs++;
179 else no_debug_BBs++;
njn4f9c9342002-04-29 16:03:24 +0000180 }
181}
182
njn4f9c9342002-04-29 16:03:24 +0000183static UInt hash(Char *s, UInt table_size)
184{
nethercote9313ac42004-07-06 21:54:20 +0000185 const int hash_constant = 256;
186 int hash_value = 0;
187 for ( ; *s; s++)
188 hash_value = (hash_constant * hash_value + *s) % table_size;
189 return hash_value;
njn4f9c9342002-04-29 16:03:24 +0000190}
191
nethercote9313ac42004-07-06 21:54:20 +0000192static __inline__
193fileCC* new_fileCC(Char filename[], fileCC* next)
nethercote09d853e2004-01-21 16:12:55 +0000194{
nethercote9313ac42004-07-06 21:54:20 +0000195 // Using calloc() zeroes the fns[] array
196 fileCC* cc = VG_(calloc)(1, sizeof(fileCC));
197 cc->file = VG_(strdup)(filename);
198 cc->next = next;
199 return cc;
nethercote09d853e2004-01-21 16:12:55 +0000200}
201
nethercote9313ac42004-07-06 21:54:20 +0000202static __inline__
203fnCC* new_fnCC(Char fn[], fnCC* next)
njn4f9c9342002-04-29 16:03:24 +0000204{
nethercote9313ac42004-07-06 21:54:20 +0000205 // Using calloc() zeroes the lines[] array
206 fnCC* cc = VG_(calloc)(1, sizeof(fnCC));
207 cc->fn = VG_(strdup)(fn);
208 cc->next = next;
209 return cc;
210}
njn4f9c9342002-04-29 16:03:24 +0000211
nethercote9313ac42004-07-06 21:54:20 +0000212static __inline__
213lineCC* new_lineCC(Int line, lineCC* next)
214{
215 // Using calloc() zeroes the Ir/Dr/Dw CCs and the instrs[] array
216 lineCC* cc = VG_(calloc)(1, sizeof(lineCC));
217 cc->line = line;
218 cc->next = next;
219 return cc;
220}
njn4f9c9342002-04-29 16:03:24 +0000221
nethercote9313ac42004-07-06 21:54:20 +0000222static __inline__
223instr_info* new_instr_info(Addr instr_addr, lineCC* parent, instr_info* next)
224{
225 // Using calloc() zeroes instr_size and data_size
226 instr_info* ii = VG_(calloc)(1, sizeof(instr_info));
227 ii->instr_addr = instr_addr;
228 ii->parent = parent;
229 return ii;
230}
231
232// Do a three step traversal: by file, then fn, then line.
233// In all cases prepends new nodes to their chain. Returns a pointer to the
234// line node, creates a new one if necessary.
235static lineCC* get_lineCC(Addr orig_addr)
236{
237 fileCC *curr_fileCC;
238 fnCC *curr_fnCC;
239 lineCC *curr_lineCC;
240 Char file[FILE_LEN], fn[FN_LEN];
241 Int line;
242 UInt file_hash, fn_hash, line_hash;
243
244 get_debug_info(orig_addr, file, fn, &line);
245
246 VGP_PUSHCC(VgpGetLineCC);
247
248 // level 1
249 file_hash = hash(file, N_FILE_ENTRIES);
250 curr_fileCC = CC_table[file_hash];
251 while (NULL != curr_fileCC && !VG_STREQ(file, curr_fileCC->file)) {
252 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +0000253 }
nethercote9313ac42004-07-06 21:54:20 +0000254 if (NULL == curr_fileCC) {
255 CC_table[file_hash] = curr_fileCC =
256 new_fileCC(file, CC_table[file_hash]);
njn4f9c9342002-04-29 16:03:24 +0000257 distinct_files++;
258 }
259
nethercote9313ac42004-07-06 21:54:20 +0000260 // level 2
261 fn_hash = hash(fn, N_FN_ENTRIES);
262 curr_fnCC = curr_fileCC->fns[fn_hash];
263 while (NULL != curr_fnCC && !VG_STREQ(fn, curr_fnCC->fn)) {
264 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +0000265 }
nethercote9313ac42004-07-06 21:54:20 +0000266 if (NULL == curr_fnCC) {
267 curr_fileCC->fns[fn_hash] = curr_fnCC =
268 new_fnCC(fn, curr_fileCC->fns[fn_hash]);
njn4f9c9342002-04-29 16:03:24 +0000269 distinct_fns++;
270 }
271
nethercote9313ac42004-07-06 21:54:20 +0000272 // level 3
273 line_hash = line % N_LINE_ENTRIES;
274 curr_lineCC = curr_fnCC->lines[line_hash];
275 while (NULL != curr_lineCC && line != curr_lineCC->line) {
276 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +0000277 }
nethercote9313ac42004-07-06 21:54:20 +0000278 if (NULL == curr_lineCC) {
279 curr_fnCC->lines[line_hash] = curr_lineCC =
280 new_lineCC(line, curr_fnCC->lines[line_hash]);
281 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +0000282 }
nethercote9313ac42004-07-06 21:54:20 +0000283
284 VGP_POPCC(VgpGetLineCC);
285 return curr_lineCC;
njn4f9c9342002-04-29 16:03:24 +0000286}
287
288/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000289/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000290/*------------------------------------------------------------*/
291
nethercoteeec46302004-08-23 15:06:23 +0000292static REGPARM(1)
nethercote9313ac42004-07-06 21:54:20 +0000293void log_1I_0D_cache_access(instr_info* n)
njn25e49d8e72002-09-23 09:36:25 +0000294{
295 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000296 // n, n->instr_addr, n->instr_size)
njn25e49d8e72002-09-23 09:36:25 +0000297 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000298 cachesim_I1_doref(n->instr_addr, n->instr_size,
299 &n->parent->Ir.m1, &n->parent->Ir.m2);
300 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000301 VGP_POPCC(VgpCacheSimulate);
302}
303
nethercoteeec46302004-08-23 15:06:23 +0000304static REGPARM(2)
nethercote9313ac42004-07-06 21:54:20 +0000305void log_1I_1Dr_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000306{
nethercote9313ac42004-07-06 21:54:20 +0000307 //VG_(printf)("1I_1Dr: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
308 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000309 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000310 cachesim_I1_doref(n->instr_addr, n->instr_size,
311 &n->parent->Ir.m1, &n->parent->Ir.m2);
312 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000313
nethercote9313ac42004-07-06 21:54:20 +0000314 cachesim_D1_doref(data_addr, n->data_size,
315 &n->parent->Dr.m1, &n->parent->Dr.m2);
316 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000317 VGP_POPCC(VgpCacheSimulate);
318}
319
nethercoteeec46302004-08-23 15:06:23 +0000320static REGPARM(2)
nethercote9313ac42004-07-06 21:54:20 +0000321void log_1I_1Dw_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000322{
nethercote9313ac42004-07-06 21:54:20 +0000323 //VG_(printf)("1I_1Dw: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
324 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000325 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000326 cachesim_I1_doref(n->instr_addr, n->instr_size,
327 &n->parent->Ir.m1, &n->parent->Ir.m2);
328 n->parent->Ir.a++;
329
330 cachesim_D1_doref(data_addr, n->data_size,
331 &n->parent->Dw.m1, &n->parent->Dw.m2);
332 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000333 VGP_POPCC(VgpCacheSimulate);
334}
335
nethercoteeec46302004-08-23 15:06:23 +0000336static REGPARM(3)
nethercote9313ac42004-07-06 21:54:20 +0000337void log_1I_2D_cache_access(instr_info* n, Addr data_addr1, Addr data_addr2)
njn25e49d8e72002-09-23 09:36:25 +0000338{
339 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000340 // n, n->instr_addr, n->instr_size, data_addr1, data_addr2, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000341 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000342 cachesim_I1_doref(n->instr_addr, n->instr_size,
343 &n->parent->Ir.m1, &n->parent->Ir.m2);
344 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000345
nethercote9313ac42004-07-06 21:54:20 +0000346 cachesim_D1_doref(data_addr1, n->data_size,
347 &n->parent->Dr.m1, &n->parent->Dr.m2);
348 n->parent->Dr.a++;
349 cachesim_D1_doref(data_addr2, n->data_size,
350 &n->parent->Dw.m1, &n->parent->Dw.m2);
351 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000352 VGP_POPCC(VgpCacheSimulate);
353}
354
nethercote9313ac42004-07-06 21:54:20 +0000355/*------------------------------------------------------------*/
356/*--- Instrumentation ---*/
357/*------------------------------------------------------------*/
358
nethercote564b2b02004-08-07 15:54:53 +0000359static
nethercote9313ac42004-07-06 21:54:20 +0000360BB_info* get_BB_info(UCodeBlock* cb_in, Addr orig_addr, Bool* bb_seen_before)
361{
362 Int i, n_instrs;
363 UInstr* u_in;
364 BB_info* bb_info;
365 VgHashNode** dummy;
366
367 // Count number of x86 instrs in BB
368 n_instrs = 1; // start at 1 because last x86 instr has no INCEIP
369 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
370 u_in = VG_(get_instr)(cb_in, i);
371 if (INCEIP == u_in->opcode) n_instrs++;
372 }
373
374 // Get the BB_info
375 bb_info = (BB_info*)VG_(HT_get_node)(instr_info_table, orig_addr, &dummy);
376 *bb_seen_before = ( NULL == bb_info ? False : True );
377 if (*bb_seen_before) {
378 // BB must have been translated before, but flushed from the TT
379 sk_assert(bb_info->n_instrs == n_instrs );
380 BB_retranslations++;
381 } else {
382 // BB never translated before (at this address, at least; could have
383 // been unloaded and then reloaded elsewhere in memory)
384 bb_info =
385 VG_(calloc)(1, sizeof(BB_info) + n_instrs*sizeof(instr_info));
386 bb_info->BB_addr = orig_addr;
387 bb_info->n_instrs = n_instrs;
388 VG_(HT_add_node)( instr_info_table, (VgHashNode*)bb_info );
389 distinct_instrs++;
390 }
391 return bb_info;
392}
393
nethercote564b2b02004-08-07 15:54:53 +0000394static
nethercote9313ac42004-07-06 21:54:20 +0000395void do_details( instr_info* n, Bool bb_seen_before,
396 Addr instr_addr, Int instr_size, Int data_size )
397{
398 lineCC* parent = get_lineCC(instr_addr);
399 if (bb_seen_before) {
400 sk_assert( n->instr_addr == instr_addr );
401 sk_assert( n->instr_size == instr_size );
402 sk_assert( n->data_size == data_size );
403 // Don't assert that (n->parent == parent)... it's conceivable that
404 // the debug info might change; the other asserts should be enough to
405 // detect anything strange.
406 } else {
407 n->instr_addr = instr_addr;
408 n->instr_size = instr_size;
409 n->data_size = data_size;
410 n->parent = parent;
411 }
412}
413
nethercote564b2b02004-08-07 15:54:53 +0000414static Bool is_valid_data_size(Int data_size)
nethercote9313ac42004-07-06 21:54:20 +0000415{
416 return (4 == data_size || 2 == data_size || 1 == data_size ||
417 8 == data_size || 10 == data_size || MIN_LINE_SIZE == data_size);
418}
419
420// Instrumentation for the end of each x86 instruction.
nethercote564b2b02004-08-07 15:54:53 +0000421static
nethercote9313ac42004-07-06 21:54:20 +0000422void end_of_x86_instr(UCodeBlock* cb, instr_info* i_node, Bool bb_seen_before,
423 UInt instr_addr, UInt instr_size, UInt data_size,
424 Int t_read, Int t_read_addr,
425 Int t_write, Int t_write_addr)
426{
427 Addr helper;
428 Int argc;
429 Int t_CC_addr,
430 t_data_addr1 = INVALID_TEMPREG,
431 t_data_addr2 = INVALID_TEMPREG;
432
nethercotefbfc1082004-09-04 15:28:37 +0000433 sk_assert(instr_size >= MIN_INSTR_SIZE &&
434 instr_size <= MAX_INSTR_SIZE);
nethercote9313ac42004-07-06 21:54:20 +0000435
436#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
437#define INV(qqt) (INVALID_TEMPREG == (qqt))
438
439 // Work out what kind of x86 instruction it is
440 if (!IS_(read) && !IS_(write)) {
441 sk_assert( 0 == data_size );
442 sk_assert(INV(t_read) && INV(t_write));
443 helper = (Addr) & log_1I_0D_cache_access;
444 argc = 1;
445
446 } else if (IS_(read) && !IS_(write)) {
447 sk_assert( is_valid_data_size(data_size) );
448 sk_assert(!INV(t_read) && INV(t_write));
449 helper = (Addr) & log_1I_1Dr_cache_access;
450 argc = 2;
451 t_data_addr1 = t_read_addr;
452
453 } else if (!IS_(read) && IS_(write)) {
454 sk_assert( is_valid_data_size(data_size) );
455 sk_assert(INV(t_read) && !INV(t_write));
456 helper = (Addr) & log_1I_1Dw_cache_access;
457 argc = 2;
458 t_data_addr1 = t_write_addr;
459
460 } else {
461 sk_assert(IS_(read) && IS_(write));
462 sk_assert( is_valid_data_size(data_size) );
463 sk_assert(!INV(t_read) && !INV(t_write));
464 if (t_read == t_write) {
465 helper = (Addr) & log_1I_1Dr_cache_access;
466 argc = 2;
467 t_data_addr1 = t_read_addr;
468 } else {
469 helper = (Addr) & log_1I_2D_cache_access;
470 argc = 3;
471 t_data_addr1 = t_read_addr;
472 t_data_addr2 = t_write_addr;
473 }
474 }
475#undef IS_
nethercotef5b74662004-07-06 22:46:41 +0000476#undef INV
477
nethercote9313ac42004-07-06 21:54:20 +0000478 // Setup 1st arg: CC addr
479 do_details( i_node, bb_seen_before, instr_addr, instr_size, data_size );
480 t_CC_addr = newTemp(cb);
481 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
482 uLiteral(cb, (Addr)i_node);
483
484 // Call the helper
485 if (1 == argc)
486 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
487 else if (2 == argc)
488 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
489 TempReg, t_data_addr1);
490 else if (3 == argc)
491 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
492 TempReg, t_data_addr1,
493 TempReg, t_data_addr2);
494 else
495 VG_(skin_panic)("argc... not 1 or 2 or 3?");
496
497 uCCall(cb, helper, argc, argc, False);
498}
499
njn25e49d8e72002-09-23 09:36:25 +0000500UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
501{
njn4f9c9342002-04-29 16:03:24 +0000502 UCodeBlock* cb;
njn4f9c9342002-04-29 16:03:24 +0000503 UInstr* u_in;
nethercote9313ac42004-07-06 21:54:20 +0000504 Int i, bb_info_i;
505 BB_info* bb_info;
506 Bool bb_seen_before = False;
507 Int t_read_addr, t_write_addr, t_read, t_write;
njn25e49d8e72002-09-23 09:36:25 +0000508 Addr x86_instr_addr = orig_addr;
nethercote9313ac42004-07-06 21:54:20 +0000509 UInt x86_instr_size, data_size = 0;
510 Bool instrumented_Jcc = False;
njn4f9c9342002-04-29 16:03:24 +0000511
nethercote9313ac42004-07-06 21:54:20 +0000512 bb_info = get_BB_info(cb_in, orig_addr, &bb_seen_before);
513 bb_info_i = 0;
njn4f9c9342002-04-29 16:03:24 +0000514
njn810086f2002-11-14 12:42:47 +0000515 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000516
nethercote9313ac42004-07-06 21:54:20 +0000517 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000518
njn810086f2002-11-14 12:42:47 +0000519 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
520 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000521
nethercote9313ac42004-07-06 21:54:20 +0000522 // We want to instrument each x86 instruction with a call to the
523 // appropriate simulation function, which depends on whether the
524 // instruction does memory data reads/writes. x86 instructions can
525 // end in three ways, and this is how they are instrumented:
526 //
527 // 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
528 // 2. UCode, JMP --> UCode, Instrumentation, JMP
529 // 3. UCode, Jcc, JMP --> UCode, Instrumentation, Jcc, JMP
530 //
531 // The last UInstr in a BB is always a JMP. Jccs, when they appear,
532 // are always second last. This is checked with assertions.
533 // Instrumentation must go before any jumps. (JIFZ is the exception;
534 // if a JIFZ succeeds, no simulation is done for the instruction.)
535 //
536 // x86 instruction sizes are obtained from INCEIPs (for case 1) or
537 // from .extra4b field of the final JMP (for case 2 & 3).
538
539 if (instrumented_Jcc) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000540
541 switch (u_in->opcode) {
njn4f9c9342002-04-29 16:03:24 +0000542
nethercote9313ac42004-07-06 21:54:20 +0000543 // For memory-ref instrs, copy the data_addr into a temporary to be
544 // passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000545 case LOAD:
nethercote9313ac42004-07-06 21:54:20 +0000546 case SSE3ag_MemRd_RegWr:
njn25e49d8e72002-09-23 09:36:25 +0000547 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000548 t_read_addr = newTemp(cb);
549 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
550 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000551 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000552 break;
553
554 case FPU_R:
nethercote9313ac42004-07-06 21:54:20 +0000555 case MMX2_MemRd:
njn25e49d8e72002-09-23 09:36:25 +0000556 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000557 t_read_addr = newTemp(cb);
558 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
nethercote9313ac42004-07-06 21:54:20 +0000559 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000560 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000561 break;
thughes96b466a2004-03-15 16:43:58 +0000562 break;
563
564 case MMX2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000565 case SSE2a_MemRd:
566 case SSE2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000567 case SSE3a_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000568 case SSE3a1_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000569 t_read = u_in->val3;
570 t_read_addr = newTemp(cb);
571 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
572 data_size = u_in->size;
573 VG_(copy_UInstr)(cb, u_in);
574 break;
575
nethercote9313ac42004-07-06 21:54:20 +0000576 // Note that we must set t_write_addr even for mod instructions;
577 // That's how the code above determines whether it does a write.
578 // Without it, it would think a mod instruction is a read.
579 // As for the MOV, if it's a mod instruction it's redundant, but it's
580 // not expensive and mod instructions are rare anyway. */
njn4f9c9342002-04-29 16:03:24 +0000581 case STORE:
582 case FPU_W:
nethercote9313ac42004-07-06 21:54:20 +0000583 case MMX2_MemWr:
njn25e49d8e72002-09-23 09:36:25 +0000584 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000585 t_write_addr = newTemp(cb);
586 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000587 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000588 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000589 break;
590
njn21f805d2003-08-25 16:15:40 +0000591 case SSE2a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000592 case SSE3a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000593 t_write = u_in->val3;
594 t_write_addr = newTemp(cb);
595 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000596 data_size = u_in->size;
njn21f805d2003-08-25 16:15:40 +0000597 VG_(copy_UInstr)(cb, u_in);
598 break;
njn25e49d8e72002-09-23 09:36:25 +0000599
nethercote9313ac42004-07-06 21:54:20 +0000600 // INCEIP: insert instrumentation
njn25e49d8e72002-09-23 09:36:25 +0000601 case INCEIP:
602 x86_instr_size = u_in->val1;
603 goto instrument_x86_instr;
604
nethercote9313ac42004-07-06 21:54:20 +0000605 // JMP: insert instrumentation if the first JMP
njn25e49d8e72002-09-23 09:36:25 +0000606 case JMP:
nethercote9313ac42004-07-06 21:54:20 +0000607 if (instrumented_Jcc) {
njne427a662002-10-02 11:08:25 +0000608 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000609 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000610 VG_(copy_UInstr)(cb, u_in);
nethercote9313ac42004-07-06 21:54:20 +0000611 instrumented_Jcc = False; // rest
njn25e49d8e72002-09-23 09:36:25 +0000612 break;
njn25e49d8e72002-09-23 09:36:25 +0000613 } else {
nethercote9313ac42004-07-06 21:54:20 +0000614 // The first JMP... instrument.
615 if (CondAlways != u_in->cond) {
616 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
617 instrumented_Jcc = True;
njn25e49d8e72002-09-23 09:36:25 +0000618 } else {
nethercote9313ac42004-07-06 21:54:20 +0000619 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000620 }
nethercote9313ac42004-07-06 21:54:20 +0000621 // Get x86 instr size from final JMP.
622 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
623 goto instrument_x86_instr;
njn25e49d8e72002-09-23 09:36:25 +0000624 }
625
nethercote9313ac42004-07-06 21:54:20 +0000626 // Code executed at the end of each x86 instruction.
627 instrument_x86_instr:
628 // Large (eg. 28B, 108B, 512B) data-sized instructions will be
629 // done inaccurately but they're very rare and this avoids
630 // errors from hitting more than two cache lines in the
631 // simulation.
632 if (data_size > MIN_LINE_SIZE) data_size = MIN_LINE_SIZE;
njn25e49d8e72002-09-23 09:36:25 +0000633
nethercote9313ac42004-07-06 21:54:20 +0000634 end_of_x86_instr(cb, &bb_info->instrs[ bb_info_i ], bb_seen_before,
635 x86_instr_addr, x86_instr_size, data_size,
636 t_read, t_read_addr, t_write, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000637
nethercote9313ac42004-07-06 21:54:20 +0000638 // Copy original UInstr (INCEIP or JMP)
njn4ba5a792002-09-30 10:23:54 +0000639 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000640
nethercote9313ac42004-07-06 21:54:20 +0000641 // Update loop state for next x86 instr
642 bb_info_i++;
njn25e49d8e72002-09-23 09:36:25 +0000643 x86_instr_addr += x86_instr_size;
nethercote9313ac42004-07-06 21:54:20 +0000644 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
645 data_size = 0;
njn4f9c9342002-04-29 16:03:24 +0000646 break;
647
648 default:
njn4ba5a792002-09-30 10:23:54 +0000649 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000650 break;
651 }
652 }
653
nethercote9313ac42004-07-06 21:54:20 +0000654 // BB address should be the same as the first instruction's address.
655 sk_assert(bb_info->BB_addr == bb_info->instrs[0].instr_addr );
656 sk_assert(bb_info_i == bb_info->n_instrs);
njn4f9c9342002-04-29 16:03:24 +0000657
njn4ba5a792002-09-30 10:23:54 +0000658 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000659 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000660
661#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000662}
663
664/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000665/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000666/*------------------------------------------------------------*/
667
njn25e49d8e72002-09-23 09:36:25 +0000668#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
669
670static cache_t clo_I1_cache = UNDEFINED_CACHE;
671static cache_t clo_D1_cache = UNDEFINED_CACHE;
672static cache_t clo_L2_cache = UNDEFINED_CACHE;
673
nethercote9313ac42004-07-06 21:54:20 +0000674// All CPUID info taken from sandpile.org/a32/cpuid.htm */
675// Probably only works for Intel and AMD chips, and probably only for some of
676// them.
njn7cf0bd32002-06-08 13:36:03 +0000677
sewardj07133bf2002-06-13 10:25:56 +0000678static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +0000679{
680 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +0000681 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +0000682 actual_size);
njn7cf0bd32002-06-08 13:36:03 +0000683 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000684 " Simulating a %d KB cache with %d B lines",
685 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +0000686}
687
688/* Intel method is truly wretched. We have to do an insane indexing into an
689 * array of pre-defined configurations for various parts of the memory
690 * hierarchy.
691 */
692static
sewardj07133bf2002-06-13 10:25:56 +0000693Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000694{
sewardj07133bf2002-06-13 10:25:56 +0000695 UChar info[16];
696 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +0000697 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +0000698
699 if (level < 2) {
700 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000701 "warning: CPUID level < 2 for Intel processor (%d)",
702 level);
njn7cf0bd32002-06-08 13:36:03 +0000703 return -1;
704 }
705
thughes4ee64962004-06-16 20:51:45 +0000706 VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],
707 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +0000708 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
709 info[0] = 0x0; /* reset AL */
710
711 if (0 != trials) {
712 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000713 "warning: non-zero CPUID trials for Intel processor (%d)",
714 trials);
njn7cf0bd32002-06-08 13:36:03 +0000715 return -1;
716 }
717
718 for (i = 0; i < 16; i++) {
719
720 switch (info[i]) {
721
722 case 0x0: /* ignore zeros */
723 break;
724
njn25e49d8e72002-09-23 09:36:25 +0000725 /* TLB info, ignore */
726 case 0x01: case 0x02: case 0x03: case 0x04:
727 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +0000728 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +0000729 break;
730
731 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
732 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000733 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000734
735 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
736 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000737 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000738
njn25e49d8e72002-09-23 09:36:25 +0000739 /* IA-64 info -- panic! */
740 case 0x10: case 0x15: case 0x1a:
741 case 0x88: case 0x89: case 0x8a: case 0x8d:
742 case 0x90: case 0x96: case 0x9b:
nethercote9313ac42004-07-06 21:54:20 +0000743 VG_(skin_panic)("IA-64 cache detected?!");
njn25e49d8e72002-09-23 09:36:25 +0000744
njn7cf0bd32002-06-08 13:36:03 +0000745 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +0000746 VG_(message)(Vg_DebugMsg,
747 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +0000748 break;
749
njn25e49d8e72002-09-23 09:36:25 +0000750 /* These are sectored, whatever that means */
751 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
752 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
753
754 /* If a P6 core, this means "no L2 cache".
755 If a P4 core, this means "no L3 cache".
756 We don't know what core it is, so don't issue a warning. To detect
757 a missing L2 cache, we use 'L2_found'. */
758 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +0000759 break;
760
njn25e49d8e72002-09-23 09:36:25 +0000761 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
762 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
763 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
764 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
765 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000766
767 /* These are sectored, whatever that means */
768 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
769 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
770 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
771
772 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
773 * conversion to byte size is a total guess; treat the 12K and 16K
774 * cases the same since the cache byte size must be a power of two for
775 * everything to work!. Also guessing 32 bytes for the line size...
776 */
777 case 0x70: /* 12K micro-ops, 8-way */
778 *I1c = (cache_t) { 16, 8, 32 };
779 micro_ops_warn(12, 16, 32);
780 break;
781 case 0x71: /* 16K micro-ops, 8-way */
782 *I1c = (cache_t) { 16, 8, 32 };
783 micro_ops_warn(16, 16, 32);
784 break;
785 case 0x72: /* 32K micro-ops, 8-way */
786 *I1c = (cache_t) { 32, 8, 32 };
787 micro_ops_warn(32, 32, 32);
788 break;
789
njn25e49d8e72002-09-23 09:36:25 +0000790 /* These are sectored, whatever that means */
791 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
792 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
793 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
794 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
795 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000796
njn25e49d8e72002-09-23 09:36:25 +0000797 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
798 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
799 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
800 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
801 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +0000802 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
803 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000804
805 default:
806 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000807 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +0000808 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +0000809 break;
810 }
811 }
njn25e49d8e72002-09-23 09:36:25 +0000812
813 if (!L2_found)
814 VG_(message)(Vg_DebugMsg,
815 "warning: L2 cache not installed, ignore L2 results.");
816
njn7cf0bd32002-06-08 13:36:03 +0000817 return 0;
818}
819
820/* AMD method is straightforward, just extract appropriate bits from the
821 * result registers.
822 *
823 * Bits, for D1 and I1:
824 * 31..24 data L1 cache size in KBs
825 * 23..16 data L1 cache associativity (FFh=full)
826 * 15.. 8 data L1 cache lines per tag
827 * 7.. 0 data L1 cache line size in bytes
828 *
829 * Bits, for L2:
830 * 31..16 unified L2 cache size in KBs
831 * 15..12 unified L2 cache associativity (0=off, FFh=full)
832 * 11.. 8 unified L2 cache lines per tag
833 * 7.. 0 unified L2 cache line size in bytes
834 *
835 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
836 * upon this information. (Whatever that means -- njn)
837 *
njn25e49d8e72002-09-23 09:36:25 +0000838 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
839 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
840 * so we detect that.
841 *
njn7cf0bd32002-06-08 13:36:03 +0000842 * Returns 0 on success, non-zero on failure.
843 */
sewardj07133bf2002-06-13 10:25:56 +0000844static
845Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000846{
sewardj05bcdcb2003-05-18 10:05:38 +0000847 UInt ext_level;
thughes4ee64962004-06-16 20:51:45 +0000848 UInt dummy, model;
849 UInt I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +0000850
thughes4ee64962004-06-16 20:51:45 +0000851 VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000852
853 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
854 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000855 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
856 ext_level);
njn7cf0bd32002-06-08 13:36:03 +0000857 return -1;
858 }
859
thughes4ee64962004-06-16 20:51:45 +0000860 VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
861 VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000862
thughes4ee64962004-06-16 20:51:45 +0000863 VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
njn25e49d8e72002-09-23 09:36:25 +0000864
865 /* Check for Duron bug */
866 if (model == 0x630) {
867 VG_(message)(Vg_UserMsg,
868 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
869 L2i = (64 << 16) | (L2i & 0xffff);
870 }
871
njn7cf0bd32002-06-08 13:36:03 +0000872 D1c->size = (D1i >> 24) & 0xff;
873 D1c->assoc = (D1i >> 16) & 0xff;
874 D1c->line_size = (D1i >> 0) & 0xff;
875
876 I1c->size = (I1i >> 24) & 0xff;
877 I1c->assoc = (I1i >> 16) & 0xff;
878 I1c->line_size = (I1i >> 0) & 0xff;
879
880 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
881 L2c->assoc = (L2i >> 12) & 0xf;
882 L2c->line_size = (L2i >> 0) & 0xff;
883
884 return 0;
885}
886
887static jmp_buf cpuid_jmpbuf;
888
889static
890void cpuid_SIGILL_handler(int signum)
891{
892 __builtin_longjmp(cpuid_jmpbuf, 1);
893}
894
895static
sewardj07133bf2002-06-13 10:25:56 +0000896Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000897{
sewardj07133bf2002-06-13 10:25:56 +0000898 Int level, res, ret;
899 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +0000900 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +0000901
902 /* Install own SIGILL handler */
903 sigill_new.ksa_handler = cpuid_SIGILL_handler;
904 sigill_new.ksa_flags = 0;
905 sigill_new.ksa_restorer = NULL;
906 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +0000907 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000908
909 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +0000910 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000911
912 /* Trap for illegal instruction, in case it's a really old processor that
913 * doesn't support CPUID. */
914 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
thughes4ee64962004-06-16 20:51:45 +0000915 VG_(cpuid)(0, &level, (int*)&vendor_id[0],
916 (int*)&vendor_id[8], (int*)&vendor_id[4]);
njn7cf0bd32002-06-08 13:36:03 +0000917 vendor_id[12] = '\0';
918
919 /* Restore old SIGILL handler */
920 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000921 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000922
923 } else {
924 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
925
926 /* Restore old SIGILL handler */
927 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000928 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000929 return -1;
930 }
931
932 if (0 == level) {
933 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
934 return -1;
935 }
936
937 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
938 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
939 ret = Intel_cache_info(level, I1c, D1c, L2c);
940
941 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
942 ret = AMD_cache_info(I1c, D1c, L2c);
943
sewardj97b7b262003-10-07 00:18:16 +0000944 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
945 /* Total kludge. Pretend to be a VIA Nehemiah. */
946 D1c->size = 64;
947 D1c->assoc = 16;
948 D1c->line_size = 16;
949 I1c->size = 64;
950 I1c->assoc = 4;
951 I1c->line_size = 16;
952 L2c->size = 64;
953 L2c->assoc = 16;
954 L2c->line_size = 16;
955 ret = 0;
956
njn7cf0bd32002-06-08 13:36:03 +0000957 } else {
958 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
959 vendor_id);
960 return -1;
961 }
962
963 /* Successful! Convert sizes from KB to bytes */
964 I1c->size *= 1024;
965 D1c->size *= 1024;
966 L2c->size *= 1024;
967
968 return ret;
969}
970
971/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +0000972static
973void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +0000974{
975 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +0000976 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +0000977 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000978 "warning: %s size of %dB not a power of two; "
979 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +0000980 cache->size = dflt->size;
981 }
982
sewardj07133bf2002-06-13 10:25:56 +0000983 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +0000984 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000985 "warning: %s associativity of %d not a power of two; "
986 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +0000987 cache->assoc = dflt->assoc;
988 }
989
sewardj07133bf2002-06-13 10:25:56 +0000990 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +0000991 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000992 "warning: %s line size of %dB not a power of two; "
993 "defaulting to %dB",
994 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +0000995 cache->line_size = dflt->line_size;
996 }
997
998 /* Then check line size >= 16 -- any smaller and a single instruction could
999 * straddle three cache lines, which breaks a simulation assertion and is
1000 * stupid anyway. */
1001 if (cache->line_size < MIN_LINE_SIZE) {
1002 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001003 "warning: %s line size of %dB too small; "
1004 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001005 cache->line_size = MIN_LINE_SIZE;
1006 }
1007
1008 /* Then check cache size > line size (causes seg faults if not). */
1009 if (cache->size <= cache->line_size) {
1010 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001011 "warning: %s cache size of %dB <= line size of %dB; "
1012 "increasing to %dB", name, cache->size, cache->line_size,
1013 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001014 cache->size = cache->line_size * 2;
1015 }
1016
1017 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1018 if (cache->assoc > (cache->size / cache->line_size)) {
1019 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001020 "warning: %s associativity > (size / line size); "
1021 "increasing size to %dB",
1022 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001023 cache->size = cache->assoc * cache->line_size;
1024 }
1025}
1026
sewardj07133bf2002-06-13 10:25:56 +00001027static
1028void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001029{
nethercote9313ac42004-07-06 21:54:20 +00001030#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1031
1032 Int res, n_clos = 0;
1033
1034 // Defaults are for a model 3 or 4 Athlon
njn7cf0bd32002-06-08 13:36:03 +00001035 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1036 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1037 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1038
nethercote9313ac42004-07-06 21:54:20 +00001039 // Set caches to default.
1040 *I1c = I1_dflt;
1041 *D1c = D1_dflt;
1042 *L2c = L2_dflt;
njn7cf0bd32002-06-08 13:36:03 +00001043
nethercote9313ac42004-07-06 21:54:20 +00001044 // Then replace with any info we can get from CPUID.
1045 res = get_caches_from_CPUID(I1c, D1c, L2c);
sewardjb1a77a42002-07-13 13:31:20 +00001046
nethercote9313ac42004-07-06 21:54:20 +00001047 // Then replace with any defined on the command line.
1048 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; n_clos++; }
1049 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; n_clos++; }
1050 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; n_clos++; }
njn7cf0bd32002-06-08 13:36:03 +00001051
nethercote9313ac42004-07-06 21:54:20 +00001052 // Warn if CPUID failed and config not completely specified from cmd line.
1053 if (res != 0 && n_clos < 3) {
1054 VG_(message)(Vg_DebugMsg,
1055 "Warning: Couldn't detect cache config, using one "
1056 "or more defaults ");
njn7cf0bd32002-06-08 13:36:03 +00001057 }
njn7cf0bd32002-06-08 13:36:03 +00001058
nethercote9313ac42004-07-06 21:54:20 +00001059 // Then check values and fix if not acceptable.
njn7cf0bd32002-06-08 13:36:03 +00001060 check_cache(I1c, &I1_dflt, "I1");
1061 check_cache(D1c, &D1_dflt, "D1");
1062 check_cache(L2c, &L2_dflt, "L2");
1063
1064 if (VG_(clo_verbosity) > 1) {
1065 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1066 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1067 I1c->size, I1c->assoc, I1c->line_size);
1068 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1069 D1c->size, D1c->assoc, D1c->line_size);
1070 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1071 L2c->size, L2c->assoc, L2c->line_size);
1072 }
nethercote9313ac42004-07-06 21:54:20 +00001073#undef CMD_LINE_DEFINED
njn7cf0bd32002-06-08 13:36:03 +00001074}
1075
njn4f9c9342002-04-29 16:03:24 +00001076/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001077/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001078/*------------------------------------------------------------*/
1079
nethercote9313ac42004-07-06 21:54:20 +00001080// Total reads/writes/misses. Calculated during CC traversal at the end.
1081// All auto-zeroed.
1082static CC Ir_total;
1083static CC Dr_total;
1084static CC Dw_total;
1085
1086static Char* cachegrind_out_file;
1087
1088static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +00001089{
nethercote9313ac42004-07-06 21:54:20 +00001090 VG_(message)(Vg_UserMsg,
1091 "error: can't open cache simulation output file `%s'",
1092 cachegrind_out_file );
1093 VG_(message)(Vg_UserMsg,
1094 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +00001095}
1096
nethercote9313ac42004-07-06 21:54:20 +00001097static void fprint_lineCC(Int fd, lineCC* n)
njn4f9c9342002-04-29 16:03:24 +00001098{
nethercote9313ac42004-07-06 21:54:20 +00001099 Char buf[512];
1100 VG_(sprintf)(buf, "%u %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1101 n->line,
1102 n->Ir.a, n->Ir.m1, n->Ir.m2,
1103 n->Dr.a, n->Dr.m1, n->Dr.m2,
1104 n->Dw.a, n->Dw.m1, n->Dw.m2);
1105 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1106
1107 Ir_total.a += n->Ir.a; Ir_total.m1 += n->Ir.m1; Ir_total.m2 += n->Ir.m2;
1108 Dr_total.a += n->Dr.a; Dr_total.m1 += n->Dr.m1; Dr_total.m2 += n->Dr.m2;
1109 Dw_total.a += n->Dw.a; Dw_total.m1 += n->Dw.m1; Dw_total.m2 += n->Dw.m2;
1110}
1111
1112static void fprint_CC_table_and_calc_totals(void)
1113{
1114 Int fd;
1115 Char buf[512];
1116 fileCC *curr_fileCC;
1117 fnCC *curr_fnCC;
1118 lineCC *curr_lineCC;
1119 Int i, j, k;
njn4f9c9342002-04-29 16:03:24 +00001120
njn25e49d8e72002-09-23 09:36:25 +00001121 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001122
njndb918dd2003-07-22 20:45:11 +00001123 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001124 VKI_S_IRUSR|VKI_S_IWUSR);
nethercote50da0f32003-10-30 10:33:30 +00001125 if (fd < 0) {
nethercote9313ac42004-07-06 21:54:20 +00001126 // If the file can't be opened for whatever reason (conflict
1127 // between multiple cachegrinded processes?), give up now.
sewardj0744b6c2002-12-11 00:45:42 +00001128 file_err();
1129 return;
1130 }
njn4f9c9342002-04-29 16:03:24 +00001131
nethercote9313ac42004-07-06 21:54:20 +00001132 // "desc:" lines (giving I1/D1/L2 cache configuration). The spaces after
1133 // the 2nd colon makes cg_annotate's output look nicer.
1134 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1135 "desc: D1 cache: %s\n"
1136 "desc: L2 cache: %s\n",
1137 I1.desc_line, D1.desc_line, L2.desc_line);
njn7cf0bd32002-06-08 13:36:03 +00001138 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001139
nethercote9313ac42004-07-06 21:54:20 +00001140 // "cmd:" line
njn4f9c9342002-04-29 16:03:24 +00001141 VG_(strcpy)(buf, "cmd:");
1142 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001143 for (i = 0; i < VG_(client_argc); i++) {
1144 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001145 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1146 }
nethercote9313ac42004-07-06 21:54:20 +00001147 // "events:" line
njn4f9c9342002-04-29 16:03:24 +00001148 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1149 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1150
nethercote9313ac42004-07-06 21:54:20 +00001151 // Six loops here: three for the hash table arrays, and three for the
1152 // chains hanging off the hash table arrays.
njn4f9c9342002-04-29 16:03:24 +00001153 for (i = 0; i < N_FILE_ENTRIES; i++) {
nethercote9313ac42004-07-06 21:54:20 +00001154 curr_fileCC = CC_table[i];
1155 while (curr_fileCC != NULL) {
1156 VG_(sprintf)(buf, "fl=%s\n", curr_fileCC->file);
njn4f9c9342002-04-29 16:03:24 +00001157 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1158
1159 for (j = 0; j < N_FN_ENTRIES; j++) {
nethercote9313ac42004-07-06 21:54:20 +00001160 curr_fnCC = curr_fileCC->fns[j];
1161 while (curr_fnCC != NULL) {
1162 VG_(sprintf)(buf, "fn=%s\n", curr_fnCC->fn);
njn4f9c9342002-04-29 16:03:24 +00001163 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1164
nethercote9313ac42004-07-06 21:54:20 +00001165 for (k = 0; k < N_LINE_ENTRIES; k++) {
1166 curr_lineCC = curr_fnCC->lines[k];
1167 while (curr_lineCC != NULL) {
1168 fprint_lineCC(fd, curr_lineCC);
1169 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +00001170 }
1171 }
nethercote9313ac42004-07-06 21:54:20 +00001172 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +00001173 }
1174 }
nethercote9313ac42004-07-06 21:54:20 +00001175 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +00001176 }
1177 }
1178
nethercote9313ac42004-07-06 21:54:20 +00001179 // Summary stats must come after rest of table, since we calculate them
1180 // during traversal. */
njn4f9c9342002-04-29 16:03:24 +00001181 VG_(sprintf)(buf, "summary: "
nethercote9313ac42004-07-06 21:54:20 +00001182 "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
njn4f9c9342002-04-29 16:03:24 +00001183 Ir_total.a, Ir_total.m1, Ir_total.m2,
1184 Dr_total.a, Dr_total.m1, Dr_total.m2,
1185 Dw_total.a, Dw_total.m1, Dw_total.m2);
1186 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1187 VG_(close)(fd);
1188}
1189
njn607adfc2003-09-30 14:15:44 +00001190static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001191{
njn607adfc2003-09-30 14:15:44 +00001192 UInt w = 0;
1193 while (n > 0) {
1194 n = n / 10;
1195 w++;
njn4f9c9342002-04-29 16:03:24 +00001196 }
njn607adfc2003-09-30 14:15:44 +00001197 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001198}
1199
sewardj4f29ddf2002-05-03 22:29:04 +00001200static
daywalker8ad1a402003-09-18 01:15:32 +00001201void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001202{
1203 int i, len, space;
1204
daywalker8ad1a402003-09-18 01:15:32 +00001205 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001206 len = VG_(strlen)(buf);
1207 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001208 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001209 i = len;
1210
1211 /* Right justify in field */
1212 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1213 for (i = 0; i < space; i++) buf[i] = ' ';
1214}
1215
njn7d9f94d2003-04-22 21:41:40 +00001216void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001217{
nethercote9313ac42004-07-06 21:54:20 +00001218 static char buf1[128], buf2[128], buf3[128], fmt [128];
njn607adfc2003-09-30 14:15:44 +00001219
njn4f9c9342002-04-29 16:03:24 +00001220 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001221 ULong L2_total_m, L2_total_mr, L2_total_mw,
1222 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001223 Int l1, l2, l3;
1224 Int p;
1225
nethercote9313ac42004-07-06 21:54:20 +00001226 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001227
njn7cf0bd32002-06-08 13:36:03 +00001228 if (VG_(clo_verbosity) == 0)
1229 return;
1230
njn4f9c9342002-04-29 16:03:24 +00001231 /* I cache results. Use the I_refs value to determine the first column
1232 * width. */
njn607adfc2003-09-30 14:15:44 +00001233 l1 = ULong_width(Ir_total.a);
1234 l2 = ULong_width(Dr_total.a);
1235 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001236
njn607adfc2003-09-30 14:15:44 +00001237 /* Make format string, getting width right for numbers */
1238 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1239
1240 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1241 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1242 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001243
1244 p = 100;
1245
njn25e49d8e72002-09-23 09:36:25 +00001246 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001247 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1248 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1249
1250 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1251 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1252 VG_(message)(Vg_UserMsg, "");
1253
1254 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1255 * width of columns 2 & 3. */
1256 D_total.a = Dr_total.a + Dw_total.a;
1257 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1258 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1259
njn607adfc2003-09-30 14:15:44 +00001260 /* Make format string, getting width right for numbers */
1261 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001262
njn607adfc2003-09-30 14:15:44 +00001263 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1264 D_total.a, Dr_total.a, Dw_total.a);
1265 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1266 D_total.m1, Dr_total.m1, Dw_total.m1);
1267 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1268 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001269
1270 p = 10;
1271
njn25e49d8e72002-09-23 09:36:25 +00001272 if (0 == D_total.a) D_total.a = 1;
1273 if (0 == Dr_total.a) Dr_total.a = 1;
1274 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001275 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1276 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1277 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1278 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1279
1280 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1281 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1282 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1283 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1284 VG_(message)(Vg_UserMsg, "");
1285
1286 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001287
1288 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1289 L2_total_r = Dr_total.m1 + Ir_total.m1;
1290 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001291 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1292 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001293
njn4f9c9342002-04-29 16:03:24 +00001294 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1295 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1296 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001297 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1298 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001299
1300 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1301 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1302 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1303 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1304
1305
nethercote9313ac42004-07-06 21:54:20 +00001306 // Various stats
njn4f9c9342002-04-29 16:03:24 +00001307 if (VG_(clo_verbosity) > 1) {
nethercote9313ac42004-07-06 21:54:20 +00001308 int BB_lookups = full_debug_BBs + fn_debug_BBs +
njn4f9c9342002-04-29 16:03:24 +00001309 file_line_debug_BBs + no_debug_BBs;
1310
1311 VG_(message)(Vg_DebugMsg, "");
1312 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1313 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
nethercote9313ac42004-07-06 21:54:20 +00001314 VG_(message)(Vg_DebugMsg, "Distinct lines: %d", distinct_lines);
1315 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
njn4f9c9342002-04-29 16:03:24 +00001316 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1317 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1318 full_debug_BBs * 100 / BB_lookups,
1319 full_debug_BBs);
1320 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1321 file_line_debug_BBs * 100 / BB_lookups,
1322 file_line_debug_BBs);
1323 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
nethercote9313ac42004-07-06 21:54:20 +00001324 fn_debug_BBs * 100 / BB_lookups,
1325 fn_debug_BBs);
njn4f9c9342002-04-29 16:03:24 +00001326 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1327 no_debug_BBs * 100 / BB_lookups,
1328 no_debug_BBs);
1329 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
njn4f9c9342002-04-29 16:03:24 +00001330 }
njn25e49d8e72002-09-23 09:36:25 +00001331 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001332}
1333
nethercote9313ac42004-07-06 21:54:20 +00001334/*--------------------------------------------------------------------*/
1335/*--- Discarding BB info ---*/
1336/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001337
nethercote9313ac42004-07-06 21:54:20 +00001338// Called when a translation is invalidated due to code unloading.
njn25e49d8e72002-09-23 09:36:25 +00001339void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001340{
nethercote9313ac42004-07-06 21:54:20 +00001341 VgHashNode** prev_next_ptr;
1342 VgHashNode* bb_info;
njn4294fd42002-06-05 14:41:10 +00001343
nethercote9313ac42004-07-06 21:54:20 +00001344 if (0) VG_(printf)( "discard_basic_block_info: %p, %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001345
nethercote9313ac42004-07-06 21:54:20 +00001346 // Get BB info, remove from table, free BB info. Simple!
1347 bb_info = VG_(HT_get_node)(instr_info_table, a, &prev_next_ptr);
1348 sk_assert(NULL != bb_info);
1349 *prev_next_ptr = bb_info->next;
1350 VG_(free)(bb_info);
sewardj18d75132002-05-16 11:06:21 +00001351}
1352
1353/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001354/*--- Command line processing ---*/
1355/*--------------------------------------------------------------------*/
1356
nethercote9313ac42004-07-06 21:54:20 +00001357static void parse_cache_opt ( cache_t* cache, char* opt )
njn25e49d8e72002-09-23 09:36:25 +00001358{
nethercote9313ac42004-07-06 21:54:20 +00001359 int i = 0, i2, i3;
njn25e49d8e72002-09-23 09:36:25 +00001360
nethercote9313ac42004-07-06 21:54:20 +00001361 // Option argument looks like "65536,2,64".
1362 // Find commas, replace with NULs to make three independent
1363 // strings, then extract numbers, put NULs back. Yuck.
njn25e49d8e72002-09-23 09:36:25 +00001364 while (VG_(isdigit)(opt[i])) i++;
1365 if (',' == opt[i]) {
1366 opt[i++] = '\0';
1367 i2 = i;
1368 } else goto bad;
1369 while (VG_(isdigit)(opt[i])) i++;
1370 if (',' == opt[i]) {
1371 opt[i++] = '\0';
1372 i3 = i;
1373 } else goto bad;
1374 while (VG_(isdigit)(opt[i])) i++;
1375 if ('\0' != opt[i]) goto bad;
1376
nethercote9313ac42004-07-06 21:54:20 +00001377 cache->size = (Int)VG_(atoll)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001378 cache->assoc = (Int)VG_(atoll)(opt + i2);
1379 cache->line_size = (Int)VG_(atoll)(opt + i3);
1380
nethercote9313ac42004-07-06 21:54:20 +00001381 opt[i2-1] = ',';
1382 opt[i3-1] = ',';
njn25e49d8e72002-09-23 09:36:25 +00001383 return;
1384
1385 bad:
nethercote9313ac42004-07-06 21:54:20 +00001386 VG_(bad_option)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001387}
1388
1389Bool SK_(process_cmd_line_option)(Char* arg)
1390{
nethercote9313ac42004-07-06 21:54:20 +00001391 // 5 is length of "--I1="
njn39c86652003-05-21 10:13:39 +00001392 if (VG_CLO_STREQN(5, arg, "--I1="))
nethercote9313ac42004-07-06 21:54:20 +00001393 parse_cache_opt(&clo_I1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001394 else if (VG_CLO_STREQN(5, arg, "--D1="))
nethercote9313ac42004-07-06 21:54:20 +00001395 parse_cache_opt(&clo_D1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001396 else if (VG_CLO_STREQN(5, arg, "--L2="))
nethercote9313ac42004-07-06 21:54:20 +00001397 parse_cache_opt(&clo_L2_cache, &arg[5]);
njn25e49d8e72002-09-23 09:36:25 +00001398 else
1399 return False;
1400
1401 return True;
1402}
1403
njn3e884182003-04-15 13:03:23 +00001404void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001405{
njn3e884182003-04-15 13:03:23 +00001406 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001407" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1408" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001409" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1410 );
1411}
1412
1413void SK_(print_debug_usage)(void)
1414{
1415 VG_(printf)(
1416" (none)\n"
1417 );
njn25e49d8e72002-09-23 09:36:25 +00001418}
1419
1420/*--------------------------------------------------------------------*/
1421/*--- Setup ---*/
1422/*--------------------------------------------------------------------*/
1423
njn810086f2002-11-14 12:42:47 +00001424void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001425{
njn13f02932003-04-30 20:23:58 +00001426 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00001427
njn810086f2002-11-14 12:42:47 +00001428 VG_(details_name) ("Cachegrind");
1429 VG_(details_version) (NULL);
1430 VG_(details_description) ("an I1/D1/L2 cache profiler");
1431 VG_(details_copyright_author)(
nethercote08fa9a72004-07-16 17:44:00 +00001432 "Copyright (C) 2002-2004, and GNU GPL'd, by Nicholas Nethercote et al.");
nethercote421281e2003-11-20 16:20:55 +00001433 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardj78210aa2002-12-01 02:55:46 +00001434 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00001435
njn810086f2002-11-14 12:42:47 +00001436 VG_(needs_basic_block_discards)();
1437 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00001438
1439 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
nethercote9313ac42004-07-06 21:54:20 +00001440 VG_(register_compact_helper)((Addr) & log_1I_1Dr_cache_access);
1441 VG_(register_compact_helper)((Addr) & log_1I_1Dw_cache_access);
njn25e49d8e72002-09-23 09:36:25 +00001442 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00001443
njn99ccf082003-09-30 13:51:23 +00001444 /* Get working directory */
1445 sk_assert( VG_(getcwd_alloc)(&base_dir) );
1446
njn13f02932003-04-30 20:23:58 +00001447 /* Block is big enough for dir name + cachegrind.out.<pid> */
1448 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
1449 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
1450 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00001451 VG_(free)(base_dir);
nethercote9313ac42004-07-06 21:54:20 +00001452
1453 instr_info_table = VG_(HT_construct)();
njn25e49d8e72002-09-23 09:36:25 +00001454}
1455
1456void SK_(post_clo_init)(void)
1457{
1458 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00001459
njn25e49d8e72002-09-23 09:36:25 +00001460 get_caches(&I1c, &D1c, &L2c);
1461
1462 cachesim_I1_initcache(I1c);
1463 cachesim_D1_initcache(D1c);
1464 cachesim_L2_initcache(L2c);
1465
nethercote9313ac42004-07-06 21:54:20 +00001466 VGP_(register_profile_event)(VgpGetLineCC, "get-lineCC");
njn25e49d8e72002-09-23 09:36:25 +00001467 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
1468 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
njn25e49d8e72002-09-23 09:36:25 +00001469}
1470
fitzhardinge98abfc72003-12-16 02:05:15 +00001471VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
1472
njn25e49d8e72002-09-23 09:36:25 +00001473/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00001474/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00001475/*--------------------------------------------------------------------*/