blob: 555ba0bf0241088d4dc567d3068a9e9f1af95493 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +00003/*--- Cachegrind: every but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
nethercotebb1c9912004-01-04 16:43:23 +000011 Copyright (C) 2002-2004 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000012 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
njn25e49d8e72002-09-23 09:36:25 +000032#include "vg_skin.h"
33//#include "vg_profile.c"
34
35/* For cache simulation */
36typedef struct {
37 int size; /* bytes */
38 int assoc;
39 int line_size; /* bytes */
40} cache_t;
njn4f9c9342002-04-29 16:03:24 +000041
nethercote27fc1da2004-01-04 16:56:57 +000042#include "cg_sim.c"
njn4f9c9342002-04-29 16:03:24 +000043
njn25e49d8e72002-09-23 09:36:25 +000044/*------------------------------------------------------------*/
45/*--- Constants ---*/
46/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000047
nethercote9313ac42004-07-06 21:54:20 +000048#define MAX_x86_INSTR_SIZE 16 // According to ia32 sw dev manual vol 2
49#define MIN_LINE_SIZE 16
50#define FILE_LEN 256
51#define FN_LEN 256
njn7cf0bd32002-06-08 13:36:03 +000052
53/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000054/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000055/*------------------------------------------------------------*/
56
njn25e49d8e72002-09-23 09:36:25 +000057typedef
58 enum {
nethercote9313ac42004-07-06 21:54:20 +000059 VgpGetLineCC = VgpFini+1,
njn25e49d8e72002-09-23 09:36:25 +000060 VgpCacheSimulate,
61 VgpCacheResults
62 }
nethercote7cc9c232004-01-21 15:08:04 +000063 VgpToolCC;
sewardj07133bf2002-06-13 10:25:56 +000064
njn4f9c9342002-04-29 16:03:24 +000065/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000066/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000067/*------------------------------------------------------------*/
68
69typedef struct _CC CC;
70struct _CC {
71 ULong a;
72 ULong m1;
73 ULong m2;
74};
75
nethercote9313ac42004-07-06 21:54:20 +000076//------------------------------------------------------------
77// Primary data structure #1: CC table
78// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
79// - hash(file, hash(fn, hash(line+CC)))
80// - Each hash table is separately chained.
81// - The array sizes below work fairly well for Konqueror.
82// - Lookups done by instr_addr, which is converted immediately to a source
83// location.
84// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +000085
86#define N_FILE_ENTRIES 251
87#define N_FN_ENTRIES 53
nethercote9313ac42004-07-06 21:54:20 +000088#define N_LINE_ENTRIES 37
njn4f9c9342002-04-29 16:03:24 +000089
nethercote9313ac42004-07-06 21:54:20 +000090typedef struct _lineCC lineCC;
91struct _lineCC {
92 Int line;
93 CC Ir;
94 CC Dr;
95 CC Dw;
96 lineCC* next;
njn4f9c9342002-04-29 16:03:24 +000097};
98
nethercote9313ac42004-07-06 21:54:20 +000099typedef struct _fnCC fnCC;
100struct _fnCC {
101 Char* fn;
102 fnCC* next;
103 lineCC* lines[N_LINE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000104};
105
nethercote9313ac42004-07-06 21:54:20 +0000106typedef struct _fileCC fileCC;
107struct _fileCC {
108 Char* file;
109 fileCC* next;
110 fnCC* fns[N_FN_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000111};
112
nethercote9313ac42004-07-06 21:54:20 +0000113// Top level of CC table. Auto-zeroed.
114static fileCC *CC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000115
nethercote9313ac42004-07-06 21:54:20 +0000116//------------------------------------------------------------
117// Primary data structre #2: Instr-info table
118// - Holds the cached info about each instr that is used for simulation.
119// - table(BB_start_addr, list(instr_info))
120// - For each BB, each instr_info in the list holds info about the
121// instruction (instr_size, instr_addr, etc), plue a pointer to its line
122// CC. This node is what's passed to the simulation function.
123// - When BBs are discarded the relevant list(instr_details) is freed.
124
125typedef struct _instr_info instr_info;
126struct _instr_info {
127 Addr instr_addr;
128 UChar instr_size;
129 UChar data_size;
130 struct _lineCC* parent; // parent line-CC
131};
132
133typedef struct _BB_info BB_info;
134struct _BB_info {
135 BB_info* next; // next field
136 Addr BB_addr; // key
137 Int n_instrs;
138 instr_info instrs[0];
139};
140
141VgHashTable instr_info_table; // hash(Addr, BB_info)
142
143//------------------------------------------------------------
144// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000145static Int distinct_files = 0;
146static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000147static Int distinct_lines = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000148static Int distinct_instrs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000149
sewardj4f29ddf2002-05-03 22:29:04 +0000150static Int full_debug_BBs = 0;
151static Int file_line_debug_BBs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000152static Int fn_debug_BBs = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000153static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000154
sewardj4f29ddf2002-05-03 22:29:04 +0000155static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000156
nethercote9313ac42004-07-06 21:54:20 +0000157/*------------------------------------------------------------*/
158/*--- CC table operations ---*/
159/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000160
nethercote9313ac42004-07-06 21:54:20 +0000161static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
162 Char fn[FN_LEN], Int* line)
njn4f9c9342002-04-29 16:03:24 +0000163{
nethercote9313ac42004-07-06 21:54:20 +0000164 Bool found_file_line = VG_(get_filename_linenum)(instr_addr, file,
165 FILE_LEN, line);
166 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
njn4f9c9342002-04-29 16:03:24 +0000167
nethercote9313ac42004-07-06 21:54:20 +0000168 if (!found_file_line) {
169 VG_(strcpy)(file, "???");
170 *line = 0;
171 }
172 if (!found_fn) {
173 VG_(strcpy)(fn, "???");
174 }
175 if (found_file_line) {
176 if (found_fn) full_debug_BBs++;
177 else file_line_debug_BBs++;
178 } else {
179 if (found_fn) fn_debug_BBs++;
180 else no_debug_BBs++;
njn4f9c9342002-04-29 16:03:24 +0000181 }
182}
183
njn4f9c9342002-04-29 16:03:24 +0000184static UInt hash(Char *s, UInt table_size)
185{
nethercote9313ac42004-07-06 21:54:20 +0000186 const int hash_constant = 256;
187 int hash_value = 0;
188 for ( ; *s; s++)
189 hash_value = (hash_constant * hash_value + *s) % table_size;
190 return hash_value;
njn4f9c9342002-04-29 16:03:24 +0000191}
192
nethercote9313ac42004-07-06 21:54:20 +0000193static __inline__
194fileCC* new_fileCC(Char filename[], fileCC* next)
nethercote09d853e2004-01-21 16:12:55 +0000195{
nethercote9313ac42004-07-06 21:54:20 +0000196 // Using calloc() zeroes the fns[] array
197 fileCC* cc = VG_(calloc)(1, sizeof(fileCC));
198 cc->file = VG_(strdup)(filename);
199 cc->next = next;
200 return cc;
nethercote09d853e2004-01-21 16:12:55 +0000201}
202
nethercote9313ac42004-07-06 21:54:20 +0000203static __inline__
204fnCC* new_fnCC(Char fn[], fnCC* next)
njn4f9c9342002-04-29 16:03:24 +0000205{
nethercote9313ac42004-07-06 21:54:20 +0000206 // Using calloc() zeroes the lines[] array
207 fnCC* cc = VG_(calloc)(1, sizeof(fnCC));
208 cc->fn = VG_(strdup)(fn);
209 cc->next = next;
210 return cc;
211}
njn4f9c9342002-04-29 16:03:24 +0000212
nethercote9313ac42004-07-06 21:54:20 +0000213static __inline__
214lineCC* new_lineCC(Int line, lineCC* next)
215{
216 // Using calloc() zeroes the Ir/Dr/Dw CCs and the instrs[] array
217 lineCC* cc = VG_(calloc)(1, sizeof(lineCC));
218 cc->line = line;
219 cc->next = next;
220 return cc;
221}
njn4f9c9342002-04-29 16:03:24 +0000222
nethercote9313ac42004-07-06 21:54:20 +0000223static __inline__
224instr_info* new_instr_info(Addr instr_addr, lineCC* parent, instr_info* next)
225{
226 // Using calloc() zeroes instr_size and data_size
227 instr_info* ii = VG_(calloc)(1, sizeof(instr_info));
228 ii->instr_addr = instr_addr;
229 ii->parent = parent;
230 return ii;
231}
232
233// Do a three step traversal: by file, then fn, then line.
234// In all cases prepends new nodes to their chain. Returns a pointer to the
235// line node, creates a new one if necessary.
236static lineCC* get_lineCC(Addr orig_addr)
237{
238 fileCC *curr_fileCC;
239 fnCC *curr_fnCC;
240 lineCC *curr_lineCC;
241 Char file[FILE_LEN], fn[FN_LEN];
242 Int line;
243 UInt file_hash, fn_hash, line_hash;
244
245 get_debug_info(orig_addr, file, fn, &line);
246
247 VGP_PUSHCC(VgpGetLineCC);
248
249 // level 1
250 file_hash = hash(file, N_FILE_ENTRIES);
251 curr_fileCC = CC_table[file_hash];
252 while (NULL != curr_fileCC && !VG_STREQ(file, curr_fileCC->file)) {
253 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +0000254 }
nethercote9313ac42004-07-06 21:54:20 +0000255 if (NULL == curr_fileCC) {
256 CC_table[file_hash] = curr_fileCC =
257 new_fileCC(file, CC_table[file_hash]);
njn4f9c9342002-04-29 16:03:24 +0000258 distinct_files++;
259 }
260
nethercote9313ac42004-07-06 21:54:20 +0000261 // level 2
262 fn_hash = hash(fn, N_FN_ENTRIES);
263 curr_fnCC = curr_fileCC->fns[fn_hash];
264 while (NULL != curr_fnCC && !VG_STREQ(fn, curr_fnCC->fn)) {
265 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +0000266 }
nethercote9313ac42004-07-06 21:54:20 +0000267 if (NULL == curr_fnCC) {
268 curr_fileCC->fns[fn_hash] = curr_fnCC =
269 new_fnCC(fn, curr_fileCC->fns[fn_hash]);
njn4f9c9342002-04-29 16:03:24 +0000270 distinct_fns++;
271 }
272
nethercote9313ac42004-07-06 21:54:20 +0000273 // level 3
274 line_hash = line % N_LINE_ENTRIES;
275 curr_lineCC = curr_fnCC->lines[line_hash];
276 while (NULL != curr_lineCC && line != curr_lineCC->line) {
277 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +0000278 }
nethercote9313ac42004-07-06 21:54:20 +0000279 if (NULL == curr_lineCC) {
280 curr_fnCC->lines[line_hash] = curr_lineCC =
281 new_lineCC(line, curr_fnCC->lines[line_hash]);
282 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +0000283 }
nethercote9313ac42004-07-06 21:54:20 +0000284
285 VGP_POPCC(VgpGetLineCC);
286 return curr_lineCC;
njn4f9c9342002-04-29 16:03:24 +0000287}
288
289/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000290/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000291/*------------------------------------------------------------*/
292
njn25e49d8e72002-09-23 09:36:25 +0000293static __attribute__ ((regparm (1)))
nethercote9313ac42004-07-06 21:54:20 +0000294void log_1I_0D_cache_access(instr_info* n)
njn25e49d8e72002-09-23 09:36:25 +0000295{
296 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000297 // n, n->instr_addr, n->instr_size)
njn25e49d8e72002-09-23 09:36:25 +0000298 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000299 cachesim_I1_doref(n->instr_addr, n->instr_size,
300 &n->parent->Ir.m1, &n->parent->Ir.m2);
301 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000302 VGP_POPCC(VgpCacheSimulate);
303}
304
nethercote9313ac42004-07-06 21:54:20 +0000305static __attribute__ ((regparm (2)))
306void log_1I_1Dr_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000307{
nethercote9313ac42004-07-06 21:54:20 +0000308 //VG_(printf)("1I_1Dr: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
309 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000310 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000311 cachesim_I1_doref(n->instr_addr, n->instr_size,
312 &n->parent->Ir.m1, &n->parent->Ir.m2);
313 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000314
nethercote9313ac42004-07-06 21:54:20 +0000315 cachesim_D1_doref(data_addr, n->data_size,
316 &n->parent->Dr.m1, &n->parent->Dr.m2);
317 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000318 VGP_POPCC(VgpCacheSimulate);
319}
320
nethercote9313ac42004-07-06 21:54:20 +0000321static __attribute__ ((regparm (2)))
322void log_1I_1Dw_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000323{
nethercote9313ac42004-07-06 21:54:20 +0000324 //VG_(printf)("1I_1Dw: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
325 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000326 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000327 cachesim_I1_doref(n->instr_addr, n->instr_size,
328 &n->parent->Ir.m1, &n->parent->Ir.m2);
329 n->parent->Ir.a++;
330
331 cachesim_D1_doref(data_addr, n->data_size,
332 &n->parent->Dw.m1, &n->parent->Dw.m2);
333 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000334 VGP_POPCC(VgpCacheSimulate);
335}
336
nethercote9313ac42004-07-06 21:54:20 +0000337static __attribute__ ((regparm (3)))
338void log_1I_2D_cache_access(instr_info* n, Addr data_addr1, Addr data_addr2)
njn25e49d8e72002-09-23 09:36:25 +0000339{
340 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000341 // n, n->instr_addr, n->instr_size, data_addr1, data_addr2, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000342 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000343 cachesim_I1_doref(n->instr_addr, n->instr_size,
344 &n->parent->Ir.m1, &n->parent->Ir.m2);
345 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000346
nethercote9313ac42004-07-06 21:54:20 +0000347 cachesim_D1_doref(data_addr1, n->data_size,
348 &n->parent->Dr.m1, &n->parent->Dr.m2);
349 n->parent->Dr.a++;
350 cachesim_D1_doref(data_addr2, n->data_size,
351 &n->parent->Dw.m1, &n->parent->Dw.m2);
352 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000353 VGP_POPCC(VgpCacheSimulate);
354}
355
nethercote9313ac42004-07-06 21:54:20 +0000356/*------------------------------------------------------------*/
357/*--- Instrumentation ---*/
358/*------------------------------------------------------------*/
359
360BB_info* get_BB_info(UCodeBlock* cb_in, Addr orig_addr, Bool* bb_seen_before)
361{
362 Int i, n_instrs;
363 UInstr* u_in;
364 BB_info* bb_info;
365 VgHashNode** dummy;
366
367 // Count number of x86 instrs in BB
368 n_instrs = 1; // start at 1 because last x86 instr has no INCEIP
369 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
370 u_in = VG_(get_instr)(cb_in, i);
371 if (INCEIP == u_in->opcode) n_instrs++;
372 }
373
374 // Get the BB_info
375 bb_info = (BB_info*)VG_(HT_get_node)(instr_info_table, orig_addr, &dummy);
376 *bb_seen_before = ( NULL == bb_info ? False : True );
377 if (*bb_seen_before) {
378 // BB must have been translated before, but flushed from the TT
379 sk_assert(bb_info->n_instrs == n_instrs );
380 BB_retranslations++;
381 } else {
382 // BB never translated before (at this address, at least; could have
383 // been unloaded and then reloaded elsewhere in memory)
384 bb_info =
385 VG_(calloc)(1, sizeof(BB_info) + n_instrs*sizeof(instr_info));
386 bb_info->BB_addr = orig_addr;
387 bb_info->n_instrs = n_instrs;
388 VG_(HT_add_node)( instr_info_table, (VgHashNode*)bb_info );
389 distinct_instrs++;
390 }
391 return bb_info;
392}
393
394void do_details( instr_info* n, Bool bb_seen_before,
395 Addr instr_addr, Int instr_size, Int data_size )
396{
397 lineCC* parent = get_lineCC(instr_addr);
398 if (bb_seen_before) {
399 sk_assert( n->instr_addr == instr_addr );
400 sk_assert( n->instr_size == instr_size );
401 sk_assert( n->data_size == data_size );
402 // Don't assert that (n->parent == parent)... it's conceivable that
403 // the debug info might change; the other asserts should be enough to
404 // detect anything strange.
405 } else {
406 n->instr_addr = instr_addr;
407 n->instr_size = instr_size;
408 n->data_size = data_size;
409 n->parent = parent;
410 }
411}
412
413Bool is_valid_data_size(Int data_size)
414{
415 return (4 == data_size || 2 == data_size || 1 == data_size ||
416 8 == data_size || 10 == data_size || MIN_LINE_SIZE == data_size);
417}
418
419// Instrumentation for the end of each x86 instruction.
420void end_of_x86_instr(UCodeBlock* cb, instr_info* i_node, Bool bb_seen_before,
421 UInt instr_addr, UInt instr_size, UInt data_size,
422 Int t_read, Int t_read_addr,
423 Int t_write, Int t_write_addr)
424{
425 Addr helper;
426 Int argc;
427 Int t_CC_addr,
428 t_data_addr1 = INVALID_TEMPREG,
429 t_data_addr2 = INVALID_TEMPREG;
430
431 sk_assert(instr_size >= 1 &&
432 instr_size <= MAX_x86_INSTR_SIZE);
433
434#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
435#define INV(qqt) (INVALID_TEMPREG == (qqt))
436
437 // Work out what kind of x86 instruction it is
438 if (!IS_(read) && !IS_(write)) {
439 sk_assert( 0 == data_size );
440 sk_assert(INV(t_read) && INV(t_write));
441 helper = (Addr) & log_1I_0D_cache_access;
442 argc = 1;
443
444 } else if (IS_(read) && !IS_(write)) {
445 sk_assert( is_valid_data_size(data_size) );
446 sk_assert(!INV(t_read) && INV(t_write));
447 helper = (Addr) & log_1I_1Dr_cache_access;
448 argc = 2;
449 t_data_addr1 = t_read_addr;
450
451 } else if (!IS_(read) && IS_(write)) {
452 sk_assert( is_valid_data_size(data_size) );
453 sk_assert(INV(t_read) && !INV(t_write));
454 helper = (Addr) & log_1I_1Dw_cache_access;
455 argc = 2;
456 t_data_addr1 = t_write_addr;
457
458 } else {
459 sk_assert(IS_(read) && IS_(write));
460 sk_assert( is_valid_data_size(data_size) );
461 sk_assert(!INV(t_read) && !INV(t_write));
462 if (t_read == t_write) {
463 helper = (Addr) & log_1I_1Dr_cache_access;
464 argc = 2;
465 t_data_addr1 = t_read_addr;
466 } else {
467 helper = (Addr) & log_1I_2D_cache_access;
468 argc = 3;
469 t_data_addr1 = t_read_addr;
470 t_data_addr2 = t_write_addr;
471 }
472 }
473#undef IS_
474 // Setup 1st arg: CC addr
475 do_details( i_node, bb_seen_before, instr_addr, instr_size, data_size );
476 t_CC_addr = newTemp(cb);
477 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
478 uLiteral(cb, (Addr)i_node);
479
480 // Call the helper
481 if (1 == argc)
482 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
483 else if (2 == argc)
484 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
485 TempReg, t_data_addr1);
486 else if (3 == argc)
487 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
488 TempReg, t_data_addr1,
489 TempReg, t_data_addr2);
490 else
491 VG_(skin_panic)("argc... not 1 or 2 or 3?");
492
493 uCCall(cb, helper, argc, argc, False);
494}
495
njn25e49d8e72002-09-23 09:36:25 +0000496UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
497{
njn4f9c9342002-04-29 16:03:24 +0000498 UCodeBlock* cb;
njn4f9c9342002-04-29 16:03:24 +0000499 UInstr* u_in;
nethercote9313ac42004-07-06 21:54:20 +0000500 Int i, bb_info_i;
501 BB_info* bb_info;
502 Bool bb_seen_before = False;
503 Int t_read_addr, t_write_addr, t_read, t_write;
njn25e49d8e72002-09-23 09:36:25 +0000504 Addr x86_instr_addr = orig_addr;
nethercote9313ac42004-07-06 21:54:20 +0000505 UInt x86_instr_size, data_size = 0;
506 Bool instrumented_Jcc = False;
njn4f9c9342002-04-29 16:03:24 +0000507
nethercote9313ac42004-07-06 21:54:20 +0000508 bb_info = get_BB_info(cb_in, orig_addr, &bb_seen_before);
509 bb_info_i = 0;
njn4f9c9342002-04-29 16:03:24 +0000510
njn810086f2002-11-14 12:42:47 +0000511 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000512
nethercote9313ac42004-07-06 21:54:20 +0000513 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000514
njn810086f2002-11-14 12:42:47 +0000515 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
516 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000517
nethercote9313ac42004-07-06 21:54:20 +0000518 // We want to instrument each x86 instruction with a call to the
519 // appropriate simulation function, which depends on whether the
520 // instruction does memory data reads/writes. x86 instructions can
521 // end in three ways, and this is how they are instrumented:
522 //
523 // 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
524 // 2. UCode, JMP --> UCode, Instrumentation, JMP
525 // 3. UCode, Jcc, JMP --> UCode, Instrumentation, Jcc, JMP
526 //
527 // The last UInstr in a BB is always a JMP. Jccs, when they appear,
528 // are always second last. This is checked with assertions.
529 // Instrumentation must go before any jumps. (JIFZ is the exception;
530 // if a JIFZ succeeds, no simulation is done for the instruction.)
531 //
532 // x86 instruction sizes are obtained from INCEIPs (for case 1) or
533 // from .extra4b field of the final JMP (for case 2 & 3).
534
535 if (instrumented_Jcc) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000536
537 switch (u_in->opcode) {
njn4f9c9342002-04-29 16:03:24 +0000538
nethercote9313ac42004-07-06 21:54:20 +0000539 // For memory-ref instrs, copy the data_addr into a temporary to be
540 // passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000541 case LOAD:
nethercote9313ac42004-07-06 21:54:20 +0000542 case SSE3ag_MemRd_RegWr:
njn25e49d8e72002-09-23 09:36:25 +0000543 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000544 t_read_addr = newTemp(cb);
545 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
546 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000547 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000548 break;
549
550 case FPU_R:
nethercote9313ac42004-07-06 21:54:20 +0000551 case MMX2_MemRd:
njn25e49d8e72002-09-23 09:36:25 +0000552 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000553 t_read_addr = newTemp(cb);
554 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
nethercote9313ac42004-07-06 21:54:20 +0000555 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000556 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000557 break;
thughes96b466a2004-03-15 16:43:58 +0000558 break;
559
560 case MMX2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000561 case SSE2a_MemRd:
562 case SSE2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000563 case SSE3a_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000564 case SSE3a1_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000565 t_read = u_in->val3;
566 t_read_addr = newTemp(cb);
567 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
568 data_size = u_in->size;
569 VG_(copy_UInstr)(cb, u_in);
570 break;
571
nethercote9313ac42004-07-06 21:54:20 +0000572 // Note that we must set t_write_addr even for mod instructions;
573 // That's how the code above determines whether it does a write.
574 // Without it, it would think a mod instruction is a read.
575 // As for the MOV, if it's a mod instruction it's redundant, but it's
576 // not expensive and mod instructions are rare anyway. */
njn4f9c9342002-04-29 16:03:24 +0000577 case STORE:
578 case FPU_W:
nethercote9313ac42004-07-06 21:54:20 +0000579 case MMX2_MemWr:
njn25e49d8e72002-09-23 09:36:25 +0000580 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000581 t_write_addr = newTemp(cb);
582 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000583 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000584 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000585 break;
586
njn21f805d2003-08-25 16:15:40 +0000587 case SSE2a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000588 case SSE3a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000589 t_write = u_in->val3;
590 t_write_addr = newTemp(cb);
591 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000592 data_size = u_in->size;
njn21f805d2003-08-25 16:15:40 +0000593 VG_(copy_UInstr)(cb, u_in);
594 break;
njn25e49d8e72002-09-23 09:36:25 +0000595
nethercote9313ac42004-07-06 21:54:20 +0000596 // INCEIP: insert instrumentation
njn25e49d8e72002-09-23 09:36:25 +0000597 case INCEIP:
598 x86_instr_size = u_in->val1;
599 goto instrument_x86_instr;
600
nethercote9313ac42004-07-06 21:54:20 +0000601 // JMP: insert instrumentation if the first JMP
njn25e49d8e72002-09-23 09:36:25 +0000602 case JMP:
nethercote9313ac42004-07-06 21:54:20 +0000603 if (instrumented_Jcc) {
njne427a662002-10-02 11:08:25 +0000604 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000605 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000606 VG_(copy_UInstr)(cb, u_in);
nethercote9313ac42004-07-06 21:54:20 +0000607 instrumented_Jcc = False; // rest
njn25e49d8e72002-09-23 09:36:25 +0000608 break;
njn25e49d8e72002-09-23 09:36:25 +0000609 } else {
nethercote9313ac42004-07-06 21:54:20 +0000610 // The first JMP... instrument.
611 if (CondAlways != u_in->cond) {
612 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
613 instrumented_Jcc = True;
njn25e49d8e72002-09-23 09:36:25 +0000614 } else {
nethercote9313ac42004-07-06 21:54:20 +0000615 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000616 }
nethercote9313ac42004-07-06 21:54:20 +0000617 // Get x86 instr size from final JMP.
618 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
619 goto instrument_x86_instr;
njn25e49d8e72002-09-23 09:36:25 +0000620 }
621
nethercote9313ac42004-07-06 21:54:20 +0000622 // Code executed at the end of each x86 instruction.
623 instrument_x86_instr:
624 // Large (eg. 28B, 108B, 512B) data-sized instructions will be
625 // done inaccurately but they're very rare and this avoids
626 // errors from hitting more than two cache lines in the
627 // simulation.
628 if (data_size > MIN_LINE_SIZE) data_size = MIN_LINE_SIZE;
njn25e49d8e72002-09-23 09:36:25 +0000629
nethercote9313ac42004-07-06 21:54:20 +0000630 end_of_x86_instr(cb, &bb_info->instrs[ bb_info_i ], bb_seen_before,
631 x86_instr_addr, x86_instr_size, data_size,
632 t_read, t_read_addr, t_write, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000633
nethercote9313ac42004-07-06 21:54:20 +0000634 // Copy original UInstr (INCEIP or JMP)
njn4ba5a792002-09-30 10:23:54 +0000635 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000636
nethercote9313ac42004-07-06 21:54:20 +0000637 // Update loop state for next x86 instr
638 bb_info_i++;
njn25e49d8e72002-09-23 09:36:25 +0000639 x86_instr_addr += x86_instr_size;
nethercote9313ac42004-07-06 21:54:20 +0000640 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
641 data_size = 0;
njn4f9c9342002-04-29 16:03:24 +0000642 break;
643
644 default:
njn4ba5a792002-09-30 10:23:54 +0000645 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000646 break;
647 }
648 }
649
nethercote9313ac42004-07-06 21:54:20 +0000650 // BB address should be the same as the first instruction's address.
651 sk_assert(bb_info->BB_addr == bb_info->instrs[0].instr_addr );
652 sk_assert(bb_info_i == bb_info->n_instrs);
njn4f9c9342002-04-29 16:03:24 +0000653
njn4ba5a792002-09-30 10:23:54 +0000654 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000655 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000656
657#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000658}
659
660/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000661/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000662/*------------------------------------------------------------*/
663
njn25e49d8e72002-09-23 09:36:25 +0000664#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
665
666static cache_t clo_I1_cache = UNDEFINED_CACHE;
667static cache_t clo_D1_cache = UNDEFINED_CACHE;
668static cache_t clo_L2_cache = UNDEFINED_CACHE;
669
nethercote9313ac42004-07-06 21:54:20 +0000670// All CPUID info taken from sandpile.org/a32/cpuid.htm */
671// Probably only works for Intel and AMD chips, and probably only for some of
672// them.
njn7cf0bd32002-06-08 13:36:03 +0000673
sewardj07133bf2002-06-13 10:25:56 +0000674static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +0000675{
676 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +0000677 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +0000678 actual_size);
njn7cf0bd32002-06-08 13:36:03 +0000679 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000680 " Simulating a %d KB cache with %d B lines",
681 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +0000682}
683
684/* Intel method is truly wretched. We have to do an insane indexing into an
685 * array of pre-defined configurations for various parts of the memory
686 * hierarchy.
687 */
688static
sewardj07133bf2002-06-13 10:25:56 +0000689Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000690{
sewardj07133bf2002-06-13 10:25:56 +0000691 UChar info[16];
692 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +0000693 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +0000694
695 if (level < 2) {
696 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000697 "warning: CPUID level < 2 for Intel processor (%d)",
698 level);
njn7cf0bd32002-06-08 13:36:03 +0000699 return -1;
700 }
701
thughes4ee64962004-06-16 20:51:45 +0000702 VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],
703 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +0000704 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
705 info[0] = 0x0; /* reset AL */
706
707 if (0 != trials) {
708 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000709 "warning: non-zero CPUID trials for Intel processor (%d)",
710 trials);
njn7cf0bd32002-06-08 13:36:03 +0000711 return -1;
712 }
713
714 for (i = 0; i < 16; i++) {
715
716 switch (info[i]) {
717
718 case 0x0: /* ignore zeros */
719 break;
720
njn25e49d8e72002-09-23 09:36:25 +0000721 /* TLB info, ignore */
722 case 0x01: case 0x02: case 0x03: case 0x04:
723 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +0000724 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +0000725 break;
726
727 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
728 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000729 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000730
731 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
732 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000733 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000734
njn25e49d8e72002-09-23 09:36:25 +0000735 /* IA-64 info -- panic! */
736 case 0x10: case 0x15: case 0x1a:
737 case 0x88: case 0x89: case 0x8a: case 0x8d:
738 case 0x90: case 0x96: case 0x9b:
nethercote9313ac42004-07-06 21:54:20 +0000739 VG_(skin_panic)("IA-64 cache detected?!");
njn25e49d8e72002-09-23 09:36:25 +0000740
njn7cf0bd32002-06-08 13:36:03 +0000741 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +0000742 VG_(message)(Vg_DebugMsg,
743 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +0000744 break;
745
njn25e49d8e72002-09-23 09:36:25 +0000746 /* These are sectored, whatever that means */
747 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
748 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
749
750 /* If a P6 core, this means "no L2 cache".
751 If a P4 core, this means "no L3 cache".
752 We don't know what core it is, so don't issue a warning. To detect
753 a missing L2 cache, we use 'L2_found'. */
754 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +0000755 break;
756
njn25e49d8e72002-09-23 09:36:25 +0000757 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
758 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
759 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
760 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
761 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000762
763 /* These are sectored, whatever that means */
764 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
765 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
766 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
767
768 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
769 * conversion to byte size is a total guess; treat the 12K and 16K
770 * cases the same since the cache byte size must be a power of two for
771 * everything to work!. Also guessing 32 bytes for the line size...
772 */
773 case 0x70: /* 12K micro-ops, 8-way */
774 *I1c = (cache_t) { 16, 8, 32 };
775 micro_ops_warn(12, 16, 32);
776 break;
777 case 0x71: /* 16K micro-ops, 8-way */
778 *I1c = (cache_t) { 16, 8, 32 };
779 micro_ops_warn(16, 16, 32);
780 break;
781 case 0x72: /* 32K micro-ops, 8-way */
782 *I1c = (cache_t) { 32, 8, 32 };
783 micro_ops_warn(32, 32, 32);
784 break;
785
njn25e49d8e72002-09-23 09:36:25 +0000786 /* These are sectored, whatever that means */
787 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
788 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
789 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
790 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
791 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000792
njn25e49d8e72002-09-23 09:36:25 +0000793 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
794 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
795 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
796 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
797 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +0000798 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
799 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000800
801 default:
802 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000803 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +0000804 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +0000805 break;
806 }
807 }
njn25e49d8e72002-09-23 09:36:25 +0000808
809 if (!L2_found)
810 VG_(message)(Vg_DebugMsg,
811 "warning: L2 cache not installed, ignore L2 results.");
812
njn7cf0bd32002-06-08 13:36:03 +0000813 return 0;
814}
815
816/* AMD method is straightforward, just extract appropriate bits from the
817 * result registers.
818 *
819 * Bits, for D1 and I1:
820 * 31..24 data L1 cache size in KBs
821 * 23..16 data L1 cache associativity (FFh=full)
822 * 15.. 8 data L1 cache lines per tag
823 * 7.. 0 data L1 cache line size in bytes
824 *
825 * Bits, for L2:
826 * 31..16 unified L2 cache size in KBs
827 * 15..12 unified L2 cache associativity (0=off, FFh=full)
828 * 11.. 8 unified L2 cache lines per tag
829 * 7.. 0 unified L2 cache line size in bytes
830 *
831 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
832 * upon this information. (Whatever that means -- njn)
833 *
njn25e49d8e72002-09-23 09:36:25 +0000834 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
835 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
836 * so we detect that.
837 *
njn7cf0bd32002-06-08 13:36:03 +0000838 * Returns 0 on success, non-zero on failure.
839 */
sewardj07133bf2002-06-13 10:25:56 +0000840static
841Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000842{
sewardj05bcdcb2003-05-18 10:05:38 +0000843 UInt ext_level;
thughes4ee64962004-06-16 20:51:45 +0000844 UInt dummy, model;
845 UInt I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +0000846
thughes4ee64962004-06-16 20:51:45 +0000847 VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000848
849 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
850 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000851 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
852 ext_level);
njn7cf0bd32002-06-08 13:36:03 +0000853 return -1;
854 }
855
thughes4ee64962004-06-16 20:51:45 +0000856 VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
857 VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000858
thughes4ee64962004-06-16 20:51:45 +0000859 VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
njn25e49d8e72002-09-23 09:36:25 +0000860
861 /* Check for Duron bug */
862 if (model == 0x630) {
863 VG_(message)(Vg_UserMsg,
864 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
865 L2i = (64 << 16) | (L2i & 0xffff);
866 }
867
njn7cf0bd32002-06-08 13:36:03 +0000868 D1c->size = (D1i >> 24) & 0xff;
869 D1c->assoc = (D1i >> 16) & 0xff;
870 D1c->line_size = (D1i >> 0) & 0xff;
871
872 I1c->size = (I1i >> 24) & 0xff;
873 I1c->assoc = (I1i >> 16) & 0xff;
874 I1c->line_size = (I1i >> 0) & 0xff;
875
876 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
877 L2c->assoc = (L2i >> 12) & 0xf;
878 L2c->line_size = (L2i >> 0) & 0xff;
879
880 return 0;
881}
882
883static jmp_buf cpuid_jmpbuf;
884
885static
886void cpuid_SIGILL_handler(int signum)
887{
888 __builtin_longjmp(cpuid_jmpbuf, 1);
889}
890
891static
sewardj07133bf2002-06-13 10:25:56 +0000892Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000893{
sewardj07133bf2002-06-13 10:25:56 +0000894 Int level, res, ret;
895 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +0000896 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +0000897
898 /* Install own SIGILL handler */
899 sigill_new.ksa_handler = cpuid_SIGILL_handler;
900 sigill_new.ksa_flags = 0;
901 sigill_new.ksa_restorer = NULL;
902 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +0000903 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000904
905 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +0000906 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000907
908 /* Trap for illegal instruction, in case it's a really old processor that
909 * doesn't support CPUID. */
910 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
thughes4ee64962004-06-16 20:51:45 +0000911 VG_(cpuid)(0, &level, (int*)&vendor_id[0],
912 (int*)&vendor_id[8], (int*)&vendor_id[4]);
njn7cf0bd32002-06-08 13:36:03 +0000913 vendor_id[12] = '\0';
914
915 /* Restore old SIGILL handler */
916 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000917 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000918
919 } else {
920 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
921
922 /* Restore old SIGILL handler */
923 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000924 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000925 return -1;
926 }
927
928 if (0 == level) {
929 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
930 return -1;
931 }
932
933 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
934 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
935 ret = Intel_cache_info(level, I1c, D1c, L2c);
936
937 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
938 ret = AMD_cache_info(I1c, D1c, L2c);
939
sewardj97b7b262003-10-07 00:18:16 +0000940 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
941 /* Total kludge. Pretend to be a VIA Nehemiah. */
942 D1c->size = 64;
943 D1c->assoc = 16;
944 D1c->line_size = 16;
945 I1c->size = 64;
946 I1c->assoc = 4;
947 I1c->line_size = 16;
948 L2c->size = 64;
949 L2c->assoc = 16;
950 L2c->line_size = 16;
951 ret = 0;
952
njn7cf0bd32002-06-08 13:36:03 +0000953 } else {
954 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
955 vendor_id);
956 return -1;
957 }
958
959 /* Successful! Convert sizes from KB to bytes */
960 I1c->size *= 1024;
961 D1c->size *= 1024;
962 L2c->size *= 1024;
963
964 return ret;
965}
966
967/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +0000968static
969void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +0000970{
971 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +0000972 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +0000973 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000974 "warning: %s size of %dB not a power of two; "
975 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +0000976 cache->size = dflt->size;
977 }
978
sewardj07133bf2002-06-13 10:25:56 +0000979 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +0000980 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000981 "warning: %s associativity of %d not a power of two; "
982 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +0000983 cache->assoc = dflt->assoc;
984 }
985
sewardj07133bf2002-06-13 10:25:56 +0000986 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +0000987 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000988 "warning: %s line size of %dB not a power of two; "
989 "defaulting to %dB",
990 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +0000991 cache->line_size = dflt->line_size;
992 }
993
994 /* Then check line size >= 16 -- any smaller and a single instruction could
995 * straddle three cache lines, which breaks a simulation assertion and is
996 * stupid anyway. */
997 if (cache->line_size < MIN_LINE_SIZE) {
998 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000999 "warning: %s line size of %dB too small; "
1000 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001001 cache->line_size = MIN_LINE_SIZE;
1002 }
1003
1004 /* Then check cache size > line size (causes seg faults if not). */
1005 if (cache->size <= cache->line_size) {
1006 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001007 "warning: %s cache size of %dB <= line size of %dB; "
1008 "increasing to %dB", name, cache->size, cache->line_size,
1009 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001010 cache->size = cache->line_size * 2;
1011 }
1012
1013 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1014 if (cache->assoc > (cache->size / cache->line_size)) {
1015 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001016 "warning: %s associativity > (size / line size); "
1017 "increasing size to %dB",
1018 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001019 cache->size = cache->assoc * cache->line_size;
1020 }
1021}
1022
sewardj07133bf2002-06-13 10:25:56 +00001023static
1024void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001025{
nethercote9313ac42004-07-06 21:54:20 +00001026#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1027
1028 Int res, n_clos = 0;
1029
1030 // Defaults are for a model 3 or 4 Athlon
njn7cf0bd32002-06-08 13:36:03 +00001031 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1032 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1033 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1034
nethercote9313ac42004-07-06 21:54:20 +00001035 // Set caches to default.
1036 *I1c = I1_dflt;
1037 *D1c = D1_dflt;
1038 *L2c = L2_dflt;
njn7cf0bd32002-06-08 13:36:03 +00001039
nethercote9313ac42004-07-06 21:54:20 +00001040 // Then replace with any info we can get from CPUID.
1041 res = get_caches_from_CPUID(I1c, D1c, L2c);
1042 res = -1;
sewardjb1a77a42002-07-13 13:31:20 +00001043
nethercote9313ac42004-07-06 21:54:20 +00001044 // Then replace with any defined on the command line.
1045 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; n_clos++; }
1046 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; n_clos++; }
1047 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; n_clos++; }
njn7cf0bd32002-06-08 13:36:03 +00001048
nethercote9313ac42004-07-06 21:54:20 +00001049 // Warn if CPUID failed and config not completely specified from cmd line.
1050 if (res != 0 && n_clos < 3) {
1051 VG_(message)(Vg_DebugMsg,
1052 "Warning: Couldn't detect cache config, using one "
1053 "or more defaults ");
njn7cf0bd32002-06-08 13:36:03 +00001054 }
njn7cf0bd32002-06-08 13:36:03 +00001055
nethercote9313ac42004-07-06 21:54:20 +00001056 // Then check values and fix if not acceptable.
njn7cf0bd32002-06-08 13:36:03 +00001057 check_cache(I1c, &I1_dflt, "I1");
1058 check_cache(D1c, &D1_dflt, "D1");
1059 check_cache(L2c, &L2_dflt, "L2");
1060
1061 if (VG_(clo_verbosity) > 1) {
1062 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1063 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1064 I1c->size, I1c->assoc, I1c->line_size);
1065 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1066 D1c->size, D1c->assoc, D1c->line_size);
1067 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1068 L2c->size, L2c->assoc, L2c->line_size);
1069 }
nethercote9313ac42004-07-06 21:54:20 +00001070#undef CMD_LINE_DEFINED
njn7cf0bd32002-06-08 13:36:03 +00001071}
1072
njn4f9c9342002-04-29 16:03:24 +00001073/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001074/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001075/*------------------------------------------------------------*/
1076
nethercote9313ac42004-07-06 21:54:20 +00001077// Total reads/writes/misses. Calculated during CC traversal at the end.
1078// All auto-zeroed.
1079static CC Ir_total;
1080static CC Dr_total;
1081static CC Dw_total;
1082
1083static Char* cachegrind_out_file;
1084
1085static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +00001086{
nethercote9313ac42004-07-06 21:54:20 +00001087 VG_(message)(Vg_UserMsg,
1088 "error: can't open cache simulation output file `%s'",
1089 cachegrind_out_file );
1090 VG_(message)(Vg_UserMsg,
1091 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +00001092}
1093
nethercote9313ac42004-07-06 21:54:20 +00001094static void fprint_lineCC(Int fd, lineCC* n)
njn4f9c9342002-04-29 16:03:24 +00001095{
nethercote9313ac42004-07-06 21:54:20 +00001096 Char buf[512];
1097 VG_(sprintf)(buf, "%u %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1098 n->line,
1099 n->Ir.a, n->Ir.m1, n->Ir.m2,
1100 n->Dr.a, n->Dr.m1, n->Dr.m2,
1101 n->Dw.a, n->Dw.m1, n->Dw.m2);
1102 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1103
1104 Ir_total.a += n->Ir.a; Ir_total.m1 += n->Ir.m1; Ir_total.m2 += n->Ir.m2;
1105 Dr_total.a += n->Dr.a; Dr_total.m1 += n->Dr.m1; Dr_total.m2 += n->Dr.m2;
1106 Dw_total.a += n->Dw.a; Dw_total.m1 += n->Dw.m1; Dw_total.m2 += n->Dw.m2;
1107}
1108
1109static void fprint_CC_table_and_calc_totals(void)
1110{
1111 Int fd;
1112 Char buf[512];
1113 fileCC *curr_fileCC;
1114 fnCC *curr_fnCC;
1115 lineCC *curr_lineCC;
1116 Int i, j, k;
njn4f9c9342002-04-29 16:03:24 +00001117
njn25e49d8e72002-09-23 09:36:25 +00001118 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001119
njndb918dd2003-07-22 20:45:11 +00001120 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001121 VKI_S_IRUSR|VKI_S_IWUSR);
nethercote50da0f32003-10-30 10:33:30 +00001122 if (fd < 0) {
nethercote9313ac42004-07-06 21:54:20 +00001123 // If the file can't be opened for whatever reason (conflict
1124 // between multiple cachegrinded processes?), give up now.
sewardj0744b6c2002-12-11 00:45:42 +00001125 file_err();
1126 return;
1127 }
njn4f9c9342002-04-29 16:03:24 +00001128
nethercote9313ac42004-07-06 21:54:20 +00001129 // "desc:" lines (giving I1/D1/L2 cache configuration). The spaces after
1130 // the 2nd colon makes cg_annotate's output look nicer.
1131 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1132 "desc: D1 cache: %s\n"
1133 "desc: L2 cache: %s\n",
1134 I1.desc_line, D1.desc_line, L2.desc_line);
njn7cf0bd32002-06-08 13:36:03 +00001135 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001136
nethercote9313ac42004-07-06 21:54:20 +00001137 // "cmd:" line
njn4f9c9342002-04-29 16:03:24 +00001138 VG_(strcpy)(buf, "cmd:");
1139 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001140 for (i = 0; i < VG_(client_argc); i++) {
1141 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001142 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1143 }
nethercote9313ac42004-07-06 21:54:20 +00001144 // "events:" line
njn4f9c9342002-04-29 16:03:24 +00001145 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1146 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1147
nethercote9313ac42004-07-06 21:54:20 +00001148 // Six loops here: three for the hash table arrays, and three for the
1149 // chains hanging off the hash table arrays.
njn4f9c9342002-04-29 16:03:24 +00001150 for (i = 0; i < N_FILE_ENTRIES; i++) {
nethercote9313ac42004-07-06 21:54:20 +00001151 curr_fileCC = CC_table[i];
1152 while (curr_fileCC != NULL) {
1153 VG_(sprintf)(buf, "fl=%s\n", curr_fileCC->file);
njn4f9c9342002-04-29 16:03:24 +00001154 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1155
1156 for (j = 0; j < N_FN_ENTRIES; j++) {
nethercote9313ac42004-07-06 21:54:20 +00001157 curr_fnCC = curr_fileCC->fns[j];
1158 while (curr_fnCC != NULL) {
1159 VG_(sprintf)(buf, "fn=%s\n", curr_fnCC->fn);
njn4f9c9342002-04-29 16:03:24 +00001160 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1161
nethercote9313ac42004-07-06 21:54:20 +00001162 for (k = 0; k < N_LINE_ENTRIES; k++) {
1163 curr_lineCC = curr_fnCC->lines[k];
1164 while (curr_lineCC != NULL) {
1165 fprint_lineCC(fd, curr_lineCC);
1166 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +00001167 }
1168 }
nethercote9313ac42004-07-06 21:54:20 +00001169 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +00001170 }
1171 }
nethercote9313ac42004-07-06 21:54:20 +00001172 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +00001173 }
1174 }
1175
nethercote9313ac42004-07-06 21:54:20 +00001176 // Summary stats must come after rest of table, since we calculate them
1177 // during traversal. */
njn4f9c9342002-04-29 16:03:24 +00001178 VG_(sprintf)(buf, "summary: "
nethercote9313ac42004-07-06 21:54:20 +00001179 "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
njn4f9c9342002-04-29 16:03:24 +00001180 Ir_total.a, Ir_total.m1, Ir_total.m2,
1181 Dr_total.a, Dr_total.m1, Dr_total.m2,
1182 Dw_total.a, Dw_total.m1, Dw_total.m2);
1183 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1184 VG_(close)(fd);
1185}
1186
njn607adfc2003-09-30 14:15:44 +00001187static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001188{
njn607adfc2003-09-30 14:15:44 +00001189 UInt w = 0;
1190 while (n > 0) {
1191 n = n / 10;
1192 w++;
njn4f9c9342002-04-29 16:03:24 +00001193 }
njn607adfc2003-09-30 14:15:44 +00001194 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001195}
1196
sewardj4f29ddf2002-05-03 22:29:04 +00001197static
daywalker8ad1a402003-09-18 01:15:32 +00001198void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001199{
1200 int i, len, space;
1201
daywalker8ad1a402003-09-18 01:15:32 +00001202 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001203 len = VG_(strlen)(buf);
1204 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001205 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001206 i = len;
1207
1208 /* Right justify in field */
1209 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1210 for (i = 0; i < space; i++) buf[i] = ' ';
1211}
1212
njn7d9f94d2003-04-22 21:41:40 +00001213void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001214{
nethercote9313ac42004-07-06 21:54:20 +00001215 static char buf1[128], buf2[128], buf3[128], fmt [128];
njn607adfc2003-09-30 14:15:44 +00001216
njn4f9c9342002-04-29 16:03:24 +00001217 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001218 ULong L2_total_m, L2_total_mr, L2_total_mw,
1219 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001220 Int l1, l2, l3;
1221 Int p;
1222
nethercote9313ac42004-07-06 21:54:20 +00001223 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001224
njn7cf0bd32002-06-08 13:36:03 +00001225 if (VG_(clo_verbosity) == 0)
1226 return;
1227
njn4f9c9342002-04-29 16:03:24 +00001228 /* I cache results. Use the I_refs value to determine the first column
1229 * width. */
njn607adfc2003-09-30 14:15:44 +00001230 l1 = ULong_width(Ir_total.a);
1231 l2 = ULong_width(Dr_total.a);
1232 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001233
njn607adfc2003-09-30 14:15:44 +00001234 /* Make format string, getting width right for numbers */
1235 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1236
1237 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1238 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1239 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001240
1241 p = 100;
1242
njn25e49d8e72002-09-23 09:36:25 +00001243 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001244 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1245 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1246
1247 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1248 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1249 VG_(message)(Vg_UserMsg, "");
1250
1251 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1252 * width of columns 2 & 3. */
1253 D_total.a = Dr_total.a + Dw_total.a;
1254 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1255 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1256
njn607adfc2003-09-30 14:15:44 +00001257 /* Make format string, getting width right for numbers */
1258 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001259
njn607adfc2003-09-30 14:15:44 +00001260 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1261 D_total.a, Dr_total.a, Dw_total.a);
1262 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1263 D_total.m1, Dr_total.m1, Dw_total.m1);
1264 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1265 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001266
1267 p = 10;
1268
njn25e49d8e72002-09-23 09:36:25 +00001269 if (0 == D_total.a) D_total.a = 1;
1270 if (0 == Dr_total.a) Dr_total.a = 1;
1271 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001272 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1273 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1274 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1275 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1276
1277 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1278 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1279 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1280 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1281 VG_(message)(Vg_UserMsg, "");
1282
1283 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001284
1285 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1286 L2_total_r = Dr_total.m1 + Ir_total.m1;
1287 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001288 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1289 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001290
njn4f9c9342002-04-29 16:03:24 +00001291 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1292 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1293 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001294 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1295 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001296
1297 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1298 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1299 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1300 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1301
1302
nethercote9313ac42004-07-06 21:54:20 +00001303 // Various stats
njn4f9c9342002-04-29 16:03:24 +00001304 if (VG_(clo_verbosity) > 1) {
nethercote9313ac42004-07-06 21:54:20 +00001305 int BB_lookups = full_debug_BBs + fn_debug_BBs +
njn4f9c9342002-04-29 16:03:24 +00001306 file_line_debug_BBs + no_debug_BBs;
1307
1308 VG_(message)(Vg_DebugMsg, "");
1309 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1310 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
nethercote9313ac42004-07-06 21:54:20 +00001311 VG_(message)(Vg_DebugMsg, "Distinct lines: %d", distinct_lines);
1312 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
njn4f9c9342002-04-29 16:03:24 +00001313 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1314 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1315 full_debug_BBs * 100 / BB_lookups,
1316 full_debug_BBs);
1317 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1318 file_line_debug_BBs * 100 / BB_lookups,
1319 file_line_debug_BBs);
1320 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
nethercote9313ac42004-07-06 21:54:20 +00001321 fn_debug_BBs * 100 / BB_lookups,
1322 fn_debug_BBs);
njn4f9c9342002-04-29 16:03:24 +00001323 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1324 no_debug_BBs * 100 / BB_lookups,
1325 no_debug_BBs);
1326 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
njn4f9c9342002-04-29 16:03:24 +00001327 }
njn25e49d8e72002-09-23 09:36:25 +00001328 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001329}
1330
nethercote9313ac42004-07-06 21:54:20 +00001331/*--------------------------------------------------------------------*/
1332/*--- Discarding BB info ---*/
1333/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001334
nethercote9313ac42004-07-06 21:54:20 +00001335// Called when a translation is invalidated due to code unloading.
njn25e49d8e72002-09-23 09:36:25 +00001336void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001337{
nethercote9313ac42004-07-06 21:54:20 +00001338 VgHashNode** prev_next_ptr;
1339 VgHashNode* bb_info;
njn4294fd42002-06-05 14:41:10 +00001340
nethercote9313ac42004-07-06 21:54:20 +00001341 if (0) VG_(printf)( "discard_basic_block_info: %p, %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001342
nethercote9313ac42004-07-06 21:54:20 +00001343 // Get BB info, remove from table, free BB info. Simple!
1344 bb_info = VG_(HT_get_node)(instr_info_table, a, &prev_next_ptr);
1345 sk_assert(NULL != bb_info);
1346 *prev_next_ptr = bb_info->next;
1347 VG_(free)(bb_info);
sewardj18d75132002-05-16 11:06:21 +00001348}
1349
1350/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001351/*--- Command line processing ---*/
1352/*--------------------------------------------------------------------*/
1353
nethercote9313ac42004-07-06 21:54:20 +00001354static void parse_cache_opt ( cache_t* cache, char* opt )
njn25e49d8e72002-09-23 09:36:25 +00001355{
nethercote9313ac42004-07-06 21:54:20 +00001356 int i = 0, i2, i3;
njn25e49d8e72002-09-23 09:36:25 +00001357
nethercote9313ac42004-07-06 21:54:20 +00001358 // Option argument looks like "65536,2,64".
1359 // Find commas, replace with NULs to make three independent
1360 // strings, then extract numbers, put NULs back. Yuck.
njn25e49d8e72002-09-23 09:36:25 +00001361 while (VG_(isdigit)(opt[i])) i++;
1362 if (',' == opt[i]) {
1363 opt[i++] = '\0';
1364 i2 = i;
1365 } else goto bad;
1366 while (VG_(isdigit)(opt[i])) i++;
1367 if (',' == opt[i]) {
1368 opt[i++] = '\0';
1369 i3 = i;
1370 } else goto bad;
1371 while (VG_(isdigit)(opt[i])) i++;
1372 if ('\0' != opt[i]) goto bad;
1373
nethercote9313ac42004-07-06 21:54:20 +00001374 cache->size = (Int)VG_(atoll)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001375 cache->assoc = (Int)VG_(atoll)(opt + i2);
1376 cache->line_size = (Int)VG_(atoll)(opt + i3);
1377
nethercote9313ac42004-07-06 21:54:20 +00001378 opt[i2-1] = ',';
1379 opt[i3-1] = ',';
njn25e49d8e72002-09-23 09:36:25 +00001380 return;
1381
1382 bad:
nethercote9313ac42004-07-06 21:54:20 +00001383 VG_(bad_option)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001384}
1385
1386Bool SK_(process_cmd_line_option)(Char* arg)
1387{
nethercote9313ac42004-07-06 21:54:20 +00001388 // 5 is length of "--I1="
njn39c86652003-05-21 10:13:39 +00001389 if (VG_CLO_STREQN(5, arg, "--I1="))
nethercote9313ac42004-07-06 21:54:20 +00001390 parse_cache_opt(&clo_I1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001391 else if (VG_CLO_STREQN(5, arg, "--D1="))
nethercote9313ac42004-07-06 21:54:20 +00001392 parse_cache_opt(&clo_D1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001393 else if (VG_CLO_STREQN(5, arg, "--L2="))
nethercote9313ac42004-07-06 21:54:20 +00001394 parse_cache_opt(&clo_L2_cache, &arg[5]);
njn25e49d8e72002-09-23 09:36:25 +00001395 else
1396 return False;
1397
1398 return True;
1399}
1400
njn3e884182003-04-15 13:03:23 +00001401void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001402{
njn3e884182003-04-15 13:03:23 +00001403 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001404" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1405" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001406" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1407 );
1408}
1409
1410void SK_(print_debug_usage)(void)
1411{
1412 VG_(printf)(
1413" (none)\n"
1414 );
njn25e49d8e72002-09-23 09:36:25 +00001415}
1416
1417/*--------------------------------------------------------------------*/
1418/*--- Setup ---*/
1419/*--------------------------------------------------------------------*/
1420
njn810086f2002-11-14 12:42:47 +00001421void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001422{
njn13f02932003-04-30 20:23:58 +00001423 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00001424
njn810086f2002-11-14 12:42:47 +00001425 VG_(details_name) ("Cachegrind");
1426 VG_(details_version) (NULL);
1427 VG_(details_description) ("an I1/D1/L2 cache profiler");
1428 VG_(details_copyright_author)(
nethercotebb1c9912004-01-04 16:43:23 +00001429 "Copyright (C) 2002-2004, and GNU GPL'd, by Nicholas Nethercote.");
nethercote421281e2003-11-20 16:20:55 +00001430 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardj78210aa2002-12-01 02:55:46 +00001431 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00001432
njn810086f2002-11-14 12:42:47 +00001433 VG_(needs_basic_block_discards)();
1434 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00001435
1436 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
nethercote9313ac42004-07-06 21:54:20 +00001437 VG_(register_compact_helper)((Addr) & log_1I_1Dr_cache_access);
1438 VG_(register_compact_helper)((Addr) & log_1I_1Dw_cache_access);
njn25e49d8e72002-09-23 09:36:25 +00001439 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00001440
njn99ccf082003-09-30 13:51:23 +00001441 /* Get working directory */
1442 sk_assert( VG_(getcwd_alloc)(&base_dir) );
1443
njn13f02932003-04-30 20:23:58 +00001444 /* Block is big enough for dir name + cachegrind.out.<pid> */
1445 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
1446 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
1447 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00001448 VG_(free)(base_dir);
nethercote9313ac42004-07-06 21:54:20 +00001449
1450 instr_info_table = VG_(HT_construct)();
njn25e49d8e72002-09-23 09:36:25 +00001451}
1452
1453void SK_(post_clo_init)(void)
1454{
1455 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00001456
njn25e49d8e72002-09-23 09:36:25 +00001457 get_caches(&I1c, &D1c, &L2c);
1458
1459 cachesim_I1_initcache(I1c);
1460 cachesim_D1_initcache(D1c);
1461 cachesim_L2_initcache(L2c);
1462
nethercote9313ac42004-07-06 21:54:20 +00001463 VGP_(register_profile_event)(VgpGetLineCC, "get-lineCC");
njn25e49d8e72002-09-23 09:36:25 +00001464 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
1465 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
njn25e49d8e72002-09-23 09:36:25 +00001466}
1467
fitzhardinge98abfc72003-12-16 02:05:15 +00001468VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
1469
njn25e49d8e72002-09-23 09:36:25 +00001470/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00001471/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00001472/*--------------------------------------------------------------------*/