blob: 757cb1a7fb9e96b1a9c1cc0819f3ff4a0d09e0d4 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +00003/*--- Cachegrind: every but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
nethercotebb1c9912004-01-04 16:43:23 +000011 Copyright (C) 2002-2004 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000012 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
njn25e49d8e72002-09-23 09:36:25 +000032#include "vg_skin.h"
33//#include "vg_profile.c"
34
35/* For cache simulation */
36typedef struct {
37 int size; /* bytes */
38 int assoc;
39 int line_size; /* bytes */
40} cache_t;
njn4f9c9342002-04-29 16:03:24 +000041
nethercote27fc1da2004-01-04 16:56:57 +000042#include "cg_sim.c"
njn4f9c9342002-04-29 16:03:24 +000043
njn25e49d8e72002-09-23 09:36:25 +000044/*------------------------------------------------------------*/
45/*--- Constants ---*/
46/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000047
nethercote9313ac42004-07-06 21:54:20 +000048#define MAX_x86_INSTR_SIZE 16 // According to ia32 sw dev manual vol 2
49#define MIN_LINE_SIZE 16
50#define FILE_LEN 256
51#define FN_LEN 256
njn7cf0bd32002-06-08 13:36:03 +000052
53/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000054/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000055/*------------------------------------------------------------*/
56
njn25e49d8e72002-09-23 09:36:25 +000057typedef
58 enum {
nethercote9313ac42004-07-06 21:54:20 +000059 VgpGetLineCC = VgpFini+1,
njn25e49d8e72002-09-23 09:36:25 +000060 VgpCacheSimulate,
61 VgpCacheResults
62 }
nethercote7cc9c232004-01-21 15:08:04 +000063 VgpToolCC;
sewardj07133bf2002-06-13 10:25:56 +000064
njn4f9c9342002-04-29 16:03:24 +000065/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000066/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000067/*------------------------------------------------------------*/
68
69typedef struct _CC CC;
70struct _CC {
71 ULong a;
72 ULong m1;
73 ULong m2;
74};
75
nethercote9313ac42004-07-06 21:54:20 +000076//------------------------------------------------------------
77// Primary data structure #1: CC table
78// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
79// - hash(file, hash(fn, hash(line+CC)))
80// - Each hash table is separately chained.
81// - The array sizes below work fairly well for Konqueror.
82// - Lookups done by instr_addr, which is converted immediately to a source
83// location.
84// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +000085
86#define N_FILE_ENTRIES 251
87#define N_FN_ENTRIES 53
nethercote9313ac42004-07-06 21:54:20 +000088#define N_LINE_ENTRIES 37
njn4f9c9342002-04-29 16:03:24 +000089
nethercote9313ac42004-07-06 21:54:20 +000090typedef struct _lineCC lineCC;
91struct _lineCC {
92 Int line;
93 CC Ir;
94 CC Dr;
95 CC Dw;
96 lineCC* next;
njn4f9c9342002-04-29 16:03:24 +000097};
98
nethercote9313ac42004-07-06 21:54:20 +000099typedef struct _fnCC fnCC;
100struct _fnCC {
101 Char* fn;
102 fnCC* next;
103 lineCC* lines[N_LINE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000104};
105
nethercote9313ac42004-07-06 21:54:20 +0000106typedef struct _fileCC fileCC;
107struct _fileCC {
108 Char* file;
109 fileCC* next;
110 fnCC* fns[N_FN_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000111};
112
nethercote9313ac42004-07-06 21:54:20 +0000113// Top level of CC table. Auto-zeroed.
114static fileCC *CC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000115
nethercote9313ac42004-07-06 21:54:20 +0000116//------------------------------------------------------------
117// Primary data structre #2: Instr-info table
118// - Holds the cached info about each instr that is used for simulation.
119// - table(BB_start_addr, list(instr_info))
120// - For each BB, each instr_info in the list holds info about the
121// instruction (instr_size, instr_addr, etc), plue a pointer to its line
122// CC. This node is what's passed to the simulation function.
123// - When BBs are discarded the relevant list(instr_details) is freed.
124
125typedef struct _instr_info instr_info;
126struct _instr_info {
127 Addr instr_addr;
128 UChar instr_size;
129 UChar data_size;
130 struct _lineCC* parent; // parent line-CC
131};
132
133typedef struct _BB_info BB_info;
134struct _BB_info {
135 BB_info* next; // next field
136 Addr BB_addr; // key
137 Int n_instrs;
138 instr_info instrs[0];
139};
140
141VgHashTable instr_info_table; // hash(Addr, BB_info)
142
143//------------------------------------------------------------
144// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000145static Int distinct_files = 0;
146static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000147static Int distinct_lines = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000148static Int distinct_instrs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000149
sewardj4f29ddf2002-05-03 22:29:04 +0000150static Int full_debug_BBs = 0;
151static Int file_line_debug_BBs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000152static Int fn_debug_BBs = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000153static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000154
sewardj4f29ddf2002-05-03 22:29:04 +0000155static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000156
nethercote9313ac42004-07-06 21:54:20 +0000157/*------------------------------------------------------------*/
158/*--- CC table operations ---*/
159/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000160
nethercote9313ac42004-07-06 21:54:20 +0000161static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
162 Char fn[FN_LEN], Int* line)
njn4f9c9342002-04-29 16:03:24 +0000163{
nethercote9313ac42004-07-06 21:54:20 +0000164 Bool found_file_line = VG_(get_filename_linenum)(instr_addr, file,
165 FILE_LEN, line);
166 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
njn4f9c9342002-04-29 16:03:24 +0000167
nethercote9313ac42004-07-06 21:54:20 +0000168 if (!found_file_line) {
169 VG_(strcpy)(file, "???");
170 *line = 0;
171 }
172 if (!found_fn) {
173 VG_(strcpy)(fn, "???");
174 }
175 if (found_file_line) {
176 if (found_fn) full_debug_BBs++;
177 else file_line_debug_BBs++;
178 } else {
179 if (found_fn) fn_debug_BBs++;
180 else no_debug_BBs++;
njn4f9c9342002-04-29 16:03:24 +0000181 }
182}
183
njn4f9c9342002-04-29 16:03:24 +0000184static UInt hash(Char *s, UInt table_size)
185{
nethercote9313ac42004-07-06 21:54:20 +0000186 const int hash_constant = 256;
187 int hash_value = 0;
188 for ( ; *s; s++)
189 hash_value = (hash_constant * hash_value + *s) % table_size;
190 return hash_value;
njn4f9c9342002-04-29 16:03:24 +0000191}
192
nethercote9313ac42004-07-06 21:54:20 +0000193static __inline__
194fileCC* new_fileCC(Char filename[], fileCC* next)
nethercote09d853e2004-01-21 16:12:55 +0000195{
nethercote9313ac42004-07-06 21:54:20 +0000196 // Using calloc() zeroes the fns[] array
197 fileCC* cc = VG_(calloc)(1, sizeof(fileCC));
198 cc->file = VG_(strdup)(filename);
199 cc->next = next;
200 return cc;
nethercote09d853e2004-01-21 16:12:55 +0000201}
202
nethercote9313ac42004-07-06 21:54:20 +0000203static __inline__
204fnCC* new_fnCC(Char fn[], fnCC* next)
njn4f9c9342002-04-29 16:03:24 +0000205{
nethercote9313ac42004-07-06 21:54:20 +0000206 // Using calloc() zeroes the lines[] array
207 fnCC* cc = VG_(calloc)(1, sizeof(fnCC));
208 cc->fn = VG_(strdup)(fn);
209 cc->next = next;
210 return cc;
211}
njn4f9c9342002-04-29 16:03:24 +0000212
nethercote9313ac42004-07-06 21:54:20 +0000213static __inline__
214lineCC* new_lineCC(Int line, lineCC* next)
215{
216 // Using calloc() zeroes the Ir/Dr/Dw CCs and the instrs[] array
217 lineCC* cc = VG_(calloc)(1, sizeof(lineCC));
218 cc->line = line;
219 cc->next = next;
220 return cc;
221}
njn4f9c9342002-04-29 16:03:24 +0000222
nethercote9313ac42004-07-06 21:54:20 +0000223static __inline__
224instr_info* new_instr_info(Addr instr_addr, lineCC* parent, instr_info* next)
225{
226 // Using calloc() zeroes instr_size and data_size
227 instr_info* ii = VG_(calloc)(1, sizeof(instr_info));
228 ii->instr_addr = instr_addr;
229 ii->parent = parent;
230 return ii;
231}
232
233// Do a three step traversal: by file, then fn, then line.
234// In all cases prepends new nodes to their chain. Returns a pointer to the
235// line node, creates a new one if necessary.
236static lineCC* get_lineCC(Addr orig_addr)
237{
238 fileCC *curr_fileCC;
239 fnCC *curr_fnCC;
240 lineCC *curr_lineCC;
241 Char file[FILE_LEN], fn[FN_LEN];
242 Int line;
243 UInt file_hash, fn_hash, line_hash;
244
245 get_debug_info(orig_addr, file, fn, &line);
246
247 VGP_PUSHCC(VgpGetLineCC);
248
249 // level 1
250 file_hash = hash(file, N_FILE_ENTRIES);
251 curr_fileCC = CC_table[file_hash];
252 while (NULL != curr_fileCC && !VG_STREQ(file, curr_fileCC->file)) {
253 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +0000254 }
nethercote9313ac42004-07-06 21:54:20 +0000255 if (NULL == curr_fileCC) {
256 CC_table[file_hash] = curr_fileCC =
257 new_fileCC(file, CC_table[file_hash]);
njn4f9c9342002-04-29 16:03:24 +0000258 distinct_files++;
259 }
260
nethercote9313ac42004-07-06 21:54:20 +0000261 // level 2
262 fn_hash = hash(fn, N_FN_ENTRIES);
263 curr_fnCC = curr_fileCC->fns[fn_hash];
264 while (NULL != curr_fnCC && !VG_STREQ(fn, curr_fnCC->fn)) {
265 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +0000266 }
nethercote9313ac42004-07-06 21:54:20 +0000267 if (NULL == curr_fnCC) {
268 curr_fileCC->fns[fn_hash] = curr_fnCC =
269 new_fnCC(fn, curr_fileCC->fns[fn_hash]);
njn4f9c9342002-04-29 16:03:24 +0000270 distinct_fns++;
271 }
272
nethercote9313ac42004-07-06 21:54:20 +0000273 // level 3
274 line_hash = line % N_LINE_ENTRIES;
275 curr_lineCC = curr_fnCC->lines[line_hash];
276 while (NULL != curr_lineCC && line != curr_lineCC->line) {
277 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +0000278 }
nethercote9313ac42004-07-06 21:54:20 +0000279 if (NULL == curr_lineCC) {
280 curr_fnCC->lines[line_hash] = curr_lineCC =
281 new_lineCC(line, curr_fnCC->lines[line_hash]);
282 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +0000283 }
nethercote9313ac42004-07-06 21:54:20 +0000284
285 VGP_POPCC(VgpGetLineCC);
286 return curr_lineCC;
njn4f9c9342002-04-29 16:03:24 +0000287}
288
289/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000290/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000291/*------------------------------------------------------------*/
292
njn25e49d8e72002-09-23 09:36:25 +0000293static __attribute__ ((regparm (1)))
nethercote9313ac42004-07-06 21:54:20 +0000294void log_1I_0D_cache_access(instr_info* n)
njn25e49d8e72002-09-23 09:36:25 +0000295{
296 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000297 // n, n->instr_addr, n->instr_size)
njn25e49d8e72002-09-23 09:36:25 +0000298 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000299 cachesim_I1_doref(n->instr_addr, n->instr_size,
300 &n->parent->Ir.m1, &n->parent->Ir.m2);
301 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000302 VGP_POPCC(VgpCacheSimulate);
303}
304
nethercote9313ac42004-07-06 21:54:20 +0000305static __attribute__ ((regparm (2)))
306void log_1I_1Dr_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000307{
nethercote9313ac42004-07-06 21:54:20 +0000308 //VG_(printf)("1I_1Dr: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
309 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000310 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000311 cachesim_I1_doref(n->instr_addr, n->instr_size,
312 &n->parent->Ir.m1, &n->parent->Ir.m2);
313 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000314
nethercote9313ac42004-07-06 21:54:20 +0000315 cachesim_D1_doref(data_addr, n->data_size,
316 &n->parent->Dr.m1, &n->parent->Dr.m2);
317 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000318 VGP_POPCC(VgpCacheSimulate);
319}
320
nethercote9313ac42004-07-06 21:54:20 +0000321static __attribute__ ((regparm (2)))
322void log_1I_1Dw_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000323{
nethercote9313ac42004-07-06 21:54:20 +0000324 //VG_(printf)("1I_1Dw: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
325 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000326 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000327 cachesim_I1_doref(n->instr_addr, n->instr_size,
328 &n->parent->Ir.m1, &n->parent->Ir.m2);
329 n->parent->Ir.a++;
330
331 cachesim_D1_doref(data_addr, n->data_size,
332 &n->parent->Dw.m1, &n->parent->Dw.m2);
333 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000334 VGP_POPCC(VgpCacheSimulate);
335}
336
nethercote9313ac42004-07-06 21:54:20 +0000337static __attribute__ ((regparm (3)))
338void log_1I_2D_cache_access(instr_info* n, Addr data_addr1, Addr data_addr2)
njn25e49d8e72002-09-23 09:36:25 +0000339{
340 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000341 // n, n->instr_addr, n->instr_size, data_addr1, data_addr2, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000342 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000343 cachesim_I1_doref(n->instr_addr, n->instr_size,
344 &n->parent->Ir.m1, &n->parent->Ir.m2);
345 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000346
nethercote9313ac42004-07-06 21:54:20 +0000347 cachesim_D1_doref(data_addr1, n->data_size,
348 &n->parent->Dr.m1, &n->parent->Dr.m2);
349 n->parent->Dr.a++;
350 cachesim_D1_doref(data_addr2, n->data_size,
351 &n->parent->Dw.m1, &n->parent->Dw.m2);
352 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000353 VGP_POPCC(VgpCacheSimulate);
354}
355
nethercote9313ac42004-07-06 21:54:20 +0000356/*------------------------------------------------------------*/
357/*--- Instrumentation ---*/
358/*------------------------------------------------------------*/
359
360BB_info* get_BB_info(UCodeBlock* cb_in, Addr orig_addr, Bool* bb_seen_before)
361{
362 Int i, n_instrs;
363 UInstr* u_in;
364 BB_info* bb_info;
365 VgHashNode** dummy;
366
367 // Count number of x86 instrs in BB
368 n_instrs = 1; // start at 1 because last x86 instr has no INCEIP
369 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
370 u_in = VG_(get_instr)(cb_in, i);
371 if (INCEIP == u_in->opcode) n_instrs++;
372 }
373
374 // Get the BB_info
375 bb_info = (BB_info*)VG_(HT_get_node)(instr_info_table, orig_addr, &dummy);
376 *bb_seen_before = ( NULL == bb_info ? False : True );
377 if (*bb_seen_before) {
378 // BB must have been translated before, but flushed from the TT
379 sk_assert(bb_info->n_instrs == n_instrs );
380 BB_retranslations++;
381 } else {
382 // BB never translated before (at this address, at least; could have
383 // been unloaded and then reloaded elsewhere in memory)
384 bb_info =
385 VG_(calloc)(1, sizeof(BB_info) + n_instrs*sizeof(instr_info));
386 bb_info->BB_addr = orig_addr;
387 bb_info->n_instrs = n_instrs;
388 VG_(HT_add_node)( instr_info_table, (VgHashNode*)bb_info );
389 distinct_instrs++;
390 }
391 return bb_info;
392}
393
394void do_details( instr_info* n, Bool bb_seen_before,
395 Addr instr_addr, Int instr_size, Int data_size )
396{
397 lineCC* parent = get_lineCC(instr_addr);
398 if (bb_seen_before) {
399 sk_assert( n->instr_addr == instr_addr );
400 sk_assert( n->instr_size == instr_size );
401 sk_assert( n->data_size == data_size );
402 // Don't assert that (n->parent == parent)... it's conceivable that
403 // the debug info might change; the other asserts should be enough to
404 // detect anything strange.
405 } else {
406 n->instr_addr = instr_addr;
407 n->instr_size = instr_size;
408 n->data_size = data_size;
409 n->parent = parent;
410 }
411}
412
413Bool is_valid_data_size(Int data_size)
414{
415 return (4 == data_size || 2 == data_size || 1 == data_size ||
416 8 == data_size || 10 == data_size || MIN_LINE_SIZE == data_size);
417}
418
419// Instrumentation for the end of each x86 instruction.
420void end_of_x86_instr(UCodeBlock* cb, instr_info* i_node, Bool bb_seen_before,
421 UInt instr_addr, UInt instr_size, UInt data_size,
422 Int t_read, Int t_read_addr,
423 Int t_write, Int t_write_addr)
424{
425 Addr helper;
426 Int argc;
427 Int t_CC_addr,
428 t_data_addr1 = INVALID_TEMPREG,
429 t_data_addr2 = INVALID_TEMPREG;
430
431 sk_assert(instr_size >= 1 &&
432 instr_size <= MAX_x86_INSTR_SIZE);
433
434#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
435#define INV(qqt) (INVALID_TEMPREG == (qqt))
436
437 // Work out what kind of x86 instruction it is
438 if (!IS_(read) && !IS_(write)) {
439 sk_assert( 0 == data_size );
440 sk_assert(INV(t_read) && INV(t_write));
441 helper = (Addr) & log_1I_0D_cache_access;
442 argc = 1;
443
444 } else if (IS_(read) && !IS_(write)) {
445 sk_assert( is_valid_data_size(data_size) );
446 sk_assert(!INV(t_read) && INV(t_write));
447 helper = (Addr) & log_1I_1Dr_cache_access;
448 argc = 2;
449 t_data_addr1 = t_read_addr;
450
451 } else if (!IS_(read) && IS_(write)) {
452 sk_assert( is_valid_data_size(data_size) );
453 sk_assert(INV(t_read) && !INV(t_write));
454 helper = (Addr) & log_1I_1Dw_cache_access;
455 argc = 2;
456 t_data_addr1 = t_write_addr;
457
458 } else {
459 sk_assert(IS_(read) && IS_(write));
460 sk_assert( is_valid_data_size(data_size) );
461 sk_assert(!INV(t_read) && !INV(t_write));
462 if (t_read == t_write) {
463 helper = (Addr) & log_1I_1Dr_cache_access;
464 argc = 2;
465 t_data_addr1 = t_read_addr;
466 } else {
467 helper = (Addr) & log_1I_2D_cache_access;
468 argc = 3;
469 t_data_addr1 = t_read_addr;
470 t_data_addr2 = t_write_addr;
471 }
472 }
473#undef IS_
nethercotef5b74662004-07-06 22:46:41 +0000474#undef INV
475
nethercote9313ac42004-07-06 21:54:20 +0000476 // Setup 1st arg: CC addr
477 do_details( i_node, bb_seen_before, instr_addr, instr_size, data_size );
478 t_CC_addr = newTemp(cb);
479 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
480 uLiteral(cb, (Addr)i_node);
481
482 // Call the helper
483 if (1 == argc)
484 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
485 else if (2 == argc)
486 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
487 TempReg, t_data_addr1);
488 else if (3 == argc)
489 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
490 TempReg, t_data_addr1,
491 TempReg, t_data_addr2);
492 else
493 VG_(skin_panic)("argc... not 1 or 2 or 3?");
494
495 uCCall(cb, helper, argc, argc, False);
496}
497
njn25e49d8e72002-09-23 09:36:25 +0000498UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
499{
njn4f9c9342002-04-29 16:03:24 +0000500 UCodeBlock* cb;
njn4f9c9342002-04-29 16:03:24 +0000501 UInstr* u_in;
nethercote9313ac42004-07-06 21:54:20 +0000502 Int i, bb_info_i;
503 BB_info* bb_info;
504 Bool bb_seen_before = False;
505 Int t_read_addr, t_write_addr, t_read, t_write;
njn25e49d8e72002-09-23 09:36:25 +0000506 Addr x86_instr_addr = orig_addr;
nethercote9313ac42004-07-06 21:54:20 +0000507 UInt x86_instr_size, data_size = 0;
508 Bool instrumented_Jcc = False;
njn4f9c9342002-04-29 16:03:24 +0000509
nethercote9313ac42004-07-06 21:54:20 +0000510 bb_info = get_BB_info(cb_in, orig_addr, &bb_seen_before);
511 bb_info_i = 0;
njn4f9c9342002-04-29 16:03:24 +0000512
njn810086f2002-11-14 12:42:47 +0000513 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000514
nethercote9313ac42004-07-06 21:54:20 +0000515 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000516
njn810086f2002-11-14 12:42:47 +0000517 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
518 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000519
nethercote9313ac42004-07-06 21:54:20 +0000520 // We want to instrument each x86 instruction with a call to the
521 // appropriate simulation function, which depends on whether the
522 // instruction does memory data reads/writes. x86 instructions can
523 // end in three ways, and this is how they are instrumented:
524 //
525 // 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
526 // 2. UCode, JMP --> UCode, Instrumentation, JMP
527 // 3. UCode, Jcc, JMP --> UCode, Instrumentation, Jcc, JMP
528 //
529 // The last UInstr in a BB is always a JMP. Jccs, when they appear,
530 // are always second last. This is checked with assertions.
531 // Instrumentation must go before any jumps. (JIFZ is the exception;
532 // if a JIFZ succeeds, no simulation is done for the instruction.)
533 //
534 // x86 instruction sizes are obtained from INCEIPs (for case 1) or
535 // from .extra4b field of the final JMP (for case 2 & 3).
536
537 if (instrumented_Jcc) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000538
539 switch (u_in->opcode) {
njn4f9c9342002-04-29 16:03:24 +0000540
nethercote9313ac42004-07-06 21:54:20 +0000541 // For memory-ref instrs, copy the data_addr into a temporary to be
542 // passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000543 case LOAD:
nethercote9313ac42004-07-06 21:54:20 +0000544 case SSE3ag_MemRd_RegWr:
njn25e49d8e72002-09-23 09:36:25 +0000545 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000546 t_read_addr = newTemp(cb);
547 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
548 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000549 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000550 break;
551
552 case FPU_R:
nethercote9313ac42004-07-06 21:54:20 +0000553 case MMX2_MemRd:
njn25e49d8e72002-09-23 09:36:25 +0000554 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000555 t_read_addr = newTemp(cb);
556 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
nethercote9313ac42004-07-06 21:54:20 +0000557 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000558 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000559 break;
thughes96b466a2004-03-15 16:43:58 +0000560 break;
561
562 case MMX2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000563 case SSE2a_MemRd:
564 case SSE2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000565 case SSE3a_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000566 case SSE3a1_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000567 t_read = u_in->val3;
568 t_read_addr = newTemp(cb);
569 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
570 data_size = u_in->size;
571 VG_(copy_UInstr)(cb, u_in);
572 break;
573
nethercote9313ac42004-07-06 21:54:20 +0000574 // Note that we must set t_write_addr even for mod instructions;
575 // That's how the code above determines whether it does a write.
576 // Without it, it would think a mod instruction is a read.
577 // As for the MOV, if it's a mod instruction it's redundant, but it's
578 // not expensive and mod instructions are rare anyway. */
njn4f9c9342002-04-29 16:03:24 +0000579 case STORE:
580 case FPU_W:
nethercote9313ac42004-07-06 21:54:20 +0000581 case MMX2_MemWr:
njn25e49d8e72002-09-23 09:36:25 +0000582 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000583 t_write_addr = newTemp(cb);
584 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000585 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000586 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000587 break;
588
njn21f805d2003-08-25 16:15:40 +0000589 case SSE2a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000590 case SSE3a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000591 t_write = u_in->val3;
592 t_write_addr = newTemp(cb);
593 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000594 data_size = u_in->size;
njn21f805d2003-08-25 16:15:40 +0000595 VG_(copy_UInstr)(cb, u_in);
596 break;
njn25e49d8e72002-09-23 09:36:25 +0000597
nethercote9313ac42004-07-06 21:54:20 +0000598 // INCEIP: insert instrumentation
njn25e49d8e72002-09-23 09:36:25 +0000599 case INCEIP:
600 x86_instr_size = u_in->val1;
601 goto instrument_x86_instr;
602
nethercote9313ac42004-07-06 21:54:20 +0000603 // JMP: insert instrumentation if the first JMP
njn25e49d8e72002-09-23 09:36:25 +0000604 case JMP:
nethercote9313ac42004-07-06 21:54:20 +0000605 if (instrumented_Jcc) {
njne427a662002-10-02 11:08:25 +0000606 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000607 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000608 VG_(copy_UInstr)(cb, u_in);
nethercote9313ac42004-07-06 21:54:20 +0000609 instrumented_Jcc = False; // rest
njn25e49d8e72002-09-23 09:36:25 +0000610 break;
njn25e49d8e72002-09-23 09:36:25 +0000611 } else {
nethercote9313ac42004-07-06 21:54:20 +0000612 // The first JMP... instrument.
613 if (CondAlways != u_in->cond) {
614 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
615 instrumented_Jcc = True;
njn25e49d8e72002-09-23 09:36:25 +0000616 } else {
nethercote9313ac42004-07-06 21:54:20 +0000617 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000618 }
nethercote9313ac42004-07-06 21:54:20 +0000619 // Get x86 instr size from final JMP.
620 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
621 goto instrument_x86_instr;
njn25e49d8e72002-09-23 09:36:25 +0000622 }
623
nethercote9313ac42004-07-06 21:54:20 +0000624 // Code executed at the end of each x86 instruction.
625 instrument_x86_instr:
626 // Large (eg. 28B, 108B, 512B) data-sized instructions will be
627 // done inaccurately but they're very rare and this avoids
628 // errors from hitting more than two cache lines in the
629 // simulation.
630 if (data_size > MIN_LINE_SIZE) data_size = MIN_LINE_SIZE;
njn25e49d8e72002-09-23 09:36:25 +0000631
nethercote9313ac42004-07-06 21:54:20 +0000632 end_of_x86_instr(cb, &bb_info->instrs[ bb_info_i ], bb_seen_before,
633 x86_instr_addr, x86_instr_size, data_size,
634 t_read, t_read_addr, t_write, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000635
nethercote9313ac42004-07-06 21:54:20 +0000636 // Copy original UInstr (INCEIP or JMP)
njn4ba5a792002-09-30 10:23:54 +0000637 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000638
nethercote9313ac42004-07-06 21:54:20 +0000639 // Update loop state for next x86 instr
640 bb_info_i++;
njn25e49d8e72002-09-23 09:36:25 +0000641 x86_instr_addr += x86_instr_size;
nethercote9313ac42004-07-06 21:54:20 +0000642 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
643 data_size = 0;
njn4f9c9342002-04-29 16:03:24 +0000644 break;
645
646 default:
njn4ba5a792002-09-30 10:23:54 +0000647 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000648 break;
649 }
650 }
651
nethercote9313ac42004-07-06 21:54:20 +0000652 // BB address should be the same as the first instruction's address.
653 sk_assert(bb_info->BB_addr == bb_info->instrs[0].instr_addr );
654 sk_assert(bb_info_i == bb_info->n_instrs);
njn4f9c9342002-04-29 16:03:24 +0000655
njn4ba5a792002-09-30 10:23:54 +0000656 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000657 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000658
659#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000660}
661
662/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000663/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000664/*------------------------------------------------------------*/
665
njn25e49d8e72002-09-23 09:36:25 +0000666#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
667
668static cache_t clo_I1_cache = UNDEFINED_CACHE;
669static cache_t clo_D1_cache = UNDEFINED_CACHE;
670static cache_t clo_L2_cache = UNDEFINED_CACHE;
671
nethercote9313ac42004-07-06 21:54:20 +0000672// All CPUID info taken from sandpile.org/a32/cpuid.htm */
673// Probably only works for Intel and AMD chips, and probably only for some of
674// them.
njn7cf0bd32002-06-08 13:36:03 +0000675
sewardj07133bf2002-06-13 10:25:56 +0000676static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +0000677{
678 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +0000679 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +0000680 actual_size);
njn7cf0bd32002-06-08 13:36:03 +0000681 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000682 " Simulating a %d KB cache with %d B lines",
683 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +0000684}
685
686/* Intel method is truly wretched. We have to do an insane indexing into an
687 * array of pre-defined configurations for various parts of the memory
688 * hierarchy.
689 */
690static
sewardj07133bf2002-06-13 10:25:56 +0000691Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000692{
sewardj07133bf2002-06-13 10:25:56 +0000693 UChar info[16];
694 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +0000695 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +0000696
697 if (level < 2) {
698 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000699 "warning: CPUID level < 2 for Intel processor (%d)",
700 level);
njn7cf0bd32002-06-08 13:36:03 +0000701 return -1;
702 }
703
thughes4ee64962004-06-16 20:51:45 +0000704 VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],
705 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +0000706 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
707 info[0] = 0x0; /* reset AL */
708
709 if (0 != trials) {
710 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000711 "warning: non-zero CPUID trials for Intel processor (%d)",
712 trials);
njn7cf0bd32002-06-08 13:36:03 +0000713 return -1;
714 }
715
716 for (i = 0; i < 16; i++) {
717
718 switch (info[i]) {
719
720 case 0x0: /* ignore zeros */
721 break;
722
njn25e49d8e72002-09-23 09:36:25 +0000723 /* TLB info, ignore */
724 case 0x01: case 0x02: case 0x03: case 0x04:
725 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +0000726 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +0000727 break;
728
729 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
730 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000731 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000732
733 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
734 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000735 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000736
njn25e49d8e72002-09-23 09:36:25 +0000737 /* IA-64 info -- panic! */
738 case 0x10: case 0x15: case 0x1a:
739 case 0x88: case 0x89: case 0x8a: case 0x8d:
740 case 0x90: case 0x96: case 0x9b:
nethercote9313ac42004-07-06 21:54:20 +0000741 VG_(skin_panic)("IA-64 cache detected?!");
njn25e49d8e72002-09-23 09:36:25 +0000742
njn7cf0bd32002-06-08 13:36:03 +0000743 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +0000744 VG_(message)(Vg_DebugMsg,
745 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +0000746 break;
747
njn25e49d8e72002-09-23 09:36:25 +0000748 /* These are sectored, whatever that means */
749 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
750 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
751
752 /* If a P6 core, this means "no L2 cache".
753 If a P4 core, this means "no L3 cache".
754 We don't know what core it is, so don't issue a warning. To detect
755 a missing L2 cache, we use 'L2_found'. */
756 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +0000757 break;
758
njn25e49d8e72002-09-23 09:36:25 +0000759 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
760 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
761 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
762 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
763 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000764
765 /* These are sectored, whatever that means */
766 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
767 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
768 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
769
770 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
771 * conversion to byte size is a total guess; treat the 12K and 16K
772 * cases the same since the cache byte size must be a power of two for
773 * everything to work!. Also guessing 32 bytes for the line size...
774 */
775 case 0x70: /* 12K micro-ops, 8-way */
776 *I1c = (cache_t) { 16, 8, 32 };
777 micro_ops_warn(12, 16, 32);
778 break;
779 case 0x71: /* 16K micro-ops, 8-way */
780 *I1c = (cache_t) { 16, 8, 32 };
781 micro_ops_warn(16, 16, 32);
782 break;
783 case 0x72: /* 32K micro-ops, 8-way */
784 *I1c = (cache_t) { 32, 8, 32 };
785 micro_ops_warn(32, 32, 32);
786 break;
787
njn25e49d8e72002-09-23 09:36:25 +0000788 /* These are sectored, whatever that means */
789 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
790 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
791 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
792 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
793 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000794
njn25e49d8e72002-09-23 09:36:25 +0000795 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
796 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
797 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
798 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
799 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +0000800 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
801 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000802
803 default:
804 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000805 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +0000806 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +0000807 break;
808 }
809 }
njn25e49d8e72002-09-23 09:36:25 +0000810
811 if (!L2_found)
812 VG_(message)(Vg_DebugMsg,
813 "warning: L2 cache not installed, ignore L2 results.");
814
njn7cf0bd32002-06-08 13:36:03 +0000815 return 0;
816}
817
818/* AMD method is straightforward, just extract appropriate bits from the
819 * result registers.
820 *
821 * Bits, for D1 and I1:
822 * 31..24 data L1 cache size in KBs
823 * 23..16 data L1 cache associativity (FFh=full)
824 * 15.. 8 data L1 cache lines per tag
825 * 7.. 0 data L1 cache line size in bytes
826 *
827 * Bits, for L2:
828 * 31..16 unified L2 cache size in KBs
829 * 15..12 unified L2 cache associativity (0=off, FFh=full)
830 * 11.. 8 unified L2 cache lines per tag
831 * 7.. 0 unified L2 cache line size in bytes
832 *
833 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
834 * upon this information. (Whatever that means -- njn)
835 *
njn25e49d8e72002-09-23 09:36:25 +0000836 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
837 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
838 * so we detect that.
839 *
njn7cf0bd32002-06-08 13:36:03 +0000840 * Returns 0 on success, non-zero on failure.
841 */
sewardj07133bf2002-06-13 10:25:56 +0000842static
843Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000844{
sewardj05bcdcb2003-05-18 10:05:38 +0000845 UInt ext_level;
thughes4ee64962004-06-16 20:51:45 +0000846 UInt dummy, model;
847 UInt I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +0000848
thughes4ee64962004-06-16 20:51:45 +0000849 VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000850
851 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
852 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000853 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
854 ext_level);
njn7cf0bd32002-06-08 13:36:03 +0000855 return -1;
856 }
857
thughes4ee64962004-06-16 20:51:45 +0000858 VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
859 VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000860
thughes4ee64962004-06-16 20:51:45 +0000861 VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
njn25e49d8e72002-09-23 09:36:25 +0000862
863 /* Check for Duron bug */
864 if (model == 0x630) {
865 VG_(message)(Vg_UserMsg,
866 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
867 L2i = (64 << 16) | (L2i & 0xffff);
868 }
869
njn7cf0bd32002-06-08 13:36:03 +0000870 D1c->size = (D1i >> 24) & 0xff;
871 D1c->assoc = (D1i >> 16) & 0xff;
872 D1c->line_size = (D1i >> 0) & 0xff;
873
874 I1c->size = (I1i >> 24) & 0xff;
875 I1c->assoc = (I1i >> 16) & 0xff;
876 I1c->line_size = (I1i >> 0) & 0xff;
877
878 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
879 L2c->assoc = (L2i >> 12) & 0xf;
880 L2c->line_size = (L2i >> 0) & 0xff;
881
882 return 0;
883}
884
885static jmp_buf cpuid_jmpbuf;
886
887static
888void cpuid_SIGILL_handler(int signum)
889{
890 __builtin_longjmp(cpuid_jmpbuf, 1);
891}
892
893static
sewardj07133bf2002-06-13 10:25:56 +0000894Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000895{
sewardj07133bf2002-06-13 10:25:56 +0000896 Int level, res, ret;
897 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +0000898 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +0000899
900 /* Install own SIGILL handler */
901 sigill_new.ksa_handler = cpuid_SIGILL_handler;
902 sigill_new.ksa_flags = 0;
903 sigill_new.ksa_restorer = NULL;
904 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +0000905 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000906
907 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +0000908 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000909
910 /* Trap for illegal instruction, in case it's a really old processor that
911 * doesn't support CPUID. */
912 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
thughes4ee64962004-06-16 20:51:45 +0000913 VG_(cpuid)(0, &level, (int*)&vendor_id[0],
914 (int*)&vendor_id[8], (int*)&vendor_id[4]);
njn7cf0bd32002-06-08 13:36:03 +0000915 vendor_id[12] = '\0';
916
917 /* Restore old SIGILL handler */
918 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000919 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000920
921 } else {
922 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
923
924 /* Restore old SIGILL handler */
925 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000926 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000927 return -1;
928 }
929
930 if (0 == level) {
931 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
932 return -1;
933 }
934
935 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
936 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
937 ret = Intel_cache_info(level, I1c, D1c, L2c);
938
939 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
940 ret = AMD_cache_info(I1c, D1c, L2c);
941
sewardj97b7b262003-10-07 00:18:16 +0000942 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
943 /* Total kludge. Pretend to be a VIA Nehemiah. */
944 D1c->size = 64;
945 D1c->assoc = 16;
946 D1c->line_size = 16;
947 I1c->size = 64;
948 I1c->assoc = 4;
949 I1c->line_size = 16;
950 L2c->size = 64;
951 L2c->assoc = 16;
952 L2c->line_size = 16;
953 ret = 0;
954
njn7cf0bd32002-06-08 13:36:03 +0000955 } else {
956 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
957 vendor_id);
958 return -1;
959 }
960
961 /* Successful! Convert sizes from KB to bytes */
962 I1c->size *= 1024;
963 D1c->size *= 1024;
964 L2c->size *= 1024;
965
966 return ret;
967}
968
969/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +0000970static
971void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +0000972{
973 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +0000974 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +0000975 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000976 "warning: %s size of %dB not a power of two; "
977 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +0000978 cache->size = dflt->size;
979 }
980
sewardj07133bf2002-06-13 10:25:56 +0000981 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +0000982 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000983 "warning: %s associativity of %d not a power of two; "
984 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +0000985 cache->assoc = dflt->assoc;
986 }
987
sewardj07133bf2002-06-13 10:25:56 +0000988 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +0000989 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000990 "warning: %s line size of %dB not a power of two; "
991 "defaulting to %dB",
992 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +0000993 cache->line_size = dflt->line_size;
994 }
995
996 /* Then check line size >= 16 -- any smaller and a single instruction could
997 * straddle three cache lines, which breaks a simulation assertion and is
998 * stupid anyway. */
999 if (cache->line_size < MIN_LINE_SIZE) {
1000 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001001 "warning: %s line size of %dB too small; "
1002 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001003 cache->line_size = MIN_LINE_SIZE;
1004 }
1005
1006 /* Then check cache size > line size (causes seg faults if not). */
1007 if (cache->size <= cache->line_size) {
1008 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001009 "warning: %s cache size of %dB <= line size of %dB; "
1010 "increasing to %dB", name, cache->size, cache->line_size,
1011 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001012 cache->size = cache->line_size * 2;
1013 }
1014
1015 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1016 if (cache->assoc > (cache->size / cache->line_size)) {
1017 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001018 "warning: %s associativity > (size / line size); "
1019 "increasing size to %dB",
1020 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001021 cache->size = cache->assoc * cache->line_size;
1022 }
1023}
1024
sewardj07133bf2002-06-13 10:25:56 +00001025static
1026void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001027{
nethercote9313ac42004-07-06 21:54:20 +00001028#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1029
1030 Int res, n_clos = 0;
1031
1032 // Defaults are for a model 3 or 4 Athlon
njn7cf0bd32002-06-08 13:36:03 +00001033 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1034 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1035 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1036
nethercote9313ac42004-07-06 21:54:20 +00001037 // Set caches to default.
1038 *I1c = I1_dflt;
1039 *D1c = D1_dflt;
1040 *L2c = L2_dflt;
njn7cf0bd32002-06-08 13:36:03 +00001041
nethercote9313ac42004-07-06 21:54:20 +00001042 // Then replace with any info we can get from CPUID.
1043 res = get_caches_from_CPUID(I1c, D1c, L2c);
sewardjb1a77a42002-07-13 13:31:20 +00001044
nethercote9313ac42004-07-06 21:54:20 +00001045 // Then replace with any defined on the command line.
1046 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; n_clos++; }
1047 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; n_clos++; }
1048 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; n_clos++; }
njn7cf0bd32002-06-08 13:36:03 +00001049
nethercote9313ac42004-07-06 21:54:20 +00001050 // Warn if CPUID failed and config not completely specified from cmd line.
1051 if (res != 0 && n_clos < 3) {
1052 VG_(message)(Vg_DebugMsg,
1053 "Warning: Couldn't detect cache config, using one "
1054 "or more defaults ");
njn7cf0bd32002-06-08 13:36:03 +00001055 }
njn7cf0bd32002-06-08 13:36:03 +00001056
nethercote9313ac42004-07-06 21:54:20 +00001057 // Then check values and fix if not acceptable.
njn7cf0bd32002-06-08 13:36:03 +00001058 check_cache(I1c, &I1_dflt, "I1");
1059 check_cache(D1c, &D1_dflt, "D1");
1060 check_cache(L2c, &L2_dflt, "L2");
1061
1062 if (VG_(clo_verbosity) > 1) {
1063 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1064 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1065 I1c->size, I1c->assoc, I1c->line_size);
1066 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1067 D1c->size, D1c->assoc, D1c->line_size);
1068 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1069 L2c->size, L2c->assoc, L2c->line_size);
1070 }
nethercote9313ac42004-07-06 21:54:20 +00001071#undef CMD_LINE_DEFINED
njn7cf0bd32002-06-08 13:36:03 +00001072}
1073
njn4f9c9342002-04-29 16:03:24 +00001074/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001075/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001076/*------------------------------------------------------------*/
1077
nethercote9313ac42004-07-06 21:54:20 +00001078// Total reads/writes/misses. Calculated during CC traversal at the end.
1079// All auto-zeroed.
1080static CC Ir_total;
1081static CC Dr_total;
1082static CC Dw_total;
1083
1084static Char* cachegrind_out_file;
1085
1086static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +00001087{
nethercote9313ac42004-07-06 21:54:20 +00001088 VG_(message)(Vg_UserMsg,
1089 "error: can't open cache simulation output file `%s'",
1090 cachegrind_out_file );
1091 VG_(message)(Vg_UserMsg,
1092 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +00001093}
1094
nethercote9313ac42004-07-06 21:54:20 +00001095static void fprint_lineCC(Int fd, lineCC* n)
njn4f9c9342002-04-29 16:03:24 +00001096{
nethercote9313ac42004-07-06 21:54:20 +00001097 Char buf[512];
1098 VG_(sprintf)(buf, "%u %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1099 n->line,
1100 n->Ir.a, n->Ir.m1, n->Ir.m2,
1101 n->Dr.a, n->Dr.m1, n->Dr.m2,
1102 n->Dw.a, n->Dw.m1, n->Dw.m2);
1103 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1104
1105 Ir_total.a += n->Ir.a; Ir_total.m1 += n->Ir.m1; Ir_total.m2 += n->Ir.m2;
1106 Dr_total.a += n->Dr.a; Dr_total.m1 += n->Dr.m1; Dr_total.m2 += n->Dr.m2;
1107 Dw_total.a += n->Dw.a; Dw_total.m1 += n->Dw.m1; Dw_total.m2 += n->Dw.m2;
1108}
1109
1110static void fprint_CC_table_and_calc_totals(void)
1111{
1112 Int fd;
1113 Char buf[512];
1114 fileCC *curr_fileCC;
1115 fnCC *curr_fnCC;
1116 lineCC *curr_lineCC;
1117 Int i, j, k;
njn4f9c9342002-04-29 16:03:24 +00001118
njn25e49d8e72002-09-23 09:36:25 +00001119 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001120
njndb918dd2003-07-22 20:45:11 +00001121 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001122 VKI_S_IRUSR|VKI_S_IWUSR);
nethercote50da0f32003-10-30 10:33:30 +00001123 if (fd < 0) {
nethercote9313ac42004-07-06 21:54:20 +00001124 // If the file can't be opened for whatever reason (conflict
1125 // between multiple cachegrinded processes?), give up now.
sewardj0744b6c2002-12-11 00:45:42 +00001126 file_err();
1127 return;
1128 }
njn4f9c9342002-04-29 16:03:24 +00001129
nethercote9313ac42004-07-06 21:54:20 +00001130 // "desc:" lines (giving I1/D1/L2 cache configuration). The spaces after
1131 // the 2nd colon makes cg_annotate's output look nicer.
1132 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1133 "desc: D1 cache: %s\n"
1134 "desc: L2 cache: %s\n",
1135 I1.desc_line, D1.desc_line, L2.desc_line);
njn7cf0bd32002-06-08 13:36:03 +00001136 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001137
nethercote9313ac42004-07-06 21:54:20 +00001138 // "cmd:" line
njn4f9c9342002-04-29 16:03:24 +00001139 VG_(strcpy)(buf, "cmd:");
1140 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001141 for (i = 0; i < VG_(client_argc); i++) {
1142 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001143 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1144 }
nethercote9313ac42004-07-06 21:54:20 +00001145 // "events:" line
njn4f9c9342002-04-29 16:03:24 +00001146 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1147 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1148
nethercote9313ac42004-07-06 21:54:20 +00001149 // Six loops here: three for the hash table arrays, and three for the
1150 // chains hanging off the hash table arrays.
njn4f9c9342002-04-29 16:03:24 +00001151 for (i = 0; i < N_FILE_ENTRIES; i++) {
nethercote9313ac42004-07-06 21:54:20 +00001152 curr_fileCC = CC_table[i];
1153 while (curr_fileCC != NULL) {
1154 VG_(sprintf)(buf, "fl=%s\n", curr_fileCC->file);
njn4f9c9342002-04-29 16:03:24 +00001155 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1156
1157 for (j = 0; j < N_FN_ENTRIES; j++) {
nethercote9313ac42004-07-06 21:54:20 +00001158 curr_fnCC = curr_fileCC->fns[j];
1159 while (curr_fnCC != NULL) {
1160 VG_(sprintf)(buf, "fn=%s\n", curr_fnCC->fn);
njn4f9c9342002-04-29 16:03:24 +00001161 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1162
nethercote9313ac42004-07-06 21:54:20 +00001163 for (k = 0; k < N_LINE_ENTRIES; k++) {
1164 curr_lineCC = curr_fnCC->lines[k];
1165 while (curr_lineCC != NULL) {
1166 fprint_lineCC(fd, curr_lineCC);
1167 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +00001168 }
1169 }
nethercote9313ac42004-07-06 21:54:20 +00001170 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +00001171 }
1172 }
nethercote9313ac42004-07-06 21:54:20 +00001173 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +00001174 }
1175 }
1176
nethercote9313ac42004-07-06 21:54:20 +00001177 // Summary stats must come after rest of table, since we calculate them
1178 // during traversal. */
njn4f9c9342002-04-29 16:03:24 +00001179 VG_(sprintf)(buf, "summary: "
nethercote9313ac42004-07-06 21:54:20 +00001180 "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
njn4f9c9342002-04-29 16:03:24 +00001181 Ir_total.a, Ir_total.m1, Ir_total.m2,
1182 Dr_total.a, Dr_total.m1, Dr_total.m2,
1183 Dw_total.a, Dw_total.m1, Dw_total.m2);
1184 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1185 VG_(close)(fd);
1186}
1187
njn607adfc2003-09-30 14:15:44 +00001188static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001189{
njn607adfc2003-09-30 14:15:44 +00001190 UInt w = 0;
1191 while (n > 0) {
1192 n = n / 10;
1193 w++;
njn4f9c9342002-04-29 16:03:24 +00001194 }
njn607adfc2003-09-30 14:15:44 +00001195 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001196}
1197
sewardj4f29ddf2002-05-03 22:29:04 +00001198static
daywalker8ad1a402003-09-18 01:15:32 +00001199void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001200{
1201 int i, len, space;
1202
daywalker8ad1a402003-09-18 01:15:32 +00001203 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001204 len = VG_(strlen)(buf);
1205 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001206 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001207 i = len;
1208
1209 /* Right justify in field */
1210 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1211 for (i = 0; i < space; i++) buf[i] = ' ';
1212}
1213
njn7d9f94d2003-04-22 21:41:40 +00001214void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001215{
nethercote9313ac42004-07-06 21:54:20 +00001216 static char buf1[128], buf2[128], buf3[128], fmt [128];
njn607adfc2003-09-30 14:15:44 +00001217
njn4f9c9342002-04-29 16:03:24 +00001218 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001219 ULong L2_total_m, L2_total_mr, L2_total_mw,
1220 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001221 Int l1, l2, l3;
1222 Int p;
1223
nethercote9313ac42004-07-06 21:54:20 +00001224 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001225
njn7cf0bd32002-06-08 13:36:03 +00001226 if (VG_(clo_verbosity) == 0)
1227 return;
1228
njn4f9c9342002-04-29 16:03:24 +00001229 /* I cache results. Use the I_refs value to determine the first column
1230 * width. */
njn607adfc2003-09-30 14:15:44 +00001231 l1 = ULong_width(Ir_total.a);
1232 l2 = ULong_width(Dr_total.a);
1233 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001234
njn607adfc2003-09-30 14:15:44 +00001235 /* Make format string, getting width right for numbers */
1236 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1237
1238 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1239 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1240 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001241
1242 p = 100;
1243
njn25e49d8e72002-09-23 09:36:25 +00001244 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001245 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1246 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1247
1248 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1249 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1250 VG_(message)(Vg_UserMsg, "");
1251
1252 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1253 * width of columns 2 & 3. */
1254 D_total.a = Dr_total.a + Dw_total.a;
1255 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1256 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1257
njn607adfc2003-09-30 14:15:44 +00001258 /* Make format string, getting width right for numbers */
1259 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001260
njn607adfc2003-09-30 14:15:44 +00001261 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1262 D_total.a, Dr_total.a, Dw_total.a);
1263 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1264 D_total.m1, Dr_total.m1, Dw_total.m1);
1265 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1266 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001267
1268 p = 10;
1269
njn25e49d8e72002-09-23 09:36:25 +00001270 if (0 == D_total.a) D_total.a = 1;
1271 if (0 == Dr_total.a) Dr_total.a = 1;
1272 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001273 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1274 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1275 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1276 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1277
1278 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1279 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1280 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1281 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1282 VG_(message)(Vg_UserMsg, "");
1283
1284 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001285
1286 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1287 L2_total_r = Dr_total.m1 + Ir_total.m1;
1288 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001289 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1290 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001291
njn4f9c9342002-04-29 16:03:24 +00001292 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1293 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1294 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001295 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1296 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001297
1298 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1299 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1300 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1301 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1302
1303
nethercote9313ac42004-07-06 21:54:20 +00001304 // Various stats
njn4f9c9342002-04-29 16:03:24 +00001305 if (VG_(clo_verbosity) > 1) {
nethercote9313ac42004-07-06 21:54:20 +00001306 int BB_lookups = full_debug_BBs + fn_debug_BBs +
njn4f9c9342002-04-29 16:03:24 +00001307 file_line_debug_BBs + no_debug_BBs;
1308
1309 VG_(message)(Vg_DebugMsg, "");
1310 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1311 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
nethercote9313ac42004-07-06 21:54:20 +00001312 VG_(message)(Vg_DebugMsg, "Distinct lines: %d", distinct_lines);
1313 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
njn4f9c9342002-04-29 16:03:24 +00001314 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1315 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1316 full_debug_BBs * 100 / BB_lookups,
1317 full_debug_BBs);
1318 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1319 file_line_debug_BBs * 100 / BB_lookups,
1320 file_line_debug_BBs);
1321 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
nethercote9313ac42004-07-06 21:54:20 +00001322 fn_debug_BBs * 100 / BB_lookups,
1323 fn_debug_BBs);
njn4f9c9342002-04-29 16:03:24 +00001324 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1325 no_debug_BBs * 100 / BB_lookups,
1326 no_debug_BBs);
1327 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
njn4f9c9342002-04-29 16:03:24 +00001328 }
njn25e49d8e72002-09-23 09:36:25 +00001329 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001330}
1331
nethercote9313ac42004-07-06 21:54:20 +00001332/*--------------------------------------------------------------------*/
1333/*--- Discarding BB info ---*/
1334/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001335
nethercote9313ac42004-07-06 21:54:20 +00001336// Called when a translation is invalidated due to code unloading.
njn25e49d8e72002-09-23 09:36:25 +00001337void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001338{
nethercote9313ac42004-07-06 21:54:20 +00001339 VgHashNode** prev_next_ptr;
1340 VgHashNode* bb_info;
njn4294fd42002-06-05 14:41:10 +00001341
nethercote9313ac42004-07-06 21:54:20 +00001342 if (0) VG_(printf)( "discard_basic_block_info: %p, %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001343
nethercote9313ac42004-07-06 21:54:20 +00001344 // Get BB info, remove from table, free BB info. Simple!
1345 bb_info = VG_(HT_get_node)(instr_info_table, a, &prev_next_ptr);
1346 sk_assert(NULL != bb_info);
1347 *prev_next_ptr = bb_info->next;
1348 VG_(free)(bb_info);
sewardj18d75132002-05-16 11:06:21 +00001349}
1350
1351/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001352/*--- Command line processing ---*/
1353/*--------------------------------------------------------------------*/
1354
nethercote9313ac42004-07-06 21:54:20 +00001355static void parse_cache_opt ( cache_t* cache, char* opt )
njn25e49d8e72002-09-23 09:36:25 +00001356{
nethercote9313ac42004-07-06 21:54:20 +00001357 int i = 0, i2, i3;
njn25e49d8e72002-09-23 09:36:25 +00001358
nethercote9313ac42004-07-06 21:54:20 +00001359 // Option argument looks like "65536,2,64".
1360 // Find commas, replace with NULs to make three independent
1361 // strings, then extract numbers, put NULs back. Yuck.
njn25e49d8e72002-09-23 09:36:25 +00001362 while (VG_(isdigit)(opt[i])) i++;
1363 if (',' == opt[i]) {
1364 opt[i++] = '\0';
1365 i2 = i;
1366 } else goto bad;
1367 while (VG_(isdigit)(opt[i])) i++;
1368 if (',' == opt[i]) {
1369 opt[i++] = '\0';
1370 i3 = i;
1371 } else goto bad;
1372 while (VG_(isdigit)(opt[i])) i++;
1373 if ('\0' != opt[i]) goto bad;
1374
nethercote9313ac42004-07-06 21:54:20 +00001375 cache->size = (Int)VG_(atoll)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001376 cache->assoc = (Int)VG_(atoll)(opt + i2);
1377 cache->line_size = (Int)VG_(atoll)(opt + i3);
1378
nethercote9313ac42004-07-06 21:54:20 +00001379 opt[i2-1] = ',';
1380 opt[i3-1] = ',';
njn25e49d8e72002-09-23 09:36:25 +00001381 return;
1382
1383 bad:
nethercote9313ac42004-07-06 21:54:20 +00001384 VG_(bad_option)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001385}
1386
1387Bool SK_(process_cmd_line_option)(Char* arg)
1388{
nethercote9313ac42004-07-06 21:54:20 +00001389 // 5 is length of "--I1="
njn39c86652003-05-21 10:13:39 +00001390 if (VG_CLO_STREQN(5, arg, "--I1="))
nethercote9313ac42004-07-06 21:54:20 +00001391 parse_cache_opt(&clo_I1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001392 else if (VG_CLO_STREQN(5, arg, "--D1="))
nethercote9313ac42004-07-06 21:54:20 +00001393 parse_cache_opt(&clo_D1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001394 else if (VG_CLO_STREQN(5, arg, "--L2="))
nethercote9313ac42004-07-06 21:54:20 +00001395 parse_cache_opt(&clo_L2_cache, &arg[5]);
njn25e49d8e72002-09-23 09:36:25 +00001396 else
1397 return False;
1398
1399 return True;
1400}
1401
njn3e884182003-04-15 13:03:23 +00001402void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001403{
njn3e884182003-04-15 13:03:23 +00001404 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001405" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1406" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001407" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1408 );
1409}
1410
1411void SK_(print_debug_usage)(void)
1412{
1413 VG_(printf)(
1414" (none)\n"
1415 );
njn25e49d8e72002-09-23 09:36:25 +00001416}
1417
1418/*--------------------------------------------------------------------*/
1419/*--- Setup ---*/
1420/*--------------------------------------------------------------------*/
1421
njn810086f2002-11-14 12:42:47 +00001422void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001423{
njn13f02932003-04-30 20:23:58 +00001424 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00001425
njn810086f2002-11-14 12:42:47 +00001426 VG_(details_name) ("Cachegrind");
1427 VG_(details_version) (NULL);
1428 VG_(details_description) ("an I1/D1/L2 cache profiler");
1429 VG_(details_copyright_author)(
nethercote08fa9a72004-07-16 17:44:00 +00001430 "Copyright (C) 2002-2004, and GNU GPL'd, by Nicholas Nethercote et al.");
nethercote421281e2003-11-20 16:20:55 +00001431 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardj78210aa2002-12-01 02:55:46 +00001432 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00001433
njn810086f2002-11-14 12:42:47 +00001434 VG_(needs_basic_block_discards)();
1435 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00001436
1437 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
nethercote9313ac42004-07-06 21:54:20 +00001438 VG_(register_compact_helper)((Addr) & log_1I_1Dr_cache_access);
1439 VG_(register_compact_helper)((Addr) & log_1I_1Dw_cache_access);
njn25e49d8e72002-09-23 09:36:25 +00001440 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00001441
njn99ccf082003-09-30 13:51:23 +00001442 /* Get working directory */
1443 sk_assert( VG_(getcwd_alloc)(&base_dir) );
1444
njn13f02932003-04-30 20:23:58 +00001445 /* Block is big enough for dir name + cachegrind.out.<pid> */
1446 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
1447 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
1448 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00001449 VG_(free)(base_dir);
nethercote9313ac42004-07-06 21:54:20 +00001450
1451 instr_info_table = VG_(HT_construct)();
njn25e49d8e72002-09-23 09:36:25 +00001452}
1453
1454void SK_(post_clo_init)(void)
1455{
1456 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00001457
njn25e49d8e72002-09-23 09:36:25 +00001458 get_caches(&I1c, &D1c, &L2c);
1459
1460 cachesim_I1_initcache(I1c);
1461 cachesim_D1_initcache(D1c);
1462 cachesim_L2_initcache(L2c);
1463
nethercote9313ac42004-07-06 21:54:20 +00001464 VGP_(register_profile_event)(VgpGetLineCC, "get-lineCC");
njn25e49d8e72002-09-23 09:36:25 +00001465 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
1466 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
njn25e49d8e72002-09-23 09:36:25 +00001467}
1468
fitzhardinge98abfc72003-12-16 02:05:15 +00001469VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
1470
njn25e49d8e72002-09-23 09:36:25 +00001471/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00001472/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00001473/*--------------------------------------------------------------------*/