blob: dae1b96919cae538d7d2712edf0bab024670fd04 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +00003/*--- Cachegrind: every but the simulation itself. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00005/*--------------------------------------------------------------------*/
6
7/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +00009 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000010
nethercotebb1c9912004-01-04 16:43:23 +000011 Copyright (C) 2002-2004 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000012 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000030*/
31
njn25e49d8e72002-09-23 09:36:25 +000032#include "vg_skin.h"
33//#include "vg_profile.c"
34
35/* For cache simulation */
36typedef struct {
37 int size; /* bytes */
38 int assoc;
39 int line_size; /* bytes */
40} cache_t;
njn4f9c9342002-04-29 16:03:24 +000041
nethercote27fc1da2004-01-04 16:56:57 +000042#include "cg_sim.c"
njn4f9c9342002-04-29 16:03:24 +000043
njn25e49d8e72002-09-23 09:36:25 +000044/*------------------------------------------------------------*/
45/*--- Constants ---*/
46/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000047
nethercote9313ac42004-07-06 21:54:20 +000048#define MAX_x86_INSTR_SIZE 16 // According to ia32 sw dev manual vol 2
49#define MIN_LINE_SIZE 16
50#define FILE_LEN 256
51#define FN_LEN 256
njn7cf0bd32002-06-08 13:36:03 +000052
53/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000054/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000055/*------------------------------------------------------------*/
56
njn25e49d8e72002-09-23 09:36:25 +000057typedef
58 enum {
nethercote9313ac42004-07-06 21:54:20 +000059 VgpGetLineCC = VgpFini+1,
njn25e49d8e72002-09-23 09:36:25 +000060 VgpCacheSimulate,
61 VgpCacheResults
62 }
nethercote7cc9c232004-01-21 15:08:04 +000063 VgpToolCC;
sewardj07133bf2002-06-13 10:25:56 +000064
njn4f9c9342002-04-29 16:03:24 +000065/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +000066/*--- Types and Data Structures ---*/
njn4f9c9342002-04-29 16:03:24 +000067/*------------------------------------------------------------*/
68
69typedef struct _CC CC;
70struct _CC {
71 ULong a;
72 ULong m1;
73 ULong m2;
74};
75
nethercote9313ac42004-07-06 21:54:20 +000076//------------------------------------------------------------
77// Primary data structure #1: CC table
78// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
79// - hash(file, hash(fn, hash(line+CC)))
80// - Each hash table is separately chained.
81// - The array sizes below work fairly well for Konqueror.
82// - Lookups done by instr_addr, which is converted immediately to a source
83// location.
84// - Traversed for dumping stats at end in file/func/line hierarchy.
njn4f9c9342002-04-29 16:03:24 +000085
86#define N_FILE_ENTRIES 251
87#define N_FN_ENTRIES 53
nethercote9313ac42004-07-06 21:54:20 +000088#define N_LINE_ENTRIES 37
njn4f9c9342002-04-29 16:03:24 +000089
nethercote9313ac42004-07-06 21:54:20 +000090typedef struct _lineCC lineCC;
91struct _lineCC {
92 Int line;
93 CC Ir;
94 CC Dr;
95 CC Dw;
96 lineCC* next;
njn4f9c9342002-04-29 16:03:24 +000097};
98
nethercote9313ac42004-07-06 21:54:20 +000099typedef struct _fnCC fnCC;
100struct _fnCC {
101 Char* fn;
102 fnCC* next;
103 lineCC* lines[N_LINE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000104};
105
nethercote9313ac42004-07-06 21:54:20 +0000106typedef struct _fileCC fileCC;
107struct _fileCC {
108 Char* file;
109 fileCC* next;
110 fnCC* fns[N_FN_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000111};
112
nethercote9313ac42004-07-06 21:54:20 +0000113// Top level of CC table. Auto-zeroed.
114static fileCC *CC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000115
nethercote9313ac42004-07-06 21:54:20 +0000116//------------------------------------------------------------
117// Primary data structre #2: Instr-info table
118// - Holds the cached info about each instr that is used for simulation.
119// - table(BB_start_addr, list(instr_info))
120// - For each BB, each instr_info in the list holds info about the
nethercote7149b422004-07-20 13:29:02 +0000121// instruction (instr_size, instr_addr, etc), plus a pointer to its line
nethercote9313ac42004-07-06 21:54:20 +0000122// CC. This node is what's passed to the simulation function.
123// - When BBs are discarded the relevant list(instr_details) is freed.
124
125typedef struct _instr_info instr_info;
126struct _instr_info {
nethercoteca1f2dc2004-07-21 08:49:02 +0000127 Addr instr_addr;
128 UChar instr_size;
129 UChar data_size;
130 lineCC* parent; // parent line-CC
nethercote9313ac42004-07-06 21:54:20 +0000131};
132
133typedef struct _BB_info BB_info;
134struct _BB_info {
135 BB_info* next; // next field
136 Addr BB_addr; // key
137 Int n_instrs;
138 instr_info instrs[0];
139};
140
141VgHashTable instr_info_table; // hash(Addr, BB_info)
142
143//------------------------------------------------------------
144// Stats
sewardj4f29ddf2002-05-03 22:29:04 +0000145static Int distinct_files = 0;
146static Int distinct_fns = 0;
nethercote9313ac42004-07-06 21:54:20 +0000147static Int distinct_lines = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000148static Int distinct_instrs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000149
sewardj4f29ddf2002-05-03 22:29:04 +0000150static Int full_debug_BBs = 0;
151static Int file_line_debug_BBs = 0;
nethercote9313ac42004-07-06 21:54:20 +0000152static Int fn_debug_BBs = 0;
sewardj4f29ddf2002-05-03 22:29:04 +0000153static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000154
sewardj4f29ddf2002-05-03 22:29:04 +0000155static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000156
nethercote9313ac42004-07-06 21:54:20 +0000157/*------------------------------------------------------------*/
158/*--- CC table operations ---*/
159/*------------------------------------------------------------*/
njn4294fd42002-06-05 14:41:10 +0000160
nethercote9313ac42004-07-06 21:54:20 +0000161static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
162 Char fn[FN_LEN], Int* line)
njn4f9c9342002-04-29 16:03:24 +0000163{
nethercote9313ac42004-07-06 21:54:20 +0000164 Bool found_file_line = VG_(get_filename_linenum)(instr_addr, file,
165 FILE_LEN, line);
166 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
njn4f9c9342002-04-29 16:03:24 +0000167
nethercote9313ac42004-07-06 21:54:20 +0000168 if (!found_file_line) {
169 VG_(strcpy)(file, "???");
170 *line = 0;
171 }
172 if (!found_fn) {
173 VG_(strcpy)(fn, "???");
174 }
175 if (found_file_line) {
176 if (found_fn) full_debug_BBs++;
177 else file_line_debug_BBs++;
178 } else {
179 if (found_fn) fn_debug_BBs++;
180 else no_debug_BBs++;
njn4f9c9342002-04-29 16:03:24 +0000181 }
182}
183
njn4f9c9342002-04-29 16:03:24 +0000184static UInt hash(Char *s, UInt table_size)
185{
nethercote9313ac42004-07-06 21:54:20 +0000186 const int hash_constant = 256;
187 int hash_value = 0;
188 for ( ; *s; s++)
189 hash_value = (hash_constant * hash_value + *s) % table_size;
190 return hash_value;
njn4f9c9342002-04-29 16:03:24 +0000191}
192
nethercote9313ac42004-07-06 21:54:20 +0000193static __inline__
194fileCC* new_fileCC(Char filename[], fileCC* next)
nethercote09d853e2004-01-21 16:12:55 +0000195{
nethercote9313ac42004-07-06 21:54:20 +0000196 // Using calloc() zeroes the fns[] array
197 fileCC* cc = VG_(calloc)(1, sizeof(fileCC));
198 cc->file = VG_(strdup)(filename);
199 cc->next = next;
200 return cc;
nethercote09d853e2004-01-21 16:12:55 +0000201}
202
nethercote9313ac42004-07-06 21:54:20 +0000203static __inline__
204fnCC* new_fnCC(Char fn[], fnCC* next)
njn4f9c9342002-04-29 16:03:24 +0000205{
nethercote9313ac42004-07-06 21:54:20 +0000206 // Using calloc() zeroes the lines[] array
207 fnCC* cc = VG_(calloc)(1, sizeof(fnCC));
208 cc->fn = VG_(strdup)(fn);
209 cc->next = next;
210 return cc;
211}
njn4f9c9342002-04-29 16:03:24 +0000212
nethercote9313ac42004-07-06 21:54:20 +0000213static __inline__
214lineCC* new_lineCC(Int line, lineCC* next)
215{
216 // Using calloc() zeroes the Ir/Dr/Dw CCs and the instrs[] array
217 lineCC* cc = VG_(calloc)(1, sizeof(lineCC));
218 cc->line = line;
219 cc->next = next;
220 return cc;
221}
njn4f9c9342002-04-29 16:03:24 +0000222
nethercote9313ac42004-07-06 21:54:20 +0000223static __inline__
224instr_info* new_instr_info(Addr instr_addr, lineCC* parent, instr_info* next)
225{
226 // Using calloc() zeroes instr_size and data_size
227 instr_info* ii = VG_(calloc)(1, sizeof(instr_info));
228 ii->instr_addr = instr_addr;
229 ii->parent = parent;
230 return ii;
231}
232
233// Do a three step traversal: by file, then fn, then line.
234// In all cases prepends new nodes to their chain. Returns a pointer to the
235// line node, creates a new one if necessary.
236static lineCC* get_lineCC(Addr orig_addr)
237{
238 fileCC *curr_fileCC;
239 fnCC *curr_fnCC;
240 lineCC *curr_lineCC;
241 Char file[FILE_LEN], fn[FN_LEN];
242 Int line;
243 UInt file_hash, fn_hash, line_hash;
244
245 get_debug_info(orig_addr, file, fn, &line);
246
247 VGP_PUSHCC(VgpGetLineCC);
248
249 // level 1
250 file_hash = hash(file, N_FILE_ENTRIES);
251 curr_fileCC = CC_table[file_hash];
252 while (NULL != curr_fileCC && !VG_STREQ(file, curr_fileCC->file)) {
253 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +0000254 }
nethercote9313ac42004-07-06 21:54:20 +0000255 if (NULL == curr_fileCC) {
256 CC_table[file_hash] = curr_fileCC =
257 new_fileCC(file, CC_table[file_hash]);
njn4f9c9342002-04-29 16:03:24 +0000258 distinct_files++;
259 }
260
nethercote9313ac42004-07-06 21:54:20 +0000261 // level 2
262 fn_hash = hash(fn, N_FN_ENTRIES);
263 curr_fnCC = curr_fileCC->fns[fn_hash];
264 while (NULL != curr_fnCC && !VG_STREQ(fn, curr_fnCC->fn)) {
265 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +0000266 }
nethercote9313ac42004-07-06 21:54:20 +0000267 if (NULL == curr_fnCC) {
268 curr_fileCC->fns[fn_hash] = curr_fnCC =
269 new_fnCC(fn, curr_fileCC->fns[fn_hash]);
njn4f9c9342002-04-29 16:03:24 +0000270 distinct_fns++;
271 }
272
nethercote9313ac42004-07-06 21:54:20 +0000273 // level 3
274 line_hash = line % N_LINE_ENTRIES;
275 curr_lineCC = curr_fnCC->lines[line_hash];
276 while (NULL != curr_lineCC && line != curr_lineCC->line) {
277 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +0000278 }
nethercote9313ac42004-07-06 21:54:20 +0000279 if (NULL == curr_lineCC) {
280 curr_fnCC->lines[line_hash] = curr_lineCC =
281 new_lineCC(line, curr_fnCC->lines[line_hash]);
282 distinct_lines++;
njn4f9c9342002-04-29 16:03:24 +0000283 }
nethercote9313ac42004-07-06 21:54:20 +0000284
285 VGP_POPCC(VgpGetLineCC);
286 return curr_lineCC;
njn4f9c9342002-04-29 16:03:24 +0000287}
288
289/*------------------------------------------------------------*/
nethercote9313ac42004-07-06 21:54:20 +0000290/*--- Cache simulation functions ---*/
njn4f9c9342002-04-29 16:03:24 +0000291/*------------------------------------------------------------*/
292
njn25e49d8e72002-09-23 09:36:25 +0000293static __attribute__ ((regparm (1)))
nethercote9313ac42004-07-06 21:54:20 +0000294void log_1I_0D_cache_access(instr_info* n)
njn25e49d8e72002-09-23 09:36:25 +0000295{
296 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000297 // n, n->instr_addr, n->instr_size)
njn25e49d8e72002-09-23 09:36:25 +0000298 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000299 cachesim_I1_doref(n->instr_addr, n->instr_size,
300 &n->parent->Ir.m1, &n->parent->Ir.m2);
301 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000302 VGP_POPCC(VgpCacheSimulate);
303}
304
nethercote9313ac42004-07-06 21:54:20 +0000305static __attribute__ ((regparm (2)))
306void log_1I_1Dr_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000307{
nethercote9313ac42004-07-06 21:54:20 +0000308 //VG_(printf)("1I_1Dr: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
309 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000310 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000311 cachesim_I1_doref(n->instr_addr, n->instr_size,
312 &n->parent->Ir.m1, &n->parent->Ir.m2);
313 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000314
nethercote9313ac42004-07-06 21:54:20 +0000315 cachesim_D1_doref(data_addr, n->data_size,
316 &n->parent->Dr.m1, &n->parent->Dr.m2);
317 n->parent->Dr.a++;
njn25e49d8e72002-09-23 09:36:25 +0000318 VGP_POPCC(VgpCacheSimulate);
319}
320
nethercote9313ac42004-07-06 21:54:20 +0000321static __attribute__ ((regparm (2)))
322void log_1I_1Dw_cache_access(instr_info* n, Addr data_addr)
njn25e49d8e72002-09-23 09:36:25 +0000323{
nethercote9313ac42004-07-06 21:54:20 +0000324 //VG_(printf)("1I_1Dw: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
325 // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000326 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000327 cachesim_I1_doref(n->instr_addr, n->instr_size,
328 &n->parent->Ir.m1, &n->parent->Ir.m2);
329 n->parent->Ir.a++;
330
331 cachesim_D1_doref(data_addr, n->data_size,
332 &n->parent->Dw.m1, &n->parent->Dw.m2);
333 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000334 VGP_POPCC(VgpCacheSimulate);
335}
336
nethercote9313ac42004-07-06 21:54:20 +0000337static __attribute__ ((regparm (3)))
338void log_1I_2D_cache_access(instr_info* n, Addr data_addr1, Addr data_addr2)
njn25e49d8e72002-09-23 09:36:25 +0000339{
340 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
nethercote9313ac42004-07-06 21:54:20 +0000341 // n, n->instr_addr, n->instr_size, data_addr1, data_addr2, n->data_size)
njn25e49d8e72002-09-23 09:36:25 +0000342 VGP_PUSHCC(VgpCacheSimulate);
nethercote9313ac42004-07-06 21:54:20 +0000343 cachesim_I1_doref(n->instr_addr, n->instr_size,
344 &n->parent->Ir.m1, &n->parent->Ir.m2);
345 n->parent->Ir.a++;
njn25e49d8e72002-09-23 09:36:25 +0000346
nethercote9313ac42004-07-06 21:54:20 +0000347 cachesim_D1_doref(data_addr1, n->data_size,
348 &n->parent->Dr.m1, &n->parent->Dr.m2);
349 n->parent->Dr.a++;
350 cachesim_D1_doref(data_addr2, n->data_size,
351 &n->parent->Dw.m1, &n->parent->Dw.m2);
352 n->parent->Dw.a++;
njn25e49d8e72002-09-23 09:36:25 +0000353 VGP_POPCC(VgpCacheSimulate);
354}
355
nethercote9313ac42004-07-06 21:54:20 +0000356/*------------------------------------------------------------*/
357/*--- Instrumentation ---*/
358/*------------------------------------------------------------*/
359
nethercote564b2b02004-08-07 15:54:53 +0000360static
nethercote9313ac42004-07-06 21:54:20 +0000361BB_info* get_BB_info(UCodeBlock* cb_in, Addr orig_addr, Bool* bb_seen_before)
362{
363 Int i, n_instrs;
364 UInstr* u_in;
365 BB_info* bb_info;
366 VgHashNode** dummy;
367
368 // Count number of x86 instrs in BB
369 n_instrs = 1; // start at 1 because last x86 instr has no INCEIP
370 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
371 u_in = VG_(get_instr)(cb_in, i);
372 if (INCEIP == u_in->opcode) n_instrs++;
373 }
374
375 // Get the BB_info
376 bb_info = (BB_info*)VG_(HT_get_node)(instr_info_table, orig_addr, &dummy);
377 *bb_seen_before = ( NULL == bb_info ? False : True );
378 if (*bb_seen_before) {
379 // BB must have been translated before, but flushed from the TT
380 sk_assert(bb_info->n_instrs == n_instrs );
381 BB_retranslations++;
382 } else {
383 // BB never translated before (at this address, at least; could have
384 // been unloaded and then reloaded elsewhere in memory)
385 bb_info =
386 VG_(calloc)(1, sizeof(BB_info) + n_instrs*sizeof(instr_info));
387 bb_info->BB_addr = orig_addr;
388 bb_info->n_instrs = n_instrs;
389 VG_(HT_add_node)( instr_info_table, (VgHashNode*)bb_info );
390 distinct_instrs++;
391 }
392 return bb_info;
393}
394
nethercote564b2b02004-08-07 15:54:53 +0000395static
nethercote9313ac42004-07-06 21:54:20 +0000396void do_details( instr_info* n, Bool bb_seen_before,
397 Addr instr_addr, Int instr_size, Int data_size )
398{
399 lineCC* parent = get_lineCC(instr_addr);
400 if (bb_seen_before) {
401 sk_assert( n->instr_addr == instr_addr );
402 sk_assert( n->instr_size == instr_size );
403 sk_assert( n->data_size == data_size );
404 // Don't assert that (n->parent == parent)... it's conceivable that
405 // the debug info might change; the other asserts should be enough to
406 // detect anything strange.
407 } else {
408 n->instr_addr = instr_addr;
409 n->instr_size = instr_size;
410 n->data_size = data_size;
411 n->parent = parent;
412 }
413}
414
nethercote564b2b02004-08-07 15:54:53 +0000415static Bool is_valid_data_size(Int data_size)
nethercote9313ac42004-07-06 21:54:20 +0000416{
417 return (4 == data_size || 2 == data_size || 1 == data_size ||
418 8 == data_size || 10 == data_size || MIN_LINE_SIZE == data_size);
419}
420
421// Instrumentation for the end of each x86 instruction.
nethercote564b2b02004-08-07 15:54:53 +0000422static
nethercote9313ac42004-07-06 21:54:20 +0000423void end_of_x86_instr(UCodeBlock* cb, instr_info* i_node, Bool bb_seen_before,
424 UInt instr_addr, UInt instr_size, UInt data_size,
425 Int t_read, Int t_read_addr,
426 Int t_write, Int t_write_addr)
427{
428 Addr helper;
429 Int argc;
430 Int t_CC_addr,
431 t_data_addr1 = INVALID_TEMPREG,
432 t_data_addr2 = INVALID_TEMPREG;
433
434 sk_assert(instr_size >= 1 &&
435 instr_size <= MAX_x86_INSTR_SIZE);
436
437#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
438#define INV(qqt) (INVALID_TEMPREG == (qqt))
439
440 // Work out what kind of x86 instruction it is
441 if (!IS_(read) && !IS_(write)) {
442 sk_assert( 0 == data_size );
443 sk_assert(INV(t_read) && INV(t_write));
444 helper = (Addr) & log_1I_0D_cache_access;
445 argc = 1;
446
447 } else if (IS_(read) && !IS_(write)) {
448 sk_assert( is_valid_data_size(data_size) );
449 sk_assert(!INV(t_read) && INV(t_write));
450 helper = (Addr) & log_1I_1Dr_cache_access;
451 argc = 2;
452 t_data_addr1 = t_read_addr;
453
454 } else if (!IS_(read) && IS_(write)) {
455 sk_assert( is_valid_data_size(data_size) );
456 sk_assert(INV(t_read) && !INV(t_write));
457 helper = (Addr) & log_1I_1Dw_cache_access;
458 argc = 2;
459 t_data_addr1 = t_write_addr;
460
461 } else {
462 sk_assert(IS_(read) && IS_(write));
463 sk_assert( is_valid_data_size(data_size) );
464 sk_assert(!INV(t_read) && !INV(t_write));
465 if (t_read == t_write) {
466 helper = (Addr) & log_1I_1Dr_cache_access;
467 argc = 2;
468 t_data_addr1 = t_read_addr;
469 } else {
470 helper = (Addr) & log_1I_2D_cache_access;
471 argc = 3;
472 t_data_addr1 = t_read_addr;
473 t_data_addr2 = t_write_addr;
474 }
475 }
476#undef IS_
nethercotef5b74662004-07-06 22:46:41 +0000477#undef INV
478
nethercote9313ac42004-07-06 21:54:20 +0000479 // Setup 1st arg: CC addr
480 do_details( i_node, bb_seen_before, instr_addr, instr_size, data_size );
481 t_CC_addr = newTemp(cb);
482 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
483 uLiteral(cb, (Addr)i_node);
484
485 // Call the helper
486 if (1 == argc)
487 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
488 else if (2 == argc)
489 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
490 TempReg, t_data_addr1);
491 else if (3 == argc)
492 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
493 TempReg, t_data_addr1,
494 TempReg, t_data_addr2);
495 else
496 VG_(skin_panic)("argc... not 1 or 2 or 3?");
497
498 uCCall(cb, helper, argc, argc, False);
499}
500
njn25e49d8e72002-09-23 09:36:25 +0000501UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
502{
njn4f9c9342002-04-29 16:03:24 +0000503 UCodeBlock* cb;
njn4f9c9342002-04-29 16:03:24 +0000504 UInstr* u_in;
nethercote9313ac42004-07-06 21:54:20 +0000505 Int i, bb_info_i;
506 BB_info* bb_info;
507 Bool bb_seen_before = False;
508 Int t_read_addr, t_write_addr, t_read, t_write;
njn25e49d8e72002-09-23 09:36:25 +0000509 Addr x86_instr_addr = orig_addr;
nethercote9313ac42004-07-06 21:54:20 +0000510 UInt x86_instr_size, data_size = 0;
511 Bool instrumented_Jcc = False;
njn4f9c9342002-04-29 16:03:24 +0000512
nethercote9313ac42004-07-06 21:54:20 +0000513 bb_info = get_BB_info(cb_in, orig_addr, &bb_seen_before);
514 bb_info_i = 0;
njn4f9c9342002-04-29 16:03:24 +0000515
njn810086f2002-11-14 12:42:47 +0000516 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000517
nethercote9313ac42004-07-06 21:54:20 +0000518 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000519
njn810086f2002-11-14 12:42:47 +0000520 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
521 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000522
nethercote9313ac42004-07-06 21:54:20 +0000523 // We want to instrument each x86 instruction with a call to the
524 // appropriate simulation function, which depends on whether the
525 // instruction does memory data reads/writes. x86 instructions can
526 // end in three ways, and this is how they are instrumented:
527 //
528 // 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
529 // 2. UCode, JMP --> UCode, Instrumentation, JMP
530 // 3. UCode, Jcc, JMP --> UCode, Instrumentation, Jcc, JMP
531 //
532 // The last UInstr in a BB is always a JMP. Jccs, when they appear,
533 // are always second last. This is checked with assertions.
534 // Instrumentation must go before any jumps. (JIFZ is the exception;
535 // if a JIFZ succeeds, no simulation is done for the instruction.)
536 //
537 // x86 instruction sizes are obtained from INCEIPs (for case 1) or
538 // from .extra4b field of the final JMP (for case 2 & 3).
539
540 if (instrumented_Jcc) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000541
542 switch (u_in->opcode) {
njn4f9c9342002-04-29 16:03:24 +0000543
nethercote9313ac42004-07-06 21:54:20 +0000544 // For memory-ref instrs, copy the data_addr into a temporary to be
545 // passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000546 case LOAD:
nethercote9313ac42004-07-06 21:54:20 +0000547 case SSE3ag_MemRd_RegWr:
njn25e49d8e72002-09-23 09:36:25 +0000548 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000549 t_read_addr = newTemp(cb);
550 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
551 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000552 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000553 break;
554
555 case FPU_R:
nethercote9313ac42004-07-06 21:54:20 +0000556 case MMX2_MemRd:
njn25e49d8e72002-09-23 09:36:25 +0000557 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000558 t_read_addr = newTemp(cb);
559 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
nethercote9313ac42004-07-06 21:54:20 +0000560 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000561 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000562 break;
thughes96b466a2004-03-15 16:43:58 +0000563 break;
564
565 case MMX2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000566 case SSE2a_MemRd:
567 case SSE2a1_MemRd:
njn21f805d2003-08-25 16:15:40 +0000568 case SSE3a_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000569 case SSE3a1_MemRd:
jseward1b58fbc2003-11-04 22:54:28 +0000570 t_read = u_in->val3;
571 t_read_addr = newTemp(cb);
572 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
573 data_size = u_in->size;
574 VG_(copy_UInstr)(cb, u_in);
575 break;
576
nethercote9313ac42004-07-06 21:54:20 +0000577 // Note that we must set t_write_addr even for mod instructions;
578 // That's how the code above determines whether it does a write.
579 // Without it, it would think a mod instruction is a read.
580 // As for the MOV, if it's a mod instruction it's redundant, but it's
581 // not expensive and mod instructions are rare anyway. */
njn4f9c9342002-04-29 16:03:24 +0000582 case STORE:
583 case FPU_W:
nethercote9313ac42004-07-06 21:54:20 +0000584 case MMX2_MemWr:
njn25e49d8e72002-09-23 09:36:25 +0000585 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000586 t_write_addr = newTemp(cb);
587 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000588 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000589 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000590 break;
591
njn21f805d2003-08-25 16:15:40 +0000592 case SSE2a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000593 case SSE3a_MemWr:
njn21f805d2003-08-25 16:15:40 +0000594 t_write = u_in->val3;
595 t_write_addr = newTemp(cb);
596 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
nethercote9313ac42004-07-06 21:54:20 +0000597 data_size = u_in->size;
njn21f805d2003-08-25 16:15:40 +0000598 VG_(copy_UInstr)(cb, u_in);
599 break;
njn25e49d8e72002-09-23 09:36:25 +0000600
nethercote9313ac42004-07-06 21:54:20 +0000601 // INCEIP: insert instrumentation
njn25e49d8e72002-09-23 09:36:25 +0000602 case INCEIP:
603 x86_instr_size = u_in->val1;
604 goto instrument_x86_instr;
605
nethercote9313ac42004-07-06 21:54:20 +0000606 // JMP: insert instrumentation if the first JMP
njn25e49d8e72002-09-23 09:36:25 +0000607 case JMP:
nethercote9313ac42004-07-06 21:54:20 +0000608 if (instrumented_Jcc) {
njne427a662002-10-02 11:08:25 +0000609 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000610 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000611 VG_(copy_UInstr)(cb, u_in);
nethercote9313ac42004-07-06 21:54:20 +0000612 instrumented_Jcc = False; // rest
njn25e49d8e72002-09-23 09:36:25 +0000613 break;
njn25e49d8e72002-09-23 09:36:25 +0000614 } else {
nethercote9313ac42004-07-06 21:54:20 +0000615 // The first JMP... instrument.
616 if (CondAlways != u_in->cond) {
617 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
618 instrumented_Jcc = True;
njn25e49d8e72002-09-23 09:36:25 +0000619 } else {
nethercote9313ac42004-07-06 21:54:20 +0000620 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000621 }
nethercote9313ac42004-07-06 21:54:20 +0000622 // Get x86 instr size from final JMP.
623 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
624 goto instrument_x86_instr;
njn25e49d8e72002-09-23 09:36:25 +0000625 }
626
nethercote9313ac42004-07-06 21:54:20 +0000627 // Code executed at the end of each x86 instruction.
628 instrument_x86_instr:
629 // Large (eg. 28B, 108B, 512B) data-sized instructions will be
630 // done inaccurately but they're very rare and this avoids
631 // errors from hitting more than two cache lines in the
632 // simulation.
633 if (data_size > MIN_LINE_SIZE) data_size = MIN_LINE_SIZE;
njn25e49d8e72002-09-23 09:36:25 +0000634
nethercote9313ac42004-07-06 21:54:20 +0000635 end_of_x86_instr(cb, &bb_info->instrs[ bb_info_i ], bb_seen_before,
636 x86_instr_addr, x86_instr_size, data_size,
637 t_read, t_read_addr, t_write, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000638
nethercote9313ac42004-07-06 21:54:20 +0000639 // Copy original UInstr (INCEIP or JMP)
njn4ba5a792002-09-30 10:23:54 +0000640 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000641
nethercote9313ac42004-07-06 21:54:20 +0000642 // Update loop state for next x86 instr
643 bb_info_i++;
njn25e49d8e72002-09-23 09:36:25 +0000644 x86_instr_addr += x86_instr_size;
nethercote9313ac42004-07-06 21:54:20 +0000645 t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
646 data_size = 0;
njn4f9c9342002-04-29 16:03:24 +0000647 break;
648
649 default:
njn4ba5a792002-09-30 10:23:54 +0000650 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000651 break;
652 }
653 }
654
nethercote9313ac42004-07-06 21:54:20 +0000655 // BB address should be the same as the first instruction's address.
656 sk_assert(bb_info->BB_addr == bb_info->instrs[0].instr_addr );
657 sk_assert(bb_info_i == bb_info->n_instrs);
njn4f9c9342002-04-29 16:03:24 +0000658
njn4ba5a792002-09-30 10:23:54 +0000659 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000660 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000661
662#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000663}
664
665/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000666/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000667/*------------------------------------------------------------*/
668
njn25e49d8e72002-09-23 09:36:25 +0000669#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
670
671static cache_t clo_I1_cache = UNDEFINED_CACHE;
672static cache_t clo_D1_cache = UNDEFINED_CACHE;
673static cache_t clo_L2_cache = UNDEFINED_CACHE;
674
nethercote9313ac42004-07-06 21:54:20 +0000675// All CPUID info taken from sandpile.org/a32/cpuid.htm */
676// Probably only works for Intel and AMD chips, and probably only for some of
677// them.
njn7cf0bd32002-06-08 13:36:03 +0000678
sewardj07133bf2002-06-13 10:25:56 +0000679static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +0000680{
681 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +0000682 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +0000683 actual_size);
njn7cf0bd32002-06-08 13:36:03 +0000684 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000685 " Simulating a %d KB cache with %d B lines",
686 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +0000687}
688
689/* Intel method is truly wretched. We have to do an insane indexing into an
690 * array of pre-defined configurations for various parts of the memory
691 * hierarchy.
692 */
693static
sewardj07133bf2002-06-13 10:25:56 +0000694Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000695{
sewardj07133bf2002-06-13 10:25:56 +0000696 UChar info[16];
697 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +0000698 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +0000699
700 if (level < 2) {
701 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000702 "warning: CPUID level < 2 for Intel processor (%d)",
703 level);
njn7cf0bd32002-06-08 13:36:03 +0000704 return -1;
705 }
706
thughes4ee64962004-06-16 20:51:45 +0000707 VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],
708 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +0000709 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
710 info[0] = 0x0; /* reset AL */
711
712 if (0 != trials) {
713 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000714 "warning: non-zero CPUID trials for Intel processor (%d)",
715 trials);
njn7cf0bd32002-06-08 13:36:03 +0000716 return -1;
717 }
718
719 for (i = 0; i < 16; i++) {
720
721 switch (info[i]) {
722
723 case 0x0: /* ignore zeros */
724 break;
725
njn25e49d8e72002-09-23 09:36:25 +0000726 /* TLB info, ignore */
727 case 0x01: case 0x02: case 0x03: case 0x04:
728 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +0000729 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +0000730 break;
731
732 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
733 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000734 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000735
736 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
737 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +0000738 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +0000739
njn25e49d8e72002-09-23 09:36:25 +0000740 /* IA-64 info -- panic! */
741 case 0x10: case 0x15: case 0x1a:
742 case 0x88: case 0x89: case 0x8a: case 0x8d:
743 case 0x90: case 0x96: case 0x9b:
nethercote9313ac42004-07-06 21:54:20 +0000744 VG_(skin_panic)("IA-64 cache detected?!");
njn25e49d8e72002-09-23 09:36:25 +0000745
njn7cf0bd32002-06-08 13:36:03 +0000746 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +0000747 VG_(message)(Vg_DebugMsg,
748 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +0000749 break;
750
njn25e49d8e72002-09-23 09:36:25 +0000751 /* These are sectored, whatever that means */
752 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
753 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
754
755 /* If a P6 core, this means "no L2 cache".
756 If a P4 core, this means "no L3 cache".
757 We don't know what core it is, so don't issue a warning. To detect
758 a missing L2 cache, we use 'L2_found'. */
759 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +0000760 break;
761
njn25e49d8e72002-09-23 09:36:25 +0000762 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
763 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
764 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
765 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
766 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000767
768 /* These are sectored, whatever that means */
769 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
770 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
771 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
772
773 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
774 * conversion to byte size is a total guess; treat the 12K and 16K
775 * cases the same since the cache byte size must be a power of two for
776 * everything to work!. Also guessing 32 bytes for the line size...
777 */
778 case 0x70: /* 12K micro-ops, 8-way */
779 *I1c = (cache_t) { 16, 8, 32 };
780 micro_ops_warn(12, 16, 32);
781 break;
782 case 0x71: /* 16K micro-ops, 8-way */
783 *I1c = (cache_t) { 16, 8, 32 };
784 micro_ops_warn(16, 16, 32);
785 break;
786 case 0x72: /* 32K micro-ops, 8-way */
787 *I1c = (cache_t) { 32, 8, 32 };
788 micro_ops_warn(32, 32, 32);
789 break;
790
njn25e49d8e72002-09-23 09:36:25 +0000791 /* These are sectored, whatever that means */
792 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
793 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
794 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
795 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
796 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000797
njn25e49d8e72002-09-23 09:36:25 +0000798 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
799 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
800 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
801 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
802 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +0000803 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
804 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +0000805
806 default:
807 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000808 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +0000809 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +0000810 break;
811 }
812 }
njn25e49d8e72002-09-23 09:36:25 +0000813
814 if (!L2_found)
815 VG_(message)(Vg_DebugMsg,
816 "warning: L2 cache not installed, ignore L2 results.");
817
njn7cf0bd32002-06-08 13:36:03 +0000818 return 0;
819}
820
821/* AMD method is straightforward, just extract appropriate bits from the
822 * result registers.
823 *
824 * Bits, for D1 and I1:
825 * 31..24 data L1 cache size in KBs
826 * 23..16 data L1 cache associativity (FFh=full)
827 * 15.. 8 data L1 cache lines per tag
828 * 7.. 0 data L1 cache line size in bytes
829 *
830 * Bits, for L2:
831 * 31..16 unified L2 cache size in KBs
832 * 15..12 unified L2 cache associativity (0=off, FFh=full)
833 * 11.. 8 unified L2 cache lines per tag
834 * 7.. 0 unified L2 cache line size in bytes
835 *
836 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
837 * upon this information. (Whatever that means -- njn)
838 *
njn25e49d8e72002-09-23 09:36:25 +0000839 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
840 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
841 * so we detect that.
842 *
njn7cf0bd32002-06-08 13:36:03 +0000843 * Returns 0 on success, non-zero on failure.
844 */
sewardj07133bf2002-06-13 10:25:56 +0000845static
846Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000847{
sewardj05bcdcb2003-05-18 10:05:38 +0000848 UInt ext_level;
thughes4ee64962004-06-16 20:51:45 +0000849 UInt dummy, model;
850 UInt I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +0000851
thughes4ee64962004-06-16 20:51:45 +0000852 VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000853
854 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
855 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000856 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
857 ext_level);
njn7cf0bd32002-06-08 13:36:03 +0000858 return -1;
859 }
860
thughes4ee64962004-06-16 20:51:45 +0000861 VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
862 VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
njn7cf0bd32002-06-08 13:36:03 +0000863
thughes4ee64962004-06-16 20:51:45 +0000864 VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
njn25e49d8e72002-09-23 09:36:25 +0000865
866 /* Check for Duron bug */
867 if (model == 0x630) {
868 VG_(message)(Vg_UserMsg,
869 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
870 L2i = (64 << 16) | (L2i & 0xffff);
871 }
872
njn7cf0bd32002-06-08 13:36:03 +0000873 D1c->size = (D1i >> 24) & 0xff;
874 D1c->assoc = (D1i >> 16) & 0xff;
875 D1c->line_size = (D1i >> 0) & 0xff;
876
877 I1c->size = (I1i >> 24) & 0xff;
878 I1c->assoc = (I1i >> 16) & 0xff;
879 I1c->line_size = (I1i >> 0) & 0xff;
880
881 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
882 L2c->assoc = (L2i >> 12) & 0xf;
883 L2c->line_size = (L2i >> 0) & 0xff;
884
885 return 0;
886}
887
888static jmp_buf cpuid_jmpbuf;
889
890static
891void cpuid_SIGILL_handler(int signum)
892{
893 __builtin_longjmp(cpuid_jmpbuf, 1);
894}
895
896static
sewardj07133bf2002-06-13 10:25:56 +0000897Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000898{
sewardj07133bf2002-06-13 10:25:56 +0000899 Int level, res, ret;
900 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +0000901 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +0000902
903 /* Install own SIGILL handler */
904 sigill_new.ksa_handler = cpuid_SIGILL_handler;
905 sigill_new.ksa_flags = 0;
906 sigill_new.ksa_restorer = NULL;
907 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +0000908 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000909
910 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +0000911 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000912
913 /* Trap for illegal instruction, in case it's a really old processor that
914 * doesn't support CPUID. */
915 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
thughes4ee64962004-06-16 20:51:45 +0000916 VG_(cpuid)(0, &level, (int*)&vendor_id[0],
917 (int*)&vendor_id[8], (int*)&vendor_id[4]);
njn7cf0bd32002-06-08 13:36:03 +0000918 vendor_id[12] = '\0';
919
920 /* Restore old SIGILL handler */
921 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000922 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000923
924 } else {
925 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
926
927 /* Restore old SIGILL handler */
928 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +0000929 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +0000930 return -1;
931 }
932
933 if (0 == level) {
934 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
935 return -1;
936 }
937
938 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
939 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
940 ret = Intel_cache_info(level, I1c, D1c, L2c);
941
942 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
943 ret = AMD_cache_info(I1c, D1c, L2c);
944
sewardj97b7b262003-10-07 00:18:16 +0000945 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
946 /* Total kludge. Pretend to be a VIA Nehemiah. */
947 D1c->size = 64;
948 D1c->assoc = 16;
949 D1c->line_size = 16;
950 I1c->size = 64;
951 I1c->assoc = 4;
952 I1c->line_size = 16;
953 L2c->size = 64;
954 L2c->assoc = 16;
955 L2c->line_size = 16;
956 ret = 0;
957
njn7cf0bd32002-06-08 13:36:03 +0000958 } else {
959 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
960 vendor_id);
961 return -1;
962 }
963
964 /* Successful! Convert sizes from KB to bytes */
965 I1c->size *= 1024;
966 D1c->size *= 1024;
967 L2c->size *= 1024;
968
969 return ret;
970}
971
972/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +0000973static
974void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +0000975{
976 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +0000977 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +0000978 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000979 "warning: %s size of %dB not a power of two; "
980 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +0000981 cache->size = dflt->size;
982 }
983
sewardj07133bf2002-06-13 10:25:56 +0000984 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +0000985 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000986 "warning: %s associativity of %d not a power of two; "
987 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +0000988 cache->assoc = dflt->assoc;
989 }
990
sewardj07133bf2002-06-13 10:25:56 +0000991 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +0000992 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000993 "warning: %s line size of %dB not a power of two; "
994 "defaulting to %dB",
995 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +0000996 cache->line_size = dflt->line_size;
997 }
998
999 /* Then check line size >= 16 -- any smaller and a single instruction could
1000 * straddle three cache lines, which breaks a simulation assertion and is
1001 * stupid anyway. */
1002 if (cache->line_size < MIN_LINE_SIZE) {
1003 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001004 "warning: %s line size of %dB too small; "
1005 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001006 cache->line_size = MIN_LINE_SIZE;
1007 }
1008
1009 /* Then check cache size > line size (causes seg faults if not). */
1010 if (cache->size <= cache->line_size) {
1011 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001012 "warning: %s cache size of %dB <= line size of %dB; "
1013 "increasing to %dB", name, cache->size, cache->line_size,
1014 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001015 cache->size = cache->line_size * 2;
1016 }
1017
1018 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1019 if (cache->assoc > (cache->size / cache->line_size)) {
1020 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001021 "warning: %s associativity > (size / line size); "
1022 "increasing size to %dB",
1023 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001024 cache->size = cache->assoc * cache->line_size;
1025 }
1026}
1027
sewardj07133bf2002-06-13 10:25:56 +00001028static
1029void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001030{
nethercote9313ac42004-07-06 21:54:20 +00001031#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1032
1033 Int res, n_clos = 0;
1034
1035 // Defaults are for a model 3 or 4 Athlon
njn7cf0bd32002-06-08 13:36:03 +00001036 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1037 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1038 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1039
nethercote9313ac42004-07-06 21:54:20 +00001040 // Set caches to default.
1041 *I1c = I1_dflt;
1042 *D1c = D1_dflt;
1043 *L2c = L2_dflt;
njn7cf0bd32002-06-08 13:36:03 +00001044
nethercote9313ac42004-07-06 21:54:20 +00001045 // Then replace with any info we can get from CPUID.
1046 res = get_caches_from_CPUID(I1c, D1c, L2c);
sewardjb1a77a42002-07-13 13:31:20 +00001047
nethercote9313ac42004-07-06 21:54:20 +00001048 // Then replace with any defined on the command line.
1049 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; n_clos++; }
1050 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; n_clos++; }
1051 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; n_clos++; }
njn7cf0bd32002-06-08 13:36:03 +00001052
nethercote9313ac42004-07-06 21:54:20 +00001053 // Warn if CPUID failed and config not completely specified from cmd line.
1054 if (res != 0 && n_clos < 3) {
1055 VG_(message)(Vg_DebugMsg,
1056 "Warning: Couldn't detect cache config, using one "
1057 "or more defaults ");
njn7cf0bd32002-06-08 13:36:03 +00001058 }
njn7cf0bd32002-06-08 13:36:03 +00001059
nethercote9313ac42004-07-06 21:54:20 +00001060 // Then check values and fix if not acceptable.
njn7cf0bd32002-06-08 13:36:03 +00001061 check_cache(I1c, &I1_dflt, "I1");
1062 check_cache(D1c, &D1_dflt, "D1");
1063 check_cache(L2c, &L2_dflt, "L2");
1064
1065 if (VG_(clo_verbosity) > 1) {
1066 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1067 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1068 I1c->size, I1c->assoc, I1c->line_size);
1069 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1070 D1c->size, D1c->assoc, D1c->line_size);
1071 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1072 L2c->size, L2c->assoc, L2c->line_size);
1073 }
nethercote9313ac42004-07-06 21:54:20 +00001074#undef CMD_LINE_DEFINED
njn7cf0bd32002-06-08 13:36:03 +00001075}
1076
njn4f9c9342002-04-29 16:03:24 +00001077/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001078/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001079/*------------------------------------------------------------*/
1080
nethercote9313ac42004-07-06 21:54:20 +00001081// Total reads/writes/misses. Calculated during CC traversal at the end.
1082// All auto-zeroed.
1083static CC Ir_total;
1084static CC Dr_total;
1085static CC Dw_total;
1086
1087static Char* cachegrind_out_file;
1088
1089static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +00001090{
nethercote9313ac42004-07-06 21:54:20 +00001091 VG_(message)(Vg_UserMsg,
1092 "error: can't open cache simulation output file `%s'",
1093 cachegrind_out_file );
1094 VG_(message)(Vg_UserMsg,
1095 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +00001096}
1097
nethercote9313ac42004-07-06 21:54:20 +00001098static void fprint_lineCC(Int fd, lineCC* n)
njn4f9c9342002-04-29 16:03:24 +00001099{
nethercote9313ac42004-07-06 21:54:20 +00001100 Char buf[512];
1101 VG_(sprintf)(buf, "%u %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1102 n->line,
1103 n->Ir.a, n->Ir.m1, n->Ir.m2,
1104 n->Dr.a, n->Dr.m1, n->Dr.m2,
1105 n->Dw.a, n->Dw.m1, n->Dw.m2);
1106 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1107
1108 Ir_total.a += n->Ir.a; Ir_total.m1 += n->Ir.m1; Ir_total.m2 += n->Ir.m2;
1109 Dr_total.a += n->Dr.a; Dr_total.m1 += n->Dr.m1; Dr_total.m2 += n->Dr.m2;
1110 Dw_total.a += n->Dw.a; Dw_total.m1 += n->Dw.m1; Dw_total.m2 += n->Dw.m2;
1111}
1112
1113static void fprint_CC_table_and_calc_totals(void)
1114{
1115 Int fd;
1116 Char buf[512];
1117 fileCC *curr_fileCC;
1118 fnCC *curr_fnCC;
1119 lineCC *curr_lineCC;
1120 Int i, j, k;
njn4f9c9342002-04-29 16:03:24 +00001121
njn25e49d8e72002-09-23 09:36:25 +00001122 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001123
njndb918dd2003-07-22 20:45:11 +00001124 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001125 VKI_S_IRUSR|VKI_S_IWUSR);
nethercote50da0f32003-10-30 10:33:30 +00001126 if (fd < 0) {
nethercote9313ac42004-07-06 21:54:20 +00001127 // If the file can't be opened for whatever reason (conflict
1128 // between multiple cachegrinded processes?), give up now.
sewardj0744b6c2002-12-11 00:45:42 +00001129 file_err();
1130 return;
1131 }
njn4f9c9342002-04-29 16:03:24 +00001132
nethercote9313ac42004-07-06 21:54:20 +00001133 // "desc:" lines (giving I1/D1/L2 cache configuration). The spaces after
1134 // the 2nd colon makes cg_annotate's output look nicer.
1135 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1136 "desc: D1 cache: %s\n"
1137 "desc: L2 cache: %s\n",
1138 I1.desc_line, D1.desc_line, L2.desc_line);
njn7cf0bd32002-06-08 13:36:03 +00001139 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001140
nethercote9313ac42004-07-06 21:54:20 +00001141 // "cmd:" line
njn4f9c9342002-04-29 16:03:24 +00001142 VG_(strcpy)(buf, "cmd:");
1143 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001144 for (i = 0; i < VG_(client_argc); i++) {
1145 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001146 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1147 }
nethercote9313ac42004-07-06 21:54:20 +00001148 // "events:" line
njn4f9c9342002-04-29 16:03:24 +00001149 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1150 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1151
nethercote9313ac42004-07-06 21:54:20 +00001152 // Six loops here: three for the hash table arrays, and three for the
1153 // chains hanging off the hash table arrays.
njn4f9c9342002-04-29 16:03:24 +00001154 for (i = 0; i < N_FILE_ENTRIES; i++) {
nethercote9313ac42004-07-06 21:54:20 +00001155 curr_fileCC = CC_table[i];
1156 while (curr_fileCC != NULL) {
1157 VG_(sprintf)(buf, "fl=%s\n", curr_fileCC->file);
njn4f9c9342002-04-29 16:03:24 +00001158 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1159
1160 for (j = 0; j < N_FN_ENTRIES; j++) {
nethercote9313ac42004-07-06 21:54:20 +00001161 curr_fnCC = curr_fileCC->fns[j];
1162 while (curr_fnCC != NULL) {
1163 VG_(sprintf)(buf, "fn=%s\n", curr_fnCC->fn);
njn4f9c9342002-04-29 16:03:24 +00001164 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1165
nethercote9313ac42004-07-06 21:54:20 +00001166 for (k = 0; k < N_LINE_ENTRIES; k++) {
1167 curr_lineCC = curr_fnCC->lines[k];
1168 while (curr_lineCC != NULL) {
1169 fprint_lineCC(fd, curr_lineCC);
1170 curr_lineCC = curr_lineCC->next;
njn4f9c9342002-04-29 16:03:24 +00001171 }
1172 }
nethercote9313ac42004-07-06 21:54:20 +00001173 curr_fnCC = curr_fnCC->next;
njn4f9c9342002-04-29 16:03:24 +00001174 }
1175 }
nethercote9313ac42004-07-06 21:54:20 +00001176 curr_fileCC = curr_fileCC->next;
njn4f9c9342002-04-29 16:03:24 +00001177 }
1178 }
1179
nethercote9313ac42004-07-06 21:54:20 +00001180 // Summary stats must come after rest of table, since we calculate them
1181 // during traversal. */
njn4f9c9342002-04-29 16:03:24 +00001182 VG_(sprintf)(buf, "summary: "
nethercote9313ac42004-07-06 21:54:20 +00001183 "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
njn4f9c9342002-04-29 16:03:24 +00001184 Ir_total.a, Ir_total.m1, Ir_total.m2,
1185 Dr_total.a, Dr_total.m1, Dr_total.m2,
1186 Dw_total.a, Dw_total.m1, Dw_total.m2);
1187 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1188 VG_(close)(fd);
1189}
1190
njn607adfc2003-09-30 14:15:44 +00001191static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001192{
njn607adfc2003-09-30 14:15:44 +00001193 UInt w = 0;
1194 while (n > 0) {
1195 n = n / 10;
1196 w++;
njn4f9c9342002-04-29 16:03:24 +00001197 }
njn607adfc2003-09-30 14:15:44 +00001198 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001199}
1200
sewardj4f29ddf2002-05-03 22:29:04 +00001201static
daywalker8ad1a402003-09-18 01:15:32 +00001202void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001203{
1204 int i, len, space;
1205
daywalker8ad1a402003-09-18 01:15:32 +00001206 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001207 len = VG_(strlen)(buf);
1208 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001209 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001210 i = len;
1211
1212 /* Right justify in field */
1213 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1214 for (i = 0; i < space; i++) buf[i] = ' ';
1215}
1216
njn7d9f94d2003-04-22 21:41:40 +00001217void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001218{
nethercote9313ac42004-07-06 21:54:20 +00001219 static char buf1[128], buf2[128], buf3[128], fmt [128];
njn607adfc2003-09-30 14:15:44 +00001220
njn4f9c9342002-04-29 16:03:24 +00001221 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001222 ULong L2_total_m, L2_total_mr, L2_total_mw,
1223 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001224 Int l1, l2, l3;
1225 Int p;
1226
nethercote9313ac42004-07-06 21:54:20 +00001227 fprint_CC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001228
njn7cf0bd32002-06-08 13:36:03 +00001229 if (VG_(clo_verbosity) == 0)
1230 return;
1231
njn4f9c9342002-04-29 16:03:24 +00001232 /* I cache results. Use the I_refs value to determine the first column
1233 * width. */
njn607adfc2003-09-30 14:15:44 +00001234 l1 = ULong_width(Ir_total.a);
1235 l2 = ULong_width(Dr_total.a);
1236 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001237
njn607adfc2003-09-30 14:15:44 +00001238 /* Make format string, getting width right for numbers */
1239 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1240
1241 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1242 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1243 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001244
1245 p = 100;
1246
njn25e49d8e72002-09-23 09:36:25 +00001247 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001248 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1249 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1250
1251 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1252 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1253 VG_(message)(Vg_UserMsg, "");
1254
1255 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1256 * width of columns 2 & 3. */
1257 D_total.a = Dr_total.a + Dw_total.a;
1258 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1259 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1260
njn607adfc2003-09-30 14:15:44 +00001261 /* Make format string, getting width right for numbers */
1262 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001263
njn607adfc2003-09-30 14:15:44 +00001264 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1265 D_total.a, Dr_total.a, Dw_total.a);
1266 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1267 D_total.m1, Dr_total.m1, Dw_total.m1);
1268 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1269 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001270
1271 p = 10;
1272
njn25e49d8e72002-09-23 09:36:25 +00001273 if (0 == D_total.a) D_total.a = 1;
1274 if (0 == Dr_total.a) Dr_total.a = 1;
1275 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001276 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1277 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1278 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1279 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1280
1281 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1282 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1283 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1284 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1285 VG_(message)(Vg_UserMsg, "");
1286
1287 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001288
1289 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1290 L2_total_r = Dr_total.m1 + Ir_total.m1;
1291 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001292 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1293 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001294
njn4f9c9342002-04-29 16:03:24 +00001295 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1296 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1297 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001298 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1299 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001300
1301 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1302 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1303 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1304 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1305
1306
nethercote9313ac42004-07-06 21:54:20 +00001307 // Various stats
njn4f9c9342002-04-29 16:03:24 +00001308 if (VG_(clo_verbosity) > 1) {
nethercote9313ac42004-07-06 21:54:20 +00001309 int BB_lookups = full_debug_BBs + fn_debug_BBs +
njn4f9c9342002-04-29 16:03:24 +00001310 file_line_debug_BBs + no_debug_BBs;
1311
1312 VG_(message)(Vg_DebugMsg, "");
1313 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1314 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
nethercote9313ac42004-07-06 21:54:20 +00001315 VG_(message)(Vg_DebugMsg, "Distinct lines: %d", distinct_lines);
1316 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
njn4f9c9342002-04-29 16:03:24 +00001317 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1318 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1319 full_debug_BBs * 100 / BB_lookups,
1320 full_debug_BBs);
1321 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1322 file_line_debug_BBs * 100 / BB_lookups,
1323 file_line_debug_BBs);
1324 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
nethercote9313ac42004-07-06 21:54:20 +00001325 fn_debug_BBs * 100 / BB_lookups,
1326 fn_debug_BBs);
njn4f9c9342002-04-29 16:03:24 +00001327 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1328 no_debug_BBs * 100 / BB_lookups,
1329 no_debug_BBs);
1330 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
njn4f9c9342002-04-29 16:03:24 +00001331 }
njn25e49d8e72002-09-23 09:36:25 +00001332 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001333}
1334
nethercote9313ac42004-07-06 21:54:20 +00001335/*--------------------------------------------------------------------*/
1336/*--- Discarding BB info ---*/
1337/*--------------------------------------------------------------------*/
sewardj18d75132002-05-16 11:06:21 +00001338
nethercote9313ac42004-07-06 21:54:20 +00001339// Called when a translation is invalidated due to code unloading.
njn25e49d8e72002-09-23 09:36:25 +00001340void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001341{
nethercote9313ac42004-07-06 21:54:20 +00001342 VgHashNode** prev_next_ptr;
1343 VgHashNode* bb_info;
njn4294fd42002-06-05 14:41:10 +00001344
nethercote9313ac42004-07-06 21:54:20 +00001345 if (0) VG_(printf)( "discard_basic_block_info: %p, %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001346
nethercote9313ac42004-07-06 21:54:20 +00001347 // Get BB info, remove from table, free BB info. Simple!
1348 bb_info = VG_(HT_get_node)(instr_info_table, a, &prev_next_ptr);
1349 sk_assert(NULL != bb_info);
1350 *prev_next_ptr = bb_info->next;
1351 VG_(free)(bb_info);
sewardj18d75132002-05-16 11:06:21 +00001352}
1353
1354/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001355/*--- Command line processing ---*/
1356/*--------------------------------------------------------------------*/
1357
nethercote9313ac42004-07-06 21:54:20 +00001358static void parse_cache_opt ( cache_t* cache, char* opt )
njn25e49d8e72002-09-23 09:36:25 +00001359{
nethercote9313ac42004-07-06 21:54:20 +00001360 int i = 0, i2, i3;
njn25e49d8e72002-09-23 09:36:25 +00001361
nethercote9313ac42004-07-06 21:54:20 +00001362 // Option argument looks like "65536,2,64".
1363 // Find commas, replace with NULs to make three independent
1364 // strings, then extract numbers, put NULs back. Yuck.
njn25e49d8e72002-09-23 09:36:25 +00001365 while (VG_(isdigit)(opt[i])) i++;
1366 if (',' == opt[i]) {
1367 opt[i++] = '\0';
1368 i2 = i;
1369 } else goto bad;
1370 while (VG_(isdigit)(opt[i])) i++;
1371 if (',' == opt[i]) {
1372 opt[i++] = '\0';
1373 i3 = i;
1374 } else goto bad;
1375 while (VG_(isdigit)(opt[i])) i++;
1376 if ('\0' != opt[i]) goto bad;
1377
nethercote9313ac42004-07-06 21:54:20 +00001378 cache->size = (Int)VG_(atoll)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001379 cache->assoc = (Int)VG_(atoll)(opt + i2);
1380 cache->line_size = (Int)VG_(atoll)(opt + i3);
1381
nethercote9313ac42004-07-06 21:54:20 +00001382 opt[i2-1] = ',';
1383 opt[i3-1] = ',';
njn25e49d8e72002-09-23 09:36:25 +00001384 return;
1385
1386 bad:
nethercote9313ac42004-07-06 21:54:20 +00001387 VG_(bad_option)(opt);
njn25e49d8e72002-09-23 09:36:25 +00001388}
1389
1390Bool SK_(process_cmd_line_option)(Char* arg)
1391{
nethercote9313ac42004-07-06 21:54:20 +00001392 // 5 is length of "--I1="
njn39c86652003-05-21 10:13:39 +00001393 if (VG_CLO_STREQN(5, arg, "--I1="))
nethercote9313ac42004-07-06 21:54:20 +00001394 parse_cache_opt(&clo_I1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001395 else if (VG_CLO_STREQN(5, arg, "--D1="))
nethercote9313ac42004-07-06 21:54:20 +00001396 parse_cache_opt(&clo_D1_cache, &arg[5]);
njn39c86652003-05-21 10:13:39 +00001397 else if (VG_CLO_STREQN(5, arg, "--L2="))
nethercote9313ac42004-07-06 21:54:20 +00001398 parse_cache_opt(&clo_L2_cache, &arg[5]);
njn25e49d8e72002-09-23 09:36:25 +00001399 else
1400 return False;
1401
1402 return True;
1403}
1404
njn3e884182003-04-15 13:03:23 +00001405void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001406{
njn3e884182003-04-15 13:03:23 +00001407 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001408" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1409" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001410" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1411 );
1412}
1413
1414void SK_(print_debug_usage)(void)
1415{
1416 VG_(printf)(
1417" (none)\n"
1418 );
njn25e49d8e72002-09-23 09:36:25 +00001419}
1420
1421/*--------------------------------------------------------------------*/
1422/*--- Setup ---*/
1423/*--------------------------------------------------------------------*/
1424
njn810086f2002-11-14 12:42:47 +00001425void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001426{
njn13f02932003-04-30 20:23:58 +00001427 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00001428
njn810086f2002-11-14 12:42:47 +00001429 VG_(details_name) ("Cachegrind");
1430 VG_(details_version) (NULL);
1431 VG_(details_description) ("an I1/D1/L2 cache profiler");
1432 VG_(details_copyright_author)(
nethercote08fa9a72004-07-16 17:44:00 +00001433 "Copyright (C) 2002-2004, and GNU GPL'd, by Nicholas Nethercote et al.");
nethercote421281e2003-11-20 16:20:55 +00001434 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardj78210aa2002-12-01 02:55:46 +00001435 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00001436
njn810086f2002-11-14 12:42:47 +00001437 VG_(needs_basic_block_discards)();
1438 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00001439
1440 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
nethercote9313ac42004-07-06 21:54:20 +00001441 VG_(register_compact_helper)((Addr) & log_1I_1Dr_cache_access);
1442 VG_(register_compact_helper)((Addr) & log_1I_1Dw_cache_access);
njn25e49d8e72002-09-23 09:36:25 +00001443 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00001444
njn99ccf082003-09-30 13:51:23 +00001445 /* Get working directory */
1446 sk_assert( VG_(getcwd_alloc)(&base_dir) );
1447
njn13f02932003-04-30 20:23:58 +00001448 /* Block is big enough for dir name + cachegrind.out.<pid> */
1449 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
1450 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
1451 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00001452 VG_(free)(base_dir);
nethercote9313ac42004-07-06 21:54:20 +00001453
1454 instr_info_table = VG_(HT_construct)();
njn25e49d8e72002-09-23 09:36:25 +00001455}
1456
1457void SK_(post_clo_init)(void)
1458{
1459 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00001460
njn25e49d8e72002-09-23 09:36:25 +00001461 get_caches(&I1c, &D1c, &L2c);
1462
1463 cachesim_I1_initcache(I1c);
1464 cachesim_D1_initcache(D1c);
1465 cachesim_L2_initcache(L2c);
1466
nethercote9313ac42004-07-06 21:54:20 +00001467 VGP_(register_profile_event)(VgpGetLineCC, "get-lineCC");
njn25e49d8e72002-09-23 09:36:25 +00001468 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
1469 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
njn25e49d8e72002-09-23 09:36:25 +00001470}
1471
fitzhardinge98abfc72003-12-16 02:05:15 +00001472VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
1473
njn25e49d8e72002-09-23 09:36:25 +00001474/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00001475/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00001476/*--------------------------------------------------------------------*/