blob: 0cbeaab0e45236c23043ef6d0941ac85f74aeb78 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
nethercote137bc552003-11-14 17:47:54 +00009 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +000010 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
nethercotebb1c9912004-01-04 16:43:23 +000012 Copyright (C) 2002-2004 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000013 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
36/* For cache simulation */
37typedef struct {
38 int size; /* bytes */
39 int assoc;
40 int line_size; /* bytes */
41} cache_t;
njn4f9c9342002-04-29 16:03:24 +000042
nethercote27fc1da2004-01-04 16:56:57 +000043#include "cg_sim.c"
njn4f9c9342002-04-29 16:03:24 +000044
njn25e49d8e72002-09-23 09:36:25 +000045/*------------------------------------------------------------*/
46/*--- Constants ---*/
47/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000048
49/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000050#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000051
njn25e49d8e72002-09-23 09:36:25 +000052#define MIN_LINE_SIZE 16
53
njn4f9c9342002-04-29 16:03:24 +000054/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000055#define FILENAME_LEN 256
56#define FN_NAME_LEN 256
57#define BUF_LEN 512
58#define COMMIFY_BUF_LEN 128
59#define RESULTS_BUF_LEN 128
60#define LINE_BUF_LEN 64
61
62/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000063/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000064/*------------------------------------------------------------*/
65
njn25e49d8e72002-09-23 09:36:25 +000066typedef
67 enum {
68 VgpGetBBCC = VgpFini+1,
69 VgpCacheSimulate,
70 VgpCacheResults
71 }
72 VgpSkinCC;
sewardj07133bf2002-06-13 10:25:56 +000073
njn4f9c9342002-04-29 16:03:24 +000074/*------------------------------------------------------------*/
75/*--- Output file related stuff ---*/
76/*------------------------------------------------------------*/
77
njn13f02932003-04-30 20:23:58 +000078static Char* cachegrind_out_file;
njn4f9c9342002-04-29 16:03:24 +000079
sewardj0744b6c2002-12-11 00:45:42 +000080static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +000081{
82 VG_(message)(Vg_UserMsg,
sewardj0744b6c2002-12-11 00:45:42 +000083 "error: can't open cache simulation output file `%s'",
84 cachegrind_out_file );
85 VG_(message)(Vg_UserMsg,
86 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +000087}
88
89/*------------------------------------------------------------*/
90/*--- Cost center types, operations ---*/
91/*------------------------------------------------------------*/
92
93typedef struct _CC CC;
94struct _CC {
95 ULong a;
96 ULong m1;
97 ULong m2;
98};
99
100static __inline__ void initCC(CC* cc) {
101 cc->a = 0;
102 cc->m1 = 0;
103 cc->m2 = 0;
104}
105
njn25e49d8e72002-09-23 09:36:25 +0000106typedef
107 enum {
108 InstrCC, /* eg. mov %eax, %ebx */
109 ReadCC, /* eg. mov (%ecx), %esi */
110 WriteCC, /* eg. mov %eax, (%edx) */
111 ModCC, /* eg. incl (%eax) (read+write one addr) */
112 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
113 (read+write two different addrs) */
114 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000115
njn7e1b3b22003-07-04 11:44:39 +0000116/* Instruction-level cost-centres.
njn4f9c9342002-04-29 16:03:24 +0000117 *
118 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000119 *
njne0ee0712002-05-03 16:41:05 +0000120 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000121 */
njn25e49d8e72002-09-23 09:36:25 +0000122typedef
123 struct {
124 /* word 1 */
125 UChar tag;
126 UChar instr_size;
127 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000128
njn25e49d8e72002-09-23 09:36:25 +0000129 /* words 2+ */
130 Addr instr_addr;
131 CC I;
132 }
133 iCC;
njn4f9c9342002-04-29 16:03:24 +0000134
njn25e49d8e72002-09-23 09:36:25 +0000135typedef
136 struct _idCC {
137 /* word 1 */
138 UChar tag;
139 UChar instr_size;
140 UChar data_size;
141 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000142
njn25e49d8e72002-09-23 09:36:25 +0000143 /* words 2+ */
144 Addr instr_addr;
145 CC I;
146 CC D;
147 }
148 idCC;
149
150typedef
151 struct _iddCC {
152 /* word 1 */
153 UChar tag;
154 UChar instr_size;
155 UChar data_size;
156 /* 1 byte padding */
157
158 /* words 2+ */
159 Addr instr_addr;
160 CC I;
161 CC Da;
162 CC Db;
163 }
164 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000165
166static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
167{
njn25e49d8e72002-09-23 09:36:25 +0000168 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000169 cc->instr_size = instr_size;
170 cc->instr_addr = instr_addr;
171 initCC(&cc->I);
172}
173
174static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
175 UInt instr_size, UInt data_size)
176{
177 cc->tag = X_CC;
178 cc->instr_size = instr_size;
179 cc->data_size = data_size;
180 cc->instr_addr = instr_addr;
181 initCC(&cc->I);
182 initCC(&cc->D);
183}
184
njn25e49d8e72002-09-23 09:36:25 +0000185static void init_iddCC(iddCC* cc, Addr instr_addr,
186 UInt instr_size, UInt data_size)
187{
188 cc->tag = ReadWriteCC;
189 cc->instr_size = instr_size;
190 cc->data_size = data_size;
191 cc->instr_addr = instr_addr;
192 initCC(&cc->I);
193 initCC(&cc->Da);
194 initCC(&cc->Db);
195}
196
njn4294fd42002-06-05 14:41:10 +0000197#define ADD_CC_TO(CC_type, cc, total) \
198 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
199 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
200 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
201
njn95114da2002-06-05 09:39:31 +0000202/* If 1, address of each instruction is printed as a comment after its counts
203 * in cachegrind.out */
204#define PRINT_INSTR_ADDRS 0
205
njne0ee0712002-05-03 16:41:05 +0000206static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000207{
njn95114da2002-06-05 09:39:31 +0000208#if PRINT_INSTR_ADDRS
209 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
210 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
211#else
njne0ee0712002-05-03 16:41:05 +0000212 VG_(sprintf)(buf, "%llu %llu %llu\n",
213 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000214#endif
njn4f9c9342002-04-29 16:03:24 +0000215}
216
njne0ee0712002-05-03 16:41:05 +0000217static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000218{
njn95114da2002-06-05 09:39:31 +0000219#if PRINT_INSTR_ADDRS
220 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
221 cc->I.a, cc->I.m1, cc->I.m2,
222 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
223#else
njne0ee0712002-05-03 16:41:05 +0000224 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
225 cc->I.a, cc->I.m1, cc->I.m2,
226 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000227#endif
njn4f9c9342002-04-29 16:03:24 +0000228}
229
njne0ee0712002-05-03 16:41:05 +0000230static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000231{
njn95114da2002-06-05 09:39:31 +0000232#if PRINT_INSTR_ADDRS
233 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
234 cc->I.a, cc->I.m1, cc->I.m2,
235 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
236#else
njne0ee0712002-05-03 16:41:05 +0000237 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
238 cc->I.a, cc->I.m1, cc->I.m2,
239 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000240#endif
njn4f9c9342002-04-29 16:03:24 +0000241}
242
njn25e49d8e72002-09-23 09:36:25 +0000243static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
244{
245#if PRINT_INSTR_ADDRS
246 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
247 cc->I.a, cc->I.m1, cc->I.m2,
248 cc->Da.a, cc->Da.m1, cc->Da.m2,
249 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
250#else
251 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
252 cc->I.a, cc->I.m1, cc->I.m2,
253 cc->Da.a, cc->Da.m1, cc->Da.m2,
254 cc->Db.a, cc->Db.m1, cc->Db.m2);
255#endif
256}
257
258
njn4f9c9342002-04-29 16:03:24 +0000259/*------------------------------------------------------------*/
260/*--- BBCC hash table stuff ---*/
261/*------------------------------------------------------------*/
262
263/* The table of BBCCs is of the form hash(filename, hash(fn_name,
264 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
265 * fairly well for Konqueror. */
266
267#define N_FILE_ENTRIES 251
268#define N_FN_ENTRIES 53
269#define N_BBCC_ENTRIES 37
270
271/* The cost centres for a basic block are stored in a contiguous array.
272 * They are distinguishable by their tag field. */
273typedef struct _BBCC BBCC;
274struct _BBCC {
275 Addr orig_addr;
276 UInt array_size; /* byte-size of variable length array */
277 BBCC* next;
278 Addr array[0]; /* variable length array */
279};
280
281typedef struct _fn_node fn_node;
282struct _fn_node {
283 Char* fn_name;
284 BBCC* BBCCs[N_BBCC_ENTRIES];
285 fn_node* next;
286};
287
288typedef struct _file_node file_node;
289struct _file_node {
290 Char* filename;
291 fn_node* fns[N_FN_ENTRIES];
292 file_node* next;
293};
294
295/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000296static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000297
sewardj4f29ddf2002-05-03 22:29:04 +0000298static Int distinct_files = 0;
299static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000300
sewardj4f29ddf2002-05-03 22:29:04 +0000301static Int distinct_instrs = 0;
302static Int full_debug_BBs = 0;
303static Int file_line_debug_BBs = 0;
304static Int fn_name_debug_BBs = 0;
305static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000306
sewardj4f29ddf2002-05-03 22:29:04 +0000307static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000308
njn4294fd42002-06-05 14:41:10 +0000309static CC Ir_discards;
310static CC Dr_discards;
311static CC Dw_discards;
312
njn4f9c9342002-04-29 16:03:24 +0000313static void init_BBCC_table()
314{
315 Int i;
316 for (i = 0; i < N_FILE_ENTRIES; i++)
317 BBCC_table[i] = NULL;
318}
319
njne0ee0712002-05-03 16:41:05 +0000320static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
321 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000322{
njn25e49d8e72002-09-23 09:36:25 +0000323 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000324
njn25e49d8e72002-09-23 09:36:25 +0000325 found1 = VG_(get_filename_linenum)(instr_addr, filename,
326 FILENAME_LEN, line_num);
327 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000328
329 if (!found1 && !found2) {
330 no_debug_BBs++;
331 VG_(strcpy)(filename, "???");
332 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000333 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000334
335 } else if ( found1 && found2) {
336 full_debug_BBs++;
337
338 } else if ( found1 && !found2) {
339 file_line_debug_BBs++;
340 VG_(strcpy)(fn_name, "???");
341
342 } else /*(!found1 && found2)*/ {
343 fn_name_debug_BBs++;
344 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000345 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000346 }
347}
348
349/* Forward declaration. */
350static Int compute_BBCC_array_size(UCodeBlock* cb);
351
352static __inline__
353file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
354{
355 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000356 file_node* new = VG_(malloc)(sizeof(file_node));
357 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000358 for (i = 0; i < N_FN_ENTRIES; i++) {
359 new->fns[i] = NULL;
360 }
361 new->next = next;
362 return new;
363}
364
365static __inline__
366fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
367{
368 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000369 fn_node* new = VG_(malloc)(sizeof(fn_node));
370 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000371 for (i = 0; i < N_BBCC_ENTRIES; i++) {
372 new->BBCCs[i] = NULL;
373 }
374 new->next = next;
375 return new;
376}
377
378static __inline__
379BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
380{
381 Int BBCC_array_size = compute_BBCC_array_size(cb);
382 BBCC* new;
383
njn25e49d8e72002-09-23 09:36:25 +0000384 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000385 new->orig_addr = bb_orig_addr;
386 new->array_size = BBCC_array_size;
387 new->next = next;
388
389 return new;
390}
391
392#define HASH_CONSTANT 256
393
394static UInt hash(Char *s, UInt table_size)
395{
396 int hash_value = 0;
397 for ( ; *s; s++)
398 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
399 return hash_value;
400}
401
402/* Do a three step traversal: by filename, then fn_name, then instr_addr.
403 * In all cases prepends new nodes to their chain. Returns a pointer to the
404 * cost centre. Also sets BB_seen_before by reference.
405 */
sewardj56867352003-10-12 10:27:06 +0000406static BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
407 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000408{
409 file_node *curr_file_node;
410 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000411 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000412 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
413 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000414 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000415
njne0ee0712002-05-03 16:41:05 +0000416 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000417
njn25e49d8e72002-09-23 09:36:25 +0000418 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000419 filename_hash = hash(filename, N_FILE_ENTRIES);
420 curr_file_node = BBCC_table[filename_hash];
421 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000422 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000423 curr_file_node = curr_file_node->next;
424 }
425 if (NULL == curr_file_node) {
426 BBCC_table[filename_hash] = curr_file_node =
427 new_file_node(filename, BBCC_table[filename_hash]);
428 distinct_files++;
429 }
430
431 fnname_hash = hash(fn_name, N_FN_ENTRIES);
432 curr_fn_node = curr_file_node->fns[fnname_hash];
433 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000434 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000435 curr_fn_node = curr_fn_node->next;
436 }
437 if (NULL == curr_fn_node) {
438 curr_file_node->fns[fnname_hash] = curr_fn_node =
439 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
440 distinct_fns++;
441 }
442
443 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000444 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000445 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
446 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000447 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000448 curr_BBCC = curr_BBCC->next;
449 }
450 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000451
njne427a662002-10-02 11:08:25 +0000452 sk_assert(False == remove);
njn4294fd42002-06-05 14:41:10 +0000453
njn4f9c9342002-04-29 16:03:24 +0000454 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
455 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
456 *BB_seen_before = False;
457
458 } else {
njne427a662002-10-02 11:08:25 +0000459 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
460 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000461 if (VG_(clo_verbosity) > 2) {
462 VG_(message)(Vg_DebugMsg,
463 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000464 }
465 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000466
467 if (True == remove) {
468 // Remove curr_BBCC from chain; it will be used and free'd by the
469 // caller.
470 *prev_BBCC_next_ptr = curr_BBCC->next;
471
472 } else {
473 BB_retranslations++;
474 }
njn4f9c9342002-04-29 16:03:24 +0000475 }
njn25e49d8e72002-09-23 09:36:25 +0000476 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000477 return curr_BBCC;
478}
479
480/*------------------------------------------------------------*/
481/*--- Cache simulation instrumentation phase ---*/
482/*------------------------------------------------------------*/
483
njn4f9c9342002-04-29 16:03:24 +0000484static Int compute_BBCC_array_size(UCodeBlock* cb)
485{
486 UInstr* u_in;
487 Int i, CC_size, BBCC_size = 0;
488 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000489 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000490
491 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000492 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000493
njn810086f2002-11-14 12:42:47 +0000494 for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
495 u_in = VG_(get_instr)(cb, i);
njn4f9c9342002-04-29 16:03:24 +0000496 switch(u_in->opcode) {
497
498 case INCEIP:
499 goto case_for_end_of_instr;
500
501 case JMP:
502 if (u_in->cond != CondAlways) break;
503
504 goto case_for_end_of_instr;
505
506 case_for_end_of_instr:
507
njn25e49d8e72002-09-23 09:36:25 +0000508 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
509 t_read != t_write)
510 CC_size = sizeof(iddCC);
511 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
512 CC_size = sizeof(idCC);
513 else
514 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000515
516 BBCC_size += CC_size;
517 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
518 break;
519
520 case LOAD:
521 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000522 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000523 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000524 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000525 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000526 is_LOAD = True;
527 break;
528
529 case STORE:
530 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000531 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000532 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000533 is_STORE = True;
534 break;
535
sewardj3949d102003-03-28 17:21:29 +0000536 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000537 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000538 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000539 case FPU_R:
njne427a662002-10-02 11:08:25 +0000540 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000541 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000542 is_FPU_R = True;
543 break;
544
njn21f805d2003-08-25 16:15:40 +0000545 case SSE2a_MemRd:
546 case SSE2a1_MemRd:
547 sk_assert(u_in->size == 4 || u_in->size == 16);
548 t_read = u_in->val3;
549 is_FPU_R = True;
550 break;
551
552 case SSE3a_MemRd:
553 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
554 t_read = u_in->val3;
555 is_FPU_R = True;
556 break;
557
jseward1b58fbc2003-11-04 22:54:28 +0000558 case SSE3a1_MemRd:
559 sk_assert(u_in->size == 16);
560 t_read = u_in->val3;
561 is_FPU_R = True;
562 break;
563
njn21f805d2003-08-25 16:15:40 +0000564 case SSE3ag_MemRd_RegWr:
565 sk_assert(u_in->size == 4 || u_in->size == 8);
566 t_read = u_in->val1;
567 is_FPU_R = True;
568 break;
569
sewardj3949d102003-03-28 17:21:29 +0000570 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000571 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000572 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000573 case FPU_W:
njne427a662002-10-02 11:08:25 +0000574 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000575 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000576 is_FPU_W = True;
577 break;
578
njn21f805d2003-08-25 16:15:40 +0000579 case SSE2a_MemWr:
580 sk_assert(u_in->size == 4 || u_in->size == 16);
581 t_write = u_in->val3;
582 is_FPU_W = True;
583 break;
584
585 case SSE3a_MemWr:
586 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
587 t_write = u_in->val3;
588 is_FPU_W = True;
589 break;
590
njn4f9c9342002-04-29 16:03:24 +0000591 default:
592 break;
593 }
594 }
595
596 return BBCC_size;
597}
598
njn25e49d8e72002-09-23 09:36:25 +0000599static __attribute__ ((regparm (1)))
600void log_1I_0D_cache_access(iCC* cc)
601{
602 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
603 // cc, cc->instr_addr, cc->instr_size)
604 VGP_PUSHCC(VgpCacheSimulate);
605 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
606 cc->I.a++;
607 VGP_POPCC(VgpCacheSimulate);
608}
609
610/* Difference between this function and log_1I_0D_cache_access() is that
611 this one can be passed any kind of CC, not just an iCC. So we have to
612 be careful to make sure we don't make any assumptions about CC layout.
613 (As it stands, they would be safe, but this will avoid potential heartache
614 if anyone else changes CC layout.)
615 Note that we only do the switch for the JIFZ version because if we always
616 called this switching version, things would run about 5% slower. */
617static __attribute__ ((regparm (1)))
618void log_1I_0D_cache_access_JIFZ(iCC* cc)
619{
620 UChar instr_size;
621 Addr instr_addr;
622 CC* I;
623
624 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
625 // cc, cc->instr_addr, cc->instr_size)
626 VGP_PUSHCC(VgpCacheSimulate);
627
628 switch(cc->tag) {
629 case InstrCC:
630 instr_size = cc->instr_size;
631 instr_addr = cc->instr_addr;
632 I = &(cc->I);
633 break;
634 case ReadCC:
635 case WriteCC:
636 case ModCC:
637 instr_size = ((idCC*)cc)->instr_size;
638 instr_addr = ((idCC*)cc)->instr_addr;
639 I = &( ((idCC*)cc)->I );
640 break;
641 case ReadWriteCC:
642 instr_size = ((iddCC*)cc)->instr_size;
643 instr_addr = ((iddCC*)cc)->instr_addr;
644 I = &( ((iddCC*)cc)->I );
645 break;
646 default:
njne427a662002-10-02 11:08:25 +0000647 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000648 break;
649 }
650 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
651 I->a++;
652 VGP_POPCC(VgpCacheSimulate);
653}
654
655__attribute__ ((regparm (2))) static
656void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
657{
658 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
659 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
660 VGP_PUSHCC(VgpCacheSimulate);
661 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
662 cc->D.a++;
663 VGP_POPCC(VgpCacheSimulate);
664}
665
666__attribute__ ((regparm (2))) static
667void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
668{
669 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
670 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
671 VGP_PUSHCC(VgpCacheSimulate);
672 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
673 cc->I.a++;
674
675 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
676 cc->D.a++;
677 VGP_POPCC(VgpCacheSimulate);
678}
679
680__attribute__ ((regparm (3))) static
681void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
682{
683 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
684 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
685 VGP_PUSHCC(VgpCacheSimulate);
686 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
687 cc->Da.a++;
688 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
689 cc->Db.a++;
690 VGP_POPCC(VgpCacheSimulate);
691}
692
693__attribute__ ((regparm (3))) static
694void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
695{
696 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
697 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
698 VGP_PUSHCC(VgpCacheSimulate);
699 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
700 cc->I.a++;
701
702 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
703 cc->Da.a++;
704 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
705 cc->Db.a++;
706 VGP_POPCC(VgpCacheSimulate);
707}
708
709UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
710{
711/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000712#define INVALID_DATA_SIZE 999999
713
njn4f9c9342002-04-29 16:03:24 +0000714 UCodeBlock* cb;
715 Int i;
716 UInstr* u_in;
717 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000718 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
719 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000720 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000721 Addr x86_instr_addr = orig_addr;
722 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
723 Addr helper;
724 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000725 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000726 Bool BB_seen_before = False;
727 Bool instrumented_Jcond = False;
728 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000729 Addr BBCC_ptr0, BBCC_ptr;
730
731 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
732 * if it's the first time it's been seen), and point to start of the
733 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000734 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000735 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
736
njn810086f2002-11-14 12:42:47 +0000737 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000738
njn25e49d8e72002-09-23 09:36:25 +0000739 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
740 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000741
njn810086f2002-11-14 12:42:47 +0000742 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
743 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000744
njn4f9c9342002-04-29 16:03:24 +0000745 /* What this is all about: we want to instrument each x86 instruction
746 * translation. The end of these are marked in three ways. The three
747 * ways, and the way we instrument them, are as follows:
748 *
749 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
750 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
751 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
752 *
njn25e49d8e72002-09-23 09:36:25 +0000753 * The last UInstr in a basic block is always a Juncond. Jconds,
754 * when they appear, are always second last. We check this with
755 * various assertions.
756 *
757 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000758 * executed. We don't have to put the instrumentation before the INCEIP
759 * (it could go after) but we do so for consistency.
760 *
njn25e49d8e72002-09-23 09:36:25 +0000761 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
762 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000763 *
njn25e49d8e72002-09-23 09:36:25 +0000764 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000765 *
766 * The instrumentation is just a call to the appropriate helper function,
767 * passing it the address of the instruction's CC.
768 */
njne427a662002-10-02 11:08:25 +0000769 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000770
771 switch (u_in->opcode) {
sewardj7a5ebcf2002-11-13 22:42:13 +0000772 case NOP: case LOCK: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000773 break;
774
njn4f9c9342002-04-29 16:03:24 +0000775 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000776 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000777 */
778 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000779 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000780 t_read_addr = newTemp(cb);
781 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
782 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000783 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000784 break;
785
sewardj3949d102003-03-28 17:21:29 +0000786 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000787 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000788 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000789 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000790 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000791 t_read_addr = newTemp(cb);
792 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000793 data_size = ( u_in->size <= MIN_LINE_SIZE
794 ? u_in->size
795 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000796 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000797 break;
798
njn21f805d2003-08-25 16:15:40 +0000799 case SSE2a_MemRd:
800 case SSE2a1_MemRd:
801 sk_assert(u_in->size == 4 || u_in->size == 16);
802 t_read = u_in->val3;
803 t_read_addr = newTemp(cb);
804 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
805 data_size = u_in->size;
806 VG_(copy_UInstr)(cb, u_in);
807 break;
808
809 case SSE3a_MemRd:
810 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
811 t_read = u_in->val3;
812 t_read_addr = newTemp(cb);
813 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
814 data_size = u_in->size;
815 VG_(copy_UInstr)(cb, u_in);
816 break;
817
jseward1b58fbc2003-11-04 22:54:28 +0000818 case SSE3a1_MemRd:
819 sk_assert(u_in->size == 16);
820 t_read = u_in->val3;
821 t_read_addr = newTemp(cb);
822 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
823 data_size = u_in->size;
824 VG_(copy_UInstr)(cb, u_in);
825 break;
826
njn21f805d2003-08-25 16:15:40 +0000827 case SSE3ag_MemRd_RegWr:
828 sk_assert(u_in->size == 4 || u_in->size == 8);
829 t_read = u_in->val1;
830 t_read_addr = newTemp(cb);
831 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
832 data_size = u_in->size;
833 VG_(copy_UInstr)(cb, u_in);
834 break;
835
njn4f9c9342002-04-29 16:03:24 +0000836 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000837 * That's how the code above determines whether it does a write.
838 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000839 * As for the MOV, if it's a mod instruction it's redundant, but it's
840 * not expensive and mod instructions are rare anyway. */
sewardj3949d102003-03-28 17:21:29 +0000841 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000842 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000843 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000844 case STORE:
845 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000846 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000847 t_write_addr = newTemp(cb);
848 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000849 /* 28 and 108 B data-sized instructions will be done
850 * inaccurately but they're very rare and this avoids errors
851 * from hitting more than two cache lines in the simulation. */
852 data_size = ( u_in->size <= MIN_LINE_SIZE
853 ? u_in->size
854 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000855 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000856 break;
857
njn21f805d2003-08-25 16:15:40 +0000858 case SSE2a_MemWr:
859 sk_assert(u_in->size == 4 || u_in->size == 16);
860 /* fall through */
861 case SSE3a_MemWr:
862 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
863 t_write = u_in->val3;
864 t_write_addr = newTemp(cb);
865 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
866 data_size = u_in->size;
867 VG_(copy_UInstr)(cb, u_in);
868 break;
njn25e49d8e72002-09-23 09:36:25 +0000869
870 /* For rep-prefixed instructions, log a single I-cache access
871 * before the UCode loop that implements the repeated part, which
872 * is where the multiple D-cache accesses are logged. */
873 case JIFZ:
874 has_rep_prefix = True;
875
876 /* Setup 1st and only arg: CC addr */
877 t_CC_addr = newTemp(cb);
878 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
879 uLiteral(cb, BBCC_ptr);
880
881 /* Call helper */
882 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
883 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000884 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000885 break;
886
887
888 /* INCEIP: insert instrumentation */
889 case INCEIP:
890 x86_instr_size = u_in->val1;
891 goto instrument_x86_instr;
892
893 /* JMP: insert instrumentation if the first JMP */
894 case JMP:
895 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000896 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000897 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000898 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000899 instrumented_Jcond = False; /* reset */
900 break;
901 }
902 /* The first JMP... instrument. */
903 if (CondAlways != u_in->cond) {
njn810086f2002-11-14 12:42:47 +0000904 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000905 instrumented_Jcond = True;
906 } else {
njn810086f2002-11-14 12:42:47 +0000907 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000908 }
909
910 /* Get x86 instr size from final JMP. */
njn810086f2002-11-14 12:42:47 +0000911 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
912
njn25e49d8e72002-09-23 09:36:25 +0000913 goto instrument_x86_instr;
914
915
916 /* Code executed at the end of each x86 instruction. */
917 instrument_x86_instr:
918
919 /* Initialise the CC in the BBCC array appropriately if it
920 * hasn't been initialised before. Then call appropriate sim
921 * function, passing it the CC address. */
922 stack_used = 0;
923
njne427a662002-10-02 11:08:25 +0000924 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000925 x86_instr_size <= MAX_x86_INSTR_SIZE);
926
927#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
928
929 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +0000930 sk_assert(INVALID_DATA_SIZE == data_size);
931 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000932 INVALID_TEMPREG == t_read &&
933 INVALID_TEMPREG == t_write_addr &&
934 INVALID_TEMPREG == t_write);
935 CC_size = sizeof(iCC);
936 if (!BB_seen_before)
937 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
938 helper = ( has_rep_prefix
939 ? (Addr)0 /* no extra log needed */
940 : (Addr) & log_1I_0D_cache_access
941 );
942 argc = 1;
943
944 } else {
njne427a662002-10-02 11:08:25 +0000945 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +0000946 8 == data_size || 10 == data_size ||
947 MIN_LINE_SIZE == data_size);
948
949 if (IS_(read) && !IS_(write)) {
950 CC_size = sizeof(idCC);
951 /* If it uses 'rep', we've already logged the I-cache
952 * access at the JIFZ UInstr (see JIFZ case below) so
953 * don't do it here */
954 helper = ( has_rep_prefix
955 ? (Addr) & log_0I_1D_cache_access
956 : (Addr) & log_1I_1D_cache_access
957 );
958 argc = 2;
959 if (!BB_seen_before)
960 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
961 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000962 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000963 INVALID_TEMPREG != t_read &&
964 INVALID_TEMPREG == t_write_addr &&
965 INVALID_TEMPREG == t_write);
966 t_data_addr1 = t_read_addr;
967
968 } else if (!IS_(read) && IS_(write)) {
969 CC_size = sizeof(idCC);
970 helper = ( has_rep_prefix
971 ? (Addr) & log_0I_1D_cache_access
972 : (Addr) & log_1I_1D_cache_access
973 );
974 argc = 2;
975 if (!BB_seen_before)
976 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
977 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000978 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000979 INVALID_TEMPREG == t_read &&
980 INVALID_TEMPREG != t_write_addr &&
981 INVALID_TEMPREG != t_write);
982 t_data_addr1 = t_write_addr;
983
984 } else {
njne427a662002-10-02 11:08:25 +0000985 sk_assert(IS_(read) && IS_(write));
986 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000987 INVALID_TEMPREG != t_read &&
988 INVALID_TEMPREG != t_write_addr &&
989 INVALID_TEMPREG != t_write);
990 if (t_read == t_write) {
991 CC_size = sizeof(idCC);
992 helper = ( has_rep_prefix
993 ? (Addr) & log_0I_1D_cache_access
994 : (Addr) & log_1I_1D_cache_access
995 );
996 argc = 2;
997 if (!BB_seen_before)
998 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
999 x86_instr_size, data_size);
1000 t_data_addr1 = t_read_addr;
1001 } else {
1002 CC_size = sizeof(iddCC);
1003 helper = ( has_rep_prefix
1004 ? (Addr) & log_0I_2D_cache_access
1005 : (Addr) & log_1I_2D_cache_access
1006 );
1007 argc = 3;
1008 if (!BB_seen_before)
1009 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
1010 x86_instr_size, data_size);
1011 t_data_addr1 = t_read_addr;
1012 t_data_addr2 = t_write_addr;
1013 }
1014 }
1015#undef IS_
1016 }
1017
1018 /* Call the helper, if necessary */
1019 if ((Addr)0 != helper) {
1020
1021 /* Setup 1st arg: CC addr */
1022 t_CC_addr = newTemp(cb);
1023 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
1024 uLiteral(cb, BBCC_ptr);
1025
1026 /* Call the helper */
1027 if (1 == argc)
1028 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
1029 else if (2 == argc)
1030 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
1031 TempReg, t_data_addr1);
1032 else if (3 == argc)
1033 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
1034 TempReg, t_data_addr1,
1035 TempReg, t_data_addr2);
1036 else
njne427a662002-10-02 11:08:25 +00001037 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +00001038
1039 uCCall(cb, helper, argc, argc, False);
1040 }
1041
1042 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +00001043 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +00001044
1045 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
1046 BBCC_ptr += CC_size;
1047 x86_instr_addr += x86_instr_size;
1048 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
1049 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
1050 data_size = INVALID_DATA_SIZE;
1051 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +00001052 break;
1053
1054 default:
njn4ba5a792002-09-30 10:23:54 +00001055 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +00001056 break;
1057 }
1058 }
1059
1060 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +00001061 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001062
njn4ba5a792002-09-30 10:23:54 +00001063 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +00001064 return cb;
njn25e49d8e72002-09-23 09:36:25 +00001065
1066#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +00001067}
1068
1069/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001070/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +00001071/*------------------------------------------------------------*/
1072
1073/* Total reads/writes/misses. Calculated during CC traversal at the end. */
1074static CC Ir_total;
1075static CC Dr_total;
1076static CC Dw_total;
1077
njn25e49d8e72002-09-23 09:36:25 +00001078#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
1079
1080static cache_t clo_I1_cache = UNDEFINED_CACHE;
1081static cache_t clo_D1_cache = UNDEFINED_CACHE;
1082static cache_t clo_L2_cache = UNDEFINED_CACHE;
1083
njn7cf0bd32002-06-08 13:36:03 +00001084/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
1085/* Probably only works for Intel and AMD chips, and probably only for some of
1086 * them.
1087 */
1088
sewardj05bcdcb2003-05-18 10:05:38 +00001089static __inline__ void cpuid(Int n, UInt *a, UInt *b, UInt *c, UInt *d)
njn7cf0bd32002-06-08 13:36:03 +00001090{
1091 __asm__ __volatile__ (
1092 "cpuid"
1093 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1094 : "0" (n) /* input */
1095 );
1096}
1097
sewardj07133bf2002-06-13 10:25:56 +00001098static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001099{
1100 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001101 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001102 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001103 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001104 " Simulating a %d KB cache with %d B lines",
1105 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001106}
1107
1108/* Intel method is truly wretched. We have to do an insane indexing into an
1109 * array of pre-defined configurations for various parts of the memory
1110 * hierarchy.
1111 */
1112static
sewardj07133bf2002-06-13 10:25:56 +00001113Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001114{
sewardj07133bf2002-06-13 10:25:56 +00001115 UChar info[16];
1116 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001117 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001118
1119 if (level < 2) {
1120 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001121 "warning: CPUID level < 2 for Intel processor (%d)",
1122 level);
njn7cf0bd32002-06-08 13:36:03 +00001123 return -1;
1124 }
1125
sewardj07133bf2002-06-13 10:25:56 +00001126 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1127 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001128 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1129 info[0] = 0x0; /* reset AL */
1130
1131 if (0 != trials) {
1132 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001133 "warning: non-zero CPUID trials for Intel processor (%d)",
1134 trials);
njn7cf0bd32002-06-08 13:36:03 +00001135 return -1;
1136 }
1137
1138 for (i = 0; i < 16; i++) {
1139
1140 switch (info[i]) {
1141
1142 case 0x0: /* ignore zeros */
1143 break;
1144
njn25e49d8e72002-09-23 09:36:25 +00001145 /* TLB info, ignore */
1146 case 0x01: case 0x02: case 0x03: case 0x04:
1147 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +00001148 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +00001149 break;
1150
1151 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1152 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001153 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001154
1155 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1156 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001157 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001158
njn25e49d8e72002-09-23 09:36:25 +00001159 /* IA-64 info -- panic! */
1160 case 0x10: case 0x15: case 0x1a:
1161 case 0x88: case 0x89: case 0x8a: case 0x8d:
1162 case 0x90: case 0x96: case 0x9b:
1163 VG_(message)(Vg_DebugMsg,
1164 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001165 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001166
njn7cf0bd32002-06-08 13:36:03 +00001167 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001168 VG_(message)(Vg_DebugMsg,
1169 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001170 break;
1171
njn25e49d8e72002-09-23 09:36:25 +00001172 /* These are sectored, whatever that means */
1173 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1174 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1175
1176 /* If a P6 core, this means "no L2 cache".
1177 If a P4 core, this means "no L3 cache".
1178 We don't know what core it is, so don't issue a warning. To detect
1179 a missing L2 cache, we use 'L2_found'. */
1180 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001181 break;
1182
njn25e49d8e72002-09-23 09:36:25 +00001183 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1184 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1185 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1186 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1187 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001188
1189 /* These are sectored, whatever that means */
1190 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1191 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1192 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1193
1194 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1195 * conversion to byte size is a total guess; treat the 12K and 16K
1196 * cases the same since the cache byte size must be a power of two for
1197 * everything to work!. Also guessing 32 bytes for the line size...
1198 */
1199 case 0x70: /* 12K micro-ops, 8-way */
1200 *I1c = (cache_t) { 16, 8, 32 };
1201 micro_ops_warn(12, 16, 32);
1202 break;
1203 case 0x71: /* 16K micro-ops, 8-way */
1204 *I1c = (cache_t) { 16, 8, 32 };
1205 micro_ops_warn(16, 16, 32);
1206 break;
1207 case 0x72: /* 32K micro-ops, 8-way */
1208 *I1c = (cache_t) { 32, 8, 32 };
1209 micro_ops_warn(32, 32, 32);
1210 break;
1211
njn25e49d8e72002-09-23 09:36:25 +00001212 /* These are sectored, whatever that means */
1213 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1214 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1215 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1216 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1217 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001218
njn25e49d8e72002-09-23 09:36:25 +00001219 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1220 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1221 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1222 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1223 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +00001224 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
1225 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001226
1227 default:
1228 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001229 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001230 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001231 break;
1232 }
1233 }
njn25e49d8e72002-09-23 09:36:25 +00001234
1235 if (!L2_found)
1236 VG_(message)(Vg_DebugMsg,
1237 "warning: L2 cache not installed, ignore L2 results.");
1238
njn7cf0bd32002-06-08 13:36:03 +00001239 return 0;
1240}
1241
1242/* AMD method is straightforward, just extract appropriate bits from the
1243 * result registers.
1244 *
1245 * Bits, for D1 and I1:
1246 * 31..24 data L1 cache size in KBs
1247 * 23..16 data L1 cache associativity (FFh=full)
1248 * 15.. 8 data L1 cache lines per tag
1249 * 7.. 0 data L1 cache line size in bytes
1250 *
1251 * Bits, for L2:
1252 * 31..16 unified L2 cache size in KBs
1253 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1254 * 11.. 8 unified L2 cache lines per tag
1255 * 7.. 0 unified L2 cache line size in bytes
1256 *
1257 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1258 * upon this information. (Whatever that means -- njn)
1259 *
njn25e49d8e72002-09-23 09:36:25 +00001260 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1261 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1262 * so we detect that.
1263 *
njn7cf0bd32002-06-08 13:36:03 +00001264 * Returns 0 on success, non-zero on failure.
1265 */
sewardj07133bf2002-06-13 10:25:56 +00001266static
1267Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001268{
sewardj05bcdcb2003-05-18 10:05:38 +00001269 UInt ext_level;
1270 Int dummy, model;
sewardj07133bf2002-06-13 10:25:56 +00001271 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001272
1273 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1274
1275 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1276 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001277 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1278 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001279 return -1;
1280 }
1281
1282 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1283 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1284
njn25e49d8e72002-09-23 09:36:25 +00001285 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1286 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1287
1288 /* Check for Duron bug */
1289 if (model == 0x630) {
1290 VG_(message)(Vg_UserMsg,
1291 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1292 L2i = (64 << 16) | (L2i & 0xffff);
1293 }
1294
njn7cf0bd32002-06-08 13:36:03 +00001295 D1c->size = (D1i >> 24) & 0xff;
1296 D1c->assoc = (D1i >> 16) & 0xff;
1297 D1c->line_size = (D1i >> 0) & 0xff;
1298
1299 I1c->size = (I1i >> 24) & 0xff;
1300 I1c->assoc = (I1i >> 16) & 0xff;
1301 I1c->line_size = (I1i >> 0) & 0xff;
1302
1303 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1304 L2c->assoc = (L2i >> 12) & 0xf;
1305 L2c->line_size = (L2i >> 0) & 0xff;
1306
1307 return 0;
1308}
1309
1310static jmp_buf cpuid_jmpbuf;
1311
1312static
1313void cpuid_SIGILL_handler(int signum)
1314{
1315 __builtin_longjmp(cpuid_jmpbuf, 1);
1316}
1317
1318static
sewardj07133bf2002-06-13 10:25:56 +00001319Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001320{
sewardj07133bf2002-06-13 10:25:56 +00001321 Int level, res, ret;
1322 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001323 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001324
1325 /* Install own SIGILL handler */
1326 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1327 sigill_new.ksa_flags = 0;
1328 sigill_new.ksa_restorer = NULL;
1329 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001330 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001331
1332 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001333 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001334
1335 /* Trap for illegal instruction, in case it's a really old processor that
1336 * doesn't support CPUID. */
1337 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1338 cpuid(0, &level, (int*)&vendor_id[0],
1339 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1340 vendor_id[12] = '\0';
1341
1342 /* Restore old SIGILL handler */
1343 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001344 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001345
1346 } else {
1347 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1348
1349 /* Restore old SIGILL handler */
1350 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001351 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001352 return -1;
1353 }
1354
1355 if (0 == level) {
1356 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1357 return -1;
1358 }
1359
1360 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1361 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1362 ret = Intel_cache_info(level, I1c, D1c, L2c);
1363
1364 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1365 ret = AMD_cache_info(I1c, D1c, L2c);
1366
sewardj97b7b262003-10-07 00:18:16 +00001367 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
1368 /* Total kludge. Pretend to be a VIA Nehemiah. */
1369 D1c->size = 64;
1370 D1c->assoc = 16;
1371 D1c->line_size = 16;
1372 I1c->size = 64;
1373 I1c->assoc = 4;
1374 I1c->line_size = 16;
1375 L2c->size = 64;
1376 L2c->assoc = 16;
1377 L2c->line_size = 16;
1378 ret = 0;
1379
njn7cf0bd32002-06-08 13:36:03 +00001380 } else {
1381 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1382 vendor_id);
1383 return -1;
1384 }
1385
1386 /* Successful! Convert sizes from KB to bytes */
1387 I1c->size *= 1024;
1388 D1c->size *= 1024;
1389 L2c->size *= 1024;
1390
1391 return ret;
1392}
1393
1394/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001395static
1396void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001397{
1398 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001399 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001400 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001401 "warning: %s size of %dB not a power of two; "
1402 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001403 cache->size = dflt->size;
1404 }
1405
sewardj07133bf2002-06-13 10:25:56 +00001406 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001407 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001408 "warning: %s associativity of %d not a power of two; "
1409 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001410 cache->assoc = dflt->assoc;
1411 }
1412
sewardj07133bf2002-06-13 10:25:56 +00001413 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001414 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001415 "warning: %s line size of %dB not a power of two; "
1416 "defaulting to %dB",
1417 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001418 cache->line_size = dflt->line_size;
1419 }
1420
1421 /* Then check line size >= 16 -- any smaller and a single instruction could
1422 * straddle three cache lines, which breaks a simulation assertion and is
1423 * stupid anyway. */
1424 if (cache->line_size < MIN_LINE_SIZE) {
1425 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001426 "warning: %s line size of %dB too small; "
1427 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001428 cache->line_size = MIN_LINE_SIZE;
1429 }
1430
1431 /* Then check cache size > line size (causes seg faults if not). */
1432 if (cache->size <= cache->line_size) {
1433 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001434 "warning: %s cache size of %dB <= line size of %dB; "
1435 "increasing to %dB", name, cache->size, cache->line_size,
1436 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001437 cache->size = cache->line_size * 2;
1438 }
1439
1440 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1441 if (cache->assoc > (cache->size / cache->line_size)) {
1442 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001443 "warning: %s associativity > (size / line size); "
1444 "increasing size to %dB",
1445 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001446 cache->size = cache->assoc * cache->line_size;
1447 }
1448}
1449
1450/* On entry, args are undefined. Fill them with any info from the
1451 * command-line, then fill in any remaining with CPUID instruction if possible,
1452 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001453static
1454void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001455{
1456 /* Defaults are for a model 3 or 4 Athlon */
1457 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1458 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1459 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1460
njn25e49d8e72002-09-23 09:36:25 +00001461#define CMD_LINE_DEFINED(L) \
1462 (-1 != clo_##L##_cache.size || \
1463 -1 != clo_##L##_cache.assoc || \
1464 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001465
njn25e49d8e72002-09-23 09:36:25 +00001466 *I1c = clo_I1_cache;
1467 *D1c = clo_D1_cache;
1468 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001469
njn7cf0bd32002-06-08 13:36:03 +00001470 /* If any undefined on command-line, try CPUID */
1471 if (! CMD_LINE_DEFINED(I1) ||
1472 ! CMD_LINE_DEFINED(D1) ||
1473 ! CMD_LINE_DEFINED(L2)) {
1474
1475 /* Overwrite CPUID result for any cache defined on command-line */
1476 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1477
njn25e49d8e72002-09-23 09:36:25 +00001478 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1479 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1480 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001481
1482 /* CPUID failed, use defaults for each undefined by command-line */
1483 } else {
1484 VG_(message)(Vg_DebugMsg,
1485 "Couldn't detect cache configuration, using one "
1486 "or more defaults ");
1487
njn25e49d8e72002-09-23 09:36:25 +00001488 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1489 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1490 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001491 }
1492 }
1493#undef CMD_LINE_DEFINED
1494
1495 check_cache(I1c, &I1_dflt, "I1");
1496 check_cache(D1c, &D1_dflt, "D1");
1497 check_cache(L2c, &L2_dflt, "L2");
1498
1499 if (VG_(clo_verbosity) > 1) {
1500 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1501 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1502 I1c->size, I1c->assoc, I1c->line_size);
1503 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1504 D1c->size, D1c->assoc, D1c->line_size);
1505 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1506 L2c->size, L2c->assoc, L2c->line_size);
1507 }
1508}
1509
njn4f9c9342002-04-29 16:03:24 +00001510/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001511/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001512/*------------------------------------------------------------*/
1513
njn4f9c9342002-04-29 16:03:24 +00001514static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1515 Char *first_instr_fn)
1516{
1517 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001518 Char buf[BUF_LEN], curr_file[BUF_LEN],
1519 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001520 UInt line_num;
1521
1522 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1523
njne0ee0712002-05-03 16:41:05 +00001524 /* Mark start of basic block in output, just to ease debugging */
1525 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001526
1527 VG_(strcpy)(curr_file, first_instr_fl);
1528
1529 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1530
1531 /* We pretend the CC is an iCC for getting the tag. This is ok
1532 * because both CC types have tag as their first byte. Once we know
1533 * the type, we can cast and act appropriately. */
1534
1535 Char fl_buf[FILENAME_LEN];
1536 Char fn_buf[FN_NAME_LEN];
1537
njne0ee0712002-05-03 16:41:05 +00001538 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001539 switch ( ((iCC*)BBCC_ptr)->tag ) {
1540
njn25e49d8e72002-09-23 09:36:25 +00001541 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001542 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1543 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001544 ADD_CC_TO(iCC, I, Ir_total);
1545 BBCC_ptr += sizeof(iCC);
1546 break;
1547
njn25e49d8e72002-09-23 09:36:25 +00001548 case ReadCC:
1549 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001550 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1551 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001552 ADD_CC_TO(idCC, I, Ir_total);
1553 ADD_CC_TO(idCC, D, Dr_total);
1554 BBCC_ptr += sizeof(idCC);
1555 break;
1556
njn25e49d8e72002-09-23 09:36:25 +00001557 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001558 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1559 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001560 ADD_CC_TO(idCC, I, Ir_total);
1561 ADD_CC_TO(idCC, D, Dw_total);
1562 BBCC_ptr += sizeof(idCC);
1563 break;
1564
njn25e49d8e72002-09-23 09:36:25 +00001565 case ReadWriteCC:
1566 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1567 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1568 ADD_CC_TO(iddCC, I, Ir_total);
1569 ADD_CC_TO(iddCC, Da, Dr_total);
1570 ADD_CC_TO(iddCC, Db, Dw_total);
1571 BBCC_ptr += sizeof(iddCC);
1572 break;
1573
njn4f9c9342002-04-29 16:03:24 +00001574 default:
njne427a662002-10-02 11:08:25 +00001575 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001576 break;
1577 }
1578 distinct_instrs++;
1579
njne0ee0712002-05-03 16:41:05 +00001580 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1581
1582 /* Allow for filename switching in the middle of a BB; if this happens,
1583 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001584 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001585 VG_(strcpy)(curr_file, fl_buf);
1586 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1587 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1588 }
1589
njn4f9c9342002-04-29 16:03:24 +00001590 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001591 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001592 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001593 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001594 VG_(printf)(" filenames: BB:%s, instr:%s;"
1595 " fn_names: BB:%s, instr:%s;"
1596 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001597 first_instr_fl, fl_buf,
1598 first_instr_fn, fn_buf,
1599 line_num);
1600 }
1601
njne0ee0712002-05-03 16:41:05 +00001602 VG_(sprintf)(lbuf, "%u ", line_num);
1603 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1604 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001605 }
1606 /* If we switched filenames in the middle of the BB without switching back,
1607 * switch back now because the subsequent BB may be relying on falling under
1608 * the original file name. */
1609 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1610 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1611 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1612 }
njne0ee0712002-05-03 16:41:05 +00001613
1614 /* Mark end of basic block */
1615 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001616
njne427a662002-10-02 11:08:25 +00001617 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001618}
1619
njn25e49d8e72002-09-23 09:36:25 +00001620static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001621{
1622 Int fd;
1623 Char buf[BUF_LEN];
1624 file_node *curr_file_node;
1625 fn_node *curr_fn_node;
1626 BBCC *curr_BBCC;
1627 Int i,j,k;
1628
njn25e49d8e72002-09-23 09:36:25 +00001629 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001630
njndb918dd2003-07-22 20:45:11 +00001631 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001632 VKI_S_IRUSR|VKI_S_IWUSR);
nethercote50da0f32003-10-30 10:33:30 +00001633 if (fd < 0) {
sewardj0744b6c2002-12-11 00:45:42 +00001634 /* If the file can't be opened for whatever reason (conflict
1635 between multiple cachegrinded processes?), give up now. */
1636 file_err();
1637 return;
1638 }
njn4f9c9342002-04-29 16:03:24 +00001639
1640 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001641 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1642 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1643 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1644 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1645 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1646 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001647
1648 /* "cmd:" line */
1649 VG_(strcpy)(buf, "cmd:");
1650 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001651 for (i = 0; i < VG_(client_argc); i++) {
1652 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001653 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1654 }
1655 /* "events:" line */
1656 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1657 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1658
1659 /* Six loops here: three for the hash table arrays, and three for the
1660 * chains hanging off the hash table arrays. */
1661 for (i = 0; i < N_FILE_ENTRIES; i++) {
1662 curr_file_node = BBCC_table[i];
1663 while (curr_file_node != NULL) {
1664 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1665 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1666
1667 for (j = 0; j < N_FN_ENTRIES; j++) {
1668 curr_fn_node = curr_file_node->fns[j];
1669 while (curr_fn_node != NULL) {
1670 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1671 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1672
1673 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1674 curr_BBCC = curr_fn_node->BBCCs[k];
1675 while (curr_BBCC != NULL) {
1676 fprint_BBCC(fd, curr_BBCC,
1677
1678 curr_file_node->filename,
1679 curr_fn_node->fn_name);
1680
1681 curr_BBCC = curr_BBCC->next;
1682 }
1683 }
1684 curr_fn_node = curr_fn_node->next;
1685 }
1686 }
1687 curr_file_node = curr_file_node->next;
1688 }
1689 }
1690
njn4294fd42002-06-05 14:41:10 +00001691 /* Print stats from any discarded basic blocks */
1692 if (0 != Ir_discards.a) {
1693
1694 VG_(sprintf)(buf, "fl=(discarded)\n");
1695 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1696 VG_(sprintf)(buf, "fn=(discarded)\n");
1697 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1698
1699 /* Use 0 as line number */
1700 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1701 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1702 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1703 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1704 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1705
1706 Ir_total.a += Ir_discards.a;
1707 Ir_total.m1 += Ir_discards.m1;
1708 Ir_total.m2 += Ir_discards.m2;
1709 Dr_total.a += Dr_discards.a;
1710 Dr_total.m1 += Dr_discards.m1;
1711 Dr_total.m2 += Dr_discards.m2;
1712 Dw_total.a += Dw_discards.a;
1713 Dw_total.m1 += Dw_discards.m1;
1714 Dw_total.m2 += Dw_discards.m2;
1715 }
1716
njn4f9c9342002-04-29 16:03:24 +00001717 /* Summary stats must come after rest of table, since we calculate them
1718 * during traversal. */
1719 VG_(sprintf)(buf, "summary: "
1720 "%llu %llu %llu "
1721 "%llu %llu %llu "
1722 "%llu %llu %llu\n",
1723 Ir_total.a, Ir_total.m1, Ir_total.m2,
1724 Dr_total.a, Dr_total.m1, Dr_total.m2,
1725 Dw_total.a, Dw_total.m1, Dw_total.m2);
1726 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1727 VG_(close)(fd);
1728}
1729
njn607adfc2003-09-30 14:15:44 +00001730static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001731{
njn607adfc2003-09-30 14:15:44 +00001732 UInt w = 0;
1733 while (n > 0) {
1734 n = n / 10;
1735 w++;
njn4f9c9342002-04-29 16:03:24 +00001736 }
njn607adfc2003-09-30 14:15:44 +00001737 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001738}
1739
sewardj4f29ddf2002-05-03 22:29:04 +00001740static
daywalker8ad1a402003-09-18 01:15:32 +00001741void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001742{
1743 int i, len, space;
1744
daywalker8ad1a402003-09-18 01:15:32 +00001745 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001746 len = VG_(strlen)(buf);
1747 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001748 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001749 i = len;
1750
1751 /* Right justify in field */
1752 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1753 for (i = 0; i < space; i++) buf[i] = ' ';
1754}
1755
njn7d9f94d2003-04-22 21:41:40 +00001756void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001757{
njn607adfc2003-09-30 14:15:44 +00001758 static char buf1[RESULTS_BUF_LEN],
1759 buf2[RESULTS_BUF_LEN],
1760 buf3[RESULTS_BUF_LEN],
1761 fmt [RESULTS_BUF_LEN];
1762
njn4f9c9342002-04-29 16:03:24 +00001763 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001764 ULong L2_total_m, L2_total_mr, L2_total_mw,
1765 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001766 Int l1, l2, l3;
1767 Int p;
1768
njn25e49d8e72002-09-23 09:36:25 +00001769 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001770
njn7cf0bd32002-06-08 13:36:03 +00001771 if (VG_(clo_verbosity) == 0)
1772 return;
1773
njn4f9c9342002-04-29 16:03:24 +00001774 /* I cache results. Use the I_refs value to determine the first column
1775 * width. */
njn607adfc2003-09-30 14:15:44 +00001776 l1 = ULong_width(Ir_total.a);
1777 l2 = ULong_width(Dr_total.a);
1778 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001779
njn607adfc2003-09-30 14:15:44 +00001780 /* Make format string, getting width right for numbers */
1781 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1782
1783 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1784 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1785 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001786
1787 p = 100;
1788
njn25e49d8e72002-09-23 09:36:25 +00001789 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001790 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1791 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1792
1793 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1794 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1795 VG_(message)(Vg_UserMsg, "");
1796
1797 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1798 * width of columns 2 & 3. */
1799 D_total.a = Dr_total.a + Dw_total.a;
1800 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1801 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1802
njn607adfc2003-09-30 14:15:44 +00001803 /* Make format string, getting width right for numbers */
1804 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001805
njn607adfc2003-09-30 14:15:44 +00001806 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1807 D_total.a, Dr_total.a, Dw_total.a);
1808 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1809 D_total.m1, Dr_total.m1, Dw_total.m1);
1810 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1811 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001812
1813 p = 10;
1814
njn25e49d8e72002-09-23 09:36:25 +00001815 if (0 == D_total.a) D_total.a = 1;
1816 if (0 == Dr_total.a) Dr_total.a = 1;
1817 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001818 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1819 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1820 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1821 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1822
1823 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1824 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1825 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1826 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1827 VG_(message)(Vg_UserMsg, "");
1828
1829 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001830
1831 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1832 L2_total_r = Dr_total.m1 + Ir_total.m1;
1833 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001834 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1835 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001836
njn4f9c9342002-04-29 16:03:24 +00001837 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1838 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1839 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001840 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1841 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001842
1843 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1844 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1845 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1846 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1847
1848
1849 /* Hash table stats */
1850 if (VG_(clo_verbosity) > 1) {
1851 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1852 file_line_debug_BBs + no_debug_BBs;
1853
1854 VG_(message)(Vg_DebugMsg, "");
1855 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1856 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1857 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1858 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1859 full_debug_BBs * 100 / BB_lookups,
1860 full_debug_BBs);
1861 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1862 file_line_debug_BBs * 100 / BB_lookups,
1863 file_line_debug_BBs);
1864 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1865 fn_name_debug_BBs * 100 / BB_lookups,
1866 fn_name_debug_BBs);
1867 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1868 no_debug_BBs * 100 / BB_lookups,
1869 no_debug_BBs);
1870 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1871 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1872 }
njn25e49d8e72002-09-23 09:36:25 +00001873 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001874}
1875
sewardj18d75132002-05-16 11:06:21 +00001876
njn4294fd42002-06-05 14:41:10 +00001877/* Called when a translation is invalidated due to self-modifying code or
1878 * unloaded of a shared object.
1879 *
1880 * Finds the BBCC in the table, removes it, adds the counts to the discard
1881 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001882void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001883{
njn4294fd42002-06-05 14:41:10 +00001884 BBCC *BBCC_node;
1885 Addr BBCC_ptr0, BBCC_ptr;
1886 Bool BB_seen_before;
1887
sewardj83205b32002-06-14 11:08:07 +00001888 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001889 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001890
1891 /* 2nd arg won't be used since BB should have been seen before (assertions
1892 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001893 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001894 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1895
njne427a662002-10-02 11:08:25 +00001896 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001897
1898 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1899
1900 /* We pretend the CC is an iCC for getting the tag. This is ok
1901 * because both CC types have tag as their first byte. Once we know
1902 * the type, we can cast and act appropriately. */
1903
1904 switch ( ((iCC*)BBCC_ptr)->tag ) {
1905
njn25e49d8e72002-09-23 09:36:25 +00001906 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001907 ADD_CC_TO(iCC, I, Ir_discards);
1908 BBCC_ptr += sizeof(iCC);
1909 break;
1910
njn25e49d8e72002-09-23 09:36:25 +00001911 case ReadCC:
1912 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001913 ADD_CC_TO(idCC, I, Ir_discards);
1914 ADD_CC_TO(idCC, D, Dr_discards);
1915 BBCC_ptr += sizeof(idCC);
1916 break;
1917
njn25e49d8e72002-09-23 09:36:25 +00001918 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001919 ADD_CC_TO(idCC, I, Ir_discards);
1920 ADD_CC_TO(idCC, D, Dw_discards);
1921 BBCC_ptr += sizeof(idCC);
1922 break;
1923
njn25e49d8e72002-09-23 09:36:25 +00001924 case ReadWriteCC:
1925 ADD_CC_TO(iddCC, I, Ir_discards);
1926 ADD_CC_TO(iddCC, Da, Dr_discards);
1927 ADD_CC_TO(iddCC, Db, Dw_discards);
1928 BBCC_ptr += sizeof(iddCC);
1929 break;
1930
njn4294fd42002-06-05 14:41:10 +00001931 default:
njne427a662002-10-02 11:08:25 +00001932 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00001933 break;
1934 }
1935 }
njn25e49d8e72002-09-23 09:36:25 +00001936 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001937}
1938
1939/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001940/*--- Command line processing ---*/
1941/*--------------------------------------------------------------------*/
1942
1943static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
1944{
1945 int i1, i2, i3;
1946 int i;
1947 char *opt = VG_(strdup)(orig_opt);
1948
1949 i = i1 = opt_len;
1950
1951 /* Option looks like "--I1=65536,2,64".
1952 * Find commas, replace with NULs to make three independent
1953 * strings, then extract numbers. Yuck. */
1954 while (VG_(isdigit)(opt[i])) i++;
1955 if (',' == opt[i]) {
1956 opt[i++] = '\0';
1957 i2 = i;
1958 } else goto bad;
1959 while (VG_(isdigit)(opt[i])) i++;
1960 if (',' == opt[i]) {
1961 opt[i++] = '\0';
1962 i3 = i;
1963 } else goto bad;
1964 while (VG_(isdigit)(opt[i])) i++;
1965 if ('\0' != opt[i]) goto bad;
1966
1967 cache->size = (Int)VG_(atoll)(opt + i1);
1968 cache->assoc = (Int)VG_(atoll)(opt + i2);
1969 cache->line_size = (Int)VG_(atoll)(opt + i3);
1970
1971 VG_(free)(opt);
1972
1973 return;
1974
1975 bad:
1976 VG_(bad_option)(orig_opt);
1977}
1978
1979Bool SK_(process_cmd_line_option)(Char* arg)
1980{
1981 /* 5 is length of "--I1=" */
njn39c86652003-05-21 10:13:39 +00001982 if (VG_CLO_STREQN(5, arg, "--I1="))
njn25e49d8e72002-09-23 09:36:25 +00001983 parse_cache_opt(&clo_I1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001984 else if (VG_CLO_STREQN(5, arg, "--D1="))
njn25e49d8e72002-09-23 09:36:25 +00001985 parse_cache_opt(&clo_D1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001986 else if (VG_CLO_STREQN(5, arg, "--L2="))
njn25e49d8e72002-09-23 09:36:25 +00001987 parse_cache_opt(&clo_L2_cache, arg, 5);
1988 else
1989 return False;
1990
1991 return True;
1992}
1993
njn3e884182003-04-15 13:03:23 +00001994void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001995{
njn3e884182003-04-15 13:03:23 +00001996 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001997" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1998" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001999" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
2000 );
2001}
2002
2003void SK_(print_debug_usage)(void)
2004{
2005 VG_(printf)(
2006" (none)\n"
2007 );
njn25e49d8e72002-09-23 09:36:25 +00002008}
2009
2010/*--------------------------------------------------------------------*/
2011/*--- Setup ---*/
2012/*--------------------------------------------------------------------*/
2013
njn810086f2002-11-14 12:42:47 +00002014void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00002015{
njn13f02932003-04-30 20:23:58 +00002016 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00002017
njn810086f2002-11-14 12:42:47 +00002018 VG_(details_name) ("Cachegrind");
2019 VG_(details_version) (NULL);
2020 VG_(details_description) ("an I1/D1/L2 cache profiler");
2021 VG_(details_copyright_author)(
nethercotebb1c9912004-01-04 16:43:23 +00002022 "Copyright (C) 2002-2004, and GNU GPL'd, by Nicholas Nethercote.");
nethercote421281e2003-11-20 16:20:55 +00002023 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardj78210aa2002-12-01 02:55:46 +00002024 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00002025
njn810086f2002-11-14 12:42:47 +00002026 VG_(needs_basic_block_discards)();
2027 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00002028
2029 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
2030 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
2031 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
2032 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
2033 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
2034 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00002035
njn99ccf082003-09-30 13:51:23 +00002036 /* Get working directory */
2037 sk_assert( VG_(getcwd_alloc)(&base_dir) );
2038
njn13f02932003-04-30 20:23:58 +00002039 /* Block is big enough for dir name + cachegrind.out.<pid> */
2040 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
2041 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
2042 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00002043 VG_(free)(base_dir);
njn25e49d8e72002-09-23 09:36:25 +00002044}
2045
2046void SK_(post_clo_init)(void)
2047{
2048 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00002049
2050 initCC(&Ir_total);
2051 initCC(&Dr_total);
2052 initCC(&Dw_total);
2053
2054 initCC(&Ir_discards);
2055 initCC(&Dr_discards);
2056 initCC(&Dw_discards);
2057
2058 get_caches(&I1c, &D1c, &L2c);
2059
2060 cachesim_I1_initcache(I1c);
2061 cachesim_D1_initcache(D1c);
2062 cachesim_L2_initcache(L2c);
2063
2064 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
2065 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
2066 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
2067
2068 init_BBCC_table();
2069}
2070
fitzhardinge98abfc72003-12-16 02:05:15 +00002071VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
2072
njn25e49d8e72002-09-23 09:36:25 +00002073#if 0
2074Bool SK_(cheap_sanity_check)(void) { return True; }
2075
2076extern TTEntry* vg_tt;
2077
2078Bool SK_(expensive_sanity_check)(void)
2079{
2080 Int i;
2081 Bool dummy;
2082 for (i = 0; i < 200191; i++) {
2083 if (vg_tt[i].orig_addr != (Addr)1 &&
2084 vg_tt[i].orig_addr != (Addr)3) {
2085 VG_(printf)(".");
2086 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
2087 }
2088 }
2089 return True;
2090}
2091#endif
2092
2093/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002094/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002095/*--------------------------------------------------------------------*/