blob: 2f83e76f6704f4c66a239a3fbe7916fc8f9a0dc5 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
nethercote137bc552003-11-14 17:47:54 +00009 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +000010 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
nethercotebb1c9912004-01-04 16:43:23 +000012 Copyright (C) 2002-2004 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000013 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
36/* For cache simulation */
37typedef struct {
38 int size; /* bytes */
39 int assoc;
40 int line_size; /* bytes */
41} cache_t;
njn4f9c9342002-04-29 16:03:24 +000042
nethercote27fc1da2004-01-04 16:56:57 +000043#include "cg_sim.c"
njn4f9c9342002-04-29 16:03:24 +000044
njn25e49d8e72002-09-23 09:36:25 +000045/*------------------------------------------------------------*/
46/*--- Constants ---*/
47/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000048
49/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000050#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000051
njn25e49d8e72002-09-23 09:36:25 +000052#define MIN_LINE_SIZE 16
53
njn4f9c9342002-04-29 16:03:24 +000054/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000055#define FILENAME_LEN 256
56#define FN_NAME_LEN 256
57#define BUF_LEN 512
58#define COMMIFY_BUF_LEN 128
59#define RESULTS_BUF_LEN 128
60#define LINE_BUF_LEN 64
61
62/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000063/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000064/*------------------------------------------------------------*/
65
njn25e49d8e72002-09-23 09:36:25 +000066typedef
67 enum {
68 VgpGetBBCC = VgpFini+1,
69 VgpCacheSimulate,
70 VgpCacheResults
71 }
nethercote7cc9c232004-01-21 15:08:04 +000072 VgpToolCC;
sewardj07133bf2002-06-13 10:25:56 +000073
njn4f9c9342002-04-29 16:03:24 +000074/*------------------------------------------------------------*/
75/*--- Output file related stuff ---*/
76/*------------------------------------------------------------*/
77
njn13f02932003-04-30 20:23:58 +000078static Char* cachegrind_out_file;
njn4f9c9342002-04-29 16:03:24 +000079
sewardj0744b6c2002-12-11 00:45:42 +000080static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +000081{
82 VG_(message)(Vg_UserMsg,
sewardj0744b6c2002-12-11 00:45:42 +000083 "error: can't open cache simulation output file `%s'",
84 cachegrind_out_file );
85 VG_(message)(Vg_UserMsg,
86 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +000087}
88
89/*------------------------------------------------------------*/
90/*--- Cost center types, operations ---*/
91/*------------------------------------------------------------*/
92
93typedef struct _CC CC;
94struct _CC {
95 ULong a;
96 ULong m1;
97 ULong m2;
98};
99
100static __inline__ void initCC(CC* cc) {
101 cc->a = 0;
102 cc->m1 = 0;
103 cc->m2 = 0;
104}
105
njn25e49d8e72002-09-23 09:36:25 +0000106typedef
107 enum {
108 InstrCC, /* eg. mov %eax, %ebx */
109 ReadCC, /* eg. mov (%ecx), %esi */
110 WriteCC, /* eg. mov %eax, (%edx) */
111 ModCC, /* eg. incl (%eax) (read+write one addr) */
112 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
113 (read+write two different addrs) */
114 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000115
njn7e1b3b22003-07-04 11:44:39 +0000116/* Instruction-level cost-centres.
njn4f9c9342002-04-29 16:03:24 +0000117 *
118 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000119 *
njne0ee0712002-05-03 16:41:05 +0000120 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000121 */
njn25e49d8e72002-09-23 09:36:25 +0000122typedef
123 struct {
124 /* word 1 */
125 UChar tag;
126 UChar instr_size;
127 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000128
njn25e49d8e72002-09-23 09:36:25 +0000129 /* words 2+ */
130 Addr instr_addr;
131 CC I;
132 }
133 iCC;
njn4f9c9342002-04-29 16:03:24 +0000134
njn25e49d8e72002-09-23 09:36:25 +0000135typedef
136 struct _idCC {
137 /* word 1 */
138 UChar tag;
139 UChar instr_size;
140 UChar data_size;
141 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000142
njn25e49d8e72002-09-23 09:36:25 +0000143 /* words 2+ */
144 Addr instr_addr;
145 CC I;
146 CC D;
147 }
148 idCC;
149
150typedef
151 struct _iddCC {
152 /* word 1 */
153 UChar tag;
154 UChar instr_size;
155 UChar data_size;
156 /* 1 byte padding */
157
158 /* words 2+ */
159 Addr instr_addr;
160 CC I;
161 CC Da;
162 CC Db;
163 }
164 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000165
166static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
167{
njn25e49d8e72002-09-23 09:36:25 +0000168 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000169 cc->instr_size = instr_size;
170 cc->instr_addr = instr_addr;
171 initCC(&cc->I);
172}
173
174static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
175 UInt instr_size, UInt data_size)
176{
177 cc->tag = X_CC;
178 cc->instr_size = instr_size;
179 cc->data_size = data_size;
180 cc->instr_addr = instr_addr;
181 initCC(&cc->I);
182 initCC(&cc->D);
183}
184
njn25e49d8e72002-09-23 09:36:25 +0000185static void init_iddCC(iddCC* cc, Addr instr_addr,
186 UInt instr_size, UInt data_size)
187{
188 cc->tag = ReadWriteCC;
189 cc->instr_size = instr_size;
190 cc->data_size = data_size;
191 cc->instr_addr = instr_addr;
192 initCC(&cc->I);
193 initCC(&cc->Da);
194 initCC(&cc->Db);
195}
196
njn4294fd42002-06-05 14:41:10 +0000197#define ADD_CC_TO(CC_type, cc, total) \
198 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
199 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
200 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
201
njn95114da2002-06-05 09:39:31 +0000202/* If 1, address of each instruction is printed as a comment after its counts
203 * in cachegrind.out */
204#define PRINT_INSTR_ADDRS 0
205
njne0ee0712002-05-03 16:41:05 +0000206static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000207{
njn95114da2002-06-05 09:39:31 +0000208#if PRINT_INSTR_ADDRS
209 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
210 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
211#else
njne0ee0712002-05-03 16:41:05 +0000212 VG_(sprintf)(buf, "%llu %llu %llu\n",
213 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000214#endif
njn4f9c9342002-04-29 16:03:24 +0000215}
216
njne0ee0712002-05-03 16:41:05 +0000217static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000218{
njn95114da2002-06-05 09:39:31 +0000219#if PRINT_INSTR_ADDRS
220 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
221 cc->I.a, cc->I.m1, cc->I.m2,
222 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
223#else
njne0ee0712002-05-03 16:41:05 +0000224 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
225 cc->I.a, cc->I.m1, cc->I.m2,
226 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000227#endif
njn4f9c9342002-04-29 16:03:24 +0000228}
229
njne0ee0712002-05-03 16:41:05 +0000230static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000231{
njn95114da2002-06-05 09:39:31 +0000232#if PRINT_INSTR_ADDRS
233 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
234 cc->I.a, cc->I.m1, cc->I.m2,
235 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
236#else
njne0ee0712002-05-03 16:41:05 +0000237 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
238 cc->I.a, cc->I.m1, cc->I.m2,
239 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000240#endif
njn4f9c9342002-04-29 16:03:24 +0000241}
242
njn25e49d8e72002-09-23 09:36:25 +0000243static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
244{
245#if PRINT_INSTR_ADDRS
246 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
247 cc->I.a, cc->I.m1, cc->I.m2,
248 cc->Da.a, cc->Da.m1, cc->Da.m2,
249 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
250#else
251 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
252 cc->I.a, cc->I.m1, cc->I.m2,
253 cc->Da.a, cc->Da.m1, cc->Da.m2,
254 cc->Db.a, cc->Db.m1, cc->Db.m2);
255#endif
256}
257
258
njn4f9c9342002-04-29 16:03:24 +0000259/*------------------------------------------------------------*/
260/*--- BBCC hash table stuff ---*/
261/*------------------------------------------------------------*/
262
263/* The table of BBCCs is of the form hash(filename, hash(fn_name,
264 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
265 * fairly well for Konqueror. */
266
267#define N_FILE_ENTRIES 251
268#define N_FN_ENTRIES 53
269#define N_BBCC_ENTRIES 37
270
271/* The cost centres for a basic block are stored in a contiguous array.
272 * They are distinguishable by their tag field. */
273typedef struct _BBCC BBCC;
274struct _BBCC {
275 Addr orig_addr;
276 UInt array_size; /* byte-size of variable length array */
277 BBCC* next;
278 Addr array[0]; /* variable length array */
279};
280
281typedef struct _fn_node fn_node;
282struct _fn_node {
283 Char* fn_name;
284 BBCC* BBCCs[N_BBCC_ENTRIES];
285 fn_node* next;
286};
287
288typedef struct _file_node file_node;
289struct _file_node {
290 Char* filename;
291 fn_node* fns[N_FN_ENTRIES];
292 file_node* next;
293};
294
295/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000296static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000297
sewardj4f29ddf2002-05-03 22:29:04 +0000298static Int distinct_files = 0;
299static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000300
sewardj4f29ddf2002-05-03 22:29:04 +0000301static Int distinct_instrs = 0;
302static Int full_debug_BBs = 0;
303static Int file_line_debug_BBs = 0;
304static Int fn_name_debug_BBs = 0;
305static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000306
sewardj4f29ddf2002-05-03 22:29:04 +0000307static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000308
njn4294fd42002-06-05 14:41:10 +0000309static CC Ir_discards;
310static CC Dr_discards;
311static CC Dw_discards;
312
njn4f9c9342002-04-29 16:03:24 +0000313static void init_BBCC_table()
314{
315 Int i;
316 for (i = 0; i < N_FILE_ENTRIES; i++)
317 BBCC_table[i] = NULL;
318}
319
njne0ee0712002-05-03 16:41:05 +0000320static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
321 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000322{
njn25e49d8e72002-09-23 09:36:25 +0000323 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000324
njn25e49d8e72002-09-23 09:36:25 +0000325 found1 = VG_(get_filename_linenum)(instr_addr, filename,
326 FILENAME_LEN, line_num);
327 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000328
329 if (!found1 && !found2) {
330 no_debug_BBs++;
331 VG_(strcpy)(filename, "???");
332 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000333 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000334
335 } else if ( found1 && found2) {
336 full_debug_BBs++;
337
338 } else if ( found1 && !found2) {
339 file_line_debug_BBs++;
340 VG_(strcpy)(fn_name, "???");
341
342 } else /*(!found1 && found2)*/ {
343 fn_name_debug_BBs++;
344 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000345 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000346 }
347}
348
349/* Forward declaration. */
350static Int compute_BBCC_array_size(UCodeBlock* cb);
351
352static __inline__
353file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
354{
355 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000356 file_node* new = VG_(malloc)(sizeof(file_node));
357 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000358 for (i = 0; i < N_FN_ENTRIES; i++) {
359 new->fns[i] = NULL;
360 }
361 new->next = next;
362 return new;
363}
364
365static __inline__
366fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
367{
368 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000369 fn_node* new = VG_(malloc)(sizeof(fn_node));
370 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000371 for (i = 0; i < N_BBCC_ENTRIES; i++) {
372 new->BBCCs[i] = NULL;
373 }
374 new->next = next;
375 return new;
376}
377
378static __inline__
379BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
380{
381 Int BBCC_array_size = compute_BBCC_array_size(cb);
382 BBCC* new;
383
njn25e49d8e72002-09-23 09:36:25 +0000384 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000385 new->orig_addr = bb_orig_addr;
386 new->array_size = BBCC_array_size;
387 new->next = next;
388
389 return new;
390}
391
392#define HASH_CONSTANT 256
393
394static UInt hash(Char *s, UInt table_size)
395{
396 int hash_value = 0;
397 for ( ; *s; s++)
398 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
399 return hash_value;
400}
401
nethercote09d853e2004-01-21 16:12:55 +0000402/* This is a backup for get_BBCC() when removing BBs from the table.
403 * Necessary because the debug info can change when code is removed. For
404 * example, when inserting, the info might be "myprint.c:myprint()", but
405 * upon removal, the info might be "myprint.c:???", which causes the
406 * hash-lookup to fail (but it doesn't always happen). So we do a horrible,
407 * slow search through all the file nodes and function nodes (but we can do
408 * 3rd stage with the fast hash-lookup). */
409static BBCC* get_BBCC_slow_removal(Addr bb_orig_addr)
410{
411 Int i, j;
412 UInt BBCC_hash;
413 file_node *curr_file_node;
414 fn_node *curr_fn_node;
415 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
416
417 for (i = 0; i < N_FILE_ENTRIES; i++) {
418
419 for (curr_file_node = BBCC_table[i];
420 NULL != curr_file_node;
421 curr_file_node = curr_file_node->next)
422 {
423 for (j = 0; j < N_FN_ENTRIES; j++) {
424
425 for (curr_fn_node = curr_file_node->fns[j];
426 NULL != curr_fn_node;
427 curr_fn_node = curr_fn_node->next)
428 {
429 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
430 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
431 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
432
433 while (NULL != curr_BBCC) {
434 if (bb_orig_addr == curr_BBCC->orig_addr) {
435 // Found it!
436 sk_assert(curr_BBCC->array_size > 0
437 && curr_BBCC->array_size < 1000000);
438 if (VG_(clo_verbosity) > 2) {
439 VG_(message)(Vg_DebugMsg, "did slow BB removal");
440 }
441
442 // Remove curr_BBCC from chain; it will be used and
443 // free'd by the caller.
444 *prev_BBCC_next_ptr = curr_BBCC->next;
445 return curr_BBCC;
446 }
447
448 prev_BBCC_next_ptr = &(curr_BBCC->next);
449 curr_BBCC = curr_BBCC->next;
450 }
451 }
452 }
453 }
454 }
455 VG_(printf)("failing BB address: %p\n", bb_orig_addr);
456 VG_(skin_panic)("slow BB removal failed");
457}
458
njn4f9c9342002-04-29 16:03:24 +0000459/* Do a three step traversal: by filename, then fn_name, then instr_addr.
460 * In all cases prepends new nodes to their chain. Returns a pointer to the
461 * cost centre. Also sets BB_seen_before by reference.
462 */
sewardj56867352003-10-12 10:27:06 +0000463static BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
464 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000465{
466 file_node *curr_file_node;
467 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000468 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000469 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
470 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000471 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000472
njne0ee0712002-05-03 16:41:05 +0000473 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000474
njn25e49d8e72002-09-23 09:36:25 +0000475 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000476 filename_hash = hash(filename, N_FILE_ENTRIES);
477 curr_file_node = BBCC_table[filename_hash];
478 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000479 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000480 curr_file_node = curr_file_node->next;
481 }
482 if (NULL == curr_file_node) {
483 BBCC_table[filename_hash] = curr_file_node =
484 new_file_node(filename, BBCC_table[filename_hash]);
485 distinct_files++;
486 }
487
488 fnname_hash = hash(fn_name, N_FN_ENTRIES);
489 curr_fn_node = curr_file_node->fns[fnname_hash];
490 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000491 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000492 curr_fn_node = curr_fn_node->next;
493 }
494 if (NULL == curr_fn_node) {
495 curr_file_node->fns[fnname_hash] = curr_fn_node =
496 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
497 distinct_fns++;
498 }
499
500 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000501 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000502 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
503 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000504 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000505 curr_BBCC = curr_BBCC->next;
506 }
507 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000508
nethercote09d853e2004-01-21 16:12:55 +0000509 if (remove == False) {
510 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
511 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
512 *BB_seen_before = False;
513 } else {
514 // Ok, BB not found when removing: the debug info must have
515 // changed. Do a slow removal.
516 curr_BBCC = get_BBCC_slow_removal(bb_orig_addr);
517 *BB_seen_before = True;
518 }
njn4f9c9342002-04-29 16:03:24 +0000519
520 } else {
njne427a662002-10-02 11:08:25 +0000521 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
522 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000523 if (VG_(clo_verbosity) > 2) {
524 VG_(message)(Vg_DebugMsg,
nethercote09d853e2004-01-21 16:12:55 +0000525 "BB retranslation/invalidation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000526 }
527 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000528
529 if (True == remove) {
530 // Remove curr_BBCC from chain; it will be used and free'd by the
531 // caller.
532 *prev_BBCC_next_ptr = curr_BBCC->next;
533
534 } else {
535 BB_retranslations++;
536 }
njn4f9c9342002-04-29 16:03:24 +0000537 }
njn25e49d8e72002-09-23 09:36:25 +0000538 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000539 return curr_BBCC;
540}
541
542/*------------------------------------------------------------*/
543/*--- Cache simulation instrumentation phase ---*/
544/*------------------------------------------------------------*/
545
njn4f9c9342002-04-29 16:03:24 +0000546static Int compute_BBCC_array_size(UCodeBlock* cb)
547{
548 UInstr* u_in;
549 Int i, CC_size, BBCC_size = 0;
550 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000551 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000552
553 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000554 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000555
njn810086f2002-11-14 12:42:47 +0000556 for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
557 u_in = VG_(get_instr)(cb, i);
njn4f9c9342002-04-29 16:03:24 +0000558 switch(u_in->opcode) {
559
560 case INCEIP:
561 goto case_for_end_of_instr;
562
563 case JMP:
564 if (u_in->cond != CondAlways) break;
565
566 goto case_for_end_of_instr;
567
568 case_for_end_of_instr:
569
njn25e49d8e72002-09-23 09:36:25 +0000570 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
571 t_read != t_write)
572 CC_size = sizeof(iddCC);
573 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
574 CC_size = sizeof(idCC);
575 else
576 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000577
578 BBCC_size += CC_size;
579 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
580 break;
581
582 case LOAD:
583 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000584 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000585 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000586 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000587 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000588 is_LOAD = True;
589 break;
590
591 case STORE:
592 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000593 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000594 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000595 is_STORE = True;
596 break;
597
sewardj3949d102003-03-28 17:21:29 +0000598 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000599 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000600 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000601 case FPU_R:
njne427a662002-10-02 11:08:25 +0000602 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000603 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000604 is_FPU_R = True;
605 break;
606
njn21f805d2003-08-25 16:15:40 +0000607 case SSE2a_MemRd:
608 case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000609 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000610 t_read = u_in->val3;
611 is_FPU_R = True;
612 break;
613
614 case SSE3a_MemRd:
615 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
616 t_read = u_in->val3;
617 is_FPU_R = True;
618 break;
619
jseward1b58fbc2003-11-04 22:54:28 +0000620 case SSE3a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000621 sk_assert(u_in->size == 8 || u_in->size == 16);
jseward1b58fbc2003-11-04 22:54:28 +0000622 t_read = u_in->val3;
623 is_FPU_R = True;
624 break;
625
njn21f805d2003-08-25 16:15:40 +0000626 case SSE3ag_MemRd_RegWr:
627 sk_assert(u_in->size == 4 || u_in->size == 8);
628 t_read = u_in->val1;
629 is_FPU_R = True;
630 break;
631
sewardj3949d102003-03-28 17:21:29 +0000632 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000633 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000634 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000635 case FPU_W:
njne427a662002-10-02 11:08:25 +0000636 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000637 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000638 is_FPU_W = True;
639 break;
640
njn21f805d2003-08-25 16:15:40 +0000641 case SSE2a_MemWr:
nethercoteb1affa82004-01-19 19:14:18 +0000642 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000643 t_write = u_in->val3;
644 is_FPU_W = True;
645 break;
646
647 case SSE3a_MemWr:
648 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
649 t_write = u_in->val3;
650 is_FPU_W = True;
651 break;
652
njn4f9c9342002-04-29 16:03:24 +0000653 default:
654 break;
655 }
656 }
657
658 return BBCC_size;
659}
660
njn25e49d8e72002-09-23 09:36:25 +0000661static __attribute__ ((regparm (1)))
662void log_1I_0D_cache_access(iCC* cc)
663{
664 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
665 // cc, cc->instr_addr, cc->instr_size)
666 VGP_PUSHCC(VgpCacheSimulate);
667 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
668 cc->I.a++;
669 VGP_POPCC(VgpCacheSimulate);
670}
671
672/* Difference between this function and log_1I_0D_cache_access() is that
673 this one can be passed any kind of CC, not just an iCC. So we have to
674 be careful to make sure we don't make any assumptions about CC layout.
675 (As it stands, they would be safe, but this will avoid potential heartache
676 if anyone else changes CC layout.)
677 Note that we only do the switch for the JIFZ version because if we always
678 called this switching version, things would run about 5% slower. */
679static __attribute__ ((regparm (1)))
680void log_1I_0D_cache_access_JIFZ(iCC* cc)
681{
682 UChar instr_size;
683 Addr instr_addr;
684 CC* I;
685
686 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
687 // cc, cc->instr_addr, cc->instr_size)
688 VGP_PUSHCC(VgpCacheSimulate);
689
690 switch(cc->tag) {
691 case InstrCC:
692 instr_size = cc->instr_size;
693 instr_addr = cc->instr_addr;
694 I = &(cc->I);
695 break;
696 case ReadCC:
697 case WriteCC:
698 case ModCC:
699 instr_size = ((idCC*)cc)->instr_size;
700 instr_addr = ((idCC*)cc)->instr_addr;
701 I = &( ((idCC*)cc)->I );
702 break;
703 case ReadWriteCC:
704 instr_size = ((iddCC*)cc)->instr_size;
705 instr_addr = ((iddCC*)cc)->instr_addr;
706 I = &( ((iddCC*)cc)->I );
707 break;
708 default:
njne427a662002-10-02 11:08:25 +0000709 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000710 break;
711 }
712 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
713 I->a++;
714 VGP_POPCC(VgpCacheSimulate);
715}
716
717__attribute__ ((regparm (2))) static
718void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
719{
720 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
721 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
722 VGP_PUSHCC(VgpCacheSimulate);
723 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
724 cc->D.a++;
725 VGP_POPCC(VgpCacheSimulate);
726}
727
728__attribute__ ((regparm (2))) static
729void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
730{
731 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
732 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
733 VGP_PUSHCC(VgpCacheSimulate);
734 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
735 cc->I.a++;
736
737 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
738 cc->D.a++;
739 VGP_POPCC(VgpCacheSimulate);
740}
741
742__attribute__ ((regparm (3))) static
743void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
744{
745 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
746 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
747 VGP_PUSHCC(VgpCacheSimulate);
748 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
749 cc->Da.a++;
750 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
751 cc->Db.a++;
752 VGP_POPCC(VgpCacheSimulate);
753}
754
755__attribute__ ((regparm (3))) static
756void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
757{
758 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
759 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
760 VGP_PUSHCC(VgpCacheSimulate);
761 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
762 cc->I.a++;
763
764 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
765 cc->Da.a++;
766 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
767 cc->Db.a++;
768 VGP_POPCC(VgpCacheSimulate);
769}
770
771UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
772{
773/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000774#define INVALID_DATA_SIZE 999999
775
njn4f9c9342002-04-29 16:03:24 +0000776 UCodeBlock* cb;
777 Int i;
778 UInstr* u_in;
779 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000780 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
781 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000782 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000783 Addr x86_instr_addr = orig_addr;
784 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
785 Addr helper;
786 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000787 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000788 Bool BB_seen_before = False;
789 Bool instrumented_Jcond = False;
790 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000791 Addr BBCC_ptr0, BBCC_ptr;
792
793 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
794 * if it's the first time it's been seen), and point to start of the
795 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000796 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000797 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
798
njn810086f2002-11-14 12:42:47 +0000799 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000800
njn25e49d8e72002-09-23 09:36:25 +0000801 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
802 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000803
njn810086f2002-11-14 12:42:47 +0000804 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
805 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000806
njn4f9c9342002-04-29 16:03:24 +0000807 /* What this is all about: we want to instrument each x86 instruction
808 * translation. The end of these are marked in three ways. The three
809 * ways, and the way we instrument them, are as follows:
810 *
811 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
812 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
813 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
814 *
njn25e49d8e72002-09-23 09:36:25 +0000815 * The last UInstr in a basic block is always a Juncond. Jconds,
816 * when they appear, are always second last. We check this with
817 * various assertions.
818 *
819 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000820 * executed. We don't have to put the instrumentation before the INCEIP
821 * (it could go after) but we do so for consistency.
822 *
njn25e49d8e72002-09-23 09:36:25 +0000823 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
824 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000825 *
njn25e49d8e72002-09-23 09:36:25 +0000826 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000827 *
828 * The instrumentation is just a call to the appropriate helper function,
829 * passing it the address of the instruction's CC.
830 */
njne427a662002-10-02 11:08:25 +0000831 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000832
833 switch (u_in->opcode) {
sewardj7a5ebcf2002-11-13 22:42:13 +0000834 case NOP: case LOCK: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000835 break;
836
njn4f9c9342002-04-29 16:03:24 +0000837 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000838 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000839 */
840 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000841 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000842 t_read_addr = newTemp(cb);
843 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
844 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000845 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000846 break;
847
sewardj3949d102003-03-28 17:21:29 +0000848 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000849 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000850 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000851 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000852 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000853 t_read_addr = newTemp(cb);
854 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000855 data_size = ( u_in->size <= MIN_LINE_SIZE
856 ? u_in->size
857 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000858 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000859 break;
860
njn21f805d2003-08-25 16:15:40 +0000861 case SSE2a_MemRd:
862 case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000863 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000864 t_read = u_in->val3;
865 t_read_addr = newTemp(cb);
866 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
jsewardfca60182004-01-04 23:30:55 +0000867 /* 512 B data-sized instructions will be done inaccurately
868 * but they're very rare and this avoids errors from
869 * hitting more than two cache lines in the simulation. */
870 data_size = ( u_in->size <= MIN_LINE_SIZE
871 ? u_in->size
872 : MIN_LINE_SIZE);
njn21f805d2003-08-25 16:15:40 +0000873 VG_(copy_UInstr)(cb, u_in);
874 break;
875
876 case SSE3a_MemRd:
877 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
878 t_read = u_in->val3;
879 t_read_addr = newTemp(cb);
880 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
881 data_size = u_in->size;
882 VG_(copy_UInstr)(cb, u_in);
883 break;
884
jseward1b58fbc2003-11-04 22:54:28 +0000885 case SSE3a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000886 sk_assert(u_in->size == 8 || u_in->size == 16);
jseward1b58fbc2003-11-04 22:54:28 +0000887 t_read = u_in->val3;
888 t_read_addr = newTemp(cb);
889 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
890 data_size = u_in->size;
891 VG_(copy_UInstr)(cb, u_in);
892 break;
893
njn21f805d2003-08-25 16:15:40 +0000894 case SSE3ag_MemRd_RegWr:
895 sk_assert(u_in->size == 4 || u_in->size == 8);
896 t_read = u_in->val1;
897 t_read_addr = newTemp(cb);
898 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
899 data_size = u_in->size;
900 VG_(copy_UInstr)(cb, u_in);
901 break;
902
njn4f9c9342002-04-29 16:03:24 +0000903 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000904 * That's how the code above determines whether it does a write.
905 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000906 * As for the MOV, if it's a mod instruction it's redundant, but it's
907 * not expensive and mod instructions are rare anyway. */
sewardj3949d102003-03-28 17:21:29 +0000908 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000909 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000910 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000911 case STORE:
912 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000913 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000914 t_write_addr = newTemp(cb);
915 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000916 /* 28 and 108 B data-sized instructions will be done
917 * inaccurately but they're very rare and this avoids errors
918 * from hitting more than two cache lines in the simulation. */
919 data_size = ( u_in->size <= MIN_LINE_SIZE
920 ? u_in->size
921 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000922 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000923 break;
924
njn21f805d2003-08-25 16:15:40 +0000925 case SSE2a_MemWr:
nethercoteb1affa82004-01-19 19:14:18 +0000926 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000927 /* fall through */
928 case SSE3a_MemWr:
jsewardfca60182004-01-04 23:30:55 +0000929 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000930 t_write = u_in->val3;
931 t_write_addr = newTemp(cb);
932 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
jsewardfca60182004-01-04 23:30:55 +0000933 /* 512 B data-sized instructions will be done inaccurately
934 * but they're very rare and this avoids errors from
935 * hitting more than two cache lines in the simulation. */
936 data_size = ( u_in->size <= MIN_LINE_SIZE
937 ? u_in->size
938 : MIN_LINE_SIZE);
njn21f805d2003-08-25 16:15:40 +0000939 VG_(copy_UInstr)(cb, u_in);
940 break;
njn25e49d8e72002-09-23 09:36:25 +0000941
942 /* For rep-prefixed instructions, log a single I-cache access
943 * before the UCode loop that implements the repeated part, which
944 * is where the multiple D-cache accesses are logged. */
945 case JIFZ:
946 has_rep_prefix = True;
947
948 /* Setup 1st and only arg: CC addr */
949 t_CC_addr = newTemp(cb);
950 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
951 uLiteral(cb, BBCC_ptr);
952
953 /* Call helper */
954 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
955 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000956 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000957 break;
958
959
960 /* INCEIP: insert instrumentation */
961 case INCEIP:
962 x86_instr_size = u_in->val1;
963 goto instrument_x86_instr;
964
965 /* JMP: insert instrumentation if the first JMP */
966 case JMP:
967 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000968 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000969 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000970 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000971 instrumented_Jcond = False; /* reset */
972 break;
973 }
974 /* The first JMP... instrument. */
975 if (CondAlways != u_in->cond) {
njn810086f2002-11-14 12:42:47 +0000976 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000977 instrumented_Jcond = True;
978 } else {
njn810086f2002-11-14 12:42:47 +0000979 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000980 }
981
982 /* Get x86 instr size from final JMP. */
njn810086f2002-11-14 12:42:47 +0000983 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
984
njn25e49d8e72002-09-23 09:36:25 +0000985 goto instrument_x86_instr;
986
987
988 /* Code executed at the end of each x86 instruction. */
989 instrument_x86_instr:
990
991 /* Initialise the CC in the BBCC array appropriately if it
992 * hasn't been initialised before. Then call appropriate sim
993 * function, passing it the CC address. */
994 stack_used = 0;
995
njne427a662002-10-02 11:08:25 +0000996 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000997 x86_instr_size <= MAX_x86_INSTR_SIZE);
998
999#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
1000
1001 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +00001002 sk_assert(INVALID_DATA_SIZE == data_size);
1003 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001004 INVALID_TEMPREG == t_read &&
1005 INVALID_TEMPREG == t_write_addr &&
1006 INVALID_TEMPREG == t_write);
1007 CC_size = sizeof(iCC);
1008 if (!BB_seen_before)
1009 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
1010 helper = ( has_rep_prefix
1011 ? (Addr)0 /* no extra log needed */
1012 : (Addr) & log_1I_0D_cache_access
1013 );
1014 argc = 1;
1015
1016 } else {
njne427a662002-10-02 11:08:25 +00001017 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +00001018 8 == data_size || 10 == data_size ||
1019 MIN_LINE_SIZE == data_size);
1020
1021 if (IS_(read) && !IS_(write)) {
1022 CC_size = sizeof(idCC);
1023 /* If it uses 'rep', we've already logged the I-cache
1024 * access at the JIFZ UInstr (see JIFZ case below) so
1025 * don't do it here */
1026 helper = ( has_rep_prefix
1027 ? (Addr) & log_0I_1D_cache_access
1028 : (Addr) & log_1I_1D_cache_access
1029 );
1030 argc = 2;
1031 if (!BB_seen_before)
1032 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
1033 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +00001034 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001035 INVALID_TEMPREG != t_read &&
1036 INVALID_TEMPREG == t_write_addr &&
1037 INVALID_TEMPREG == t_write);
1038 t_data_addr1 = t_read_addr;
1039
1040 } else if (!IS_(read) && IS_(write)) {
1041 CC_size = sizeof(idCC);
1042 helper = ( has_rep_prefix
1043 ? (Addr) & log_0I_1D_cache_access
1044 : (Addr) & log_1I_1D_cache_access
1045 );
1046 argc = 2;
1047 if (!BB_seen_before)
1048 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
1049 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +00001050 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001051 INVALID_TEMPREG == t_read &&
1052 INVALID_TEMPREG != t_write_addr &&
1053 INVALID_TEMPREG != t_write);
1054 t_data_addr1 = t_write_addr;
1055
1056 } else {
njne427a662002-10-02 11:08:25 +00001057 sk_assert(IS_(read) && IS_(write));
1058 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001059 INVALID_TEMPREG != t_read &&
1060 INVALID_TEMPREG != t_write_addr &&
1061 INVALID_TEMPREG != t_write);
1062 if (t_read == t_write) {
1063 CC_size = sizeof(idCC);
1064 helper = ( has_rep_prefix
1065 ? (Addr) & log_0I_1D_cache_access
1066 : (Addr) & log_1I_1D_cache_access
1067 );
1068 argc = 2;
1069 if (!BB_seen_before)
1070 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
1071 x86_instr_size, data_size);
1072 t_data_addr1 = t_read_addr;
1073 } else {
1074 CC_size = sizeof(iddCC);
1075 helper = ( has_rep_prefix
1076 ? (Addr) & log_0I_2D_cache_access
1077 : (Addr) & log_1I_2D_cache_access
1078 );
1079 argc = 3;
1080 if (!BB_seen_before)
1081 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
1082 x86_instr_size, data_size);
1083 t_data_addr1 = t_read_addr;
1084 t_data_addr2 = t_write_addr;
1085 }
1086 }
1087#undef IS_
1088 }
1089
1090 /* Call the helper, if necessary */
1091 if ((Addr)0 != helper) {
1092
1093 /* Setup 1st arg: CC addr */
1094 t_CC_addr = newTemp(cb);
1095 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
1096 uLiteral(cb, BBCC_ptr);
1097
1098 /* Call the helper */
1099 if (1 == argc)
1100 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
1101 else if (2 == argc)
1102 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
1103 TempReg, t_data_addr1);
1104 else if (3 == argc)
1105 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
1106 TempReg, t_data_addr1,
1107 TempReg, t_data_addr2);
1108 else
njne427a662002-10-02 11:08:25 +00001109 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +00001110
1111 uCCall(cb, helper, argc, argc, False);
1112 }
1113
1114 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +00001115 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +00001116
1117 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
1118 BBCC_ptr += CC_size;
1119 x86_instr_addr += x86_instr_size;
1120 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
1121 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
1122 data_size = INVALID_DATA_SIZE;
1123 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +00001124 break;
1125
1126 default:
njn4ba5a792002-09-30 10:23:54 +00001127 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +00001128 break;
1129 }
1130 }
1131
1132 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +00001133 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001134
njn4ba5a792002-09-30 10:23:54 +00001135 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +00001136 return cb;
njn25e49d8e72002-09-23 09:36:25 +00001137
1138#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +00001139}
1140
1141/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001142/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +00001143/*------------------------------------------------------------*/
1144
1145/* Total reads/writes/misses. Calculated during CC traversal at the end. */
1146static CC Ir_total;
1147static CC Dr_total;
1148static CC Dw_total;
1149
njn25e49d8e72002-09-23 09:36:25 +00001150#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
1151
1152static cache_t clo_I1_cache = UNDEFINED_CACHE;
1153static cache_t clo_D1_cache = UNDEFINED_CACHE;
1154static cache_t clo_L2_cache = UNDEFINED_CACHE;
1155
njn7cf0bd32002-06-08 13:36:03 +00001156/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
1157/* Probably only works for Intel and AMD chips, and probably only for some of
1158 * them.
1159 */
1160
sewardj05bcdcb2003-05-18 10:05:38 +00001161static __inline__ void cpuid(Int n, UInt *a, UInt *b, UInt *c, UInt *d)
njn7cf0bd32002-06-08 13:36:03 +00001162{
1163 __asm__ __volatile__ (
1164 "cpuid"
1165 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1166 : "0" (n) /* input */
1167 );
1168}
1169
sewardj07133bf2002-06-13 10:25:56 +00001170static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001171{
1172 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001173 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001174 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001175 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001176 " Simulating a %d KB cache with %d B lines",
1177 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001178}
1179
1180/* Intel method is truly wretched. We have to do an insane indexing into an
1181 * array of pre-defined configurations for various parts of the memory
1182 * hierarchy.
1183 */
1184static
sewardj07133bf2002-06-13 10:25:56 +00001185Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001186{
sewardj07133bf2002-06-13 10:25:56 +00001187 UChar info[16];
1188 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001189 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001190
1191 if (level < 2) {
1192 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001193 "warning: CPUID level < 2 for Intel processor (%d)",
1194 level);
njn7cf0bd32002-06-08 13:36:03 +00001195 return -1;
1196 }
1197
sewardj07133bf2002-06-13 10:25:56 +00001198 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1199 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001200 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1201 info[0] = 0x0; /* reset AL */
1202
1203 if (0 != trials) {
1204 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001205 "warning: non-zero CPUID trials for Intel processor (%d)",
1206 trials);
njn7cf0bd32002-06-08 13:36:03 +00001207 return -1;
1208 }
1209
1210 for (i = 0; i < 16; i++) {
1211
1212 switch (info[i]) {
1213
1214 case 0x0: /* ignore zeros */
1215 break;
1216
njn25e49d8e72002-09-23 09:36:25 +00001217 /* TLB info, ignore */
1218 case 0x01: case 0x02: case 0x03: case 0x04:
1219 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +00001220 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +00001221 break;
1222
1223 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1224 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001225 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001226
1227 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1228 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001229 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001230
njn25e49d8e72002-09-23 09:36:25 +00001231 /* IA-64 info -- panic! */
1232 case 0x10: case 0x15: case 0x1a:
1233 case 0x88: case 0x89: case 0x8a: case 0x8d:
1234 case 0x90: case 0x96: case 0x9b:
1235 VG_(message)(Vg_DebugMsg,
1236 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001237 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001238
njn7cf0bd32002-06-08 13:36:03 +00001239 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001240 VG_(message)(Vg_DebugMsg,
1241 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001242 break;
1243
njn25e49d8e72002-09-23 09:36:25 +00001244 /* These are sectored, whatever that means */
1245 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1246 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1247
1248 /* If a P6 core, this means "no L2 cache".
1249 If a P4 core, this means "no L3 cache".
1250 We don't know what core it is, so don't issue a warning. To detect
1251 a missing L2 cache, we use 'L2_found'. */
1252 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001253 break;
1254
njn25e49d8e72002-09-23 09:36:25 +00001255 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1256 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1257 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1258 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1259 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001260
1261 /* These are sectored, whatever that means */
1262 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1263 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1264 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1265
1266 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1267 * conversion to byte size is a total guess; treat the 12K and 16K
1268 * cases the same since the cache byte size must be a power of two for
1269 * everything to work!. Also guessing 32 bytes for the line size...
1270 */
1271 case 0x70: /* 12K micro-ops, 8-way */
1272 *I1c = (cache_t) { 16, 8, 32 };
1273 micro_ops_warn(12, 16, 32);
1274 break;
1275 case 0x71: /* 16K micro-ops, 8-way */
1276 *I1c = (cache_t) { 16, 8, 32 };
1277 micro_ops_warn(16, 16, 32);
1278 break;
1279 case 0x72: /* 32K micro-ops, 8-way */
1280 *I1c = (cache_t) { 32, 8, 32 };
1281 micro_ops_warn(32, 32, 32);
1282 break;
1283
njn25e49d8e72002-09-23 09:36:25 +00001284 /* These are sectored, whatever that means */
1285 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1286 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1287 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1288 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1289 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001290
njn25e49d8e72002-09-23 09:36:25 +00001291 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1292 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1293 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1294 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1295 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +00001296 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
1297 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001298
1299 default:
1300 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001301 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001302 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001303 break;
1304 }
1305 }
njn25e49d8e72002-09-23 09:36:25 +00001306
1307 if (!L2_found)
1308 VG_(message)(Vg_DebugMsg,
1309 "warning: L2 cache not installed, ignore L2 results.");
1310
njn7cf0bd32002-06-08 13:36:03 +00001311 return 0;
1312}
1313
1314/* AMD method is straightforward, just extract appropriate bits from the
1315 * result registers.
1316 *
1317 * Bits, for D1 and I1:
1318 * 31..24 data L1 cache size in KBs
1319 * 23..16 data L1 cache associativity (FFh=full)
1320 * 15.. 8 data L1 cache lines per tag
1321 * 7.. 0 data L1 cache line size in bytes
1322 *
1323 * Bits, for L2:
1324 * 31..16 unified L2 cache size in KBs
1325 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1326 * 11.. 8 unified L2 cache lines per tag
1327 * 7.. 0 unified L2 cache line size in bytes
1328 *
1329 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1330 * upon this information. (Whatever that means -- njn)
1331 *
njn25e49d8e72002-09-23 09:36:25 +00001332 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1333 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1334 * so we detect that.
1335 *
njn7cf0bd32002-06-08 13:36:03 +00001336 * Returns 0 on success, non-zero on failure.
1337 */
sewardj07133bf2002-06-13 10:25:56 +00001338static
1339Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001340{
sewardj05bcdcb2003-05-18 10:05:38 +00001341 UInt ext_level;
1342 Int dummy, model;
sewardj07133bf2002-06-13 10:25:56 +00001343 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001344
1345 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1346
1347 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1348 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001349 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1350 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001351 return -1;
1352 }
1353
1354 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1355 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1356
njn25e49d8e72002-09-23 09:36:25 +00001357 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1358 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1359
1360 /* Check for Duron bug */
1361 if (model == 0x630) {
1362 VG_(message)(Vg_UserMsg,
1363 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1364 L2i = (64 << 16) | (L2i & 0xffff);
1365 }
1366
njn7cf0bd32002-06-08 13:36:03 +00001367 D1c->size = (D1i >> 24) & 0xff;
1368 D1c->assoc = (D1i >> 16) & 0xff;
1369 D1c->line_size = (D1i >> 0) & 0xff;
1370
1371 I1c->size = (I1i >> 24) & 0xff;
1372 I1c->assoc = (I1i >> 16) & 0xff;
1373 I1c->line_size = (I1i >> 0) & 0xff;
1374
1375 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1376 L2c->assoc = (L2i >> 12) & 0xf;
1377 L2c->line_size = (L2i >> 0) & 0xff;
1378
1379 return 0;
1380}
1381
1382static jmp_buf cpuid_jmpbuf;
1383
1384static
1385void cpuid_SIGILL_handler(int signum)
1386{
1387 __builtin_longjmp(cpuid_jmpbuf, 1);
1388}
1389
1390static
sewardj07133bf2002-06-13 10:25:56 +00001391Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001392{
sewardj07133bf2002-06-13 10:25:56 +00001393 Int level, res, ret;
1394 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001395 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001396
1397 /* Install own SIGILL handler */
1398 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1399 sigill_new.ksa_flags = 0;
1400 sigill_new.ksa_restorer = NULL;
1401 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001402 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001403
1404 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001405 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001406
1407 /* Trap for illegal instruction, in case it's a really old processor that
1408 * doesn't support CPUID. */
1409 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1410 cpuid(0, &level, (int*)&vendor_id[0],
1411 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1412 vendor_id[12] = '\0';
1413
1414 /* Restore old SIGILL handler */
1415 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001416 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001417
1418 } else {
1419 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1420
1421 /* Restore old SIGILL handler */
1422 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001423 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001424 return -1;
1425 }
1426
1427 if (0 == level) {
1428 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1429 return -1;
1430 }
1431
1432 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1433 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1434 ret = Intel_cache_info(level, I1c, D1c, L2c);
1435
1436 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1437 ret = AMD_cache_info(I1c, D1c, L2c);
1438
sewardj97b7b262003-10-07 00:18:16 +00001439 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
1440 /* Total kludge. Pretend to be a VIA Nehemiah. */
1441 D1c->size = 64;
1442 D1c->assoc = 16;
1443 D1c->line_size = 16;
1444 I1c->size = 64;
1445 I1c->assoc = 4;
1446 I1c->line_size = 16;
1447 L2c->size = 64;
1448 L2c->assoc = 16;
1449 L2c->line_size = 16;
1450 ret = 0;
1451
njn7cf0bd32002-06-08 13:36:03 +00001452 } else {
1453 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1454 vendor_id);
1455 return -1;
1456 }
1457
1458 /* Successful! Convert sizes from KB to bytes */
1459 I1c->size *= 1024;
1460 D1c->size *= 1024;
1461 L2c->size *= 1024;
1462
1463 return ret;
1464}
1465
1466/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001467static
1468void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001469{
1470 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001471 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001472 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001473 "warning: %s size of %dB not a power of two; "
1474 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001475 cache->size = dflt->size;
1476 }
1477
sewardj07133bf2002-06-13 10:25:56 +00001478 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001479 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001480 "warning: %s associativity of %d not a power of two; "
1481 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001482 cache->assoc = dflt->assoc;
1483 }
1484
sewardj07133bf2002-06-13 10:25:56 +00001485 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001486 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001487 "warning: %s line size of %dB not a power of two; "
1488 "defaulting to %dB",
1489 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001490 cache->line_size = dflt->line_size;
1491 }
1492
1493 /* Then check line size >= 16 -- any smaller and a single instruction could
1494 * straddle three cache lines, which breaks a simulation assertion and is
1495 * stupid anyway. */
1496 if (cache->line_size < MIN_LINE_SIZE) {
1497 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001498 "warning: %s line size of %dB too small; "
1499 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001500 cache->line_size = MIN_LINE_SIZE;
1501 }
1502
1503 /* Then check cache size > line size (causes seg faults if not). */
1504 if (cache->size <= cache->line_size) {
1505 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001506 "warning: %s cache size of %dB <= line size of %dB; "
1507 "increasing to %dB", name, cache->size, cache->line_size,
1508 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001509 cache->size = cache->line_size * 2;
1510 }
1511
1512 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1513 if (cache->assoc > (cache->size / cache->line_size)) {
1514 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001515 "warning: %s associativity > (size / line size); "
1516 "increasing size to %dB",
1517 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001518 cache->size = cache->assoc * cache->line_size;
1519 }
1520}
1521
1522/* On entry, args are undefined. Fill them with any info from the
1523 * command-line, then fill in any remaining with CPUID instruction if possible,
1524 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001525static
1526void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001527{
1528 /* Defaults are for a model 3 or 4 Athlon */
1529 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1530 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1531 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1532
njn25e49d8e72002-09-23 09:36:25 +00001533#define CMD_LINE_DEFINED(L) \
1534 (-1 != clo_##L##_cache.size || \
1535 -1 != clo_##L##_cache.assoc || \
1536 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001537
njn25e49d8e72002-09-23 09:36:25 +00001538 *I1c = clo_I1_cache;
1539 *D1c = clo_D1_cache;
1540 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001541
njn7cf0bd32002-06-08 13:36:03 +00001542 /* If any undefined on command-line, try CPUID */
1543 if (! CMD_LINE_DEFINED(I1) ||
1544 ! CMD_LINE_DEFINED(D1) ||
1545 ! CMD_LINE_DEFINED(L2)) {
1546
1547 /* Overwrite CPUID result for any cache defined on command-line */
1548 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1549
njn25e49d8e72002-09-23 09:36:25 +00001550 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1551 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1552 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001553
1554 /* CPUID failed, use defaults for each undefined by command-line */
1555 } else {
1556 VG_(message)(Vg_DebugMsg,
1557 "Couldn't detect cache configuration, using one "
1558 "or more defaults ");
1559
njn25e49d8e72002-09-23 09:36:25 +00001560 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1561 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1562 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001563 }
1564 }
1565#undef CMD_LINE_DEFINED
1566
1567 check_cache(I1c, &I1_dflt, "I1");
1568 check_cache(D1c, &D1_dflt, "D1");
1569 check_cache(L2c, &L2_dflt, "L2");
1570
1571 if (VG_(clo_verbosity) > 1) {
1572 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1573 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1574 I1c->size, I1c->assoc, I1c->line_size);
1575 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1576 D1c->size, D1c->assoc, D1c->line_size);
1577 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1578 L2c->size, L2c->assoc, L2c->line_size);
1579 }
1580}
1581
njn4f9c9342002-04-29 16:03:24 +00001582/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001583/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001584/*------------------------------------------------------------*/
1585
njn4f9c9342002-04-29 16:03:24 +00001586static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1587 Char *first_instr_fn)
1588{
1589 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001590 Char buf[BUF_LEN], curr_file[BUF_LEN],
1591 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001592 UInt line_num;
1593
1594 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1595
njne0ee0712002-05-03 16:41:05 +00001596 /* Mark start of basic block in output, just to ease debugging */
1597 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001598
1599 VG_(strcpy)(curr_file, first_instr_fl);
1600
1601 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1602
1603 /* We pretend the CC is an iCC for getting the tag. This is ok
1604 * because both CC types have tag as their first byte. Once we know
1605 * the type, we can cast and act appropriately. */
1606
1607 Char fl_buf[FILENAME_LEN];
1608 Char fn_buf[FN_NAME_LEN];
1609
njne0ee0712002-05-03 16:41:05 +00001610 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001611 switch ( ((iCC*)BBCC_ptr)->tag ) {
1612
njn25e49d8e72002-09-23 09:36:25 +00001613 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001614 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1615 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001616 ADD_CC_TO(iCC, I, Ir_total);
1617 BBCC_ptr += sizeof(iCC);
1618 break;
1619
njn25e49d8e72002-09-23 09:36:25 +00001620 case ReadCC:
1621 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001622 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1623 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001624 ADD_CC_TO(idCC, I, Ir_total);
1625 ADD_CC_TO(idCC, D, Dr_total);
1626 BBCC_ptr += sizeof(idCC);
1627 break;
1628
njn25e49d8e72002-09-23 09:36:25 +00001629 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001630 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1631 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001632 ADD_CC_TO(idCC, I, Ir_total);
1633 ADD_CC_TO(idCC, D, Dw_total);
1634 BBCC_ptr += sizeof(idCC);
1635 break;
1636
njn25e49d8e72002-09-23 09:36:25 +00001637 case ReadWriteCC:
1638 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1639 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1640 ADD_CC_TO(iddCC, I, Ir_total);
1641 ADD_CC_TO(iddCC, Da, Dr_total);
1642 ADD_CC_TO(iddCC, Db, Dw_total);
1643 BBCC_ptr += sizeof(iddCC);
1644 break;
1645
njn4f9c9342002-04-29 16:03:24 +00001646 default:
njne427a662002-10-02 11:08:25 +00001647 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001648 break;
1649 }
1650 distinct_instrs++;
1651
njne0ee0712002-05-03 16:41:05 +00001652 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1653
1654 /* Allow for filename switching in the middle of a BB; if this happens,
1655 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001656 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001657 VG_(strcpy)(curr_file, fl_buf);
1658 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1659 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1660 }
1661
njn4f9c9342002-04-29 16:03:24 +00001662 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001663 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001664 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001665 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001666 VG_(printf)(" filenames: BB:%s, instr:%s;"
1667 " fn_names: BB:%s, instr:%s;"
1668 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001669 first_instr_fl, fl_buf,
1670 first_instr_fn, fn_buf,
1671 line_num);
1672 }
1673
njne0ee0712002-05-03 16:41:05 +00001674 VG_(sprintf)(lbuf, "%u ", line_num);
1675 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1676 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001677 }
1678 /* If we switched filenames in the middle of the BB without switching back,
1679 * switch back now because the subsequent BB may be relying on falling under
1680 * the original file name. */
1681 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1682 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1683 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1684 }
njne0ee0712002-05-03 16:41:05 +00001685
1686 /* Mark end of basic block */
1687 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001688
njne427a662002-10-02 11:08:25 +00001689 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001690}
1691
njn25e49d8e72002-09-23 09:36:25 +00001692static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001693{
1694 Int fd;
1695 Char buf[BUF_LEN];
1696 file_node *curr_file_node;
1697 fn_node *curr_fn_node;
1698 BBCC *curr_BBCC;
1699 Int i,j,k;
1700
njn25e49d8e72002-09-23 09:36:25 +00001701 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001702
njndb918dd2003-07-22 20:45:11 +00001703 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001704 VKI_S_IRUSR|VKI_S_IWUSR);
nethercote50da0f32003-10-30 10:33:30 +00001705 if (fd < 0) {
sewardj0744b6c2002-12-11 00:45:42 +00001706 /* If the file can't be opened for whatever reason (conflict
1707 between multiple cachegrinded processes?), give up now. */
1708 file_err();
1709 return;
1710 }
njn4f9c9342002-04-29 16:03:24 +00001711
1712 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001713 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1714 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1715 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1716 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1717 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1718 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001719
1720 /* "cmd:" line */
1721 VG_(strcpy)(buf, "cmd:");
1722 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001723 for (i = 0; i < VG_(client_argc); i++) {
1724 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001725 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1726 }
1727 /* "events:" line */
1728 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1729 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1730
1731 /* Six loops here: three for the hash table arrays, and three for the
1732 * chains hanging off the hash table arrays. */
1733 for (i = 0; i < N_FILE_ENTRIES; i++) {
1734 curr_file_node = BBCC_table[i];
1735 while (curr_file_node != NULL) {
1736 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1737 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1738
1739 for (j = 0; j < N_FN_ENTRIES; j++) {
1740 curr_fn_node = curr_file_node->fns[j];
1741 while (curr_fn_node != NULL) {
1742 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1743 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1744
1745 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1746 curr_BBCC = curr_fn_node->BBCCs[k];
1747 while (curr_BBCC != NULL) {
1748 fprint_BBCC(fd, curr_BBCC,
1749
1750 curr_file_node->filename,
1751 curr_fn_node->fn_name);
1752
1753 curr_BBCC = curr_BBCC->next;
1754 }
1755 }
1756 curr_fn_node = curr_fn_node->next;
1757 }
1758 }
1759 curr_file_node = curr_file_node->next;
1760 }
1761 }
1762
njn4294fd42002-06-05 14:41:10 +00001763 /* Print stats from any discarded basic blocks */
1764 if (0 != Ir_discards.a) {
1765
1766 VG_(sprintf)(buf, "fl=(discarded)\n");
1767 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1768 VG_(sprintf)(buf, "fn=(discarded)\n");
1769 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1770
1771 /* Use 0 as line number */
1772 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1773 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1774 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1775 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1776 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1777
1778 Ir_total.a += Ir_discards.a;
1779 Ir_total.m1 += Ir_discards.m1;
1780 Ir_total.m2 += Ir_discards.m2;
1781 Dr_total.a += Dr_discards.a;
1782 Dr_total.m1 += Dr_discards.m1;
1783 Dr_total.m2 += Dr_discards.m2;
1784 Dw_total.a += Dw_discards.a;
1785 Dw_total.m1 += Dw_discards.m1;
1786 Dw_total.m2 += Dw_discards.m2;
1787 }
1788
njn4f9c9342002-04-29 16:03:24 +00001789 /* Summary stats must come after rest of table, since we calculate them
1790 * during traversal. */
1791 VG_(sprintf)(buf, "summary: "
1792 "%llu %llu %llu "
1793 "%llu %llu %llu "
1794 "%llu %llu %llu\n",
1795 Ir_total.a, Ir_total.m1, Ir_total.m2,
1796 Dr_total.a, Dr_total.m1, Dr_total.m2,
1797 Dw_total.a, Dw_total.m1, Dw_total.m2);
1798 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1799 VG_(close)(fd);
1800}
1801
njn607adfc2003-09-30 14:15:44 +00001802static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001803{
njn607adfc2003-09-30 14:15:44 +00001804 UInt w = 0;
1805 while (n > 0) {
1806 n = n / 10;
1807 w++;
njn4f9c9342002-04-29 16:03:24 +00001808 }
njn607adfc2003-09-30 14:15:44 +00001809 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001810}
1811
sewardj4f29ddf2002-05-03 22:29:04 +00001812static
daywalker8ad1a402003-09-18 01:15:32 +00001813void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001814{
1815 int i, len, space;
1816
daywalker8ad1a402003-09-18 01:15:32 +00001817 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001818 len = VG_(strlen)(buf);
1819 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001820 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001821 i = len;
1822
1823 /* Right justify in field */
1824 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1825 for (i = 0; i < space; i++) buf[i] = ' ';
1826}
1827
njn7d9f94d2003-04-22 21:41:40 +00001828void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001829{
njn607adfc2003-09-30 14:15:44 +00001830 static char buf1[RESULTS_BUF_LEN],
1831 buf2[RESULTS_BUF_LEN],
1832 buf3[RESULTS_BUF_LEN],
1833 fmt [RESULTS_BUF_LEN];
1834
njn4f9c9342002-04-29 16:03:24 +00001835 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001836 ULong L2_total_m, L2_total_mr, L2_total_mw,
1837 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001838 Int l1, l2, l3;
1839 Int p;
1840
njn25e49d8e72002-09-23 09:36:25 +00001841 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001842
njn7cf0bd32002-06-08 13:36:03 +00001843 if (VG_(clo_verbosity) == 0)
1844 return;
1845
njn4f9c9342002-04-29 16:03:24 +00001846 /* I cache results. Use the I_refs value to determine the first column
1847 * width. */
njn607adfc2003-09-30 14:15:44 +00001848 l1 = ULong_width(Ir_total.a);
1849 l2 = ULong_width(Dr_total.a);
1850 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001851
njn607adfc2003-09-30 14:15:44 +00001852 /* Make format string, getting width right for numbers */
1853 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1854
1855 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1856 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1857 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001858
1859 p = 100;
1860
njn25e49d8e72002-09-23 09:36:25 +00001861 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001862 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1863 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1864
1865 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1866 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1867 VG_(message)(Vg_UserMsg, "");
1868
1869 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1870 * width of columns 2 & 3. */
1871 D_total.a = Dr_total.a + Dw_total.a;
1872 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1873 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1874
njn607adfc2003-09-30 14:15:44 +00001875 /* Make format string, getting width right for numbers */
1876 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001877
njn607adfc2003-09-30 14:15:44 +00001878 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1879 D_total.a, Dr_total.a, Dw_total.a);
1880 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1881 D_total.m1, Dr_total.m1, Dw_total.m1);
1882 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1883 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001884
1885 p = 10;
1886
njn25e49d8e72002-09-23 09:36:25 +00001887 if (0 == D_total.a) D_total.a = 1;
1888 if (0 == Dr_total.a) Dr_total.a = 1;
1889 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001890 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1891 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1892 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1893 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1894
1895 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1896 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1897 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1898 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1899 VG_(message)(Vg_UserMsg, "");
1900
1901 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001902
1903 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1904 L2_total_r = Dr_total.m1 + Ir_total.m1;
1905 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001906 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1907 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001908
njn4f9c9342002-04-29 16:03:24 +00001909 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1910 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1911 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001912 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1913 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001914
1915 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1916 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1917 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1918 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1919
1920
1921 /* Hash table stats */
1922 if (VG_(clo_verbosity) > 1) {
1923 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1924 file_line_debug_BBs + no_debug_BBs;
1925
1926 VG_(message)(Vg_DebugMsg, "");
1927 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1928 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1929 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1930 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1931 full_debug_BBs * 100 / BB_lookups,
1932 full_debug_BBs);
1933 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1934 file_line_debug_BBs * 100 / BB_lookups,
1935 file_line_debug_BBs);
1936 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1937 fn_name_debug_BBs * 100 / BB_lookups,
1938 fn_name_debug_BBs);
1939 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1940 no_debug_BBs * 100 / BB_lookups,
1941 no_debug_BBs);
1942 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1943 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1944 }
njn25e49d8e72002-09-23 09:36:25 +00001945 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001946}
1947
sewardj18d75132002-05-16 11:06:21 +00001948
njn4294fd42002-06-05 14:41:10 +00001949/* Called when a translation is invalidated due to self-modifying code or
1950 * unloaded of a shared object.
1951 *
1952 * Finds the BBCC in the table, removes it, adds the counts to the discard
1953 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001954void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001955{
njn4294fd42002-06-05 14:41:10 +00001956 BBCC *BBCC_node;
1957 Addr BBCC_ptr0, BBCC_ptr;
1958 Bool BB_seen_before;
1959
sewardj83205b32002-06-14 11:08:07 +00001960 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001961 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001962
1963 /* 2nd arg won't be used since BB should have been seen before (assertions
1964 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001965 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001966 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1967
njne427a662002-10-02 11:08:25 +00001968 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001969
1970 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1971
1972 /* We pretend the CC is an iCC for getting the tag. This is ok
1973 * because both CC types have tag as their first byte. Once we know
1974 * the type, we can cast and act appropriately. */
1975
1976 switch ( ((iCC*)BBCC_ptr)->tag ) {
1977
njn25e49d8e72002-09-23 09:36:25 +00001978 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001979 ADD_CC_TO(iCC, I, Ir_discards);
1980 BBCC_ptr += sizeof(iCC);
1981 break;
1982
njn25e49d8e72002-09-23 09:36:25 +00001983 case ReadCC:
1984 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001985 ADD_CC_TO(idCC, I, Ir_discards);
1986 ADD_CC_TO(idCC, D, Dr_discards);
1987 BBCC_ptr += sizeof(idCC);
1988 break;
1989
njn25e49d8e72002-09-23 09:36:25 +00001990 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001991 ADD_CC_TO(idCC, I, Ir_discards);
1992 ADD_CC_TO(idCC, D, Dw_discards);
1993 BBCC_ptr += sizeof(idCC);
1994 break;
1995
njn25e49d8e72002-09-23 09:36:25 +00001996 case ReadWriteCC:
1997 ADD_CC_TO(iddCC, I, Ir_discards);
1998 ADD_CC_TO(iddCC, Da, Dr_discards);
1999 ADD_CC_TO(iddCC, Db, Dw_discards);
2000 BBCC_ptr += sizeof(iddCC);
2001 break;
2002
njn4294fd42002-06-05 14:41:10 +00002003 default:
njne427a662002-10-02 11:08:25 +00002004 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00002005 break;
2006 }
2007 }
njn25e49d8e72002-09-23 09:36:25 +00002008 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00002009}
2010
2011/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00002012/*--- Command line processing ---*/
2013/*--------------------------------------------------------------------*/
2014
2015static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
2016{
2017 int i1, i2, i3;
2018 int i;
2019 char *opt = VG_(strdup)(orig_opt);
2020
2021 i = i1 = opt_len;
2022
2023 /* Option looks like "--I1=65536,2,64".
2024 * Find commas, replace with NULs to make three independent
2025 * strings, then extract numbers. Yuck. */
2026 while (VG_(isdigit)(opt[i])) i++;
2027 if (',' == opt[i]) {
2028 opt[i++] = '\0';
2029 i2 = i;
2030 } else goto bad;
2031 while (VG_(isdigit)(opt[i])) i++;
2032 if (',' == opt[i]) {
2033 opt[i++] = '\0';
2034 i3 = i;
2035 } else goto bad;
2036 while (VG_(isdigit)(opt[i])) i++;
2037 if ('\0' != opt[i]) goto bad;
2038
2039 cache->size = (Int)VG_(atoll)(opt + i1);
2040 cache->assoc = (Int)VG_(atoll)(opt + i2);
2041 cache->line_size = (Int)VG_(atoll)(opt + i3);
2042
2043 VG_(free)(opt);
2044
2045 return;
2046
2047 bad:
2048 VG_(bad_option)(orig_opt);
2049}
2050
2051Bool SK_(process_cmd_line_option)(Char* arg)
2052{
2053 /* 5 is length of "--I1=" */
njn39c86652003-05-21 10:13:39 +00002054 if (VG_CLO_STREQN(5, arg, "--I1="))
njn25e49d8e72002-09-23 09:36:25 +00002055 parse_cache_opt(&clo_I1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00002056 else if (VG_CLO_STREQN(5, arg, "--D1="))
njn25e49d8e72002-09-23 09:36:25 +00002057 parse_cache_opt(&clo_D1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00002058 else if (VG_CLO_STREQN(5, arg, "--L2="))
njn25e49d8e72002-09-23 09:36:25 +00002059 parse_cache_opt(&clo_L2_cache, arg, 5);
2060 else
2061 return False;
2062
2063 return True;
2064}
2065
njn3e884182003-04-15 13:03:23 +00002066void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00002067{
njn3e884182003-04-15 13:03:23 +00002068 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002069" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
2070" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00002071" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
2072 );
2073}
2074
2075void SK_(print_debug_usage)(void)
2076{
2077 VG_(printf)(
2078" (none)\n"
2079 );
njn25e49d8e72002-09-23 09:36:25 +00002080}
2081
2082/*--------------------------------------------------------------------*/
2083/*--- Setup ---*/
2084/*--------------------------------------------------------------------*/
2085
njn810086f2002-11-14 12:42:47 +00002086void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00002087{
njn13f02932003-04-30 20:23:58 +00002088 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00002089
njn810086f2002-11-14 12:42:47 +00002090 VG_(details_name) ("Cachegrind");
2091 VG_(details_version) (NULL);
2092 VG_(details_description) ("an I1/D1/L2 cache profiler");
2093 VG_(details_copyright_author)(
nethercotebb1c9912004-01-04 16:43:23 +00002094 "Copyright (C) 2002-2004, and GNU GPL'd, by Nicholas Nethercote.");
nethercote421281e2003-11-20 16:20:55 +00002095 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardj78210aa2002-12-01 02:55:46 +00002096 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00002097
njn810086f2002-11-14 12:42:47 +00002098 VG_(needs_basic_block_discards)();
2099 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00002100
2101 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
2102 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
2103 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
2104 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
2105 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
2106 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00002107
njn99ccf082003-09-30 13:51:23 +00002108 /* Get working directory */
2109 sk_assert( VG_(getcwd_alloc)(&base_dir) );
2110
njn13f02932003-04-30 20:23:58 +00002111 /* Block is big enough for dir name + cachegrind.out.<pid> */
2112 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
2113 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
2114 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00002115 VG_(free)(base_dir);
njn25e49d8e72002-09-23 09:36:25 +00002116}
2117
2118void SK_(post_clo_init)(void)
2119{
2120 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00002121
2122 initCC(&Ir_total);
2123 initCC(&Dr_total);
2124 initCC(&Dw_total);
2125
2126 initCC(&Ir_discards);
2127 initCC(&Dr_discards);
2128 initCC(&Dw_discards);
2129
2130 get_caches(&I1c, &D1c, &L2c);
2131
2132 cachesim_I1_initcache(I1c);
2133 cachesim_D1_initcache(D1c);
2134 cachesim_L2_initcache(L2c);
2135
2136 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
2137 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
2138 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
2139
2140 init_BBCC_table();
2141}
2142
fitzhardinge98abfc72003-12-16 02:05:15 +00002143VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
2144
njn25e49d8e72002-09-23 09:36:25 +00002145#if 0
2146Bool SK_(cheap_sanity_check)(void) { return True; }
2147
2148extern TTEntry* vg_tt;
2149
2150Bool SK_(expensive_sanity_check)(void)
2151{
2152 Int i;
2153 Bool dummy;
2154 for (i = 0; i < 200191; i++) {
2155 if (vg_tt[i].orig_addr != (Addr)1 &&
2156 vg_tt[i].orig_addr != (Addr)3) {
2157 VG_(printf)(".");
2158 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
2159 }
2160 }
2161 return True;
2162}
2163#endif
2164
2165/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002166/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002167/*--------------------------------------------------------------------*/