blob: a0641dd4cc27c5433d73926fa540f8f203051d77 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
nethercote137bc552003-11-14 17:47:54 +00009 This file is part of Cachegrind, a Valgrind tool for cache
njnc9539842002-10-02 13:26:35 +000010 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
nethercotebb1c9912004-01-04 16:43:23 +000012 Copyright (C) 2002-2004 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000013 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
36/* For cache simulation */
37typedef struct {
38 int size; /* bytes */
39 int assoc;
40 int line_size; /* bytes */
41} cache_t;
njn4f9c9342002-04-29 16:03:24 +000042
nethercote27fc1da2004-01-04 16:56:57 +000043#include "cg_sim.c"
njn4f9c9342002-04-29 16:03:24 +000044
njn25e49d8e72002-09-23 09:36:25 +000045/*------------------------------------------------------------*/
46/*--- Constants ---*/
47/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000048
49/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000050#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000051
njn25e49d8e72002-09-23 09:36:25 +000052#define MIN_LINE_SIZE 16
53
njn4f9c9342002-04-29 16:03:24 +000054/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000055#define FILENAME_LEN 256
56#define FN_NAME_LEN 256
57#define BUF_LEN 512
58#define COMMIFY_BUF_LEN 128
59#define RESULTS_BUF_LEN 128
60#define LINE_BUF_LEN 64
61
62/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000063/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000064/*------------------------------------------------------------*/
65
njn25e49d8e72002-09-23 09:36:25 +000066typedef
67 enum {
68 VgpGetBBCC = VgpFini+1,
69 VgpCacheSimulate,
70 VgpCacheResults
71 }
nethercote7cc9c232004-01-21 15:08:04 +000072 VgpToolCC;
sewardj07133bf2002-06-13 10:25:56 +000073
njn4f9c9342002-04-29 16:03:24 +000074/*------------------------------------------------------------*/
75/*--- Output file related stuff ---*/
76/*------------------------------------------------------------*/
77
njn13f02932003-04-30 20:23:58 +000078static Char* cachegrind_out_file;
njn4f9c9342002-04-29 16:03:24 +000079
sewardj0744b6c2002-12-11 00:45:42 +000080static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +000081{
82 VG_(message)(Vg_UserMsg,
sewardj0744b6c2002-12-11 00:45:42 +000083 "error: can't open cache simulation output file `%s'",
84 cachegrind_out_file );
85 VG_(message)(Vg_UserMsg,
86 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +000087}
88
89/*------------------------------------------------------------*/
90/*--- Cost center types, operations ---*/
91/*------------------------------------------------------------*/
92
93typedef struct _CC CC;
94struct _CC {
95 ULong a;
96 ULong m1;
97 ULong m2;
98};
99
100static __inline__ void initCC(CC* cc) {
101 cc->a = 0;
102 cc->m1 = 0;
103 cc->m2 = 0;
104}
105
njn25e49d8e72002-09-23 09:36:25 +0000106typedef
107 enum {
108 InstrCC, /* eg. mov %eax, %ebx */
109 ReadCC, /* eg. mov (%ecx), %esi */
110 WriteCC, /* eg. mov %eax, (%edx) */
111 ModCC, /* eg. incl (%eax) (read+write one addr) */
112 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
113 (read+write two different addrs) */
114 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000115
njn7e1b3b22003-07-04 11:44:39 +0000116/* Instruction-level cost-centres.
njn4f9c9342002-04-29 16:03:24 +0000117 *
118 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000119 *
njne0ee0712002-05-03 16:41:05 +0000120 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000121 */
njn25e49d8e72002-09-23 09:36:25 +0000122typedef
123 struct {
124 /* word 1 */
125 UChar tag;
126 UChar instr_size;
127 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000128
njn25e49d8e72002-09-23 09:36:25 +0000129 /* words 2+ */
130 Addr instr_addr;
131 CC I;
132 }
133 iCC;
njn4f9c9342002-04-29 16:03:24 +0000134
njn25e49d8e72002-09-23 09:36:25 +0000135typedef
136 struct _idCC {
137 /* word 1 */
138 UChar tag;
139 UChar instr_size;
140 UChar data_size;
141 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000142
njn25e49d8e72002-09-23 09:36:25 +0000143 /* words 2+ */
144 Addr instr_addr;
145 CC I;
146 CC D;
147 }
148 idCC;
149
150typedef
151 struct _iddCC {
152 /* word 1 */
153 UChar tag;
154 UChar instr_size;
155 UChar data_size;
156 /* 1 byte padding */
157
158 /* words 2+ */
159 Addr instr_addr;
160 CC I;
161 CC Da;
162 CC Db;
163 }
164 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000165
166static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
167{
njn25e49d8e72002-09-23 09:36:25 +0000168 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000169 cc->instr_size = instr_size;
170 cc->instr_addr = instr_addr;
171 initCC(&cc->I);
172}
173
174static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
175 UInt instr_size, UInt data_size)
176{
177 cc->tag = X_CC;
178 cc->instr_size = instr_size;
179 cc->data_size = data_size;
180 cc->instr_addr = instr_addr;
181 initCC(&cc->I);
182 initCC(&cc->D);
183}
184
njn25e49d8e72002-09-23 09:36:25 +0000185static void init_iddCC(iddCC* cc, Addr instr_addr,
186 UInt instr_size, UInt data_size)
187{
188 cc->tag = ReadWriteCC;
189 cc->instr_size = instr_size;
190 cc->data_size = data_size;
191 cc->instr_addr = instr_addr;
192 initCC(&cc->I);
193 initCC(&cc->Da);
194 initCC(&cc->Db);
195}
196
njn4294fd42002-06-05 14:41:10 +0000197#define ADD_CC_TO(CC_type, cc, total) \
198 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
199 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
200 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
201
njn95114da2002-06-05 09:39:31 +0000202/* If 1, address of each instruction is printed as a comment after its counts
203 * in cachegrind.out */
204#define PRINT_INSTR_ADDRS 0
205
njne0ee0712002-05-03 16:41:05 +0000206static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000207{
njn95114da2002-06-05 09:39:31 +0000208#if PRINT_INSTR_ADDRS
209 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
210 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
211#else
njne0ee0712002-05-03 16:41:05 +0000212 VG_(sprintf)(buf, "%llu %llu %llu\n",
213 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000214#endif
njn4f9c9342002-04-29 16:03:24 +0000215}
216
njne0ee0712002-05-03 16:41:05 +0000217static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000218{
njn95114da2002-06-05 09:39:31 +0000219#if PRINT_INSTR_ADDRS
220 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
221 cc->I.a, cc->I.m1, cc->I.m2,
222 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
223#else
njne0ee0712002-05-03 16:41:05 +0000224 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
225 cc->I.a, cc->I.m1, cc->I.m2,
226 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000227#endif
njn4f9c9342002-04-29 16:03:24 +0000228}
229
njne0ee0712002-05-03 16:41:05 +0000230static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000231{
njn95114da2002-06-05 09:39:31 +0000232#if PRINT_INSTR_ADDRS
233 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
234 cc->I.a, cc->I.m1, cc->I.m2,
235 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
236#else
njne0ee0712002-05-03 16:41:05 +0000237 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
238 cc->I.a, cc->I.m1, cc->I.m2,
239 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000240#endif
njn4f9c9342002-04-29 16:03:24 +0000241}
242
njn25e49d8e72002-09-23 09:36:25 +0000243static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
244{
245#if PRINT_INSTR_ADDRS
246 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
247 cc->I.a, cc->I.m1, cc->I.m2,
248 cc->Da.a, cc->Da.m1, cc->Da.m2,
249 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
250#else
251 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
252 cc->I.a, cc->I.m1, cc->I.m2,
253 cc->Da.a, cc->Da.m1, cc->Da.m2,
254 cc->Db.a, cc->Db.m1, cc->Db.m2);
255#endif
256}
257
258
njn4f9c9342002-04-29 16:03:24 +0000259/*------------------------------------------------------------*/
260/*--- BBCC hash table stuff ---*/
261/*------------------------------------------------------------*/
262
263/* The table of BBCCs is of the form hash(filename, hash(fn_name,
264 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
265 * fairly well for Konqueror. */
266
267#define N_FILE_ENTRIES 251
268#define N_FN_ENTRIES 53
269#define N_BBCC_ENTRIES 37
270
271/* The cost centres for a basic block are stored in a contiguous array.
272 * They are distinguishable by their tag field. */
273typedef struct _BBCC BBCC;
274struct _BBCC {
275 Addr orig_addr;
276 UInt array_size; /* byte-size of variable length array */
277 BBCC* next;
278 Addr array[0]; /* variable length array */
279};
280
281typedef struct _fn_node fn_node;
282struct _fn_node {
283 Char* fn_name;
284 BBCC* BBCCs[N_BBCC_ENTRIES];
285 fn_node* next;
286};
287
288typedef struct _file_node file_node;
289struct _file_node {
290 Char* filename;
291 fn_node* fns[N_FN_ENTRIES];
292 file_node* next;
293};
294
295/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000296static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000297
sewardj4f29ddf2002-05-03 22:29:04 +0000298static Int distinct_files = 0;
299static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000300
sewardj4f29ddf2002-05-03 22:29:04 +0000301static Int distinct_instrs = 0;
302static Int full_debug_BBs = 0;
303static Int file_line_debug_BBs = 0;
304static Int fn_name_debug_BBs = 0;
305static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000306
sewardj4f29ddf2002-05-03 22:29:04 +0000307static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000308
njn4294fd42002-06-05 14:41:10 +0000309static CC Ir_discards;
310static CC Dr_discards;
311static CC Dw_discards;
312
njn4f9c9342002-04-29 16:03:24 +0000313static void init_BBCC_table()
314{
315 Int i;
316 for (i = 0; i < N_FILE_ENTRIES; i++)
317 BBCC_table[i] = NULL;
318}
319
njne0ee0712002-05-03 16:41:05 +0000320static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
321 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000322{
njn25e49d8e72002-09-23 09:36:25 +0000323 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000324
njn25e49d8e72002-09-23 09:36:25 +0000325 found1 = VG_(get_filename_linenum)(instr_addr, filename,
326 FILENAME_LEN, line_num);
327 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000328
329 if (!found1 && !found2) {
330 no_debug_BBs++;
331 VG_(strcpy)(filename, "???");
332 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000333 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000334
335 } else if ( found1 && found2) {
336 full_debug_BBs++;
337
338 } else if ( found1 && !found2) {
339 file_line_debug_BBs++;
340 VG_(strcpy)(fn_name, "???");
341
342 } else /*(!found1 && found2)*/ {
343 fn_name_debug_BBs++;
344 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000345 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000346 }
347}
348
349/* Forward declaration. */
350static Int compute_BBCC_array_size(UCodeBlock* cb);
351
352static __inline__
353file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
354{
355 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000356 file_node* new = VG_(malloc)(sizeof(file_node));
357 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000358 for (i = 0; i < N_FN_ENTRIES; i++) {
359 new->fns[i] = NULL;
360 }
361 new->next = next;
362 return new;
363}
364
365static __inline__
366fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
367{
368 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000369 fn_node* new = VG_(malloc)(sizeof(fn_node));
370 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000371 for (i = 0; i < N_BBCC_ENTRIES; i++) {
372 new->BBCCs[i] = NULL;
373 }
374 new->next = next;
375 return new;
376}
377
378static __inline__
379BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
380{
381 Int BBCC_array_size = compute_BBCC_array_size(cb);
382 BBCC* new;
383
njn25e49d8e72002-09-23 09:36:25 +0000384 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000385 new->orig_addr = bb_orig_addr;
386 new->array_size = BBCC_array_size;
387 new->next = next;
388
389 return new;
390}
391
392#define HASH_CONSTANT 256
393
394static UInt hash(Char *s, UInt table_size)
395{
396 int hash_value = 0;
397 for ( ; *s; s++)
398 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
399 return hash_value;
400}
401
nethercote09d853e2004-01-21 16:12:55 +0000402/* This is a backup for get_BBCC() when removing BBs from the table.
403 * Necessary because the debug info can change when code is removed. For
404 * example, when inserting, the info might be "myprint.c:myprint()", but
405 * upon removal, the info might be "myprint.c:???", which causes the
406 * hash-lookup to fail (but it doesn't always happen). So we do a horrible,
407 * slow search through all the file nodes and function nodes (but we can do
408 * 3rd stage with the fast hash-lookup). */
409static BBCC* get_BBCC_slow_removal(Addr bb_orig_addr)
410{
411 Int i, j;
412 UInt BBCC_hash;
413 file_node *curr_file_node;
414 fn_node *curr_fn_node;
415 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
416
417 for (i = 0; i < N_FILE_ENTRIES; i++) {
418
419 for (curr_file_node = BBCC_table[i];
420 NULL != curr_file_node;
421 curr_file_node = curr_file_node->next)
422 {
423 for (j = 0; j < N_FN_ENTRIES; j++) {
424
425 for (curr_fn_node = curr_file_node->fns[j];
426 NULL != curr_fn_node;
427 curr_fn_node = curr_fn_node->next)
428 {
429 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
430 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
431 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
432
433 while (NULL != curr_BBCC) {
434 if (bb_orig_addr == curr_BBCC->orig_addr) {
435 // Found it!
436 sk_assert(curr_BBCC->array_size > 0
437 && curr_BBCC->array_size < 1000000);
438 if (VG_(clo_verbosity) > 2) {
439 VG_(message)(Vg_DebugMsg, "did slow BB removal");
440 }
441
442 // Remove curr_BBCC from chain; it will be used and
443 // free'd by the caller.
444 *prev_BBCC_next_ptr = curr_BBCC->next;
445 return curr_BBCC;
446 }
447
448 prev_BBCC_next_ptr = &(curr_BBCC->next);
449 curr_BBCC = curr_BBCC->next;
450 }
451 }
452 }
453 }
454 }
455 VG_(printf)("failing BB address: %p\n", bb_orig_addr);
456 VG_(skin_panic)("slow BB removal failed");
457}
458
njn4f9c9342002-04-29 16:03:24 +0000459/* Do a three step traversal: by filename, then fn_name, then instr_addr.
460 * In all cases prepends new nodes to their chain. Returns a pointer to the
461 * cost centre. Also sets BB_seen_before by reference.
462 */
sewardj56867352003-10-12 10:27:06 +0000463static BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
464 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000465{
466 file_node *curr_file_node;
467 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000468 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000469 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
470 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000471 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000472
njne0ee0712002-05-03 16:41:05 +0000473 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000474
njn25e49d8e72002-09-23 09:36:25 +0000475 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000476 filename_hash = hash(filename, N_FILE_ENTRIES);
477 curr_file_node = BBCC_table[filename_hash];
478 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000479 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000480 curr_file_node = curr_file_node->next;
481 }
482 if (NULL == curr_file_node) {
483 BBCC_table[filename_hash] = curr_file_node =
484 new_file_node(filename, BBCC_table[filename_hash]);
485 distinct_files++;
486 }
487
488 fnname_hash = hash(fn_name, N_FN_ENTRIES);
489 curr_fn_node = curr_file_node->fns[fnname_hash];
490 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000491 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000492 curr_fn_node = curr_fn_node->next;
493 }
494 if (NULL == curr_fn_node) {
495 curr_file_node->fns[fnname_hash] = curr_fn_node =
496 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
497 distinct_fns++;
498 }
499
500 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000501 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000502 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
503 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000504 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000505 curr_BBCC = curr_BBCC->next;
506 }
507 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000508
nethercote09d853e2004-01-21 16:12:55 +0000509 if (remove == False) {
510 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
511 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
512 *BB_seen_before = False;
513 } else {
514 // Ok, BB not found when removing: the debug info must have
515 // changed. Do a slow removal.
516 curr_BBCC = get_BBCC_slow_removal(bb_orig_addr);
517 *BB_seen_before = True;
518 }
njn4f9c9342002-04-29 16:03:24 +0000519
520 } else {
njne427a662002-10-02 11:08:25 +0000521 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
522 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000523 if (VG_(clo_verbosity) > 2) {
524 VG_(message)(Vg_DebugMsg,
nethercote09d853e2004-01-21 16:12:55 +0000525 "BB retranslation/invalidation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000526 }
527 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000528
529 if (True == remove) {
530 // Remove curr_BBCC from chain; it will be used and free'd by the
531 // caller.
532 *prev_BBCC_next_ptr = curr_BBCC->next;
533
534 } else {
535 BB_retranslations++;
536 }
njn4f9c9342002-04-29 16:03:24 +0000537 }
njn25e49d8e72002-09-23 09:36:25 +0000538 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000539 return curr_BBCC;
540}
541
542/*------------------------------------------------------------*/
543/*--- Cache simulation instrumentation phase ---*/
544/*------------------------------------------------------------*/
545
njn4f9c9342002-04-29 16:03:24 +0000546static Int compute_BBCC_array_size(UCodeBlock* cb)
547{
548 UInstr* u_in;
549 Int i, CC_size, BBCC_size = 0;
550 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000551 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000552
553 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000554 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000555
njn810086f2002-11-14 12:42:47 +0000556 for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
557 u_in = VG_(get_instr)(cb, i);
njn4f9c9342002-04-29 16:03:24 +0000558 switch(u_in->opcode) {
559
560 case INCEIP:
561 goto case_for_end_of_instr;
562
563 case JMP:
564 if (u_in->cond != CondAlways) break;
565
566 goto case_for_end_of_instr;
567
568 case_for_end_of_instr:
569
njn25e49d8e72002-09-23 09:36:25 +0000570 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
571 t_read != t_write)
572 CC_size = sizeof(iddCC);
573 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
574 CC_size = sizeof(idCC);
575 else
576 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000577
578 BBCC_size += CC_size;
579 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
580 break;
581
582 case LOAD:
583 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000584 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000585 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000586 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000587 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000588 is_LOAD = True;
589 break;
590
591 case STORE:
592 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000593 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000594 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000595 is_STORE = True;
596 break;
597
sewardj3949d102003-03-28 17:21:29 +0000598 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000599 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000600 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000601 case FPU_R:
njne427a662002-10-02 11:08:25 +0000602 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000603 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000604 is_FPU_R = True;
605 break;
606
thughes96b466a2004-03-15 16:43:58 +0000607 case MMX2a1_MemRd:
608 sk_assert(u_in->size == 8);
609 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
610 t_read = u_in->val3;
611 is_FPU_R = True;
612 break;
613
njn21f805d2003-08-25 16:15:40 +0000614 case SSE2a_MemRd:
615 case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000616 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000617 t_read = u_in->val3;
618 is_FPU_R = True;
619 break;
620
621 case SSE3a_MemRd:
622 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
623 t_read = u_in->val3;
624 is_FPU_R = True;
625 break;
626
jseward1b58fbc2003-11-04 22:54:28 +0000627 case SSE3a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000628 sk_assert(u_in->size == 8 || u_in->size == 16);
jseward1b58fbc2003-11-04 22:54:28 +0000629 t_read = u_in->val3;
630 is_FPU_R = True;
631 break;
632
njn21f805d2003-08-25 16:15:40 +0000633 case SSE3ag_MemRd_RegWr:
634 sk_assert(u_in->size == 4 || u_in->size == 8);
635 t_read = u_in->val1;
636 is_FPU_R = True;
637 break;
638
sewardj3949d102003-03-28 17:21:29 +0000639 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000640 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000641 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000642 case FPU_W:
njne427a662002-10-02 11:08:25 +0000643 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000644 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000645 is_FPU_W = True;
646 break;
647
njn21f805d2003-08-25 16:15:40 +0000648 case SSE2a_MemWr:
nethercoteb1affa82004-01-19 19:14:18 +0000649 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000650 t_write = u_in->val3;
651 is_FPU_W = True;
652 break;
653
654 case SSE3a_MemWr:
655 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
656 t_write = u_in->val3;
657 is_FPU_W = True;
658 break;
659
njn4f9c9342002-04-29 16:03:24 +0000660 default:
661 break;
662 }
663 }
664
665 return BBCC_size;
666}
667
njn25e49d8e72002-09-23 09:36:25 +0000668static __attribute__ ((regparm (1)))
669void log_1I_0D_cache_access(iCC* cc)
670{
671 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
672 // cc, cc->instr_addr, cc->instr_size)
673 VGP_PUSHCC(VgpCacheSimulate);
674 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
675 cc->I.a++;
676 VGP_POPCC(VgpCacheSimulate);
677}
678
679/* Difference between this function and log_1I_0D_cache_access() is that
680 this one can be passed any kind of CC, not just an iCC. So we have to
681 be careful to make sure we don't make any assumptions about CC layout.
682 (As it stands, they would be safe, but this will avoid potential heartache
683 if anyone else changes CC layout.)
684 Note that we only do the switch for the JIFZ version because if we always
685 called this switching version, things would run about 5% slower. */
686static __attribute__ ((regparm (1)))
687void log_1I_0D_cache_access_JIFZ(iCC* cc)
688{
689 UChar instr_size;
690 Addr instr_addr;
691 CC* I;
692
693 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
694 // cc, cc->instr_addr, cc->instr_size)
695 VGP_PUSHCC(VgpCacheSimulate);
696
697 switch(cc->tag) {
698 case InstrCC:
699 instr_size = cc->instr_size;
700 instr_addr = cc->instr_addr;
701 I = &(cc->I);
702 break;
703 case ReadCC:
704 case WriteCC:
705 case ModCC:
706 instr_size = ((idCC*)cc)->instr_size;
707 instr_addr = ((idCC*)cc)->instr_addr;
708 I = &( ((idCC*)cc)->I );
709 break;
710 case ReadWriteCC:
711 instr_size = ((iddCC*)cc)->instr_size;
712 instr_addr = ((iddCC*)cc)->instr_addr;
713 I = &( ((iddCC*)cc)->I );
714 break;
715 default:
njne427a662002-10-02 11:08:25 +0000716 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000717 break;
718 }
719 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
720 I->a++;
721 VGP_POPCC(VgpCacheSimulate);
722}
723
724__attribute__ ((regparm (2))) static
725void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
726{
727 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
728 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
729 VGP_PUSHCC(VgpCacheSimulate);
730 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
731 cc->D.a++;
732 VGP_POPCC(VgpCacheSimulate);
733}
734
735__attribute__ ((regparm (2))) static
736void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
737{
738 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
739 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
740 VGP_PUSHCC(VgpCacheSimulate);
741 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
742 cc->I.a++;
743
744 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
745 cc->D.a++;
746 VGP_POPCC(VgpCacheSimulate);
747}
748
749__attribute__ ((regparm (3))) static
750void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
751{
752 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
753 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
754 VGP_PUSHCC(VgpCacheSimulate);
755 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
756 cc->Da.a++;
757 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
758 cc->Db.a++;
759 VGP_POPCC(VgpCacheSimulate);
760}
761
762__attribute__ ((regparm (3))) static
763void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
764{
765 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
766 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
767 VGP_PUSHCC(VgpCacheSimulate);
768 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
769 cc->I.a++;
770
771 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
772 cc->Da.a++;
773 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
774 cc->Db.a++;
775 VGP_POPCC(VgpCacheSimulate);
776}
777
778UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
779{
780/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000781#define INVALID_DATA_SIZE 999999
782
njn4f9c9342002-04-29 16:03:24 +0000783 UCodeBlock* cb;
784 Int i;
785 UInstr* u_in;
786 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000787 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
788 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000789 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000790 Addr x86_instr_addr = orig_addr;
791 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
792 Addr helper;
793 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000794 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000795 Bool BB_seen_before = False;
796 Bool instrumented_Jcond = False;
797 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000798 Addr BBCC_ptr0, BBCC_ptr;
799
800 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
801 * if it's the first time it's been seen), and point to start of the
802 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000803 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000804 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
805
njn810086f2002-11-14 12:42:47 +0000806 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000807
njn25e49d8e72002-09-23 09:36:25 +0000808 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
809 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000810
njn810086f2002-11-14 12:42:47 +0000811 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
812 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000813
njn4f9c9342002-04-29 16:03:24 +0000814 /* What this is all about: we want to instrument each x86 instruction
815 * translation. The end of these are marked in three ways. The three
816 * ways, and the way we instrument them, are as follows:
817 *
818 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
819 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
820 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
821 *
njn25e49d8e72002-09-23 09:36:25 +0000822 * The last UInstr in a basic block is always a Juncond. Jconds,
823 * when they appear, are always second last. We check this with
824 * various assertions.
825 *
826 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000827 * executed. We don't have to put the instrumentation before the INCEIP
828 * (it could go after) but we do so for consistency.
829 *
njn25e49d8e72002-09-23 09:36:25 +0000830 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
831 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000832 *
njn25e49d8e72002-09-23 09:36:25 +0000833 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000834 *
835 * The instrumentation is just a call to the appropriate helper function,
836 * passing it the address of the instruction's CC.
837 */
njne427a662002-10-02 11:08:25 +0000838 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000839
840 switch (u_in->opcode) {
sewardj7a5ebcf2002-11-13 22:42:13 +0000841 case NOP: case LOCK: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000842 break;
843
njn4f9c9342002-04-29 16:03:24 +0000844 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000845 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000846 */
847 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000848 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000849 t_read_addr = newTemp(cb);
850 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
851 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000852 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000853 break;
854
sewardj3949d102003-03-28 17:21:29 +0000855 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000856 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000857 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000858 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000859 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000860 t_read_addr = newTemp(cb);
861 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000862 data_size = ( u_in->size <= MIN_LINE_SIZE
863 ? u_in->size
864 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000865 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000866 break;
thughes96b466a2004-03-15 16:43:58 +0000867 break;
868
869 case MMX2a1_MemRd:
870 sk_assert(u_in->size == 8);
871 t_read = u_in->val3;
872 t_read_addr = newTemp(cb);
873 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
874 data_size = ( u_in->size <= MIN_LINE_SIZE
875 ? u_in->size
876 : MIN_LINE_SIZE);
877 VG_(copy_UInstr)(cb, u_in);
878 break;
njn4f9c9342002-04-29 16:03:24 +0000879
njn21f805d2003-08-25 16:15:40 +0000880 case SSE2a_MemRd:
881 case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000882 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000883 t_read = u_in->val3;
884 t_read_addr = newTemp(cb);
885 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
jsewardfca60182004-01-04 23:30:55 +0000886 /* 512 B data-sized instructions will be done inaccurately
887 * but they're very rare and this avoids errors from
888 * hitting more than two cache lines in the simulation. */
889 data_size = ( u_in->size <= MIN_LINE_SIZE
890 ? u_in->size
891 : MIN_LINE_SIZE);
njn21f805d2003-08-25 16:15:40 +0000892 VG_(copy_UInstr)(cb, u_in);
893 break;
894
895 case SSE3a_MemRd:
896 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
897 t_read = u_in->val3;
898 t_read_addr = newTemp(cb);
899 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
900 data_size = u_in->size;
901 VG_(copy_UInstr)(cb, u_in);
902 break;
903
jseward1b58fbc2003-11-04 22:54:28 +0000904 case SSE3a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +0000905 sk_assert(u_in->size == 8 || u_in->size == 16);
jseward1b58fbc2003-11-04 22:54:28 +0000906 t_read = u_in->val3;
907 t_read_addr = newTemp(cb);
908 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
909 data_size = u_in->size;
910 VG_(copy_UInstr)(cb, u_in);
911 break;
912
njn21f805d2003-08-25 16:15:40 +0000913 case SSE3ag_MemRd_RegWr:
914 sk_assert(u_in->size == 4 || u_in->size == 8);
915 t_read = u_in->val1;
916 t_read_addr = newTemp(cb);
917 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
918 data_size = u_in->size;
919 VG_(copy_UInstr)(cb, u_in);
920 break;
921
njn4f9c9342002-04-29 16:03:24 +0000922 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000923 * That's how the code above determines whether it does a write.
924 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000925 * As for the MOV, if it's a mod instruction it's redundant, but it's
926 * not expensive and mod instructions are rare anyway. */
sewardj3949d102003-03-28 17:21:29 +0000927 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000928 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000929 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000930 case STORE:
931 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000932 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000933 t_write_addr = newTemp(cb);
934 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000935 /* 28 and 108 B data-sized instructions will be done
936 * inaccurately but they're very rare and this avoids errors
937 * from hitting more than two cache lines in the simulation. */
938 data_size = ( u_in->size <= MIN_LINE_SIZE
939 ? u_in->size
940 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000941 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000942 break;
943
njn21f805d2003-08-25 16:15:40 +0000944 case SSE2a_MemWr:
nethercoteb1affa82004-01-19 19:14:18 +0000945 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000946 /* fall through */
947 case SSE3a_MemWr:
jsewardfca60182004-01-04 23:30:55 +0000948 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
njn21f805d2003-08-25 16:15:40 +0000949 t_write = u_in->val3;
950 t_write_addr = newTemp(cb);
951 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
jsewardfca60182004-01-04 23:30:55 +0000952 /* 512 B data-sized instructions will be done inaccurately
953 * but they're very rare and this avoids errors from
954 * hitting more than two cache lines in the simulation. */
955 data_size = ( u_in->size <= MIN_LINE_SIZE
956 ? u_in->size
957 : MIN_LINE_SIZE);
njn21f805d2003-08-25 16:15:40 +0000958 VG_(copy_UInstr)(cb, u_in);
959 break;
njn25e49d8e72002-09-23 09:36:25 +0000960
961 /* For rep-prefixed instructions, log a single I-cache access
962 * before the UCode loop that implements the repeated part, which
963 * is where the multiple D-cache accesses are logged. */
964 case JIFZ:
965 has_rep_prefix = True;
966
967 /* Setup 1st and only arg: CC addr */
968 t_CC_addr = newTemp(cb);
969 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
970 uLiteral(cb, BBCC_ptr);
971
972 /* Call helper */
973 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
974 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000975 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000976 break;
977
978
979 /* INCEIP: insert instrumentation */
980 case INCEIP:
981 x86_instr_size = u_in->val1;
982 goto instrument_x86_instr;
983
984 /* JMP: insert instrumentation if the first JMP */
985 case JMP:
986 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000987 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000988 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000989 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000990 instrumented_Jcond = False; /* reset */
991 break;
992 }
993 /* The first JMP... instrument. */
994 if (CondAlways != u_in->cond) {
njn810086f2002-11-14 12:42:47 +0000995 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000996 instrumented_Jcond = True;
997 } else {
njn810086f2002-11-14 12:42:47 +0000998 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000999 }
1000
1001 /* Get x86 instr size from final JMP. */
njn810086f2002-11-14 12:42:47 +00001002 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
1003
njn25e49d8e72002-09-23 09:36:25 +00001004 goto instrument_x86_instr;
1005
1006
1007 /* Code executed at the end of each x86 instruction. */
1008 instrument_x86_instr:
1009
1010 /* Initialise the CC in the BBCC array appropriately if it
1011 * hasn't been initialised before. Then call appropriate sim
1012 * function, passing it the CC address. */
1013 stack_used = 0;
1014
njne427a662002-10-02 11:08:25 +00001015 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +00001016 x86_instr_size <= MAX_x86_INSTR_SIZE);
1017
1018#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
1019
1020 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +00001021 sk_assert(INVALID_DATA_SIZE == data_size);
1022 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001023 INVALID_TEMPREG == t_read &&
1024 INVALID_TEMPREG == t_write_addr &&
1025 INVALID_TEMPREG == t_write);
1026 CC_size = sizeof(iCC);
1027 if (!BB_seen_before)
1028 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
1029 helper = ( has_rep_prefix
1030 ? (Addr)0 /* no extra log needed */
1031 : (Addr) & log_1I_0D_cache_access
1032 );
1033 argc = 1;
1034
1035 } else {
njne427a662002-10-02 11:08:25 +00001036 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +00001037 8 == data_size || 10 == data_size ||
1038 MIN_LINE_SIZE == data_size);
1039
1040 if (IS_(read) && !IS_(write)) {
1041 CC_size = sizeof(idCC);
1042 /* If it uses 'rep', we've already logged the I-cache
1043 * access at the JIFZ UInstr (see JIFZ case below) so
1044 * don't do it here */
1045 helper = ( has_rep_prefix
1046 ? (Addr) & log_0I_1D_cache_access
1047 : (Addr) & log_1I_1D_cache_access
1048 );
1049 argc = 2;
1050 if (!BB_seen_before)
1051 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
1052 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +00001053 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001054 INVALID_TEMPREG != t_read &&
1055 INVALID_TEMPREG == t_write_addr &&
1056 INVALID_TEMPREG == t_write);
1057 t_data_addr1 = t_read_addr;
1058
1059 } else if (!IS_(read) && IS_(write)) {
1060 CC_size = sizeof(idCC);
1061 helper = ( has_rep_prefix
1062 ? (Addr) & log_0I_1D_cache_access
1063 : (Addr) & log_1I_1D_cache_access
1064 );
1065 argc = 2;
1066 if (!BB_seen_before)
1067 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
1068 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +00001069 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001070 INVALID_TEMPREG == t_read &&
1071 INVALID_TEMPREG != t_write_addr &&
1072 INVALID_TEMPREG != t_write);
1073 t_data_addr1 = t_write_addr;
1074
1075 } else {
njne427a662002-10-02 11:08:25 +00001076 sk_assert(IS_(read) && IS_(write));
1077 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +00001078 INVALID_TEMPREG != t_read &&
1079 INVALID_TEMPREG != t_write_addr &&
1080 INVALID_TEMPREG != t_write);
1081 if (t_read == t_write) {
1082 CC_size = sizeof(idCC);
1083 helper = ( has_rep_prefix
1084 ? (Addr) & log_0I_1D_cache_access
1085 : (Addr) & log_1I_1D_cache_access
1086 );
1087 argc = 2;
1088 if (!BB_seen_before)
1089 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
1090 x86_instr_size, data_size);
1091 t_data_addr1 = t_read_addr;
1092 } else {
1093 CC_size = sizeof(iddCC);
1094 helper = ( has_rep_prefix
1095 ? (Addr) & log_0I_2D_cache_access
1096 : (Addr) & log_1I_2D_cache_access
1097 );
1098 argc = 3;
1099 if (!BB_seen_before)
1100 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
1101 x86_instr_size, data_size);
1102 t_data_addr1 = t_read_addr;
1103 t_data_addr2 = t_write_addr;
1104 }
1105 }
1106#undef IS_
1107 }
1108
1109 /* Call the helper, if necessary */
1110 if ((Addr)0 != helper) {
1111
1112 /* Setup 1st arg: CC addr */
1113 t_CC_addr = newTemp(cb);
1114 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
1115 uLiteral(cb, BBCC_ptr);
1116
1117 /* Call the helper */
1118 if (1 == argc)
1119 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
1120 else if (2 == argc)
1121 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
1122 TempReg, t_data_addr1);
1123 else if (3 == argc)
1124 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
1125 TempReg, t_data_addr1,
1126 TempReg, t_data_addr2);
1127 else
njne427a662002-10-02 11:08:25 +00001128 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +00001129
1130 uCCall(cb, helper, argc, argc, False);
1131 }
1132
1133 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +00001134 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +00001135
1136 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
1137 BBCC_ptr += CC_size;
1138 x86_instr_addr += x86_instr_size;
1139 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
1140 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
1141 data_size = INVALID_DATA_SIZE;
1142 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +00001143 break;
1144
1145 default:
njn4ba5a792002-09-30 10:23:54 +00001146 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +00001147 break;
1148 }
1149 }
1150
1151 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +00001152 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001153
njn4ba5a792002-09-30 10:23:54 +00001154 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +00001155 return cb;
njn25e49d8e72002-09-23 09:36:25 +00001156
1157#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +00001158}
1159
1160/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001161/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +00001162/*------------------------------------------------------------*/
1163
1164/* Total reads/writes/misses. Calculated during CC traversal at the end. */
1165static CC Ir_total;
1166static CC Dr_total;
1167static CC Dw_total;
1168
njn25e49d8e72002-09-23 09:36:25 +00001169#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
1170
1171static cache_t clo_I1_cache = UNDEFINED_CACHE;
1172static cache_t clo_D1_cache = UNDEFINED_CACHE;
1173static cache_t clo_L2_cache = UNDEFINED_CACHE;
1174
njn7cf0bd32002-06-08 13:36:03 +00001175/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
1176/* Probably only works for Intel and AMD chips, and probably only for some of
1177 * them.
1178 */
1179
sewardj05bcdcb2003-05-18 10:05:38 +00001180static __inline__ void cpuid(Int n, UInt *a, UInt *b, UInt *c, UInt *d)
njn7cf0bd32002-06-08 13:36:03 +00001181{
1182 __asm__ __volatile__ (
1183 "cpuid"
1184 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1185 : "0" (n) /* input */
1186 );
1187}
1188
sewardj07133bf2002-06-13 10:25:56 +00001189static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001190{
1191 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001192 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001193 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001194 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001195 " Simulating a %d KB cache with %d B lines",
1196 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001197}
1198
1199/* Intel method is truly wretched. We have to do an insane indexing into an
1200 * array of pre-defined configurations for various parts of the memory
1201 * hierarchy.
1202 */
1203static
sewardj07133bf2002-06-13 10:25:56 +00001204Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001205{
sewardj07133bf2002-06-13 10:25:56 +00001206 UChar info[16];
1207 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001208 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001209
1210 if (level < 2) {
1211 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001212 "warning: CPUID level < 2 for Intel processor (%d)",
1213 level);
njn7cf0bd32002-06-08 13:36:03 +00001214 return -1;
1215 }
1216
sewardj07133bf2002-06-13 10:25:56 +00001217 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1218 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001219 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1220 info[0] = 0x0; /* reset AL */
1221
1222 if (0 != trials) {
1223 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001224 "warning: non-zero CPUID trials for Intel processor (%d)",
1225 trials);
njn7cf0bd32002-06-08 13:36:03 +00001226 return -1;
1227 }
1228
1229 for (i = 0; i < 16; i++) {
1230
1231 switch (info[i]) {
1232
1233 case 0x0: /* ignore zeros */
1234 break;
1235
njn25e49d8e72002-09-23 09:36:25 +00001236 /* TLB info, ignore */
1237 case 0x01: case 0x02: case 0x03: case 0x04:
1238 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +00001239 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +00001240 break;
1241
1242 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1243 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001244 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001245
1246 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1247 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001248 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001249
njn25e49d8e72002-09-23 09:36:25 +00001250 /* IA-64 info -- panic! */
1251 case 0x10: case 0x15: case 0x1a:
1252 case 0x88: case 0x89: case 0x8a: case 0x8d:
1253 case 0x90: case 0x96: case 0x9b:
1254 VG_(message)(Vg_DebugMsg,
1255 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001256 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001257
njn7cf0bd32002-06-08 13:36:03 +00001258 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001259 VG_(message)(Vg_DebugMsg,
1260 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001261 break;
1262
njn25e49d8e72002-09-23 09:36:25 +00001263 /* These are sectored, whatever that means */
1264 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1265 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1266
1267 /* If a P6 core, this means "no L2 cache".
1268 If a P4 core, this means "no L3 cache".
1269 We don't know what core it is, so don't issue a warning. To detect
1270 a missing L2 cache, we use 'L2_found'. */
1271 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001272 break;
1273
njn25e49d8e72002-09-23 09:36:25 +00001274 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1275 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1276 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1277 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1278 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001279
1280 /* These are sectored, whatever that means */
1281 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1282 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1283 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1284
1285 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1286 * conversion to byte size is a total guess; treat the 12K and 16K
1287 * cases the same since the cache byte size must be a power of two for
1288 * everything to work!. Also guessing 32 bytes for the line size...
1289 */
1290 case 0x70: /* 12K micro-ops, 8-way */
1291 *I1c = (cache_t) { 16, 8, 32 };
1292 micro_ops_warn(12, 16, 32);
1293 break;
1294 case 0x71: /* 16K micro-ops, 8-way */
1295 *I1c = (cache_t) { 16, 8, 32 };
1296 micro_ops_warn(16, 16, 32);
1297 break;
1298 case 0x72: /* 32K micro-ops, 8-way */
1299 *I1c = (cache_t) { 32, 8, 32 };
1300 micro_ops_warn(32, 32, 32);
1301 break;
1302
njn25e49d8e72002-09-23 09:36:25 +00001303 /* These are sectored, whatever that means */
1304 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1305 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1306 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1307 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1308 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001309
njn25e49d8e72002-09-23 09:36:25 +00001310 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1311 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1312 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1313 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1314 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +00001315 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
1316 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001317
1318 default:
1319 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001320 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001321 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001322 break;
1323 }
1324 }
njn25e49d8e72002-09-23 09:36:25 +00001325
1326 if (!L2_found)
1327 VG_(message)(Vg_DebugMsg,
1328 "warning: L2 cache not installed, ignore L2 results.");
1329
njn7cf0bd32002-06-08 13:36:03 +00001330 return 0;
1331}
1332
1333/* AMD method is straightforward, just extract appropriate bits from the
1334 * result registers.
1335 *
1336 * Bits, for D1 and I1:
1337 * 31..24 data L1 cache size in KBs
1338 * 23..16 data L1 cache associativity (FFh=full)
1339 * 15.. 8 data L1 cache lines per tag
1340 * 7.. 0 data L1 cache line size in bytes
1341 *
1342 * Bits, for L2:
1343 * 31..16 unified L2 cache size in KBs
1344 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1345 * 11.. 8 unified L2 cache lines per tag
1346 * 7.. 0 unified L2 cache line size in bytes
1347 *
1348 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1349 * upon this information. (Whatever that means -- njn)
1350 *
njn25e49d8e72002-09-23 09:36:25 +00001351 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1352 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1353 * so we detect that.
1354 *
njn7cf0bd32002-06-08 13:36:03 +00001355 * Returns 0 on success, non-zero on failure.
1356 */
sewardj07133bf2002-06-13 10:25:56 +00001357static
1358Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001359{
sewardj05bcdcb2003-05-18 10:05:38 +00001360 UInt ext_level;
1361 Int dummy, model;
sewardj07133bf2002-06-13 10:25:56 +00001362 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001363
1364 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1365
1366 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1367 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001368 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1369 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001370 return -1;
1371 }
1372
1373 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1374 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1375
njn25e49d8e72002-09-23 09:36:25 +00001376 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1377 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1378
1379 /* Check for Duron bug */
1380 if (model == 0x630) {
1381 VG_(message)(Vg_UserMsg,
1382 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1383 L2i = (64 << 16) | (L2i & 0xffff);
1384 }
1385
njn7cf0bd32002-06-08 13:36:03 +00001386 D1c->size = (D1i >> 24) & 0xff;
1387 D1c->assoc = (D1i >> 16) & 0xff;
1388 D1c->line_size = (D1i >> 0) & 0xff;
1389
1390 I1c->size = (I1i >> 24) & 0xff;
1391 I1c->assoc = (I1i >> 16) & 0xff;
1392 I1c->line_size = (I1i >> 0) & 0xff;
1393
1394 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1395 L2c->assoc = (L2i >> 12) & 0xf;
1396 L2c->line_size = (L2i >> 0) & 0xff;
1397
1398 return 0;
1399}
1400
1401static jmp_buf cpuid_jmpbuf;
1402
1403static
1404void cpuid_SIGILL_handler(int signum)
1405{
1406 __builtin_longjmp(cpuid_jmpbuf, 1);
1407}
1408
1409static
sewardj07133bf2002-06-13 10:25:56 +00001410Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001411{
sewardj07133bf2002-06-13 10:25:56 +00001412 Int level, res, ret;
1413 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001414 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001415
1416 /* Install own SIGILL handler */
1417 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1418 sigill_new.ksa_flags = 0;
1419 sigill_new.ksa_restorer = NULL;
1420 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001421 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001422
1423 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001424 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001425
1426 /* Trap for illegal instruction, in case it's a really old processor that
1427 * doesn't support CPUID. */
1428 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1429 cpuid(0, &level, (int*)&vendor_id[0],
1430 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1431 vendor_id[12] = '\0';
1432
1433 /* Restore old SIGILL handler */
1434 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001435 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001436
1437 } else {
1438 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1439
1440 /* Restore old SIGILL handler */
1441 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001442 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001443 return -1;
1444 }
1445
1446 if (0 == level) {
1447 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1448 return -1;
1449 }
1450
1451 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1452 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1453 ret = Intel_cache_info(level, I1c, D1c, L2c);
1454
1455 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1456 ret = AMD_cache_info(I1c, D1c, L2c);
1457
sewardj97b7b262003-10-07 00:18:16 +00001458 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
1459 /* Total kludge. Pretend to be a VIA Nehemiah. */
1460 D1c->size = 64;
1461 D1c->assoc = 16;
1462 D1c->line_size = 16;
1463 I1c->size = 64;
1464 I1c->assoc = 4;
1465 I1c->line_size = 16;
1466 L2c->size = 64;
1467 L2c->assoc = 16;
1468 L2c->line_size = 16;
1469 ret = 0;
1470
njn7cf0bd32002-06-08 13:36:03 +00001471 } else {
1472 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1473 vendor_id);
1474 return -1;
1475 }
1476
1477 /* Successful! Convert sizes from KB to bytes */
1478 I1c->size *= 1024;
1479 D1c->size *= 1024;
1480 L2c->size *= 1024;
1481
1482 return ret;
1483}
1484
1485/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001486static
1487void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001488{
1489 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001490 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001491 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001492 "warning: %s size of %dB not a power of two; "
1493 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001494 cache->size = dflt->size;
1495 }
1496
sewardj07133bf2002-06-13 10:25:56 +00001497 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001498 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001499 "warning: %s associativity of %d not a power of two; "
1500 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001501 cache->assoc = dflt->assoc;
1502 }
1503
sewardj07133bf2002-06-13 10:25:56 +00001504 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001505 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001506 "warning: %s line size of %dB not a power of two; "
1507 "defaulting to %dB",
1508 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001509 cache->line_size = dflt->line_size;
1510 }
1511
1512 /* Then check line size >= 16 -- any smaller and a single instruction could
1513 * straddle three cache lines, which breaks a simulation assertion and is
1514 * stupid anyway. */
1515 if (cache->line_size < MIN_LINE_SIZE) {
1516 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001517 "warning: %s line size of %dB too small; "
1518 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001519 cache->line_size = MIN_LINE_SIZE;
1520 }
1521
1522 /* Then check cache size > line size (causes seg faults if not). */
1523 if (cache->size <= cache->line_size) {
1524 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001525 "warning: %s cache size of %dB <= line size of %dB; "
1526 "increasing to %dB", name, cache->size, cache->line_size,
1527 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001528 cache->size = cache->line_size * 2;
1529 }
1530
1531 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1532 if (cache->assoc > (cache->size / cache->line_size)) {
1533 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001534 "warning: %s associativity > (size / line size); "
1535 "increasing size to %dB",
1536 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001537 cache->size = cache->assoc * cache->line_size;
1538 }
1539}
1540
1541/* On entry, args are undefined. Fill them with any info from the
1542 * command-line, then fill in any remaining with CPUID instruction if possible,
1543 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001544static
1545void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001546{
1547 /* Defaults are for a model 3 or 4 Athlon */
1548 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1549 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1550 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1551
njn25e49d8e72002-09-23 09:36:25 +00001552#define CMD_LINE_DEFINED(L) \
1553 (-1 != clo_##L##_cache.size || \
1554 -1 != clo_##L##_cache.assoc || \
1555 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001556
njn25e49d8e72002-09-23 09:36:25 +00001557 *I1c = clo_I1_cache;
1558 *D1c = clo_D1_cache;
1559 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001560
njn7cf0bd32002-06-08 13:36:03 +00001561 /* If any undefined on command-line, try CPUID */
1562 if (! CMD_LINE_DEFINED(I1) ||
1563 ! CMD_LINE_DEFINED(D1) ||
1564 ! CMD_LINE_DEFINED(L2)) {
1565
1566 /* Overwrite CPUID result for any cache defined on command-line */
1567 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1568
njn25e49d8e72002-09-23 09:36:25 +00001569 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1570 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1571 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001572
1573 /* CPUID failed, use defaults for each undefined by command-line */
1574 } else {
1575 VG_(message)(Vg_DebugMsg,
1576 "Couldn't detect cache configuration, using one "
1577 "or more defaults ");
1578
njn25e49d8e72002-09-23 09:36:25 +00001579 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1580 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1581 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001582 }
1583 }
1584#undef CMD_LINE_DEFINED
1585
1586 check_cache(I1c, &I1_dflt, "I1");
1587 check_cache(D1c, &D1_dflt, "D1");
1588 check_cache(L2c, &L2_dflt, "L2");
1589
1590 if (VG_(clo_verbosity) > 1) {
1591 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1592 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1593 I1c->size, I1c->assoc, I1c->line_size);
1594 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1595 D1c->size, D1c->assoc, D1c->line_size);
1596 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1597 L2c->size, L2c->assoc, L2c->line_size);
1598 }
1599}
1600
njn4f9c9342002-04-29 16:03:24 +00001601/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001602/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001603/*------------------------------------------------------------*/
1604
njn4f9c9342002-04-29 16:03:24 +00001605static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1606 Char *first_instr_fn)
1607{
1608 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001609 Char buf[BUF_LEN], curr_file[BUF_LEN],
1610 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001611 UInt line_num;
1612
1613 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1614
njne0ee0712002-05-03 16:41:05 +00001615 /* Mark start of basic block in output, just to ease debugging */
1616 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001617
1618 VG_(strcpy)(curr_file, first_instr_fl);
1619
1620 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1621
1622 /* We pretend the CC is an iCC for getting the tag. This is ok
1623 * because both CC types have tag as their first byte. Once we know
1624 * the type, we can cast and act appropriately. */
1625
1626 Char fl_buf[FILENAME_LEN];
1627 Char fn_buf[FN_NAME_LEN];
1628
njne0ee0712002-05-03 16:41:05 +00001629 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001630 switch ( ((iCC*)BBCC_ptr)->tag ) {
1631
njn25e49d8e72002-09-23 09:36:25 +00001632 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001633 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1634 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001635 ADD_CC_TO(iCC, I, Ir_total);
1636 BBCC_ptr += sizeof(iCC);
1637 break;
1638
njn25e49d8e72002-09-23 09:36:25 +00001639 case ReadCC:
1640 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001641 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1642 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001643 ADD_CC_TO(idCC, I, Ir_total);
1644 ADD_CC_TO(idCC, D, Dr_total);
1645 BBCC_ptr += sizeof(idCC);
1646 break;
1647
njn25e49d8e72002-09-23 09:36:25 +00001648 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001649 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1650 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001651 ADD_CC_TO(idCC, I, Ir_total);
1652 ADD_CC_TO(idCC, D, Dw_total);
1653 BBCC_ptr += sizeof(idCC);
1654 break;
1655
njn25e49d8e72002-09-23 09:36:25 +00001656 case ReadWriteCC:
1657 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1658 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1659 ADD_CC_TO(iddCC, I, Ir_total);
1660 ADD_CC_TO(iddCC, Da, Dr_total);
1661 ADD_CC_TO(iddCC, Db, Dw_total);
1662 BBCC_ptr += sizeof(iddCC);
1663 break;
1664
njn4f9c9342002-04-29 16:03:24 +00001665 default:
njne427a662002-10-02 11:08:25 +00001666 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001667 break;
1668 }
1669 distinct_instrs++;
1670
njne0ee0712002-05-03 16:41:05 +00001671 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1672
1673 /* Allow for filename switching in the middle of a BB; if this happens,
1674 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001675 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001676 VG_(strcpy)(curr_file, fl_buf);
1677 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1678 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1679 }
1680
njn4f9c9342002-04-29 16:03:24 +00001681 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001682 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001683 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001684 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001685 VG_(printf)(" filenames: BB:%s, instr:%s;"
1686 " fn_names: BB:%s, instr:%s;"
1687 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001688 first_instr_fl, fl_buf,
1689 first_instr_fn, fn_buf,
1690 line_num);
1691 }
1692
njne0ee0712002-05-03 16:41:05 +00001693 VG_(sprintf)(lbuf, "%u ", line_num);
1694 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1695 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001696 }
1697 /* If we switched filenames in the middle of the BB without switching back,
1698 * switch back now because the subsequent BB may be relying on falling under
1699 * the original file name. */
1700 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1701 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1702 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1703 }
njne0ee0712002-05-03 16:41:05 +00001704
1705 /* Mark end of basic block */
1706 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001707
njne427a662002-10-02 11:08:25 +00001708 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001709}
1710
njn25e49d8e72002-09-23 09:36:25 +00001711static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001712{
1713 Int fd;
1714 Char buf[BUF_LEN];
1715 file_node *curr_file_node;
1716 fn_node *curr_fn_node;
1717 BBCC *curr_BBCC;
1718 Int i,j,k;
1719
njn25e49d8e72002-09-23 09:36:25 +00001720 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001721
njndb918dd2003-07-22 20:45:11 +00001722 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001723 VKI_S_IRUSR|VKI_S_IWUSR);
nethercote50da0f32003-10-30 10:33:30 +00001724 if (fd < 0) {
sewardj0744b6c2002-12-11 00:45:42 +00001725 /* If the file can't be opened for whatever reason (conflict
1726 between multiple cachegrinded processes?), give up now. */
1727 file_err();
1728 return;
1729 }
njn4f9c9342002-04-29 16:03:24 +00001730
1731 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001732 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1733 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1734 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1735 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1736 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1737 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001738
1739 /* "cmd:" line */
1740 VG_(strcpy)(buf, "cmd:");
1741 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001742 for (i = 0; i < VG_(client_argc); i++) {
1743 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001744 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1745 }
1746 /* "events:" line */
1747 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1748 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1749
1750 /* Six loops here: three for the hash table arrays, and three for the
1751 * chains hanging off the hash table arrays. */
1752 for (i = 0; i < N_FILE_ENTRIES; i++) {
1753 curr_file_node = BBCC_table[i];
1754 while (curr_file_node != NULL) {
1755 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1756 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1757
1758 for (j = 0; j < N_FN_ENTRIES; j++) {
1759 curr_fn_node = curr_file_node->fns[j];
1760 while (curr_fn_node != NULL) {
1761 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1762 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1763
1764 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1765 curr_BBCC = curr_fn_node->BBCCs[k];
1766 while (curr_BBCC != NULL) {
1767 fprint_BBCC(fd, curr_BBCC,
1768
1769 curr_file_node->filename,
1770 curr_fn_node->fn_name);
1771
1772 curr_BBCC = curr_BBCC->next;
1773 }
1774 }
1775 curr_fn_node = curr_fn_node->next;
1776 }
1777 }
1778 curr_file_node = curr_file_node->next;
1779 }
1780 }
1781
njn4294fd42002-06-05 14:41:10 +00001782 /* Print stats from any discarded basic blocks */
1783 if (0 != Ir_discards.a) {
1784
1785 VG_(sprintf)(buf, "fl=(discarded)\n");
1786 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1787 VG_(sprintf)(buf, "fn=(discarded)\n");
1788 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1789
1790 /* Use 0 as line number */
1791 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1792 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1793 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1794 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1795 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1796
1797 Ir_total.a += Ir_discards.a;
1798 Ir_total.m1 += Ir_discards.m1;
1799 Ir_total.m2 += Ir_discards.m2;
1800 Dr_total.a += Dr_discards.a;
1801 Dr_total.m1 += Dr_discards.m1;
1802 Dr_total.m2 += Dr_discards.m2;
1803 Dw_total.a += Dw_discards.a;
1804 Dw_total.m1 += Dw_discards.m1;
1805 Dw_total.m2 += Dw_discards.m2;
1806 }
1807
njn4f9c9342002-04-29 16:03:24 +00001808 /* Summary stats must come after rest of table, since we calculate them
1809 * during traversal. */
1810 VG_(sprintf)(buf, "summary: "
1811 "%llu %llu %llu "
1812 "%llu %llu %llu "
1813 "%llu %llu %llu\n",
1814 Ir_total.a, Ir_total.m1, Ir_total.m2,
1815 Dr_total.a, Dr_total.m1, Dr_total.m2,
1816 Dw_total.a, Dw_total.m1, Dw_total.m2);
1817 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1818 VG_(close)(fd);
1819}
1820
njn607adfc2003-09-30 14:15:44 +00001821static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001822{
njn607adfc2003-09-30 14:15:44 +00001823 UInt w = 0;
1824 while (n > 0) {
1825 n = n / 10;
1826 w++;
njn4f9c9342002-04-29 16:03:24 +00001827 }
njn607adfc2003-09-30 14:15:44 +00001828 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001829}
1830
sewardj4f29ddf2002-05-03 22:29:04 +00001831static
daywalker8ad1a402003-09-18 01:15:32 +00001832void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001833{
1834 int i, len, space;
1835
daywalker8ad1a402003-09-18 01:15:32 +00001836 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001837 len = VG_(strlen)(buf);
1838 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001839 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001840 i = len;
1841
1842 /* Right justify in field */
1843 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1844 for (i = 0; i < space; i++) buf[i] = ' ';
1845}
1846
njn7d9f94d2003-04-22 21:41:40 +00001847void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001848{
njn607adfc2003-09-30 14:15:44 +00001849 static char buf1[RESULTS_BUF_LEN],
1850 buf2[RESULTS_BUF_LEN],
1851 buf3[RESULTS_BUF_LEN],
1852 fmt [RESULTS_BUF_LEN];
1853
njn4f9c9342002-04-29 16:03:24 +00001854 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001855 ULong L2_total_m, L2_total_mr, L2_total_mw,
1856 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001857 Int l1, l2, l3;
1858 Int p;
1859
njn25e49d8e72002-09-23 09:36:25 +00001860 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001861
njn7cf0bd32002-06-08 13:36:03 +00001862 if (VG_(clo_verbosity) == 0)
1863 return;
1864
njn4f9c9342002-04-29 16:03:24 +00001865 /* I cache results. Use the I_refs value to determine the first column
1866 * width. */
njn607adfc2003-09-30 14:15:44 +00001867 l1 = ULong_width(Ir_total.a);
1868 l2 = ULong_width(Dr_total.a);
1869 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001870
njn607adfc2003-09-30 14:15:44 +00001871 /* Make format string, getting width right for numbers */
1872 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1873
1874 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1875 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1876 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001877
1878 p = 100;
1879
njn25e49d8e72002-09-23 09:36:25 +00001880 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001881 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1882 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1883
1884 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1885 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1886 VG_(message)(Vg_UserMsg, "");
1887
1888 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1889 * width of columns 2 & 3. */
1890 D_total.a = Dr_total.a + Dw_total.a;
1891 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1892 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1893
njn607adfc2003-09-30 14:15:44 +00001894 /* Make format string, getting width right for numbers */
1895 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001896
njn607adfc2003-09-30 14:15:44 +00001897 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1898 D_total.a, Dr_total.a, Dw_total.a);
1899 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1900 D_total.m1, Dr_total.m1, Dw_total.m1);
1901 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1902 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001903
1904 p = 10;
1905
njn25e49d8e72002-09-23 09:36:25 +00001906 if (0 == D_total.a) D_total.a = 1;
1907 if (0 == Dr_total.a) Dr_total.a = 1;
1908 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001909 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1910 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1911 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1912 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1913
1914 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1915 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1916 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1917 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1918 VG_(message)(Vg_UserMsg, "");
1919
1920 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001921
1922 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1923 L2_total_r = Dr_total.m1 + Ir_total.m1;
1924 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001925 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1926 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001927
njn4f9c9342002-04-29 16:03:24 +00001928 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1929 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1930 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001931 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1932 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001933
1934 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1935 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1936 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1937 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1938
1939
1940 /* Hash table stats */
1941 if (VG_(clo_verbosity) > 1) {
1942 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1943 file_line_debug_BBs + no_debug_BBs;
1944
1945 VG_(message)(Vg_DebugMsg, "");
1946 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1947 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1948 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1949 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1950 full_debug_BBs * 100 / BB_lookups,
1951 full_debug_BBs);
1952 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1953 file_line_debug_BBs * 100 / BB_lookups,
1954 file_line_debug_BBs);
1955 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1956 fn_name_debug_BBs * 100 / BB_lookups,
1957 fn_name_debug_BBs);
1958 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1959 no_debug_BBs * 100 / BB_lookups,
1960 no_debug_BBs);
1961 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1962 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1963 }
njn25e49d8e72002-09-23 09:36:25 +00001964 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001965}
1966
sewardj18d75132002-05-16 11:06:21 +00001967
njn4294fd42002-06-05 14:41:10 +00001968/* Called when a translation is invalidated due to self-modifying code or
1969 * unloaded of a shared object.
1970 *
1971 * Finds the BBCC in the table, removes it, adds the counts to the discard
1972 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001973void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001974{
njn4294fd42002-06-05 14:41:10 +00001975 BBCC *BBCC_node;
1976 Addr BBCC_ptr0, BBCC_ptr;
1977 Bool BB_seen_before;
1978
sewardj83205b32002-06-14 11:08:07 +00001979 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001980 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001981
1982 /* 2nd arg won't be used since BB should have been seen before (assertions
1983 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001984 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001985 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1986
njne427a662002-10-02 11:08:25 +00001987 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001988
1989 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1990
1991 /* We pretend the CC is an iCC for getting the tag. This is ok
1992 * because both CC types have tag as their first byte. Once we know
1993 * the type, we can cast and act appropriately. */
1994
1995 switch ( ((iCC*)BBCC_ptr)->tag ) {
1996
njn25e49d8e72002-09-23 09:36:25 +00001997 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001998 ADD_CC_TO(iCC, I, Ir_discards);
1999 BBCC_ptr += sizeof(iCC);
2000 break;
2001
njn25e49d8e72002-09-23 09:36:25 +00002002 case ReadCC:
2003 case ModCC:
njn4294fd42002-06-05 14:41:10 +00002004 ADD_CC_TO(idCC, I, Ir_discards);
2005 ADD_CC_TO(idCC, D, Dr_discards);
2006 BBCC_ptr += sizeof(idCC);
2007 break;
2008
njn25e49d8e72002-09-23 09:36:25 +00002009 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00002010 ADD_CC_TO(idCC, I, Ir_discards);
2011 ADD_CC_TO(idCC, D, Dw_discards);
2012 BBCC_ptr += sizeof(idCC);
2013 break;
2014
njn25e49d8e72002-09-23 09:36:25 +00002015 case ReadWriteCC:
2016 ADD_CC_TO(iddCC, I, Ir_discards);
2017 ADD_CC_TO(iddCC, Da, Dr_discards);
2018 ADD_CC_TO(iddCC, Db, Dw_discards);
2019 BBCC_ptr += sizeof(iddCC);
2020 break;
2021
njn4294fd42002-06-05 14:41:10 +00002022 default:
njne427a662002-10-02 11:08:25 +00002023 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00002024 break;
2025 }
2026 }
njn25e49d8e72002-09-23 09:36:25 +00002027 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00002028}
2029
2030/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00002031/*--- Command line processing ---*/
2032/*--------------------------------------------------------------------*/
2033
2034static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
2035{
2036 int i1, i2, i3;
2037 int i;
2038 char *opt = VG_(strdup)(orig_opt);
2039
2040 i = i1 = opt_len;
2041
2042 /* Option looks like "--I1=65536,2,64".
2043 * Find commas, replace with NULs to make three independent
2044 * strings, then extract numbers. Yuck. */
2045 while (VG_(isdigit)(opt[i])) i++;
2046 if (',' == opt[i]) {
2047 opt[i++] = '\0';
2048 i2 = i;
2049 } else goto bad;
2050 while (VG_(isdigit)(opt[i])) i++;
2051 if (',' == opt[i]) {
2052 opt[i++] = '\0';
2053 i3 = i;
2054 } else goto bad;
2055 while (VG_(isdigit)(opt[i])) i++;
2056 if ('\0' != opt[i]) goto bad;
2057
2058 cache->size = (Int)VG_(atoll)(opt + i1);
2059 cache->assoc = (Int)VG_(atoll)(opt + i2);
2060 cache->line_size = (Int)VG_(atoll)(opt + i3);
2061
2062 VG_(free)(opt);
2063
2064 return;
2065
2066 bad:
2067 VG_(bad_option)(orig_opt);
2068}
2069
2070Bool SK_(process_cmd_line_option)(Char* arg)
2071{
2072 /* 5 is length of "--I1=" */
njn39c86652003-05-21 10:13:39 +00002073 if (VG_CLO_STREQN(5, arg, "--I1="))
njn25e49d8e72002-09-23 09:36:25 +00002074 parse_cache_opt(&clo_I1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00002075 else if (VG_CLO_STREQN(5, arg, "--D1="))
njn25e49d8e72002-09-23 09:36:25 +00002076 parse_cache_opt(&clo_D1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00002077 else if (VG_CLO_STREQN(5, arg, "--L2="))
njn25e49d8e72002-09-23 09:36:25 +00002078 parse_cache_opt(&clo_L2_cache, arg, 5);
2079 else
2080 return False;
2081
2082 return True;
2083}
2084
njn3e884182003-04-15 13:03:23 +00002085void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00002086{
njn3e884182003-04-15 13:03:23 +00002087 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002088" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
2089" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00002090" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
2091 );
2092}
2093
2094void SK_(print_debug_usage)(void)
2095{
2096 VG_(printf)(
2097" (none)\n"
2098 );
njn25e49d8e72002-09-23 09:36:25 +00002099}
2100
2101/*--------------------------------------------------------------------*/
2102/*--- Setup ---*/
2103/*--------------------------------------------------------------------*/
2104
njn810086f2002-11-14 12:42:47 +00002105void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00002106{
njn13f02932003-04-30 20:23:58 +00002107 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00002108
njn810086f2002-11-14 12:42:47 +00002109 VG_(details_name) ("Cachegrind");
2110 VG_(details_version) (NULL);
2111 VG_(details_description) ("an I1/D1/L2 cache profiler");
2112 VG_(details_copyright_author)(
nethercotebb1c9912004-01-04 16:43:23 +00002113 "Copyright (C) 2002-2004, and GNU GPL'd, by Nicholas Nethercote.");
nethercote421281e2003-11-20 16:20:55 +00002114 VG_(details_bug_reports_to) (VG_BUGS_TO);
sewardj78210aa2002-12-01 02:55:46 +00002115 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00002116
njn810086f2002-11-14 12:42:47 +00002117 VG_(needs_basic_block_discards)();
2118 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00002119
2120 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
2121 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
2122 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
2123 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
2124 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
2125 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00002126
njn99ccf082003-09-30 13:51:23 +00002127 /* Get working directory */
2128 sk_assert( VG_(getcwd_alloc)(&base_dir) );
2129
njn13f02932003-04-30 20:23:58 +00002130 /* Block is big enough for dir name + cachegrind.out.<pid> */
2131 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
2132 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
2133 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00002134 VG_(free)(base_dir);
njn25e49d8e72002-09-23 09:36:25 +00002135}
2136
2137void SK_(post_clo_init)(void)
2138{
2139 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00002140
2141 initCC(&Ir_total);
2142 initCC(&Dr_total);
2143 initCC(&Dw_total);
2144
2145 initCC(&Ir_discards);
2146 initCC(&Dr_discards);
2147 initCC(&Dw_discards);
2148
2149 get_caches(&I1c, &D1c, &L2c);
2150
2151 cachesim_I1_initcache(I1c);
2152 cachesim_D1_initcache(D1c);
2153 cachesim_L2_initcache(L2c);
2154
2155 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
2156 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
2157 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
2158
2159 init_BBCC_table();
2160}
2161
fitzhardinge98abfc72003-12-16 02:05:15 +00002162VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
2163
njn25e49d8e72002-09-23 09:36:25 +00002164#if 0
2165Bool SK_(cheap_sanity_check)(void) { return True; }
2166
2167extern TTEntry* vg_tt;
2168
2169Bool SK_(expensive_sanity_check)(void)
2170{
2171 Int i;
2172 Bool dummy;
2173 for (i = 0; i < 200191; i++) {
2174 if (vg_tt[i].orig_addr != (Addr)1 &&
2175 vg_tt[i].orig_addr != (Addr)3) {
2176 VG_(printf)(".");
2177 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
2178 }
2179 }
2180 return True;
2181}
2182#endif
2183
2184/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002185/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002186/*--------------------------------------------------------------------*/