blob: 4f1bf10716b96a50ff2b49ed1a9d351c87f15b94 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
3/*--- The cache simulation framework: instrumentation, recording ---*/
4/*--- and results printing. ---*/
5/*--- vg_cachesim.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of Valgrind, an x86 protected-mode emulator
10 designed for debugging and profiling binaries on x86-Unixes.
11
sewardj3c23d432002-06-01 23:43:49 +000012 Copyright (C) 2002 Nicholas Nethercote
13 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file LICENSE.
31*/
32
njn4f9c9342002-04-29 16:03:24 +000033#include "vg_include.h"
34
35#include "vg_cachesim_L2.c"
36#include "vg_cachesim_I1.c"
37#include "vg_cachesim_D1.c"
38
39
40/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000041#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000042
43/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000044#define FILENAME_LEN 256
45#define FN_NAME_LEN 256
46#define BUF_LEN 512
47#define COMMIFY_BUF_LEN 128
48#define RESULTS_BUF_LEN 128
49#define LINE_BUF_LEN 64
50
sewardj07133bf2002-06-13 10:25:56 +000051
njn7cf0bd32002-06-08 13:36:03 +000052/*------------------------------------------------------------*/
53/*--- Generic utility stuff ---*/
54/*------------------------------------------------------------*/
55
sewardj07133bf2002-06-13 10:25:56 +000056Int VG_(log2) ( Int x )
njn7cf0bd32002-06-08 13:36:03 +000057{
sewardj07133bf2002-06-13 10:25:56 +000058 Int i;
njn7cf0bd32002-06-08 13:36:03 +000059 /* Any more than 32 and we overflow anyway... */
60 for (i = 0; i < 32; i++) {
61 if (1 << i == x) return i;
62 }
63 return -1;
64}
njn4f9c9342002-04-29 16:03:24 +000065
sewardj07133bf2002-06-13 10:25:56 +000066
njn4f9c9342002-04-29 16:03:24 +000067/*------------------------------------------------------------*/
68/*--- Output file related stuff ---*/
69/*------------------------------------------------------------*/
70
71#define OUT_FILE "cachegrind.out"
72
73static void file_err()
74{
75 VG_(message)(Vg_UserMsg,
njn7cf0bd32002-06-08 13:36:03 +000076 "error: can't open cache simulation output file `%s'",
njn4f9c9342002-04-29 16:03:24 +000077 OUT_FILE );
78 VG_(exit)(1);
79}
80
81/*------------------------------------------------------------*/
82/*--- Cost center types, operations ---*/
83/*------------------------------------------------------------*/
84
85typedef struct _CC CC;
86struct _CC {
87 ULong a;
88 ULong m1;
89 ULong m2;
90};
91
92static __inline__ void initCC(CC* cc) {
93 cc->a = 0;
94 cc->m1 = 0;
95 cc->m2 = 0;
96}
97
njn4f9c9342002-04-29 16:03:24 +000098typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
99
100/* Instruction-level cost-centres. The typedefs for these structs are in
101 * vg_include.c
102 *
103 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000104 *
njne0ee0712002-05-03 16:41:05 +0000105 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000106 */
107struct _iCC {
108 /* word 1 */
109 UChar tag;
110 UChar instr_size;
njne0ee0712002-05-03 16:41:05 +0000111 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000112
113 /* words 2+ */
114 Addr instr_addr;
115 CC I;
116};
117
118struct _idCC {
119 /* word 1 */
120 UChar tag;
121 UChar instr_size;
122 UChar data_size;
njne0ee0712002-05-03 16:41:05 +0000123 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000124
125 /* words 2+ */
126 Addr instr_addr;
127 CC I;
128 CC D;
129};
130
131static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
132{
133 cc->tag = INSTR_CC;
134 cc->instr_size = instr_size;
135 cc->instr_addr = instr_addr;
136 initCC(&cc->I);
137}
138
139static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
140 UInt instr_size, UInt data_size)
141{
142 cc->tag = X_CC;
143 cc->instr_size = instr_size;
144 cc->data_size = data_size;
145 cc->instr_addr = instr_addr;
146 initCC(&cc->I);
147 initCC(&cc->D);
148}
149
njn4294fd42002-06-05 14:41:10 +0000150#define ADD_CC_TO(CC_type, cc, total) \
151 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
152 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
153 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
154
njn95114da2002-06-05 09:39:31 +0000155/* If 1, address of each instruction is printed as a comment after its counts
156 * in cachegrind.out */
157#define PRINT_INSTR_ADDRS 0
158
njne0ee0712002-05-03 16:41:05 +0000159static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000160{
njn95114da2002-06-05 09:39:31 +0000161#if PRINT_INSTR_ADDRS
162 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
163 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
164#else
njne0ee0712002-05-03 16:41:05 +0000165 VG_(sprintf)(buf, "%llu %llu %llu\n",
166 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000167#endif
njn4f9c9342002-04-29 16:03:24 +0000168}
169
njne0ee0712002-05-03 16:41:05 +0000170static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000171{
njn95114da2002-06-05 09:39:31 +0000172#if PRINT_INSTR_ADDRS
173 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
174 cc->I.a, cc->I.m1, cc->I.m2,
175 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
176#else
njne0ee0712002-05-03 16:41:05 +0000177 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
178 cc->I.a, cc->I.m1, cc->I.m2,
179 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000180#endif
njn4f9c9342002-04-29 16:03:24 +0000181}
182
njne0ee0712002-05-03 16:41:05 +0000183static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000184{
njn95114da2002-06-05 09:39:31 +0000185#if PRINT_INSTR_ADDRS
186 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
187 cc->I.a, cc->I.m1, cc->I.m2,
188 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
189#else
njne0ee0712002-05-03 16:41:05 +0000190 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
191 cc->I.a, cc->I.m1, cc->I.m2,
192 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000193#endif
njn4f9c9342002-04-29 16:03:24 +0000194}
195
196/*------------------------------------------------------------*/
197/*--- BBCC hash table stuff ---*/
198/*------------------------------------------------------------*/
199
200/* The table of BBCCs is of the form hash(filename, hash(fn_name,
201 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
202 * fairly well for Konqueror. */
203
204#define N_FILE_ENTRIES 251
205#define N_FN_ENTRIES 53
206#define N_BBCC_ENTRIES 37
207
208/* The cost centres for a basic block are stored in a contiguous array.
209 * They are distinguishable by their tag field. */
210typedef struct _BBCC BBCC;
211struct _BBCC {
212 Addr orig_addr;
213 UInt array_size; /* byte-size of variable length array */
214 BBCC* next;
215 Addr array[0]; /* variable length array */
216};
217
218typedef struct _fn_node fn_node;
219struct _fn_node {
220 Char* fn_name;
221 BBCC* BBCCs[N_BBCC_ENTRIES];
222 fn_node* next;
223};
224
225typedef struct _file_node file_node;
226struct _file_node {
227 Char* filename;
228 fn_node* fns[N_FN_ENTRIES];
229 file_node* next;
230};
231
232/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000233static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000234
sewardj4f29ddf2002-05-03 22:29:04 +0000235static Int distinct_files = 0;
236static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000237
sewardj4f29ddf2002-05-03 22:29:04 +0000238static Int distinct_instrs = 0;
239static Int full_debug_BBs = 0;
240static Int file_line_debug_BBs = 0;
241static Int fn_name_debug_BBs = 0;
242static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000243
sewardj4f29ddf2002-05-03 22:29:04 +0000244static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000245
njn4294fd42002-06-05 14:41:10 +0000246static CC Ir_discards;
247static CC Dr_discards;
248static CC Dw_discards;
249
njn4f9c9342002-04-29 16:03:24 +0000250static void init_BBCC_table()
251{
252 Int i;
253 for (i = 0; i < N_FILE_ENTRIES; i++)
254 BBCC_table[i] = NULL;
255}
256
njne0ee0712002-05-03 16:41:05 +0000257static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
258 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000259{
njn4f9c9342002-04-29 16:03:24 +0000260 Bool found1, found2, no_demangle = False;
261
262 found1 = VG_(what_line_is_this)(instr_addr, filename,
njne0ee0712002-05-03 16:41:05 +0000263 FILENAME_LEN, line_num);
njn4f9c9342002-04-29 16:03:24 +0000264 found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
265
266 if (!found1 && !found2) {
267 no_debug_BBs++;
268 VG_(strcpy)(filename, "???");
269 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000270 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000271
272 } else if ( found1 && found2) {
273 full_debug_BBs++;
274
275 } else if ( found1 && !found2) {
276 file_line_debug_BBs++;
277 VG_(strcpy)(fn_name, "???");
278
279 } else /*(!found1 && found2)*/ {
280 fn_name_debug_BBs++;
281 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000282 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000283 }
284}
285
286/* Forward declaration. */
287static Int compute_BBCC_array_size(UCodeBlock* cb);
288
289static __inline__
290file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
291{
292 Int i;
293 file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
294 new->filename = VG_(strdup)(VG_AR_PRIVATE, filename);
295 for (i = 0; i < N_FN_ENTRIES; i++) {
296 new->fns[i] = NULL;
297 }
298 new->next = next;
299 return new;
300}
301
302static __inline__
303fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
304{
305 Int i;
306 fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
307 new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
308 for (i = 0; i < N_BBCC_ENTRIES; i++) {
309 new->BBCCs[i] = NULL;
310 }
311 new->next = next;
312 return new;
313}
314
315static __inline__
316BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
317{
318 Int BBCC_array_size = compute_BBCC_array_size(cb);
319 BBCC* new;
320
321 new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
322 new->orig_addr = bb_orig_addr;
323 new->array_size = BBCC_array_size;
324 new->next = next;
325
326 return new;
327}
328
329#define HASH_CONSTANT 256
330
331static UInt hash(Char *s, UInt table_size)
332{
333 int hash_value = 0;
334 for ( ; *s; s++)
335 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
336 return hash_value;
337}
338
339/* Do a three step traversal: by filename, then fn_name, then instr_addr.
340 * In all cases prepends new nodes to their chain. Returns a pointer to the
341 * cost centre. Also sets BB_seen_before by reference.
342 */
343static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000344 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000345{
346 file_node *curr_file_node;
347 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000348 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000349 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
350 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000351 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000352
njne0ee0712002-05-03 16:41:05 +0000353 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000354
355 VGP_PUSHCC(VgpCacheGetBBCC);
356 filename_hash = hash(filename, N_FILE_ENTRIES);
357 curr_file_node = BBCC_table[filename_hash];
358 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000359 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000360 curr_file_node = curr_file_node->next;
361 }
362 if (NULL == curr_file_node) {
363 BBCC_table[filename_hash] = curr_file_node =
364 new_file_node(filename, BBCC_table[filename_hash]);
365 distinct_files++;
366 }
367
368 fnname_hash = hash(fn_name, N_FN_ENTRIES);
369 curr_fn_node = curr_file_node->fns[fnname_hash];
370 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000371 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000372 curr_fn_node = curr_fn_node->next;
373 }
374 if (NULL == curr_fn_node) {
375 curr_file_node->fns[fnname_hash] = curr_fn_node =
376 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
377 distinct_fns++;
378 }
379
380 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000381 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000382 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
383 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000384 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000385 curr_BBCC = curr_BBCC->next;
386 }
387 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000388
389 vg_assert(False == remove);
390
njn4f9c9342002-04-29 16:03:24 +0000391 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
392 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
393 *BB_seen_before = False;
394
395 } else {
396 vg_assert(bb_orig_addr == curr_BBCC->orig_addr);
397 vg_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000398 if (VG_(clo_verbosity) > 2) {
399 VG_(message)(Vg_DebugMsg,
400 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000401 }
402 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000403
404 if (True == remove) {
405 // Remove curr_BBCC from chain; it will be used and free'd by the
406 // caller.
407 *prev_BBCC_next_ptr = curr_BBCC->next;
408
409 } else {
410 BB_retranslations++;
411 }
njn4f9c9342002-04-29 16:03:24 +0000412 }
413 VGP_POPCC;
414 return curr_BBCC;
415}
416
417/*------------------------------------------------------------*/
418/*--- Cache simulation instrumentation phase ---*/
419/*------------------------------------------------------------*/
420
421#define uInstr1 VG_(newUInstr1)
422#define uInstr2 VG_(newUInstr2)
423#define uInstr3 VG_(newUInstr3)
424#define dis VG_(disassemble)
425#define uLiteral VG_(setLiteralField)
426#define newTemp VG_(getNewTemp)
427
428static Int compute_BBCC_array_size(UCodeBlock* cb)
429{
430 UInstr* u_in;
431 Int i, CC_size, BBCC_size = 0;
432 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
433
434 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
435
436 for (i = 0; i < cb->used; i++) {
sewardjfc3e5d32002-04-30 10:18:48 +0000437 /* VG_(ppUInstr)(0, &cb->instrs[i]); */
njn4f9c9342002-04-29 16:03:24 +0000438
439 u_in = &cb->instrs[i];
440 switch(u_in->opcode) {
441
442 case INCEIP:
443 goto case_for_end_of_instr;
444
445 case JMP:
446 if (u_in->cond != CondAlways) break;
447
448 goto case_for_end_of_instr;
449
450 case_for_end_of_instr:
451
452 CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W
453 ? sizeof(idCC) : sizeof(iCC));
454
455 BBCC_size += CC_size;
456 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
457 break;
458
459 case LOAD:
460 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000461 /* Also, a STORE can come after a LOAD for bts/btr/btc */
sewardjfc3e5d32002-04-30 10:18:48 +0000462 vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */
463 !is_FPU_R && !is_FPU_W);
njn4f9c9342002-04-29 16:03:24 +0000464 is_LOAD = True;
465 break;
466
467 case STORE:
468 /* Multiple STOREs are possible for 'pushal' */
469 vg_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
470 is_STORE = True;
471 break;
472
473 case FPU_R:
474 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
475 is_FPU_R = True;
476 break;
477
478 case FPU_W:
479 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
480 is_FPU_W = True;
481 break;
482
483 default:
484 break;
485 }
486 }
487
488 return BBCC_size;
489}
490
491/* Use this rather than eg. -1 because it's stored as a UInt. */
492#define INVALID_DATA_SIZE 999999
493
494UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
495{
496 UCodeBlock* cb;
497 Int i;
498 UInstr* u_in;
499 BBCC* BBCC_node;
500 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
501 Int CC_size = -1; /* Shut gcc warnings up */
502 Addr instr_addr = orig_addr;
503 UInt instr_size, data_size = INVALID_DATA_SIZE;
504 Int helper = -1; /* Shut gcc warnings up */
505 UInt stack_used;
506 Bool BB_seen_before = False;
507 Bool prev_instr_was_Jcond = False;
508 Addr BBCC_ptr0, BBCC_ptr;
509
510 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
511 * if it's the first time it's been seen), and point to start of the
512 * BBCC array. */
njn4294fd42002-06-05 14:41:10 +0000513 BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000514 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
515
516 cb = VG_(allocCodeBlock)();
517 cb->nextTemp = cb_in->nextTemp;
518
519 t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
520
521 for (i = 0; i < cb_in->used; i++) {
522 u_in = &cb_in->instrs[i];
523
524 //VG_(ppUInstr)(0, u_in);
525
526 /* What this is all about: we want to instrument each x86 instruction
527 * translation. The end of these are marked in three ways. The three
528 * ways, and the way we instrument them, are as follows:
529 *
530 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
531 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
532 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
533 *
534 * We must put the instrumentation before the jumps so that it is always
535 * executed. We don't have to put the instrumentation before the INCEIP
536 * (it could go after) but we do so for consistency.
537 *
538 * Junconds are always the last instruction in a basic block. Jconds are
539 * always the 2nd last, and must be followed by a Jcond. We check this
540 * with various assertions.
541 *
542 * Note that in VG_(disBB) we patched the `extra4b' field of the first
543 * occurring JMP in a block with the size of its x86 instruction. This
544 * is used now.
545 *
546 * Note that we don't have to treat JIFZ specially; unlike JMPs, JIFZ
547 * occurs in the middle of a BB and gets an INCEIP after it.
548 *
549 * The instrumentation is just a call to the appropriate helper function,
550 * passing it the address of the instruction's CC.
551 */
552 if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
553
554 switch (u_in->opcode) {
555
556 case INCEIP:
557 instr_size = u_in->val1;
558 goto case_for_end_of_x86_instr;
559
560 case JMP:
561 if (u_in->cond == CondAlways) {
562 vg_assert(i+1 == cb_in->used);
563
564 /* Don't instrument if previous instr was a Jcond. */
565 if (prev_instr_was_Jcond) {
566 vg_assert(0 == u_in->extra4b);
567 VG_(copyUInstr)(cb, u_in);
568 break;
569 }
570 prev_instr_was_Jcond = False;
571
572 } else {
573 vg_assert(i+2 == cb_in->used); /* 2nd last instr in block */
574 prev_instr_was_Jcond = True;
575 }
576
577 /* Ah, the first JMP... instrument, please. */
578 instr_size = u_in->extra4b;
579 goto case_for_end_of_x86_instr;
580
581 /* Shared code that is executed at the end of an x86 translation
582 * block, marked by either an INCEIP or an unconditional JMP. */
583 case_for_end_of_x86_instr:
584
585#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
586
587 /* Initialise the CC in the BBCC array appropriately if it hasn't
588 * been initialised before.
589 * Then call appropriate sim function, passing it the CC address.
590 * Note that CALLM_S/CALL_E aren't required here; by this point,
591 * the checking related to them has already happened. */
592 stack_used = 0;
593
594 vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
595 vg_assert(0 != instr_addr);
596
597 /* Save the caller-save registers before we push our args */
598 uInstr1(cb, PUSH, 4, RealReg, R_EAX);
599 uInstr1(cb, PUSH, 4, RealReg, R_ECX);
600 uInstr1(cb, PUSH, 4, RealReg, R_EDX);
601
602 if (!IS_(read) && !IS_(write)) {
603 iCC* CC_ptr = (iCC*)(BBCC_ptr);
604 vg_assert(INVALID_DATA_SIZE == data_size);
605 vg_assert(INVALID_TEMPREG == t_read_addr &&
606 INVALID_TEMPREG == t_write_addr);
607 CC_size = sizeof(iCC);
608 if (!BB_seen_before)
609 init_iCC(CC_ptr, instr_addr, instr_size);
610
611 helper = VGOFF_(cachesim_log_non_mem_instr);
612
613 } else {
614 CC_type X_CC;
615 idCC* CC_ptr = (idCC*)(BBCC_ptr);
616
617 vg_assert(4 == data_size || 2 == data_size || 1 == data_size ||
618 8 == data_size || 10 == data_size);
619
620 CC_size = sizeof(idCC);
621 helper = VGOFF_(cachesim_log_mem_instr);
622
623 if (IS_(read) && !IS_(write)) {
624 X_CC = READ_CC;
625 vg_assert(INVALID_TEMPREG != t_read_addr &&
626 INVALID_TEMPREG == t_write_addr);
627 t_data_addr = t_read_addr;
628
629 } else if (!IS_(read) && IS_(write)) {
630 X_CC = WRITE_CC;
631 vg_assert(INVALID_TEMPREG == t_read_addr &&
632 INVALID_TEMPREG != t_write_addr);
633 t_data_addr = t_write_addr;
634
635 } else {
636 vg_assert(IS_(read) && IS_(write));
637 X_CC = MOD_CC;
638 vg_assert(INVALID_TEMPREG != t_read_addr &&
639 INVALID_TEMPREG != t_write_addr);
640 t_data_addr = t_read_addr;
641 }
642
643 if (!BB_seen_before)
644 init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
645
646 /* 2nd arg: data addr */
647 uInstr1(cb, PUSH, 4, TempReg, t_data_addr);
648 stack_used += 4;
649 }
650#undef IS_
651
652 /* 1st arg: CC addr */
653 t_CC_addr = newTemp(cb);
654 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
655 uLiteral(cb, BBCC_ptr);
656 uInstr1(cb, PUSH, 4, TempReg, t_CC_addr);
657 stack_used += 4;
658
659 /* Call function and return. */
660 uInstr1(cb, CALLM, 0, Lit16, helper);
661 uInstr1(cb, CLEAR, 0, Lit16, stack_used);
662
663 /* Restore the caller-save registers now the call is done */
664 uInstr1(cb, POP, 4, RealReg, R_EDX);
665 uInstr1(cb, POP, 4, RealReg, R_ECX);
666 uInstr1(cb, POP, 4, RealReg, R_EAX);
667
668 VG_(copyUInstr)(cb, u_in);
669
670 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
671 BBCC_ptr += CC_size;
672 instr_addr += instr_size;
673 t_CC_addr = t_read_addr = t_write_addr =
674 t_data_addr = INVALID_TEMPREG;
675 data_size = INVALID_DATA_SIZE;
676 break;
677
678
679 /* For memory-ref instrs, copy the data_addr into a temporary to be
680 * passed to the cachesim_log_function at the end of the instruction.
681 */
682 case LOAD:
683 t_read_addr = newTemp(cb);
684 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
685 data_size = u_in->size;
686 VG_(copyUInstr)(cb, u_in);
687 break;
688
689 case FPU_R:
690 t_read_addr = newTemp(cb);
691 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
692 data_size = u_in->size;
693 VG_(copyUInstr)(cb, u_in);
694 break;
695
696 /* Note that we must set t_write_addr even for mod instructions;
697 * that's how the code above determines whether it does a write;
698 * without it, it would think a mod instruction is a read.
699 * As for the MOV, if it's a mod instruction it's redundant, but it's
700 * not expensive and mod instructions are rare anyway. */
701 case STORE:
702 case FPU_W:
703 t_write_addr = newTemp(cb);
704 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
705 data_size = u_in->size;
706 VG_(copyUInstr)(cb, u_in);
707 break;
708
709 case NOP: case CALLM_E: case CALLM_S:
710 break;
711
712 default:
713 VG_(copyUInstr)(cb, u_in);
714 break;
715 }
716 }
717
718 /* Just check everything looks ok */
719 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
720
721 VG_(freeCodeBlock)(cb_in);
722 return cb;
723}
724
725/*------------------------------------------------------------*/
726/*--- Cache simulation stuff ---*/
727/*------------------------------------------------------------*/
728
njn7cf0bd32002-06-08 13:36:03 +0000729#define MIN_LINE_SIZE 16
730
njn4f9c9342002-04-29 16:03:24 +0000731/* Total reads/writes/misses. Calculated during CC traversal at the end. */
732static CC Ir_total;
733static CC Dr_total;
734static CC Dw_total;
735
njn7cf0bd32002-06-08 13:36:03 +0000736/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
737/* Probably only works for Intel and AMD chips, and probably only for some of
738 * them.
739 */
740
sewardj07133bf2002-06-13 10:25:56 +0000741static __inline__ void cpuid(Int n, Int *a, Int *b, Int *c, Int *d)
njn7cf0bd32002-06-08 13:36:03 +0000742{
743 __asm__ __volatile__ (
744 "cpuid"
745 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
746 : "0" (n) /* input */
747 );
748}
749
sewardj07133bf2002-06-13 10:25:56 +0000750static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +0000751{
752 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000753 "warning: Pentium with %d K micro_op instruction trace cache",
754 actual_size);
njn7cf0bd32002-06-08 13:36:03 +0000755 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000756 " Simulating a %d KB cache with %d B lines",
757 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +0000758}
759
760/* Intel method is truly wretched. We have to do an insane indexing into an
761 * array of pre-defined configurations for various parts of the memory
762 * hierarchy.
763 */
764static
sewardj07133bf2002-06-13 10:25:56 +0000765Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000766{
sewardj07133bf2002-06-13 10:25:56 +0000767 UChar info[16];
768 Int i, trials;
njn7cf0bd32002-06-08 13:36:03 +0000769
770 if (level < 2) {
771 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000772 "warning: CPUID level < 2 for Intel processor (%d)",
773 level);
njn7cf0bd32002-06-08 13:36:03 +0000774 return -1;
775 }
776
sewardj07133bf2002-06-13 10:25:56 +0000777 cpuid(2, (Int*)&info[0], (Int*)&info[4],
778 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +0000779 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
780 info[0] = 0x0; /* reset AL */
781
782 if (0 != trials) {
783 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000784 "warning: non-zero CPUID trials for Intel processor (%d)",
785 trials);
njn7cf0bd32002-06-08 13:36:03 +0000786 return -1;
787 }
788
789 for (i = 0; i < 16; i++) {
790
791 switch (info[i]) {
792
793 case 0x0: /* ignore zeros */
794 break;
795
796 case 0x01: case 0x02: case 0x03: case 0x04: /* TLB info, ignore */
797 case 0x90: case 0x96: case 0x9b:
798 break;
799
800 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
801 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
802
803 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
804 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
805
806 case 0x22: case 0x23: case 0x25: case 0x29:
807 case 0x88: case 0x89: case 0x8a:
sewardj07133bf2002-06-13 10:25:56 +0000808 VG_(message)(Vg_DebugMsg,
809 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +0000810 break;
811
812 case 0x40:
813 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000814 "warning: L2 cache not installed, ignore L2 results.");
njn7cf0bd32002-06-08 13:36:03 +0000815 break;
816
817 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; break;
818 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; break;
819 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; break;
820 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; break;
821 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; break;
822
823 /* These are sectored, whatever that means */
824 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
825 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
826 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
827
828 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
829 * conversion to byte size is a total guess; treat the 12K and 16K
830 * cases the same since the cache byte size must be a power of two for
831 * everything to work!. Also guessing 32 bytes for the line size...
832 */
833 case 0x70: /* 12K micro-ops, 8-way */
834 *I1c = (cache_t) { 16, 8, 32 };
835 micro_ops_warn(12, 16, 32);
836 break;
837 case 0x71: /* 16K micro-ops, 8-way */
838 *I1c = (cache_t) { 16, 8, 32 };
839 micro_ops_warn(16, 16, 32);
840 break;
841 case 0x72: /* 32K micro-ops, 8-way */
842 *I1c = (cache_t) { 32, 8, 32 };
843 micro_ops_warn(32, 32, 32);
844 break;
845
846 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; break; /* sectored */
847 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; break; /* sectored */
848 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; break; /* sectored */
849 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; break; /* sectored */
850
851 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; break;
852 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; break;
853 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; break;
854 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; break;
855 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; break;
856
857 default:
858 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000859 "warning: Unknown Intel cache config value "
860 "(0x%x), ignoring\n", info[i]);
njn7cf0bd32002-06-08 13:36:03 +0000861 break;
862 }
863 }
864 return 0;
865}
866
867/* AMD method is straightforward, just extract appropriate bits from the
868 * result registers.
869 *
870 * Bits, for D1 and I1:
871 * 31..24 data L1 cache size in KBs
872 * 23..16 data L1 cache associativity (FFh=full)
873 * 15.. 8 data L1 cache lines per tag
874 * 7.. 0 data L1 cache line size in bytes
875 *
876 * Bits, for L2:
877 * 31..16 unified L2 cache size in KBs
878 * 15..12 unified L2 cache associativity (0=off, FFh=full)
879 * 11.. 8 unified L2 cache lines per tag
880 * 7.. 0 unified L2 cache line size in bytes
881 *
882 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
883 * upon this information. (Whatever that means -- njn)
884 *
885 * Returns 0 on success, non-zero on failure.
886 */
sewardj07133bf2002-06-13 10:25:56 +0000887static
888Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000889{
sewardj07133bf2002-06-13 10:25:56 +0000890 Int dummy, ext_level;
891 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +0000892
893 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
894
895 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
896 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000897 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
898 ext_level);
njn7cf0bd32002-06-08 13:36:03 +0000899 return -1;
900 }
901
902 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
903 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
904
905 D1c->size = (D1i >> 24) & 0xff;
906 D1c->assoc = (D1i >> 16) & 0xff;
907 D1c->line_size = (D1i >> 0) & 0xff;
908
909 I1c->size = (I1i >> 24) & 0xff;
910 I1c->assoc = (I1i >> 16) & 0xff;
911 I1c->line_size = (I1i >> 0) & 0xff;
912
913 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
914 L2c->assoc = (L2i >> 12) & 0xf;
915 L2c->line_size = (L2i >> 0) & 0xff;
916
917 return 0;
918}
919
920static jmp_buf cpuid_jmpbuf;
921
922static
923void cpuid_SIGILL_handler(int signum)
924{
925 __builtin_longjmp(cpuid_jmpbuf, 1);
926}
927
928static
sewardj07133bf2002-06-13 10:25:56 +0000929Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000930{
sewardj07133bf2002-06-13 10:25:56 +0000931 Int level, res, ret;
932 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +0000933 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +0000934
935 /* Install own SIGILL handler */
936 sigill_new.ksa_handler = cpuid_SIGILL_handler;
937 sigill_new.ksa_flags = 0;
938 sigill_new.ksa_restorer = NULL;
939 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
940 vg_assert(res == 0);
941
942 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
943 vg_assert(res == 0);
944
945 /* Trap for illegal instruction, in case it's a really old processor that
946 * doesn't support CPUID. */
947 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
948 cpuid(0, &level, (int*)&vendor_id[0],
949 (int*)&vendor_id[8], (int*)&vendor_id[4]);
950 vendor_id[12] = '\0';
951
952 /* Restore old SIGILL handler */
953 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
954 vg_assert(res == 0);
955
956 } else {
957 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
958
959 /* Restore old SIGILL handler */
960 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
961 vg_assert(res == 0);
962 return -1;
963 }
964
965 if (0 == level) {
966 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
967 return -1;
968 }
969
970 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
971 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
972 ret = Intel_cache_info(level, I1c, D1c, L2c);
973
974 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
975 ret = AMD_cache_info(I1c, D1c, L2c);
976
977 } else {
978 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
979 vendor_id);
980 return -1;
981 }
982
983 /* Successful! Convert sizes from KB to bytes */
984 I1c->size *= 1024;
985 D1c->size *= 1024;
986 L2c->size *= 1024;
987
988 return ret;
989}
990
991/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +0000992static
993void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +0000994{
995 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +0000996 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +0000997 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000998 "warning: %s size of %dB not a power of two; "
999 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001000 cache->size = dflt->size;
1001 }
1002
sewardj07133bf2002-06-13 10:25:56 +00001003 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001004 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001005 "warning: %s associativity of %d not a power of two; "
1006 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001007 cache->assoc = dflt->assoc;
1008 }
1009
sewardj07133bf2002-06-13 10:25:56 +00001010 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001011 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001012 "warning: %s line size of %dB not a power of two; "
1013 "defaulting to %dB",
1014 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001015 cache->line_size = dflt->line_size;
1016 }
1017
1018 /* Then check line size >= 16 -- any smaller and a single instruction could
1019 * straddle three cache lines, which breaks a simulation assertion and is
1020 * stupid anyway. */
1021 if (cache->line_size < MIN_LINE_SIZE) {
1022 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001023 "warning: %s line size of %dB too small; "
1024 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001025 cache->line_size = MIN_LINE_SIZE;
1026 }
1027
1028 /* Then check cache size > line size (causes seg faults if not). */
1029 if (cache->size <= cache->line_size) {
1030 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001031 "warning: %s cache size of %dB <= line size of %dB; "
1032 "increasing to %dB", name, cache->size, cache->line_size,
1033 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001034 cache->size = cache->line_size * 2;
1035 }
1036
1037 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1038 if (cache->assoc > (cache->size / cache->line_size)) {
1039 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001040 "warning: %s associativity > (size / line size); "
1041 "increasing size to %dB",
1042 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001043 cache->size = cache->assoc * cache->line_size;
1044 }
1045}
1046
1047/* On entry, args are undefined. Fill them with any info from the
1048 * command-line, then fill in any remaining with CPUID instruction if possible,
1049 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001050static
1051void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001052{
1053 /* Defaults are for a model 3 or 4 Athlon */
1054 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1055 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1056 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1057
1058#define CMD_LINE_DEFINED(L) \
1059 (-1 != VG_(clo_##L##_cache).size || \
1060 -1 != VG_(clo_##L##_cache).assoc || \
1061 -1 != VG_(clo_##L##_cache).line_size)
1062
sewardjb1a77a42002-07-13 13:31:20 +00001063 *I1c = VG_(clo_I1_cache);
1064 *D1c = VG_(clo_D1_cache);
1065 *L2c = VG_(clo_L2_cache);
1066
njn7cf0bd32002-06-08 13:36:03 +00001067 /* If any undefined on command-line, try CPUID */
1068 if (! CMD_LINE_DEFINED(I1) ||
1069 ! CMD_LINE_DEFINED(D1) ||
1070 ! CMD_LINE_DEFINED(L2)) {
1071
1072 /* Overwrite CPUID result for any cache defined on command-line */
1073 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1074
1075 if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache);
1076 if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache);
1077 if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache);
1078
1079 /* CPUID failed, use defaults for each undefined by command-line */
1080 } else {
1081 VG_(message)(Vg_DebugMsg,
1082 "Couldn't detect cache configuration, using one "
1083 "or more defaults ");
1084
1085 *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt);
1086 *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt);
1087 *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt);
1088 }
1089 }
1090#undef CMD_LINE_DEFINED
1091
1092 check_cache(I1c, &I1_dflt, "I1");
1093 check_cache(D1c, &D1_dflt, "D1");
1094 check_cache(L2c, &L2_dflt, "L2");
1095
1096 if (VG_(clo_verbosity) > 1) {
1097 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1098 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1099 I1c->size, I1c->assoc, I1c->line_size);
1100 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1101 D1c->size, D1c->assoc, D1c->line_size);
1102 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1103 L2c->size, L2c->assoc, L2c->line_size);
1104 }
1105}
1106
njn4f9c9342002-04-29 16:03:24 +00001107void VG_(init_cachesim)(void)
1108{
njn7cf0bd32002-06-08 13:36:03 +00001109 cache_t I1c, D1c, L2c;
1110
njn4f9c9342002-04-29 16:03:24 +00001111 /* Make sure the output file can be written. */
1112 Int fd = VG_(open_write)(OUT_FILE);
1113 if (-1 == fd) {
1114 fd = VG_(create_and_write)(OUT_FILE);
1115 if (-1 == fd) {
1116 file_err();
1117 }
1118 }
1119 VG_(close)(fd);
njne0ee0712002-05-03 16:41:05 +00001120
njn4f9c9342002-04-29 16:03:24 +00001121 initCC(&Ir_total);
1122 initCC(&Dr_total);
1123 initCC(&Dw_total);
1124
njn4294fd42002-06-05 14:41:10 +00001125 initCC(&Ir_discards);
1126 initCC(&Dr_discards);
1127 initCC(&Dw_discards);
1128
njn7cf0bd32002-06-08 13:36:03 +00001129 get_caches(&I1c, &D1c, &L2c);
1130
1131 cachesim_I1_initcache(I1c);
1132 //cachesim_I1_initcache();
1133 cachesim_D1_initcache(D1c);
1134 //cachesim_D1_initcache();
1135 cachesim_L2_initcache(L2c);
1136 //cachesim_L2_initcache();
njn4f9c9342002-04-29 16:03:24 +00001137
1138 init_BBCC_table();
1139}
1140
1141void VG_(cachesim_log_non_mem_instr)(iCC* cc)
1142{
1143 //VG_(printf)("sim I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
1144 // cc, cc->instr_addr, cc->instr_size)
1145 VGP_PUSHCC(VgpCacheSimulate);
1146 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
1147 cc->I.a++;
1148 VGP_POPCC;
1149}
1150
1151void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
1152{
1153 //VG_(printf)("sim D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
1154 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
1155 VGP_PUSHCC(VgpCacheSimulate);
1156 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
1157 cc->I.a++;
1158
1159 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
1160 cc->D.a++;
1161 VGP_POPCC;
1162}
1163
1164/*------------------------------------------------------------*/
1165/*--- Printing of output file and summary stats ---*/
1166/*------------------------------------------------------------*/
1167
njn4f9c9342002-04-29 16:03:24 +00001168static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1169 Char *first_instr_fn)
1170{
1171 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001172 Char buf[BUF_LEN], curr_file[BUF_LEN],
1173 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001174 UInt line_num;
1175
1176 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1177
njne0ee0712002-05-03 16:41:05 +00001178 /* Mark start of basic block in output, just to ease debugging */
1179 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001180
1181 VG_(strcpy)(curr_file, first_instr_fl);
1182
1183 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1184
1185 /* We pretend the CC is an iCC for getting the tag. This is ok
1186 * because both CC types have tag as their first byte. Once we know
1187 * the type, we can cast and act appropriately. */
1188
1189 Char fl_buf[FILENAME_LEN];
1190 Char fn_buf[FN_NAME_LEN];
1191
njne0ee0712002-05-03 16:41:05 +00001192 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001193 switch ( ((iCC*)BBCC_ptr)->tag ) {
1194
njn4f9c9342002-04-29 16:03:24 +00001195 case INSTR_CC:
njne0ee0712002-05-03 16:41:05 +00001196 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1197 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001198 ADD_CC_TO(iCC, I, Ir_total);
1199 BBCC_ptr += sizeof(iCC);
1200 break;
1201
1202 case READ_CC:
1203 case MOD_CC:
njne0ee0712002-05-03 16:41:05 +00001204 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1205 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001206 ADD_CC_TO(idCC, I, Ir_total);
1207 ADD_CC_TO(idCC, D, Dr_total);
1208 BBCC_ptr += sizeof(idCC);
1209 break;
1210
1211 case WRITE_CC:
njne0ee0712002-05-03 16:41:05 +00001212 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1213 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001214 ADD_CC_TO(idCC, I, Ir_total);
1215 ADD_CC_TO(idCC, D, Dw_total);
1216 BBCC_ptr += sizeof(idCC);
1217 break;
1218
njn4f9c9342002-04-29 16:03:24 +00001219 default:
1220 VG_(panic)("Unknown CC type in fprint_BBCC()\n");
1221 break;
1222 }
1223 distinct_instrs++;
1224
njne0ee0712002-05-03 16:41:05 +00001225 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1226
1227 /* Allow for filename switching in the middle of a BB; if this happens,
1228 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001229 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001230 VG_(strcpy)(curr_file, fl_buf);
1231 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1232 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1233 }
1234
njn4f9c9342002-04-29 16:03:24 +00001235 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001236 * first instruction in the BB, print warning. */
sewardj18d75132002-05-16 11:06:21 +00001237 if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001238 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001239 VG_(printf)(" filenames: BB:%s, instr:%s;"
1240 " fn_names: BB:%s, instr:%s;"
1241 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001242 first_instr_fl, fl_buf,
1243 first_instr_fn, fn_buf,
1244 line_num);
1245 }
1246
njne0ee0712002-05-03 16:41:05 +00001247 VG_(sprintf)(lbuf, "%u ", line_num);
1248 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1249 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001250 }
1251 /* If we switched filenames in the middle of the BB without switching back,
1252 * switch back now because the subsequent BB may be relying on falling under
1253 * the original file name. */
1254 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1255 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1256 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1257 }
njne0ee0712002-05-03 16:41:05 +00001258
1259 /* Mark end of basic block */
1260 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001261
1262 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
1263}
1264
1265static void fprint_BBCC_table_and_calc_totals(Int client_argc,
1266 Char** client_argv)
1267{
1268 Int fd;
1269 Char buf[BUF_LEN];
1270 file_node *curr_file_node;
1271 fn_node *curr_fn_node;
1272 BBCC *curr_BBCC;
1273 Int i,j,k;
1274
1275 VGP_PUSHCC(VgpCacheDump);
1276 fd = VG_(open_write)(OUT_FILE);
1277 if (-1 == fd) { file_err(); }
1278
1279 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001280 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1281 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1282 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1283 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1284 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1285 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001286
1287 /* "cmd:" line */
1288 VG_(strcpy)(buf, "cmd:");
1289 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1290 for (i = 0; i < client_argc; i++) {
1291 VG_(sprintf)(buf, " %s", client_argv[i]);
1292 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1293 }
1294 /* "events:" line */
1295 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1296 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1297
1298 /* Six loops here: three for the hash table arrays, and three for the
1299 * chains hanging off the hash table arrays. */
1300 for (i = 0; i < N_FILE_ENTRIES; i++) {
1301 curr_file_node = BBCC_table[i];
1302 while (curr_file_node != NULL) {
1303 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1304 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1305
1306 for (j = 0; j < N_FN_ENTRIES; j++) {
1307 curr_fn_node = curr_file_node->fns[j];
1308 while (curr_fn_node != NULL) {
1309 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1310 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1311
1312 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1313 curr_BBCC = curr_fn_node->BBCCs[k];
1314 while (curr_BBCC != NULL) {
1315 fprint_BBCC(fd, curr_BBCC,
1316
1317 curr_file_node->filename,
1318 curr_fn_node->fn_name);
1319
1320 curr_BBCC = curr_BBCC->next;
1321 }
1322 }
1323 curr_fn_node = curr_fn_node->next;
1324 }
1325 }
1326 curr_file_node = curr_file_node->next;
1327 }
1328 }
1329
njn4294fd42002-06-05 14:41:10 +00001330 /* Print stats from any discarded basic blocks */
1331 if (0 != Ir_discards.a) {
1332
1333 VG_(sprintf)(buf, "fl=(discarded)\n");
1334 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1335 VG_(sprintf)(buf, "fn=(discarded)\n");
1336 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1337
1338 /* Use 0 as line number */
1339 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1340 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1341 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1342 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1343 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1344
1345 Ir_total.a += Ir_discards.a;
1346 Ir_total.m1 += Ir_discards.m1;
1347 Ir_total.m2 += Ir_discards.m2;
1348 Dr_total.a += Dr_discards.a;
1349 Dr_total.m1 += Dr_discards.m1;
1350 Dr_total.m2 += Dr_discards.m2;
1351 Dw_total.a += Dw_discards.a;
1352 Dw_total.m1 += Dw_discards.m1;
1353 Dw_total.m2 += Dw_discards.m2;
1354 }
1355
njn4f9c9342002-04-29 16:03:24 +00001356 /* Summary stats must come after rest of table, since we calculate them
1357 * during traversal. */
1358 VG_(sprintf)(buf, "summary: "
1359 "%llu %llu %llu "
1360 "%llu %llu %llu "
1361 "%llu %llu %llu\n",
1362 Ir_total.a, Ir_total.m1, Ir_total.m2,
1363 Dr_total.a, Dr_total.m1, Dr_total.m2,
1364 Dw_total.a, Dw_total.m1, Dw_total.m2);
1365 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1366 VG_(close)(fd);
1367}
1368
1369/* Adds commas to ULong, right justifying in a field field_width wide, returns
1370 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +00001371static
njn4f9c9342002-04-29 16:03:24 +00001372Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
1373{
1374 int len, n_commas, i, j, new_len, space;
1375
1376 VG_(sprintf)(buf, "%lu", n);
1377 len = VG_(strlen)(buf);
1378 n_commas = (len - 1) / 3;
1379 new_len = len + n_commas;
1380 space = field_width - new_len;
1381
1382 /* Allow for printing a number in a field_width smaller than it's size */
1383 if (space < 0) space = 0;
1384
1385 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1386 * of three. */
1387 for (j = -1, i = len ; i >= 0; i--) {
1388 buf[i + n_commas + space] = buf[i];
1389
1390 if (3 == ++j) {
1391 j = 0;
1392 n_commas--;
1393 buf[i + n_commas + space] = ',';
1394 }
1395 }
1396 /* Right justify in field. */
1397 for (i = 0; i < space; i++) buf[i] = ' ';
1398 return new_len;
1399}
1400
sewardj4f29ddf2002-05-03 22:29:04 +00001401static
njn4f9c9342002-04-29 16:03:24 +00001402void percentify(Int n, Int pow, Int field_width, char buf[])
1403{
1404 int i, len, space;
1405
1406 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
1407 len = VG_(strlen)(buf);
1408 space = field_width - len;
1409 i = len;
1410
1411 /* Right justify in field */
1412 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1413 for (i = 0; i < space; i++) buf[i] = ' ';
1414}
1415
njn7cf0bd32002-06-08 13:36:03 +00001416void VG_(do_cachesim_results)(Int client_argc, Char** client_argv)
njn4f9c9342002-04-29 16:03:24 +00001417{
1418 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001419 ULong L2_total_m, L2_total_mr, L2_total_mw,
1420 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +00001421 char buf1[RESULTS_BUF_LEN],
1422 buf2[RESULTS_BUF_LEN],
1423 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001424 Int l1, l2, l3;
1425 Int p;
1426
1427 fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
1428
njn7cf0bd32002-06-08 13:36:03 +00001429 if (VG_(clo_verbosity) == 0)
1430 return;
1431
njn4f9c9342002-04-29 16:03:24 +00001432 /* I cache results. Use the I_refs value to determine the first column
1433 * width. */
1434 l1 = commify(Ir_total.a, 0, buf1);
1435 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1436
1437 commify(Ir_total.m1, l1, buf1);
1438 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1439
1440 commify(Ir_total.m2, l1, buf1);
njn95114da2002-06-05 09:39:31 +00001441 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
njn4f9c9342002-04-29 16:03:24 +00001442
1443 p = 100;
1444
1445 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1446 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1447
1448 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1449 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1450 VG_(message)(Vg_UserMsg, "");
1451
1452 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1453 * width of columns 2 & 3. */
1454 D_total.a = Dr_total.a + Dw_total.a;
1455 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1456 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1457
njn1d021fa2002-05-02 13:56:34 +00001458 commify( D_total.a, l1, buf1);
1459 l2 = commify(Dr_total.a, 0, buf2);
1460 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +00001461 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1462 buf1, buf2, buf3);
1463
1464 commify( D_total.m1, l1, buf1);
1465 commify(Dr_total.m1, l2, buf2);
1466 commify(Dw_total.m1, l3, buf3);
1467 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1468 buf1, buf2, buf3);
1469
1470 commify( D_total.m2, l1, buf1);
1471 commify(Dr_total.m2, l2, buf2);
1472 commify(Dw_total.m2, l3, buf3);
njn95114da2002-06-05 09:39:31 +00001473 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
njn4f9c9342002-04-29 16:03:24 +00001474 buf1, buf2, buf3);
1475
1476 p = 10;
1477
1478 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1479 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1480 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1481 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1482
1483 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1484 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1485 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1486 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1487 VG_(message)(Vg_UserMsg, "");
1488
1489 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001490
1491 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1492 L2_total_r = Dr_total.m1 + Ir_total.m1;
1493 L2_total_w = Dw_total.m1;
1494 commify(L2_total, l1, buf1);
1495 commify(L2_total_r, l2, buf2);
1496 commify(L2_total_w, l3, buf3);
1497 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1498 buf1, buf2, buf3);
1499
njn4f9c9342002-04-29 16:03:24 +00001500 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1501 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1502 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001503 commify(L2_total_m, l1, buf1);
1504 commify(L2_total_mr, l2, buf2);
1505 commify(L2_total_mw, l3, buf3);
1506 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1507 buf1, buf2, buf3);
1508
1509 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1510 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1511 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1512 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1513
1514
1515 /* Hash table stats */
1516 if (VG_(clo_verbosity) > 1) {
1517 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1518 file_line_debug_BBs + no_debug_BBs;
1519
1520 VG_(message)(Vg_DebugMsg, "");
1521 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1522 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1523 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1524 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1525 full_debug_BBs * 100 / BB_lookups,
1526 full_debug_BBs);
1527 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1528 file_line_debug_BBs * 100 / BB_lookups,
1529 file_line_debug_BBs);
1530 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1531 fn_name_debug_BBs * 100 / BB_lookups,
1532 fn_name_debug_BBs);
1533 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1534 no_debug_BBs * 100 / BB_lookups,
1535 no_debug_BBs);
1536 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1537 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1538 }
1539 VGP_POPCC;
1540}
1541
sewardj18d75132002-05-16 11:06:21 +00001542
njn4294fd42002-06-05 14:41:10 +00001543/* Called when a translation is invalidated due to self-modifying code or
1544 * unloaded of a shared object.
1545 *
1546 * Finds the BBCC in the table, removes it, adds the counts to the discard
1547 * counters, and then frees the BBCC. */
sewardj18d75132002-05-16 11:06:21 +00001548void VG_(cachesim_notify_discard) ( TTEntry* tte )
1549{
njn4294fd42002-06-05 14:41:10 +00001550 BBCC *BBCC_node;
1551 Addr BBCC_ptr0, BBCC_ptr;
1552 Bool BB_seen_before;
1553
sewardj83205b32002-06-14 11:08:07 +00001554 if (0)
njn4294fd42002-06-05 14:41:10 +00001555 VG_(printf)( "cachesim_notify_discard: %p for %d\n",
1556 tte->orig_addr, (Int)tte->orig_size);
1557
1558 /* 2nd arg won't be used since BB should have been seen before (assertions
1559 * ensure this). */
1560 BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before);
1561 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1562
1563 vg_assert(True == BB_seen_before);
1564
1565 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1566
1567 /* We pretend the CC is an iCC for getting the tag. This is ok
1568 * because both CC types have tag as their first byte. Once we know
1569 * the type, we can cast and act appropriately. */
1570
1571 switch ( ((iCC*)BBCC_ptr)->tag ) {
1572
1573 case INSTR_CC:
1574 ADD_CC_TO(iCC, I, Ir_discards);
1575 BBCC_ptr += sizeof(iCC);
1576 break;
1577
1578 case READ_CC:
1579 case MOD_CC:
1580 ADD_CC_TO(idCC, I, Ir_discards);
1581 ADD_CC_TO(idCC, D, Dr_discards);
1582 BBCC_ptr += sizeof(idCC);
1583 break;
1584
1585 case WRITE_CC:
1586 ADD_CC_TO(idCC, I, Ir_discards);
1587 ADD_CC_TO(idCC, D, Dw_discards);
1588 BBCC_ptr += sizeof(idCC);
1589 break;
1590
1591 default:
1592 VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n");
1593 break;
1594 }
1595 }
1596
1597 VG_(free)(VG_AR_PRIVATE, BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001598}
1599
1600/*--------------------------------------------------------------------*/
1601/*--- end vg_cachesim.c ---*/
1602/*--------------------------------------------------------------------*/