blob: 86e72754dd977ac9588bae53edd3713bd663d879 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
3/*--- The cache simulation framework: instrumentation, recording ---*/
4/*--- and results printing. ---*/
5/*--- vg_cachesim.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of Valgrind, an x86 protected-mode emulator
10 designed for debugging and profiling binaries on x86-Unixes.
11
sewardj3c23d432002-06-01 23:43:49 +000012 Copyright (C) 2002 Nicholas Nethercote
13 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file LICENSE.
31*/
32
njn4f9c9342002-04-29 16:03:24 +000033#include "vg_include.h"
34
35#include "vg_cachesim_L2.c"
36#include "vg_cachesim_I1.c"
37#include "vg_cachesim_D1.c"
38
39
40/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000041#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000042
43/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000044#define FILENAME_LEN 256
45#define FN_NAME_LEN 256
46#define BUF_LEN 512
47#define COMMIFY_BUF_LEN 128
48#define RESULTS_BUF_LEN 128
49#define LINE_BUF_LEN 64
50
sewardj07133bf2002-06-13 10:25:56 +000051
njn7cf0bd32002-06-08 13:36:03 +000052/*------------------------------------------------------------*/
53/*--- Generic utility stuff ---*/
54/*------------------------------------------------------------*/
55
sewardj07133bf2002-06-13 10:25:56 +000056Int VG_(log2) ( Int x )
njn7cf0bd32002-06-08 13:36:03 +000057{
sewardj07133bf2002-06-13 10:25:56 +000058 Int i;
njn7cf0bd32002-06-08 13:36:03 +000059 /* Any more than 32 and we overflow anyway... */
60 for (i = 0; i < 32; i++) {
61 if (1 << i == x) return i;
62 }
63 return -1;
64}
njn4f9c9342002-04-29 16:03:24 +000065
sewardj07133bf2002-06-13 10:25:56 +000066
njn4f9c9342002-04-29 16:03:24 +000067/*------------------------------------------------------------*/
68/*--- Output file related stuff ---*/
69/*------------------------------------------------------------*/
70
71#define OUT_FILE "cachegrind.out"
72
73static void file_err()
74{
75 VG_(message)(Vg_UserMsg,
njn7cf0bd32002-06-08 13:36:03 +000076 "error: can't open cache simulation output file `%s'",
njn4f9c9342002-04-29 16:03:24 +000077 OUT_FILE );
78 VG_(exit)(1);
79}
80
81/*------------------------------------------------------------*/
82/*--- Cost center types, operations ---*/
83/*------------------------------------------------------------*/
84
85typedef struct _CC CC;
86struct _CC {
87 ULong a;
88 ULong m1;
89 ULong m2;
90};
91
92static __inline__ void initCC(CC* cc) {
93 cc->a = 0;
94 cc->m1 = 0;
95 cc->m2 = 0;
96}
97
njn4f9c9342002-04-29 16:03:24 +000098typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
99
100/* Instruction-level cost-centres. The typedefs for these structs are in
101 * vg_include.c
102 *
103 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000104 *
njne0ee0712002-05-03 16:41:05 +0000105 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000106 */
107struct _iCC {
108 /* word 1 */
109 UChar tag;
110 UChar instr_size;
njne0ee0712002-05-03 16:41:05 +0000111 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000112
113 /* words 2+ */
114 Addr instr_addr;
115 CC I;
116};
117
118struct _idCC {
119 /* word 1 */
120 UChar tag;
121 UChar instr_size;
122 UChar data_size;
njne0ee0712002-05-03 16:41:05 +0000123 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000124
125 /* words 2+ */
126 Addr instr_addr;
127 CC I;
128 CC D;
129};
130
131static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
132{
133 cc->tag = INSTR_CC;
134 cc->instr_size = instr_size;
135 cc->instr_addr = instr_addr;
136 initCC(&cc->I);
137}
138
139static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
140 UInt instr_size, UInt data_size)
141{
142 cc->tag = X_CC;
143 cc->instr_size = instr_size;
144 cc->data_size = data_size;
145 cc->instr_addr = instr_addr;
146 initCC(&cc->I);
147 initCC(&cc->D);
148}
149
njn4294fd42002-06-05 14:41:10 +0000150#define ADD_CC_TO(CC_type, cc, total) \
151 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
152 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
153 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
154
njn95114da2002-06-05 09:39:31 +0000155/* If 1, address of each instruction is printed as a comment after its counts
156 * in cachegrind.out */
157#define PRINT_INSTR_ADDRS 0
158
njne0ee0712002-05-03 16:41:05 +0000159static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000160{
njn95114da2002-06-05 09:39:31 +0000161#if PRINT_INSTR_ADDRS
162 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
163 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
164#else
njne0ee0712002-05-03 16:41:05 +0000165 VG_(sprintf)(buf, "%llu %llu %llu\n",
166 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000167#endif
njn4f9c9342002-04-29 16:03:24 +0000168}
169
njne0ee0712002-05-03 16:41:05 +0000170static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000171{
njn95114da2002-06-05 09:39:31 +0000172#if PRINT_INSTR_ADDRS
173 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
174 cc->I.a, cc->I.m1, cc->I.m2,
175 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
176#else
njne0ee0712002-05-03 16:41:05 +0000177 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
178 cc->I.a, cc->I.m1, cc->I.m2,
179 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000180#endif
njn4f9c9342002-04-29 16:03:24 +0000181}
182
njne0ee0712002-05-03 16:41:05 +0000183static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000184{
njn95114da2002-06-05 09:39:31 +0000185#if PRINT_INSTR_ADDRS
186 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
187 cc->I.a, cc->I.m1, cc->I.m2,
188 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
189#else
njne0ee0712002-05-03 16:41:05 +0000190 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
191 cc->I.a, cc->I.m1, cc->I.m2,
192 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000193#endif
njn4f9c9342002-04-29 16:03:24 +0000194}
195
196/*------------------------------------------------------------*/
197/*--- BBCC hash table stuff ---*/
198/*------------------------------------------------------------*/
199
200/* The table of BBCCs is of the form hash(filename, hash(fn_name,
201 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
202 * fairly well for Konqueror. */
203
204#define N_FILE_ENTRIES 251
205#define N_FN_ENTRIES 53
206#define N_BBCC_ENTRIES 37
207
208/* The cost centres for a basic block are stored in a contiguous array.
209 * They are distinguishable by their tag field. */
210typedef struct _BBCC BBCC;
211struct _BBCC {
212 Addr orig_addr;
213 UInt array_size; /* byte-size of variable length array */
214 BBCC* next;
215 Addr array[0]; /* variable length array */
216};
217
218typedef struct _fn_node fn_node;
219struct _fn_node {
220 Char* fn_name;
221 BBCC* BBCCs[N_BBCC_ENTRIES];
222 fn_node* next;
223};
224
225typedef struct _file_node file_node;
226struct _file_node {
227 Char* filename;
228 fn_node* fns[N_FN_ENTRIES];
229 file_node* next;
230};
231
232/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000233static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000234
sewardj4f29ddf2002-05-03 22:29:04 +0000235static Int distinct_files = 0;
236static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000237
sewardj4f29ddf2002-05-03 22:29:04 +0000238static Int distinct_instrs = 0;
239static Int full_debug_BBs = 0;
240static Int file_line_debug_BBs = 0;
241static Int fn_name_debug_BBs = 0;
242static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000243
sewardj4f29ddf2002-05-03 22:29:04 +0000244static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000245
njn4294fd42002-06-05 14:41:10 +0000246static CC Ir_discards;
247static CC Dr_discards;
248static CC Dw_discards;
249
njn4f9c9342002-04-29 16:03:24 +0000250static void init_BBCC_table()
251{
252 Int i;
253 for (i = 0; i < N_FILE_ENTRIES; i++)
254 BBCC_table[i] = NULL;
255}
256
njne0ee0712002-05-03 16:41:05 +0000257static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
258 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000259{
njn4f9c9342002-04-29 16:03:24 +0000260 Bool found1, found2, no_demangle = False;
261
262 found1 = VG_(what_line_is_this)(instr_addr, filename,
njne0ee0712002-05-03 16:41:05 +0000263 FILENAME_LEN, line_num);
njn4f9c9342002-04-29 16:03:24 +0000264 found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
265
266 if (!found1 && !found2) {
267 no_debug_BBs++;
268 VG_(strcpy)(filename, "???");
269 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000270 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000271
272 } else if ( found1 && found2) {
273 full_debug_BBs++;
274
275 } else if ( found1 && !found2) {
276 file_line_debug_BBs++;
277 VG_(strcpy)(fn_name, "???");
278
279 } else /*(!found1 && found2)*/ {
280 fn_name_debug_BBs++;
281 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000282 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000283 }
284}
285
286/* Forward declaration. */
287static Int compute_BBCC_array_size(UCodeBlock* cb);
288
289static __inline__
290file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
291{
292 Int i;
293 file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
294 new->filename = VG_(strdup)(VG_AR_PRIVATE, filename);
295 for (i = 0; i < N_FN_ENTRIES; i++) {
296 new->fns[i] = NULL;
297 }
298 new->next = next;
299 return new;
300}
301
302static __inline__
303fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
304{
305 Int i;
306 fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
307 new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
308 for (i = 0; i < N_BBCC_ENTRIES; i++) {
309 new->BBCCs[i] = NULL;
310 }
311 new->next = next;
312 return new;
313}
314
315static __inline__
316BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
317{
318 Int BBCC_array_size = compute_BBCC_array_size(cb);
319 BBCC* new;
320
321 new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
322 new->orig_addr = bb_orig_addr;
323 new->array_size = BBCC_array_size;
324 new->next = next;
325
326 return new;
327}
328
329#define HASH_CONSTANT 256
330
331static UInt hash(Char *s, UInt table_size)
332{
333 int hash_value = 0;
334 for ( ; *s; s++)
335 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
336 return hash_value;
337}
338
339/* Do a three step traversal: by filename, then fn_name, then instr_addr.
340 * In all cases prepends new nodes to their chain. Returns a pointer to the
341 * cost centre. Also sets BB_seen_before by reference.
342 */
343static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000344 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000345{
346 file_node *curr_file_node;
347 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000348 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000349 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
350 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000351 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000352
njne0ee0712002-05-03 16:41:05 +0000353 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000354
355 VGP_PUSHCC(VgpCacheGetBBCC);
356 filename_hash = hash(filename, N_FILE_ENTRIES);
357 curr_file_node = BBCC_table[filename_hash];
358 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000359 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000360 curr_file_node = curr_file_node->next;
361 }
362 if (NULL == curr_file_node) {
363 BBCC_table[filename_hash] = curr_file_node =
364 new_file_node(filename, BBCC_table[filename_hash]);
365 distinct_files++;
366 }
367
368 fnname_hash = hash(fn_name, N_FN_ENTRIES);
369 curr_fn_node = curr_file_node->fns[fnname_hash];
370 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000371 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000372 curr_fn_node = curr_fn_node->next;
373 }
374 if (NULL == curr_fn_node) {
375 curr_file_node->fns[fnname_hash] = curr_fn_node =
376 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
377 distinct_fns++;
378 }
379
380 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000381 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000382 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
383 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000384 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000385 curr_BBCC = curr_BBCC->next;
386 }
387 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000388
389 vg_assert(False == remove);
390
njn4f9c9342002-04-29 16:03:24 +0000391 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
392 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
393 *BB_seen_before = False;
394
395 } else {
396 vg_assert(bb_orig_addr == curr_BBCC->orig_addr);
397 vg_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000398 if (VG_(clo_verbosity) > 2) {
399 VG_(message)(Vg_DebugMsg,
400 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000401 }
402 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000403
404 if (True == remove) {
405 // Remove curr_BBCC from chain; it will be used and free'd by the
406 // caller.
407 *prev_BBCC_next_ptr = curr_BBCC->next;
408
409 } else {
410 BB_retranslations++;
411 }
njn4f9c9342002-04-29 16:03:24 +0000412 }
413 VGP_POPCC;
414 return curr_BBCC;
415}
416
417/*------------------------------------------------------------*/
418/*--- Cache simulation instrumentation phase ---*/
419/*------------------------------------------------------------*/
420
421#define uInstr1 VG_(newUInstr1)
422#define uInstr2 VG_(newUInstr2)
423#define uInstr3 VG_(newUInstr3)
424#define dis VG_(disassemble)
425#define uLiteral VG_(setLiteralField)
426#define newTemp VG_(getNewTemp)
427
428static Int compute_BBCC_array_size(UCodeBlock* cb)
429{
430 UInstr* u_in;
431 Int i, CC_size, BBCC_size = 0;
432 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
433
434 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
435
436 for (i = 0; i < cb->used; i++) {
sewardjfc3e5d32002-04-30 10:18:48 +0000437 /* VG_(ppUInstr)(0, &cb->instrs[i]); */
njn4f9c9342002-04-29 16:03:24 +0000438
439 u_in = &cb->instrs[i];
440 switch(u_in->opcode) {
441
442 case INCEIP:
443 goto case_for_end_of_instr;
444
445 case JMP:
446 if (u_in->cond != CondAlways) break;
447
448 goto case_for_end_of_instr;
449
450 case_for_end_of_instr:
451
452 CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W
453 ? sizeof(idCC) : sizeof(iCC));
454
455 BBCC_size += CC_size;
456 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
457 break;
458
459 case LOAD:
460 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000461 /* Also, a STORE can come after a LOAD for bts/btr/btc */
sewardjfc3e5d32002-04-30 10:18:48 +0000462 vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */
463 !is_FPU_R && !is_FPU_W);
njn4f9c9342002-04-29 16:03:24 +0000464 is_LOAD = True;
465 break;
466
467 case STORE:
468 /* Multiple STOREs are possible for 'pushal' */
469 vg_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
470 is_STORE = True;
471 break;
472
473 case FPU_R:
474 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
475 is_FPU_R = True;
476 break;
477
478 case FPU_W:
479 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
480 is_FPU_W = True;
481 break;
482
483 default:
484 break;
485 }
486 }
487
488 return BBCC_size;
489}
490
491/* Use this rather than eg. -1 because it's stored as a UInt. */
492#define INVALID_DATA_SIZE 999999
493
494UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
495{
496 UCodeBlock* cb;
497 Int i;
498 UInstr* u_in;
499 BBCC* BBCC_node;
500 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
501 Int CC_size = -1; /* Shut gcc warnings up */
502 Addr instr_addr = orig_addr;
503 UInt instr_size, data_size = INVALID_DATA_SIZE;
504 Int helper = -1; /* Shut gcc warnings up */
505 UInt stack_used;
506 Bool BB_seen_before = False;
507 Bool prev_instr_was_Jcond = False;
508 Addr BBCC_ptr0, BBCC_ptr;
509
510 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
511 * if it's the first time it's been seen), and point to start of the
512 * BBCC array. */
njn4294fd42002-06-05 14:41:10 +0000513 BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000514 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
515
516 cb = VG_(allocCodeBlock)();
517 cb->nextTemp = cb_in->nextTemp;
518
519 t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
520
521 for (i = 0; i < cb_in->used; i++) {
522 u_in = &cb_in->instrs[i];
523
524 //VG_(ppUInstr)(0, u_in);
525
526 /* What this is all about: we want to instrument each x86 instruction
527 * translation. The end of these are marked in three ways. The three
528 * ways, and the way we instrument them, are as follows:
529 *
530 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
531 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
532 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
533 *
534 * We must put the instrumentation before the jumps so that it is always
535 * executed. We don't have to put the instrumentation before the INCEIP
536 * (it could go after) but we do so for consistency.
537 *
538 * Junconds are always the last instruction in a basic block. Jconds are
539 * always the 2nd last, and must be followed by a Jcond. We check this
540 * with various assertions.
541 *
542 * Note that in VG_(disBB) we patched the `extra4b' field of the first
543 * occurring JMP in a block with the size of its x86 instruction. This
544 * is used now.
545 *
546 * Note that we don't have to treat JIFZ specially; unlike JMPs, JIFZ
547 * occurs in the middle of a BB and gets an INCEIP after it.
548 *
549 * The instrumentation is just a call to the appropriate helper function,
550 * passing it the address of the instruction's CC.
551 */
552 if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
553
554 switch (u_in->opcode) {
555
556 case INCEIP:
557 instr_size = u_in->val1;
558 goto case_for_end_of_x86_instr;
559
560 case JMP:
561 if (u_in->cond == CondAlways) {
562 vg_assert(i+1 == cb_in->used);
563
564 /* Don't instrument if previous instr was a Jcond. */
565 if (prev_instr_was_Jcond) {
566 vg_assert(0 == u_in->extra4b);
567 VG_(copyUInstr)(cb, u_in);
568 break;
569 }
570 prev_instr_was_Jcond = False;
571
572 } else {
573 vg_assert(i+2 == cb_in->used); /* 2nd last instr in block */
574 prev_instr_was_Jcond = True;
575 }
576
577 /* Ah, the first JMP... instrument, please. */
578 instr_size = u_in->extra4b;
579 goto case_for_end_of_x86_instr;
580
581 /* Shared code that is executed at the end of an x86 translation
582 * block, marked by either an INCEIP or an unconditional JMP. */
583 case_for_end_of_x86_instr:
584
585#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
586
587 /* Initialise the CC in the BBCC array appropriately if it hasn't
588 * been initialised before.
589 * Then call appropriate sim function, passing it the CC address.
590 * Note that CALLM_S/CALL_E aren't required here; by this point,
591 * the checking related to them has already happened. */
592 stack_used = 0;
593
594 vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
595 vg_assert(0 != instr_addr);
596
597 /* Save the caller-save registers before we push our args */
598 uInstr1(cb, PUSH, 4, RealReg, R_EAX);
599 uInstr1(cb, PUSH, 4, RealReg, R_ECX);
600 uInstr1(cb, PUSH, 4, RealReg, R_EDX);
601
602 if (!IS_(read) && !IS_(write)) {
603 iCC* CC_ptr = (iCC*)(BBCC_ptr);
604 vg_assert(INVALID_DATA_SIZE == data_size);
605 vg_assert(INVALID_TEMPREG == t_read_addr &&
606 INVALID_TEMPREG == t_write_addr);
607 CC_size = sizeof(iCC);
608 if (!BB_seen_before)
609 init_iCC(CC_ptr, instr_addr, instr_size);
610
611 helper = VGOFF_(cachesim_log_non_mem_instr);
612
613 } else {
614 CC_type X_CC;
615 idCC* CC_ptr = (idCC*)(BBCC_ptr);
616
617 vg_assert(4 == data_size || 2 == data_size || 1 == data_size ||
618 8 == data_size || 10 == data_size);
619
620 CC_size = sizeof(idCC);
621 helper = VGOFF_(cachesim_log_mem_instr);
622
623 if (IS_(read) && !IS_(write)) {
624 X_CC = READ_CC;
625 vg_assert(INVALID_TEMPREG != t_read_addr &&
626 INVALID_TEMPREG == t_write_addr);
627 t_data_addr = t_read_addr;
628
629 } else if (!IS_(read) && IS_(write)) {
630 X_CC = WRITE_CC;
631 vg_assert(INVALID_TEMPREG == t_read_addr &&
632 INVALID_TEMPREG != t_write_addr);
633 t_data_addr = t_write_addr;
634
635 } else {
636 vg_assert(IS_(read) && IS_(write));
637 X_CC = MOD_CC;
638 vg_assert(INVALID_TEMPREG != t_read_addr &&
639 INVALID_TEMPREG != t_write_addr);
640 t_data_addr = t_read_addr;
641 }
642
643 if (!BB_seen_before)
644 init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
645
646 /* 2nd arg: data addr */
647 uInstr1(cb, PUSH, 4, TempReg, t_data_addr);
648 stack_used += 4;
649 }
650#undef IS_
651
652 /* 1st arg: CC addr */
653 t_CC_addr = newTemp(cb);
654 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
655 uLiteral(cb, BBCC_ptr);
656 uInstr1(cb, PUSH, 4, TempReg, t_CC_addr);
657 stack_used += 4;
658
659 /* Call function and return. */
660 uInstr1(cb, CALLM, 0, Lit16, helper);
661 uInstr1(cb, CLEAR, 0, Lit16, stack_used);
662
663 /* Restore the caller-save registers now the call is done */
664 uInstr1(cb, POP, 4, RealReg, R_EDX);
665 uInstr1(cb, POP, 4, RealReg, R_ECX);
666 uInstr1(cb, POP, 4, RealReg, R_EAX);
667
668 VG_(copyUInstr)(cb, u_in);
669
670 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
671 BBCC_ptr += CC_size;
672 instr_addr += instr_size;
673 t_CC_addr = t_read_addr = t_write_addr =
674 t_data_addr = INVALID_TEMPREG;
675 data_size = INVALID_DATA_SIZE;
676 break;
677
678
679 /* For memory-ref instrs, copy the data_addr into a temporary to be
680 * passed to the cachesim_log_function at the end of the instruction.
681 */
682 case LOAD:
683 t_read_addr = newTemp(cb);
684 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
685 data_size = u_in->size;
686 VG_(copyUInstr)(cb, u_in);
687 break;
688
689 case FPU_R:
690 t_read_addr = newTemp(cb);
691 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
692 data_size = u_in->size;
693 VG_(copyUInstr)(cb, u_in);
694 break;
695
696 /* Note that we must set t_write_addr even for mod instructions;
697 * that's how the code above determines whether it does a write;
698 * without it, it would think a mod instruction is a read.
699 * As for the MOV, if it's a mod instruction it's redundant, but it's
700 * not expensive and mod instructions are rare anyway. */
701 case STORE:
702 case FPU_W:
703 t_write_addr = newTemp(cb);
704 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
705 data_size = u_in->size;
706 VG_(copyUInstr)(cb, u_in);
707 break;
708
709 case NOP: case CALLM_E: case CALLM_S:
710 break;
711
712 default:
713 VG_(copyUInstr)(cb, u_in);
714 break;
715 }
716 }
717
718 /* Just check everything looks ok */
719 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
720
721 VG_(freeCodeBlock)(cb_in);
722 return cb;
723}
724
725/*------------------------------------------------------------*/
726/*--- Cache simulation stuff ---*/
727/*------------------------------------------------------------*/
728
njn7cf0bd32002-06-08 13:36:03 +0000729#define MIN_LINE_SIZE 16
730
njn4f9c9342002-04-29 16:03:24 +0000731/* Total reads/writes/misses. Calculated during CC traversal at the end. */
732static CC Ir_total;
733static CC Dr_total;
734static CC Dw_total;
735
njn7cf0bd32002-06-08 13:36:03 +0000736/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
737/* Probably only works for Intel and AMD chips, and probably only for some of
738 * them.
739 */
740
sewardj07133bf2002-06-13 10:25:56 +0000741static __inline__ void cpuid(Int n, Int *a, Int *b, Int *c, Int *d)
njn7cf0bd32002-06-08 13:36:03 +0000742{
743 __asm__ __volatile__ (
744 "cpuid"
745 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
746 : "0" (n) /* input */
747 );
748}
749
sewardj07133bf2002-06-13 10:25:56 +0000750static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +0000751{
752 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000753 "warning: Pentium with %d K micro_op instruction trace cache",
754 actual_size);
njn7cf0bd32002-06-08 13:36:03 +0000755 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000756 " Simulating a %d KB cache with %d B lines",
757 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +0000758}
759
760/* Intel method is truly wretched. We have to do an insane indexing into an
761 * array of pre-defined configurations for various parts of the memory
762 * hierarchy.
763 */
764static
sewardj07133bf2002-06-13 10:25:56 +0000765Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000766{
sewardj07133bf2002-06-13 10:25:56 +0000767 UChar info[16];
768 Int i, trials;
njn7cf0bd32002-06-08 13:36:03 +0000769
770 if (level < 2) {
771 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000772 "warning: CPUID level < 2 for Intel processor (%d)",
773 level);
njn7cf0bd32002-06-08 13:36:03 +0000774 return -1;
775 }
776
sewardj07133bf2002-06-13 10:25:56 +0000777 cpuid(2, (Int*)&info[0], (Int*)&info[4],
778 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +0000779 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
780 info[0] = 0x0; /* reset AL */
781
782 if (0 != trials) {
783 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000784 "warning: non-zero CPUID trials for Intel processor (%d)",
785 trials);
njn7cf0bd32002-06-08 13:36:03 +0000786 return -1;
787 }
788
789 for (i = 0; i < 16; i++) {
790
791 switch (info[i]) {
792
793 case 0x0: /* ignore zeros */
794 break;
795
796 case 0x01: case 0x02: case 0x03: case 0x04: /* TLB info, ignore */
797 case 0x90: case 0x96: case 0x9b:
798 break;
799
800 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
801 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
802
803 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
804 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
805
806 case 0x22: case 0x23: case 0x25: case 0x29:
807 case 0x88: case 0x89: case 0x8a:
sewardj07133bf2002-06-13 10:25:56 +0000808 VG_(message)(Vg_DebugMsg,
809 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +0000810 break;
811
812 case 0x40:
813 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000814 "warning: L2 cache not installed, ignore L2 results.");
njn7cf0bd32002-06-08 13:36:03 +0000815 break;
816
817 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; break;
818 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; break;
819 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; break;
820 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; break;
821 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; break;
822
823 /* These are sectored, whatever that means */
824 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
825 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
826 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
827
828 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
829 * conversion to byte size is a total guess; treat the 12K and 16K
830 * cases the same since the cache byte size must be a power of two for
831 * everything to work!. Also guessing 32 bytes for the line size...
832 */
833 case 0x70: /* 12K micro-ops, 8-way */
834 *I1c = (cache_t) { 16, 8, 32 };
835 micro_ops_warn(12, 16, 32);
836 break;
837 case 0x71: /* 16K micro-ops, 8-way */
838 *I1c = (cache_t) { 16, 8, 32 };
839 micro_ops_warn(16, 16, 32);
840 break;
841 case 0x72: /* 32K micro-ops, 8-way */
842 *I1c = (cache_t) { 32, 8, 32 };
843 micro_ops_warn(32, 32, 32);
844 break;
845
846 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; break; /* sectored */
847 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; break; /* sectored */
848 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; break; /* sectored */
849 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; break; /* sectored */
850
851 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; break;
852 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; break;
853 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; break;
854 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; break;
855 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; break;
856
857 default:
858 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +0000859 "warning: Unknown Intel cache config value "
860 "(0x%x), ignoring\n", info[i]);
njn7cf0bd32002-06-08 13:36:03 +0000861 break;
862 }
863 }
864 return 0;
865}
866
867/* AMD method is straightforward, just extract appropriate bits from the
868 * result registers.
869 *
870 * Bits, for D1 and I1:
871 * 31..24 data L1 cache size in KBs
872 * 23..16 data L1 cache associativity (FFh=full)
873 * 15.. 8 data L1 cache lines per tag
874 * 7.. 0 data L1 cache line size in bytes
875 *
876 * Bits, for L2:
877 * 31..16 unified L2 cache size in KBs
878 * 15..12 unified L2 cache associativity (0=off, FFh=full)
879 * 11.. 8 unified L2 cache lines per tag
880 * 7.. 0 unified L2 cache line size in bytes
881 *
882 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
883 * upon this information. (Whatever that means -- njn)
884 *
885 * Returns 0 on success, non-zero on failure.
886 */
sewardj07133bf2002-06-13 10:25:56 +0000887static
888Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000889{
sewardj07133bf2002-06-13 10:25:56 +0000890 Int dummy, ext_level;
891 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +0000892
893 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
894
895 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
896 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000897 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
898 ext_level);
njn7cf0bd32002-06-08 13:36:03 +0000899 return -1;
900 }
901
902 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
903 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
904
905 D1c->size = (D1i >> 24) & 0xff;
906 D1c->assoc = (D1i >> 16) & 0xff;
907 D1c->line_size = (D1i >> 0) & 0xff;
908
909 I1c->size = (I1i >> 24) & 0xff;
910 I1c->assoc = (I1i >> 16) & 0xff;
911 I1c->line_size = (I1i >> 0) & 0xff;
912
913 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
914 L2c->assoc = (L2i >> 12) & 0xf;
915 L2c->line_size = (L2i >> 0) & 0xff;
916
917 return 0;
918}
919
920static jmp_buf cpuid_jmpbuf;
921
922static
923void cpuid_SIGILL_handler(int signum)
924{
925 __builtin_longjmp(cpuid_jmpbuf, 1);
926}
927
928static
sewardj07133bf2002-06-13 10:25:56 +0000929Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +0000930{
sewardj07133bf2002-06-13 10:25:56 +0000931 Int level, res, ret;
932 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +0000933 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +0000934
935 /* Install own SIGILL handler */
936 sigill_new.ksa_handler = cpuid_SIGILL_handler;
937 sigill_new.ksa_flags = 0;
938 sigill_new.ksa_restorer = NULL;
939 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
940 vg_assert(res == 0);
941
942 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
943 vg_assert(res == 0);
944
945 /* Trap for illegal instruction, in case it's a really old processor that
946 * doesn't support CPUID. */
947 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
948 cpuid(0, &level, (int*)&vendor_id[0],
949 (int*)&vendor_id[8], (int*)&vendor_id[4]);
950 vendor_id[12] = '\0';
951
952 /* Restore old SIGILL handler */
953 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
954 vg_assert(res == 0);
955
956 } else {
957 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
958
959 /* Restore old SIGILL handler */
960 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
961 vg_assert(res == 0);
962 return -1;
963 }
964
965 if (0 == level) {
966 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
967 return -1;
968 }
969
970 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
971 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
972 ret = Intel_cache_info(level, I1c, D1c, L2c);
973
974 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
975 ret = AMD_cache_info(I1c, D1c, L2c);
976
977 } else {
978 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
979 vendor_id);
980 return -1;
981 }
982
983 /* Successful! Convert sizes from KB to bytes */
984 I1c->size *= 1024;
985 D1c->size *= 1024;
986 L2c->size *= 1024;
987
988 return ret;
989}
990
991/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +0000992static
993void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +0000994{
995 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +0000996 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +0000997 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +0000998 "warning: %s size of %dB not a power of two; "
999 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001000 cache->size = dflt->size;
1001 }
1002
sewardj07133bf2002-06-13 10:25:56 +00001003 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001004 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001005 "warning: %s associativity of %d not a power of two; "
1006 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001007 cache->assoc = dflt->assoc;
1008 }
1009
sewardj07133bf2002-06-13 10:25:56 +00001010 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001011 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001012 "warning: %s line size of %dB not a power of two; "
1013 "defaulting to %dB",
1014 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001015 cache->line_size = dflt->line_size;
1016 }
1017
1018 /* Then check line size >= 16 -- any smaller and a single instruction could
1019 * straddle three cache lines, which breaks a simulation assertion and is
1020 * stupid anyway. */
1021 if (cache->line_size < MIN_LINE_SIZE) {
1022 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001023 "warning: %s line size of %dB too small; "
1024 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001025 cache->line_size = MIN_LINE_SIZE;
1026 }
1027
1028 /* Then check cache size > line size (causes seg faults if not). */
1029 if (cache->size <= cache->line_size) {
1030 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001031 "warning: %s cache size of %dB <= line size of %dB; "
1032 "increasing to %dB", name, cache->size, cache->line_size,
1033 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001034 cache->size = cache->line_size * 2;
1035 }
1036
1037 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1038 if (cache->assoc > (cache->size / cache->line_size)) {
1039 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001040 "warning: %s associativity > (size / line size); "
1041 "increasing size to %dB",
1042 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001043 cache->size = cache->assoc * cache->line_size;
1044 }
1045}
1046
1047/* On entry, args are undefined. Fill them with any info from the
1048 * command-line, then fill in any remaining with CPUID instruction if possible,
1049 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001050static
1051void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001052{
1053 /* Defaults are for a model 3 or 4 Athlon */
1054 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1055 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1056 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1057
1058#define CMD_LINE_DEFINED(L) \
1059 (-1 != VG_(clo_##L##_cache).size || \
1060 -1 != VG_(clo_##L##_cache).assoc || \
1061 -1 != VG_(clo_##L##_cache).line_size)
1062
1063 /* If any undefined on command-line, try CPUID */
1064 if (! CMD_LINE_DEFINED(I1) ||
1065 ! CMD_LINE_DEFINED(D1) ||
1066 ! CMD_LINE_DEFINED(L2)) {
1067
1068 /* Overwrite CPUID result for any cache defined on command-line */
1069 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1070
1071 if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache);
1072 if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache);
1073 if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache);
1074
1075 /* CPUID failed, use defaults for each undefined by command-line */
1076 } else {
1077 VG_(message)(Vg_DebugMsg,
1078 "Couldn't detect cache configuration, using one "
1079 "or more defaults ");
1080
1081 *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt);
1082 *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt);
1083 *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt);
1084 }
1085 }
1086#undef CMD_LINE_DEFINED
1087
1088 check_cache(I1c, &I1_dflt, "I1");
1089 check_cache(D1c, &D1_dflt, "D1");
1090 check_cache(L2c, &L2_dflt, "L2");
1091
1092 if (VG_(clo_verbosity) > 1) {
1093 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1094 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1095 I1c->size, I1c->assoc, I1c->line_size);
1096 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1097 D1c->size, D1c->assoc, D1c->line_size);
1098 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1099 L2c->size, L2c->assoc, L2c->line_size);
1100 }
1101}
1102
njn4f9c9342002-04-29 16:03:24 +00001103void VG_(init_cachesim)(void)
1104{
njn7cf0bd32002-06-08 13:36:03 +00001105 cache_t I1c, D1c, L2c;
1106
njn4f9c9342002-04-29 16:03:24 +00001107 /* Make sure the output file can be written. */
1108 Int fd = VG_(open_write)(OUT_FILE);
1109 if (-1 == fd) {
1110 fd = VG_(create_and_write)(OUT_FILE);
1111 if (-1 == fd) {
1112 file_err();
1113 }
1114 }
1115 VG_(close)(fd);
njne0ee0712002-05-03 16:41:05 +00001116
njn4f9c9342002-04-29 16:03:24 +00001117 initCC(&Ir_total);
1118 initCC(&Dr_total);
1119 initCC(&Dw_total);
1120
njn4294fd42002-06-05 14:41:10 +00001121 initCC(&Ir_discards);
1122 initCC(&Dr_discards);
1123 initCC(&Dw_discards);
1124
njn7cf0bd32002-06-08 13:36:03 +00001125 get_caches(&I1c, &D1c, &L2c);
1126
1127 cachesim_I1_initcache(I1c);
1128 //cachesim_I1_initcache();
1129 cachesim_D1_initcache(D1c);
1130 //cachesim_D1_initcache();
1131 cachesim_L2_initcache(L2c);
1132 //cachesim_L2_initcache();
njn4f9c9342002-04-29 16:03:24 +00001133
1134 init_BBCC_table();
1135}
1136
1137void VG_(cachesim_log_non_mem_instr)(iCC* cc)
1138{
1139 //VG_(printf)("sim I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
1140 // cc, cc->instr_addr, cc->instr_size)
1141 VGP_PUSHCC(VgpCacheSimulate);
1142 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
1143 cc->I.a++;
1144 VGP_POPCC;
1145}
1146
1147void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
1148{
1149 //VG_(printf)("sim D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
1150 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
1151 VGP_PUSHCC(VgpCacheSimulate);
1152 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
1153 cc->I.a++;
1154
1155 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
1156 cc->D.a++;
1157 VGP_POPCC;
1158}
1159
1160/*------------------------------------------------------------*/
1161/*--- Printing of output file and summary stats ---*/
1162/*------------------------------------------------------------*/
1163
njn4f9c9342002-04-29 16:03:24 +00001164static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1165 Char *first_instr_fn)
1166{
1167 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001168 Char buf[BUF_LEN], curr_file[BUF_LEN],
1169 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001170 UInt line_num;
1171
1172 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1173
njne0ee0712002-05-03 16:41:05 +00001174 /* Mark start of basic block in output, just to ease debugging */
1175 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001176
1177 VG_(strcpy)(curr_file, first_instr_fl);
1178
1179 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1180
1181 /* We pretend the CC is an iCC for getting the tag. This is ok
1182 * because both CC types have tag as their first byte. Once we know
1183 * the type, we can cast and act appropriately. */
1184
1185 Char fl_buf[FILENAME_LEN];
1186 Char fn_buf[FN_NAME_LEN];
1187
njne0ee0712002-05-03 16:41:05 +00001188 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001189 switch ( ((iCC*)BBCC_ptr)->tag ) {
1190
njn4f9c9342002-04-29 16:03:24 +00001191 case INSTR_CC:
njne0ee0712002-05-03 16:41:05 +00001192 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1193 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001194 ADD_CC_TO(iCC, I, Ir_total);
1195 BBCC_ptr += sizeof(iCC);
1196 break;
1197
1198 case READ_CC:
1199 case MOD_CC:
njne0ee0712002-05-03 16:41:05 +00001200 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1201 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001202 ADD_CC_TO(idCC, I, Ir_total);
1203 ADD_CC_TO(idCC, D, Dr_total);
1204 BBCC_ptr += sizeof(idCC);
1205 break;
1206
1207 case WRITE_CC:
njne0ee0712002-05-03 16:41:05 +00001208 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1209 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001210 ADD_CC_TO(idCC, I, Ir_total);
1211 ADD_CC_TO(idCC, D, Dw_total);
1212 BBCC_ptr += sizeof(idCC);
1213 break;
1214
njn4f9c9342002-04-29 16:03:24 +00001215 default:
1216 VG_(panic)("Unknown CC type in fprint_BBCC()\n");
1217 break;
1218 }
1219 distinct_instrs++;
1220
njne0ee0712002-05-03 16:41:05 +00001221 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1222
1223 /* Allow for filename switching in the middle of a BB; if this happens,
1224 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001225 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001226 VG_(strcpy)(curr_file, fl_buf);
1227 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1228 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1229 }
1230
njn4f9c9342002-04-29 16:03:24 +00001231 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001232 * first instruction in the BB, print warning. */
sewardj18d75132002-05-16 11:06:21 +00001233 if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001234 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001235 VG_(printf)(" filenames: BB:%s, instr:%s;"
1236 " fn_names: BB:%s, instr:%s;"
1237 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001238 first_instr_fl, fl_buf,
1239 first_instr_fn, fn_buf,
1240 line_num);
1241 }
1242
njne0ee0712002-05-03 16:41:05 +00001243 VG_(sprintf)(lbuf, "%u ", line_num);
1244 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1245 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001246 }
1247 /* If we switched filenames in the middle of the BB without switching back,
1248 * switch back now because the subsequent BB may be relying on falling under
1249 * the original file name. */
1250 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1251 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1252 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1253 }
njne0ee0712002-05-03 16:41:05 +00001254
1255 /* Mark end of basic block */
1256 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001257
1258 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
1259}
1260
1261static void fprint_BBCC_table_and_calc_totals(Int client_argc,
1262 Char** client_argv)
1263{
1264 Int fd;
1265 Char buf[BUF_LEN];
1266 file_node *curr_file_node;
1267 fn_node *curr_fn_node;
1268 BBCC *curr_BBCC;
1269 Int i,j,k;
1270
1271 VGP_PUSHCC(VgpCacheDump);
1272 fd = VG_(open_write)(OUT_FILE);
1273 if (-1 == fd) { file_err(); }
1274
1275 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001276 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1277 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1278 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1279 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1280 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1281 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001282
1283 /* "cmd:" line */
1284 VG_(strcpy)(buf, "cmd:");
1285 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1286 for (i = 0; i < client_argc; i++) {
1287 VG_(sprintf)(buf, " %s", client_argv[i]);
1288 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1289 }
1290 /* "events:" line */
1291 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1292 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1293
1294 /* Six loops here: three for the hash table arrays, and three for the
1295 * chains hanging off the hash table arrays. */
1296 for (i = 0; i < N_FILE_ENTRIES; i++) {
1297 curr_file_node = BBCC_table[i];
1298 while (curr_file_node != NULL) {
1299 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1300 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1301
1302 for (j = 0; j < N_FN_ENTRIES; j++) {
1303 curr_fn_node = curr_file_node->fns[j];
1304 while (curr_fn_node != NULL) {
1305 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1306 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1307
1308 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1309 curr_BBCC = curr_fn_node->BBCCs[k];
1310 while (curr_BBCC != NULL) {
1311 fprint_BBCC(fd, curr_BBCC,
1312
1313 curr_file_node->filename,
1314 curr_fn_node->fn_name);
1315
1316 curr_BBCC = curr_BBCC->next;
1317 }
1318 }
1319 curr_fn_node = curr_fn_node->next;
1320 }
1321 }
1322 curr_file_node = curr_file_node->next;
1323 }
1324 }
1325
njn4294fd42002-06-05 14:41:10 +00001326 /* Print stats from any discarded basic blocks */
1327 if (0 != Ir_discards.a) {
1328
1329 VG_(sprintf)(buf, "fl=(discarded)\n");
1330 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1331 VG_(sprintf)(buf, "fn=(discarded)\n");
1332 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1333
1334 /* Use 0 as line number */
1335 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1336 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1337 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1338 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1339 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1340
1341 Ir_total.a += Ir_discards.a;
1342 Ir_total.m1 += Ir_discards.m1;
1343 Ir_total.m2 += Ir_discards.m2;
1344 Dr_total.a += Dr_discards.a;
1345 Dr_total.m1 += Dr_discards.m1;
1346 Dr_total.m2 += Dr_discards.m2;
1347 Dw_total.a += Dw_discards.a;
1348 Dw_total.m1 += Dw_discards.m1;
1349 Dw_total.m2 += Dw_discards.m2;
1350 }
1351
njn4f9c9342002-04-29 16:03:24 +00001352 /* Summary stats must come after rest of table, since we calculate them
1353 * during traversal. */
1354 VG_(sprintf)(buf, "summary: "
1355 "%llu %llu %llu "
1356 "%llu %llu %llu "
1357 "%llu %llu %llu\n",
1358 Ir_total.a, Ir_total.m1, Ir_total.m2,
1359 Dr_total.a, Dr_total.m1, Dr_total.m2,
1360 Dw_total.a, Dw_total.m1, Dw_total.m2);
1361 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1362 VG_(close)(fd);
1363}
1364
1365/* Adds commas to ULong, right justifying in a field field_width wide, returns
1366 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +00001367static
njn4f9c9342002-04-29 16:03:24 +00001368Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
1369{
1370 int len, n_commas, i, j, new_len, space;
1371
1372 VG_(sprintf)(buf, "%lu", n);
1373 len = VG_(strlen)(buf);
1374 n_commas = (len - 1) / 3;
1375 new_len = len + n_commas;
1376 space = field_width - new_len;
1377
1378 /* Allow for printing a number in a field_width smaller than it's size */
1379 if (space < 0) space = 0;
1380
1381 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1382 * of three. */
1383 for (j = -1, i = len ; i >= 0; i--) {
1384 buf[i + n_commas + space] = buf[i];
1385
1386 if (3 == ++j) {
1387 j = 0;
1388 n_commas--;
1389 buf[i + n_commas + space] = ',';
1390 }
1391 }
1392 /* Right justify in field. */
1393 for (i = 0; i < space; i++) buf[i] = ' ';
1394 return new_len;
1395}
1396
sewardj4f29ddf2002-05-03 22:29:04 +00001397static
njn4f9c9342002-04-29 16:03:24 +00001398void percentify(Int n, Int pow, Int field_width, char buf[])
1399{
1400 int i, len, space;
1401
1402 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
1403 len = VG_(strlen)(buf);
1404 space = field_width - len;
1405 i = len;
1406
1407 /* Right justify in field */
1408 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1409 for (i = 0; i < space; i++) buf[i] = ' ';
1410}
1411
njn7cf0bd32002-06-08 13:36:03 +00001412void VG_(do_cachesim_results)(Int client_argc, Char** client_argv)
njn4f9c9342002-04-29 16:03:24 +00001413{
1414 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001415 ULong L2_total_m, L2_total_mr, L2_total_mw,
1416 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +00001417 char buf1[RESULTS_BUF_LEN],
1418 buf2[RESULTS_BUF_LEN],
1419 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001420 Int l1, l2, l3;
1421 Int p;
1422
1423 fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
1424
njn7cf0bd32002-06-08 13:36:03 +00001425 if (VG_(clo_verbosity) == 0)
1426 return;
1427
njn4f9c9342002-04-29 16:03:24 +00001428 /* I cache results. Use the I_refs value to determine the first column
1429 * width. */
1430 l1 = commify(Ir_total.a, 0, buf1);
1431 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1432
1433 commify(Ir_total.m1, l1, buf1);
1434 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1435
1436 commify(Ir_total.m2, l1, buf1);
njn95114da2002-06-05 09:39:31 +00001437 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
njn4f9c9342002-04-29 16:03:24 +00001438
1439 p = 100;
1440
1441 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1442 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1443
1444 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1445 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1446 VG_(message)(Vg_UserMsg, "");
1447
1448 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1449 * width of columns 2 & 3. */
1450 D_total.a = Dr_total.a + Dw_total.a;
1451 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1452 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1453
njn1d021fa2002-05-02 13:56:34 +00001454 commify( D_total.a, l1, buf1);
1455 l2 = commify(Dr_total.a, 0, buf2);
1456 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +00001457 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1458 buf1, buf2, buf3);
1459
1460 commify( D_total.m1, l1, buf1);
1461 commify(Dr_total.m1, l2, buf2);
1462 commify(Dw_total.m1, l3, buf3);
1463 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1464 buf1, buf2, buf3);
1465
1466 commify( D_total.m2, l1, buf1);
1467 commify(Dr_total.m2, l2, buf2);
1468 commify(Dw_total.m2, l3, buf3);
njn95114da2002-06-05 09:39:31 +00001469 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
njn4f9c9342002-04-29 16:03:24 +00001470 buf1, buf2, buf3);
1471
1472 p = 10;
1473
1474 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1475 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1476 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1477 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1478
1479 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1480 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1481 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1482 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1483 VG_(message)(Vg_UserMsg, "");
1484
1485 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001486
1487 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1488 L2_total_r = Dr_total.m1 + Ir_total.m1;
1489 L2_total_w = Dw_total.m1;
1490 commify(L2_total, l1, buf1);
1491 commify(L2_total_r, l2, buf2);
1492 commify(L2_total_w, l3, buf3);
1493 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1494 buf1, buf2, buf3);
1495
njn4f9c9342002-04-29 16:03:24 +00001496 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1497 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1498 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001499 commify(L2_total_m, l1, buf1);
1500 commify(L2_total_mr, l2, buf2);
1501 commify(L2_total_mw, l3, buf3);
1502 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1503 buf1, buf2, buf3);
1504
1505 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1506 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1507 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1508 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1509
1510
1511 /* Hash table stats */
1512 if (VG_(clo_verbosity) > 1) {
1513 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1514 file_line_debug_BBs + no_debug_BBs;
1515
1516 VG_(message)(Vg_DebugMsg, "");
1517 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1518 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1519 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1520 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1521 full_debug_BBs * 100 / BB_lookups,
1522 full_debug_BBs);
1523 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1524 file_line_debug_BBs * 100 / BB_lookups,
1525 file_line_debug_BBs);
1526 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1527 fn_name_debug_BBs * 100 / BB_lookups,
1528 fn_name_debug_BBs);
1529 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1530 no_debug_BBs * 100 / BB_lookups,
1531 no_debug_BBs);
1532 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1533 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1534 }
1535 VGP_POPCC;
1536}
1537
sewardj18d75132002-05-16 11:06:21 +00001538
njn4294fd42002-06-05 14:41:10 +00001539/* Called when a translation is invalidated due to self-modifying code or
1540 * unloaded of a shared object.
1541 *
1542 * Finds the BBCC in the table, removes it, adds the counts to the discard
1543 * counters, and then frees the BBCC. */
sewardj18d75132002-05-16 11:06:21 +00001544void VG_(cachesim_notify_discard) ( TTEntry* tte )
1545{
njn4294fd42002-06-05 14:41:10 +00001546 BBCC *BBCC_node;
1547 Addr BBCC_ptr0, BBCC_ptr;
1548 Bool BB_seen_before;
1549
1550 VG_(printf)( "cachesim_notify_discard: %p for %d\n",
1551 tte->orig_addr, (Int)tte->orig_size);
1552
1553 /* 2nd arg won't be used since BB should have been seen before (assertions
1554 * ensure this). */
1555 BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before);
1556 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1557
1558 vg_assert(True == BB_seen_before);
1559
1560 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1561
1562 /* We pretend the CC is an iCC for getting the tag. This is ok
1563 * because both CC types have tag as their first byte. Once we know
1564 * the type, we can cast and act appropriately. */
1565
1566 switch ( ((iCC*)BBCC_ptr)->tag ) {
1567
1568 case INSTR_CC:
1569 ADD_CC_TO(iCC, I, Ir_discards);
1570 BBCC_ptr += sizeof(iCC);
1571 break;
1572
1573 case READ_CC:
1574 case MOD_CC:
1575 ADD_CC_TO(idCC, I, Ir_discards);
1576 ADD_CC_TO(idCC, D, Dr_discards);
1577 BBCC_ptr += sizeof(idCC);
1578 break;
1579
1580 case WRITE_CC:
1581 ADD_CC_TO(idCC, I, Ir_discards);
1582 ADD_CC_TO(idCC, D, Dw_discards);
1583 BBCC_ptr += sizeof(idCC);
1584 break;
1585
1586 default:
1587 VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n");
1588 break;
1589 }
1590 }
1591
1592 VG_(free)(VG_AR_PRIVATE, BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001593}
1594
1595/*--------------------------------------------------------------------*/
1596/*--- end vg_cachesim.c ---*/
1597/*--------------------------------------------------------------------*/