blob: b794e6125cc484adb92dc3c7344a5f74f88004bb [file] [log] [blame]
sewardj18d75132002-05-16 11:06:21 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
3/*--- The cache simulation framework: instrumentation, recording ---*/
4/*--- and results printing. ---*/
5/*--- vg_cachesim.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of Valgrind, an x86 protected-mode emulator
10 designed for debugging and profiling binaries on x86-Unixes.
11
12 Copyright (C) 2000-2002 Julian Seward
13 jseward@acm.org
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file LICENSE.
31*/
32
njn4f9c9342002-04-29 16:03:24 +000033#include "vg_include.h"
34
35#include "vg_cachesim_L2.c"
36#include "vg_cachesim_I1.c"
37#include "vg_cachesim_D1.c"
38
39
40/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
41#define MAX_x86_INSTR_SIZE 16
42
43/* Size of various buffers used for storing strings */
44#define FILENAME_LEN 256
45#define FN_NAME_LEN 256
46#define BUF_LEN 512
47#define COMMIFY_BUF_LEN 128
njne0ee0712002-05-03 16:41:05 +000048#define RESULTS_BUF_LEN 128
49#define LINE_BUF_LEN 64
njn4f9c9342002-04-29 16:03:24 +000050
51/*------------------------------------------------------------*/
52/*--- Output file related stuff ---*/
53/*------------------------------------------------------------*/
54
55#define OUT_FILE "cachegrind.out"
56
57static void file_err()
58{
59 VG_(message)(Vg_UserMsg,
60 "FATAL: can't open cache simulation output file `%s'",
61 OUT_FILE );
62 VG_(exit)(1);
63}
64
65/*------------------------------------------------------------*/
66/*--- Cost center types, operations ---*/
67/*------------------------------------------------------------*/
68
69typedef struct _CC CC;
70struct _CC {
71 ULong a;
72 ULong m1;
73 ULong m2;
74};
75
76static __inline__ void initCC(CC* cc) {
77 cc->a = 0;
78 cc->m1 = 0;
79 cc->m2 = 0;
80}
81
82
83typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
84
85/* Instruction-level cost-centres. The typedefs for these structs are in
86 * vg_include.c
87 *
88 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +000089 *
njne0ee0712002-05-03 16:41:05 +000090 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +000091 */
92struct _iCC {
93 /* word 1 */
94 UChar tag;
95 UChar instr_size;
njne0ee0712002-05-03 16:41:05 +000096 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +000097
98 /* words 2+ */
99 Addr instr_addr;
100 CC I;
101};
102
103struct _idCC {
104 /* word 1 */
105 UChar tag;
106 UChar instr_size;
107 UChar data_size;
njne0ee0712002-05-03 16:41:05 +0000108 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000109
110 /* words 2+ */
111 Addr instr_addr;
112 CC I;
113 CC D;
114};
115
116static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
117{
118 cc->tag = INSTR_CC;
119 cc->instr_size = instr_size;
120 cc->instr_addr = instr_addr;
121 initCC(&cc->I);
122}
123
124static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
125 UInt instr_size, UInt data_size)
126{
127 cc->tag = X_CC;
128 cc->instr_size = instr_size;
129 cc->data_size = data_size;
130 cc->instr_addr = instr_addr;
131 initCC(&cc->I);
132 initCC(&cc->D);
133}
134
njne0ee0712002-05-03 16:41:05 +0000135static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000136{
njne0ee0712002-05-03 16:41:05 +0000137 VG_(sprintf)(buf, "%llu %llu %llu\n",
138 cc->I.a, cc->I.m1, cc->I.m2);
njn4f9c9342002-04-29 16:03:24 +0000139}
140
njne0ee0712002-05-03 16:41:05 +0000141static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000142{
njne0ee0712002-05-03 16:41:05 +0000143 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
144 cc->I.a, cc->I.m1, cc->I.m2,
145 cc->D.a, cc->D.m1, cc->D.m2);
njn4f9c9342002-04-29 16:03:24 +0000146}
147
njne0ee0712002-05-03 16:41:05 +0000148static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000149{
njne0ee0712002-05-03 16:41:05 +0000150 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
151 cc->I.a, cc->I.m1, cc->I.m2,
152 cc->D.a, cc->D.m1, cc->D.m2);
njn4f9c9342002-04-29 16:03:24 +0000153}
154
155/*------------------------------------------------------------*/
156/*--- BBCC hash table stuff ---*/
157/*------------------------------------------------------------*/
158
159/* The table of BBCCs is of the form hash(filename, hash(fn_name,
160 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
161 * fairly well for Konqueror. */
162
163#define N_FILE_ENTRIES 251
164#define N_FN_ENTRIES 53
165#define N_BBCC_ENTRIES 37
166
167/* The cost centres for a basic block are stored in a contiguous array.
168 * They are distinguishable by their tag field. */
169typedef struct _BBCC BBCC;
170struct _BBCC {
171 Addr orig_addr;
172 UInt array_size; /* byte-size of variable length array */
173 BBCC* next;
174 Addr array[0]; /* variable length array */
175};
176
177typedef struct _fn_node fn_node;
178struct _fn_node {
179 Char* fn_name;
180 BBCC* BBCCs[N_BBCC_ENTRIES];
181 fn_node* next;
182};
183
184typedef struct _file_node file_node;
185struct _file_node {
186 Char* filename;
187 fn_node* fns[N_FN_ENTRIES];
188 file_node* next;
189};
190
191/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000192static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000193
sewardj4f29ddf2002-05-03 22:29:04 +0000194static Int distinct_files = 0;
195static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000196
sewardj4f29ddf2002-05-03 22:29:04 +0000197static Int distinct_instrs = 0;
198static Int full_debug_BBs = 0;
199static Int file_line_debug_BBs = 0;
200static Int fn_name_debug_BBs = 0;
201static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000202
sewardj4f29ddf2002-05-03 22:29:04 +0000203static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000204
205static void init_BBCC_table()
206{
207 Int i;
208 for (i = 0; i < N_FILE_ENTRIES; i++)
209 BBCC_table[i] = NULL;
210}
211
njne0ee0712002-05-03 16:41:05 +0000212static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
213 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000214{
njn4f9c9342002-04-29 16:03:24 +0000215 Bool found1, found2, no_demangle = False;
216
217 found1 = VG_(what_line_is_this)(instr_addr, filename,
njne0ee0712002-05-03 16:41:05 +0000218 FILENAME_LEN, line_num);
njn4f9c9342002-04-29 16:03:24 +0000219 found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
220
221 if (!found1 && !found2) {
222 no_debug_BBs++;
223 VG_(strcpy)(filename, "???");
224 VG_(strcpy)(fn_name, "???");
225
226 } else if ( found1 && found2) {
227 full_debug_BBs++;
228
229 } else if ( found1 && !found2) {
230 file_line_debug_BBs++;
231 VG_(strcpy)(fn_name, "???");
232
233 } else /*(!found1 && found2)*/ {
234 fn_name_debug_BBs++;
235 VG_(strcpy)(filename, "???");
236 }
237}
238
239/* Forward declaration. */
240static Int compute_BBCC_array_size(UCodeBlock* cb);
241
242static __inline__
243file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
244{
245 Int i;
246 file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
247 new->filename = VG_(strdup)(VG_AR_PRIVATE, filename);
248 for (i = 0; i < N_FN_ENTRIES; i++) {
249 new->fns[i] = NULL;
250 }
251 new->next = next;
252 return new;
253}
254
255static __inline__
256fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
257{
258 Int i;
259 fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
260 new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
261 for (i = 0; i < N_BBCC_ENTRIES; i++) {
262 new->BBCCs[i] = NULL;
263 }
264 new->next = next;
265 return new;
266}
267
268static __inline__
269BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
270{
271 Int BBCC_array_size = compute_BBCC_array_size(cb);
272 BBCC* new;
273
274 new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
275 new->orig_addr = bb_orig_addr;
276 new->array_size = BBCC_array_size;
277 new->next = next;
278
279 return new;
280}
281
282#define HASH_CONSTANT 256
283
284static UInt hash(Char *s, UInt table_size)
285{
286 int hash_value = 0;
287 for ( ; *s; s++)
288 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
289 return hash_value;
290}
291
292/* Do a three step traversal: by filename, then fn_name, then instr_addr.
293 * In all cases prepends new nodes to their chain. Returns a pointer to the
294 * cost centre. Also sets BB_seen_before by reference.
295 */
296static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
297 Bool *BB_seen_before)
298{
299 file_node *curr_file_node;
300 fn_node *curr_fn_node;
301 BBCC *curr_BBCC;
302 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
303 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000304 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000305
njne0ee0712002-05-03 16:41:05 +0000306 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000307
308 VGP_PUSHCC(VgpCacheGetBBCC);
309 filename_hash = hash(filename, N_FILE_ENTRIES);
310 curr_file_node = BBCC_table[filename_hash];
311 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000312 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000313 curr_file_node = curr_file_node->next;
314 }
315 if (NULL == curr_file_node) {
316 BBCC_table[filename_hash] = curr_file_node =
317 new_file_node(filename, BBCC_table[filename_hash]);
318 distinct_files++;
319 }
320
321 fnname_hash = hash(fn_name, N_FN_ENTRIES);
322 curr_fn_node = curr_file_node->fns[fnname_hash];
323 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000324 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000325 curr_fn_node = curr_fn_node->next;
326 }
327 if (NULL == curr_fn_node) {
328 curr_file_node->fns[fnname_hash] = curr_fn_node =
329 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
330 distinct_fns++;
331 }
332
333 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
334 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
335 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
336 curr_BBCC = curr_BBCC->next;
337 }
338 if (curr_BBCC == NULL) {
339 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
340 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
341 *BB_seen_before = False;
342
343 } else {
344 vg_assert(bb_orig_addr == curr_BBCC->orig_addr);
345 vg_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000346 if (VG_(clo_verbosity) > 2) {
347 VG_(message)(Vg_DebugMsg,
348 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000349 }
350 *BB_seen_before = True;
351 BB_retranslations++;
352 }
353 VGP_POPCC;
354 return curr_BBCC;
355}
356
357/*------------------------------------------------------------*/
358/*--- Cache simulation instrumentation phase ---*/
359/*------------------------------------------------------------*/
360
361#define uInstr1 VG_(newUInstr1)
362#define uInstr2 VG_(newUInstr2)
363#define uInstr3 VG_(newUInstr3)
364#define dis VG_(disassemble)
365#define uLiteral VG_(setLiteralField)
366#define newTemp VG_(getNewTemp)
367
368static Int compute_BBCC_array_size(UCodeBlock* cb)
369{
370 UInstr* u_in;
371 Int i, CC_size, BBCC_size = 0;
372 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
373
374 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
375
376 for (i = 0; i < cb->used; i++) {
sewardjfc3e5d32002-04-30 10:18:48 +0000377 /* VG_(ppUInstr)(0, &cb->instrs[i]); */
njn4f9c9342002-04-29 16:03:24 +0000378
379 u_in = &cb->instrs[i];
380 switch(u_in->opcode) {
381
382 case INCEIP:
383 goto case_for_end_of_instr;
384
385 case JMP:
386 if (u_in->cond != CondAlways) break;
387
388 goto case_for_end_of_instr;
389
390 case_for_end_of_instr:
391
392 CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W
393 ? sizeof(idCC) : sizeof(iCC));
394
395 BBCC_size += CC_size;
396 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
397 break;
398
399 case LOAD:
400 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000401 /* Also, a STORE can come after a LOAD for bts/btr/btc */
sewardjfc3e5d32002-04-30 10:18:48 +0000402 vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */
403 !is_FPU_R && !is_FPU_W);
njn4f9c9342002-04-29 16:03:24 +0000404 is_LOAD = True;
405 break;
406
407 case STORE:
408 /* Multiple STOREs are possible for 'pushal' */
409 vg_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
410 is_STORE = True;
411 break;
412
413 case FPU_R:
414 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
415 is_FPU_R = True;
416 break;
417
418 case FPU_W:
419 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
420 is_FPU_W = True;
421 break;
422
423 default:
424 break;
425 }
426 }
427
428 return BBCC_size;
429}
430
431/* Use this rather than eg. -1 because it's stored as a UInt. */
432#define INVALID_DATA_SIZE 999999
433
434UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
435{
436 UCodeBlock* cb;
437 Int i;
438 UInstr* u_in;
439 BBCC* BBCC_node;
440 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
441 Int CC_size = -1; /* Shut gcc warnings up */
442 Addr instr_addr = orig_addr;
443 UInt instr_size, data_size = INVALID_DATA_SIZE;
444 Int helper = -1; /* Shut gcc warnings up */
445 UInt stack_used;
446 Bool BB_seen_before = False;
447 Bool prev_instr_was_Jcond = False;
448 Addr BBCC_ptr0, BBCC_ptr;
449
450 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
451 * if it's the first time it's been seen), and point to start of the
452 * BBCC array. */
453 BBCC_node = get_BBCC(orig_addr, cb_in, &BB_seen_before);
454 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
455
456 cb = VG_(allocCodeBlock)();
457 cb->nextTemp = cb_in->nextTemp;
458
459 t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
460
461 for (i = 0; i < cb_in->used; i++) {
462 u_in = &cb_in->instrs[i];
463
464 //VG_(ppUInstr)(0, u_in);
465
466 /* What this is all about: we want to instrument each x86 instruction
467 * translation. The end of these are marked in three ways. The three
468 * ways, and the way we instrument them, are as follows:
469 *
470 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
471 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
472 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
473 *
474 * We must put the instrumentation before the jumps so that it is always
475 * executed. We don't have to put the instrumentation before the INCEIP
476 * (it could go after) but we do so for consistency.
477 *
478 * Junconds are always the last instruction in a basic block. Jconds are
479 * always the 2nd last, and must be followed by a Jcond. We check this
480 * with various assertions.
481 *
482 * Note that in VG_(disBB) we patched the `extra4b' field of the first
483 * occurring JMP in a block with the size of its x86 instruction. This
484 * is used now.
485 *
486 * Note that we don't have to treat JIFZ specially; unlike JMPs, JIFZ
487 * occurs in the middle of a BB and gets an INCEIP after it.
488 *
489 * The instrumentation is just a call to the appropriate helper function,
490 * passing it the address of the instruction's CC.
491 */
492 if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
493
494 switch (u_in->opcode) {
495
496 case INCEIP:
497 instr_size = u_in->val1;
498 goto case_for_end_of_x86_instr;
499
500 case JMP:
501 if (u_in->cond == CondAlways) {
502 vg_assert(i+1 == cb_in->used);
503
504 /* Don't instrument if previous instr was a Jcond. */
505 if (prev_instr_was_Jcond) {
506 vg_assert(0 == u_in->extra4b);
507 VG_(copyUInstr)(cb, u_in);
508 break;
509 }
510 prev_instr_was_Jcond = False;
511
512 } else {
513 vg_assert(i+2 == cb_in->used); /* 2nd last instr in block */
514 prev_instr_was_Jcond = True;
515 }
516
517 /* Ah, the first JMP... instrument, please. */
518 instr_size = u_in->extra4b;
519 goto case_for_end_of_x86_instr;
520
521 /* Shared code that is executed at the end of an x86 translation
522 * block, marked by either an INCEIP or an unconditional JMP. */
523 case_for_end_of_x86_instr:
524
525#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
526
527 /* Initialise the CC in the BBCC array appropriately if it hasn't
528 * been initialised before.
529 * Then call appropriate sim function, passing it the CC address.
530 * Note that CALLM_S/CALL_E aren't required here; by this point,
531 * the checking related to them has already happened. */
532 stack_used = 0;
533
534 vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
535 vg_assert(0 != instr_addr);
536
537 /* Save the caller-save registers before we push our args */
538 uInstr1(cb, PUSH, 4, RealReg, R_EAX);
539 uInstr1(cb, PUSH, 4, RealReg, R_ECX);
540 uInstr1(cb, PUSH, 4, RealReg, R_EDX);
541
542 if (!IS_(read) && !IS_(write)) {
543 iCC* CC_ptr = (iCC*)(BBCC_ptr);
544 vg_assert(INVALID_DATA_SIZE == data_size);
545 vg_assert(INVALID_TEMPREG == t_read_addr &&
546 INVALID_TEMPREG == t_write_addr);
547 CC_size = sizeof(iCC);
548 if (!BB_seen_before)
549 init_iCC(CC_ptr, instr_addr, instr_size);
550
551 helper = VGOFF_(cachesim_log_non_mem_instr);
552
553 } else {
554 CC_type X_CC;
555 idCC* CC_ptr = (idCC*)(BBCC_ptr);
556
557 vg_assert(4 == data_size || 2 == data_size || 1 == data_size ||
558 8 == data_size || 10 == data_size);
559
560 CC_size = sizeof(idCC);
561 helper = VGOFF_(cachesim_log_mem_instr);
562
563 if (IS_(read) && !IS_(write)) {
564 X_CC = READ_CC;
565 vg_assert(INVALID_TEMPREG != t_read_addr &&
566 INVALID_TEMPREG == t_write_addr);
567 t_data_addr = t_read_addr;
568
569 } else if (!IS_(read) && IS_(write)) {
570 X_CC = WRITE_CC;
571 vg_assert(INVALID_TEMPREG == t_read_addr &&
572 INVALID_TEMPREG != t_write_addr);
573 t_data_addr = t_write_addr;
574
575 } else {
576 vg_assert(IS_(read) && IS_(write));
577 X_CC = MOD_CC;
578 vg_assert(INVALID_TEMPREG != t_read_addr &&
579 INVALID_TEMPREG != t_write_addr);
580 t_data_addr = t_read_addr;
581 }
582
583 if (!BB_seen_before)
584 init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
585
586 /* 2nd arg: data addr */
587 uInstr1(cb, PUSH, 4, TempReg, t_data_addr);
588 stack_used += 4;
589 }
590#undef IS_
591
592 /* 1st arg: CC addr */
593 t_CC_addr = newTemp(cb);
594 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
595 uLiteral(cb, BBCC_ptr);
596 uInstr1(cb, PUSH, 4, TempReg, t_CC_addr);
597 stack_used += 4;
598
599 /* Call function and return. */
600 uInstr1(cb, CALLM, 0, Lit16, helper);
601 uInstr1(cb, CLEAR, 0, Lit16, stack_used);
602
603 /* Restore the caller-save registers now the call is done */
604 uInstr1(cb, POP, 4, RealReg, R_EDX);
605 uInstr1(cb, POP, 4, RealReg, R_ECX);
606 uInstr1(cb, POP, 4, RealReg, R_EAX);
607
608 VG_(copyUInstr)(cb, u_in);
609
610 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
611 BBCC_ptr += CC_size;
612 instr_addr += instr_size;
613 t_CC_addr = t_read_addr = t_write_addr =
614 t_data_addr = INVALID_TEMPREG;
615 data_size = INVALID_DATA_SIZE;
616 break;
617
618
619 /* For memory-ref instrs, copy the data_addr into a temporary to be
620 * passed to the cachesim_log_function at the end of the instruction.
621 */
622 case LOAD:
623 t_read_addr = newTemp(cb);
624 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
625 data_size = u_in->size;
626 VG_(copyUInstr)(cb, u_in);
627 break;
628
629 case FPU_R:
630 t_read_addr = newTemp(cb);
631 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
632 data_size = u_in->size;
633 VG_(copyUInstr)(cb, u_in);
634 break;
635
636 /* Note that we must set t_write_addr even for mod instructions;
637 * that's how the code above determines whether it does a write;
638 * without it, it would think a mod instruction is a read.
639 * As for the MOV, if it's a mod instruction it's redundant, but it's
640 * not expensive and mod instructions are rare anyway. */
641 case STORE:
642 case FPU_W:
643 t_write_addr = newTemp(cb);
644 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
645 data_size = u_in->size;
646 VG_(copyUInstr)(cb, u_in);
647 break;
648
649 case NOP: case CALLM_E: case CALLM_S:
650 break;
651
652 default:
653 VG_(copyUInstr)(cb, u_in);
654 break;
655 }
656 }
657
658 /* Just check everything looks ok */
659 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
660
661 VG_(freeCodeBlock)(cb_in);
662 return cb;
663}
664
665/*------------------------------------------------------------*/
666/*--- Cache simulation stuff ---*/
667/*------------------------------------------------------------*/
668
669/* Total reads/writes/misses. Calculated during CC traversal at the end. */
670static CC Ir_total;
671static CC Dr_total;
672static CC Dw_total;
673
674void VG_(init_cachesim)(void)
675{
676 /* Make sure the output file can be written. */
677 Int fd = VG_(open_write)(OUT_FILE);
678 if (-1 == fd) {
679 fd = VG_(create_and_write)(OUT_FILE);
680 if (-1 == fd) {
681 file_err();
682 }
683 }
684 VG_(close)(fd);
njne0ee0712002-05-03 16:41:05 +0000685
njn4f9c9342002-04-29 16:03:24 +0000686 initCC(&Ir_total);
687 initCC(&Dr_total);
688 initCC(&Dw_total);
689
690 cachesim_I1_initcache();
691 cachesim_D1_initcache();
692 cachesim_L2_initcache();
693
694 init_BBCC_table();
695}
696
697void VG_(cachesim_log_non_mem_instr)(iCC* cc)
698{
699 //VG_(printf)("sim I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
700 // cc, cc->instr_addr, cc->instr_size)
701 VGP_PUSHCC(VgpCacheSimulate);
702 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
703 cc->I.a++;
704 VGP_POPCC;
705}
706
707void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
708{
709 //VG_(printf)("sim D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
710 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
711 VGP_PUSHCC(VgpCacheSimulate);
712 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
713 cc->I.a++;
714
715 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
716 cc->D.a++;
717 VGP_POPCC;
718}
719
720/*------------------------------------------------------------*/
721/*--- Printing of output file and summary stats ---*/
722/*------------------------------------------------------------*/
723
njn4f9c9342002-04-29 16:03:24 +0000724static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
725 Char *first_instr_fn)
726{
727 Addr BBCC_ptr0, BBCC_ptr;
njne0ee0712002-05-03 16:41:05 +0000728 Char buf[BUF_LEN], curr_file[BUF_LEN], fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +0000729 UInt line_num;
730
731 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
732
njne0ee0712002-05-03 16:41:05 +0000733 /* Mark start of basic block in output, just to ease debugging */
734 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +0000735
736 VG_(strcpy)(curr_file, first_instr_fl);
737
738 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
739
740 /* We pretend the CC is an iCC for getting the tag. This is ok
741 * because both CC types have tag as their first byte. Once we know
742 * the type, we can cast and act appropriately. */
743
744 Char fl_buf[FILENAME_LEN];
745 Char fn_buf[FN_NAME_LEN];
746
njne0ee0712002-05-03 16:41:05 +0000747 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +0000748 switch ( ((iCC*)BBCC_ptr)->tag ) {
749
750#define ADD_CC_TO(CC_type, cc, total) \
751 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
752 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
753 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
754
755 case INSTR_CC:
njne0ee0712002-05-03 16:41:05 +0000756 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
757 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +0000758 ADD_CC_TO(iCC, I, Ir_total);
759 BBCC_ptr += sizeof(iCC);
760 break;
761
762 case READ_CC:
763 case MOD_CC:
njne0ee0712002-05-03 16:41:05 +0000764 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
765 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +0000766 ADD_CC_TO(idCC, I, Ir_total);
767 ADD_CC_TO(idCC, D, Dr_total);
768 BBCC_ptr += sizeof(idCC);
769 break;
770
771 case WRITE_CC:
njne0ee0712002-05-03 16:41:05 +0000772 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
773 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +0000774 ADD_CC_TO(idCC, I, Ir_total);
775 ADD_CC_TO(idCC, D, Dw_total);
776 BBCC_ptr += sizeof(idCC);
777 break;
778
779#undef ADD_CC_TO
780
781 default:
782 VG_(panic)("Unknown CC type in fprint_BBCC()\n");
783 break;
784 }
785 distinct_instrs++;
786
njne0ee0712002-05-03 16:41:05 +0000787 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
788
789 /* Allow for filename switching in the middle of a BB; if this happens,
790 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +0000791 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +0000792 VG_(strcpy)(curr_file, fl_buf);
793 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
794 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
795 }
796
njn4f9c9342002-04-29 16:03:24 +0000797 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +0000798 * first instruction in the BB, print warning. */
sewardj18d75132002-05-16 11:06:21 +0000799 if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +0000800 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +0000801 VG_(printf)(" filenames: BB:%s, instr:%s;"
802 " fn_names: BB:%s, instr:%s;"
803 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +0000804 first_instr_fl, fl_buf,
805 first_instr_fn, fn_buf,
806 line_num);
807 }
808
njne0ee0712002-05-03 16:41:05 +0000809 VG_(sprintf)(lbuf, "%u ", line_num);
810 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
811 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +0000812 }
813 /* If we switched filenames in the middle of the BB without switching back,
814 * switch back now because the subsequent BB may be relying on falling under
815 * the original file name. */
816 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
817 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
818 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
819 }
njne0ee0712002-05-03 16:41:05 +0000820
821 /* Mark end of basic block */
822 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +0000823
824 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
825}
826
827static void fprint_BBCC_table_and_calc_totals(Int client_argc,
828 Char** client_argv)
829{
830 Int fd;
831 Char buf[BUF_LEN];
832 file_node *curr_file_node;
833 fn_node *curr_fn_node;
834 BBCC *curr_BBCC;
835 Int i,j,k;
836
837 VGP_PUSHCC(VgpCacheDump);
838 fd = VG_(open_write)(OUT_FILE);
839 if (-1 == fd) { file_err(); }
840
841 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
842 VG_(write)(fd, (void*)I1_desc_line, VG_(strlen)(I1_desc_line));
843 VG_(write)(fd, (void*)D1_desc_line, VG_(strlen)(D1_desc_line));
844 VG_(write)(fd, (void*)L2_desc_line, VG_(strlen)(L2_desc_line));
845
846 /* "cmd:" line */
847 VG_(strcpy)(buf, "cmd:");
848 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
849 for (i = 0; i < client_argc; i++) {
850 VG_(sprintf)(buf, " %s", client_argv[i]);
851 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
852 }
853 /* "events:" line */
854 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
855 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
856
857 /* Six loops here: three for the hash table arrays, and three for the
858 * chains hanging off the hash table arrays. */
859 for (i = 0; i < N_FILE_ENTRIES; i++) {
860 curr_file_node = BBCC_table[i];
861 while (curr_file_node != NULL) {
862 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
863 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
864
865 for (j = 0; j < N_FN_ENTRIES; j++) {
866 curr_fn_node = curr_file_node->fns[j];
867 while (curr_fn_node != NULL) {
868 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
869 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
870
871 for (k = 0; k < N_BBCC_ENTRIES; k++) {
872 curr_BBCC = curr_fn_node->BBCCs[k];
873 while (curr_BBCC != NULL) {
874 fprint_BBCC(fd, curr_BBCC,
875
876 curr_file_node->filename,
877 curr_fn_node->fn_name);
878
879 curr_BBCC = curr_BBCC->next;
880 }
881 }
882 curr_fn_node = curr_fn_node->next;
883 }
884 }
885 curr_file_node = curr_file_node->next;
886 }
887 }
888
889 /* Summary stats must come after rest of table, since we calculate them
890 * during traversal. */
891 VG_(sprintf)(buf, "summary: "
892 "%llu %llu %llu "
893 "%llu %llu %llu "
894 "%llu %llu %llu\n",
895 Ir_total.a, Ir_total.m1, Ir_total.m2,
896 Dr_total.a, Dr_total.m1, Dr_total.m2,
897 Dw_total.a, Dw_total.m1, Dw_total.m2);
898 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
899 VG_(close)(fd);
900}
901
902/* Adds commas to ULong, right justifying in a field field_width wide, returns
903 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +0000904static
njn4f9c9342002-04-29 16:03:24 +0000905Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
906{
907 int len, n_commas, i, j, new_len, space;
908
909 VG_(sprintf)(buf, "%lu", n);
910 len = VG_(strlen)(buf);
911 n_commas = (len - 1) / 3;
912 new_len = len + n_commas;
913 space = field_width - new_len;
914
915 /* Allow for printing a number in a field_width smaller than it's size */
916 if (space < 0) space = 0;
917
918 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
919 * of three. */
920 for (j = -1, i = len ; i >= 0; i--) {
921 buf[i + n_commas + space] = buf[i];
922
923 if (3 == ++j) {
924 j = 0;
925 n_commas--;
926 buf[i + n_commas + space] = ',';
927 }
928 }
929 /* Right justify in field. */
930 for (i = 0; i < space; i++) buf[i] = ' ';
931 return new_len;
932}
933
sewardj4f29ddf2002-05-03 22:29:04 +0000934static
njn4f9c9342002-04-29 16:03:24 +0000935void percentify(Int n, Int pow, Int field_width, char buf[])
936{
937 int i, len, space;
938
939 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
940 len = VG_(strlen)(buf);
941 space = field_width - len;
942 i = len;
943
944 /* Right justify in field */
945 for ( ; i >= 0; i--) buf[i + space] = buf[i];
946 for (i = 0; i < space; i++) buf[i] = ' ';
947}
948
949void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
950{
951 CC D_total;
njn1d021fa2002-05-02 13:56:34 +0000952 ULong L2_total_m, L2_total_mr, L2_total_mw,
953 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +0000954 char buf1[RESULTS_BUF_LEN],
955 buf2[RESULTS_BUF_LEN],
956 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +0000957 Int l1, l2, l3;
958 Int p;
959
960 fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
961
962 /* I cache results. Use the I_refs value to determine the first column
963 * width. */
964 l1 = commify(Ir_total.a, 0, buf1);
965 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
966
967 commify(Ir_total.m1, l1, buf1);
968 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
969
970 commify(Ir_total.m2, l1, buf1);
971 VG_(message)(Vg_UserMsg, "L2 misses: %s", buf1);
972
973 p = 100;
974
975 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
976 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
977
978 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
979 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
980 VG_(message)(Vg_UserMsg, "");
981
982 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
983 * width of columns 2 & 3. */
984 D_total.a = Dr_total.a + Dw_total.a;
985 D_total.m1 = Dr_total.m1 + Dw_total.m1;
986 D_total.m2 = Dr_total.m2 + Dw_total.m2;
987
njn1d021fa2002-05-02 13:56:34 +0000988 commify( D_total.a, l1, buf1);
989 l2 = commify(Dr_total.a, 0, buf2);
990 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +0000991 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
992 buf1, buf2, buf3);
993
994 commify( D_total.m1, l1, buf1);
995 commify(Dr_total.m1, l2, buf2);
996 commify(Dw_total.m1, l3, buf3);
997 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
998 buf1, buf2, buf3);
999
1000 commify( D_total.m2, l1, buf1);
1001 commify(Dr_total.m2, l2, buf2);
1002 commify(Dw_total.m2, l3, buf3);
1003 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1004 buf1, buf2, buf3);
1005
1006 p = 10;
1007
1008 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1009 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1010 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1011 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1012
1013 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1014 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1015 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1016 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1017 VG_(message)(Vg_UserMsg, "");
1018
1019 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001020
1021 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1022 L2_total_r = Dr_total.m1 + Ir_total.m1;
1023 L2_total_w = Dw_total.m1;
1024 commify(L2_total, l1, buf1);
1025 commify(L2_total_r, l2, buf2);
1026 commify(L2_total_w, l3, buf3);
1027 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1028 buf1, buf2, buf3);
1029
njn4f9c9342002-04-29 16:03:24 +00001030 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1031 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1032 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001033 commify(L2_total_m, l1, buf1);
1034 commify(L2_total_mr, l2, buf2);
1035 commify(L2_total_mw, l3, buf3);
1036 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1037 buf1, buf2, buf3);
1038
1039 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1040 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1041 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1042 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1043
1044
1045 /* Hash table stats */
1046 if (VG_(clo_verbosity) > 1) {
1047 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1048 file_line_debug_BBs + no_debug_BBs;
1049
1050 VG_(message)(Vg_DebugMsg, "");
1051 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1052 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1053 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1054 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1055 full_debug_BBs * 100 / BB_lookups,
1056 full_debug_BBs);
1057 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1058 file_line_debug_BBs * 100 / BB_lookups,
1059 file_line_debug_BBs);
1060 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1061 fn_name_debug_BBs * 100 / BB_lookups,
1062 fn_name_debug_BBs);
1063 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1064 no_debug_BBs * 100 / BB_lookups,
1065 no_debug_BBs);
1066 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1067 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1068 }
1069 VGP_POPCC;
1070}
1071
sewardj18d75132002-05-16 11:06:21 +00001072
1073void VG_(cachesim_notify_discard) ( TTEntry* tte )
1074{
1075 VG_(printf)( "cachesim_notify_discard: %p for %d\n",
1076 tte->orig_addr, (Int)tte->orig_size);
1077}
1078
1079/*--------------------------------------------------------------------*/
1080/*--- end vg_cachesim.c ---*/
1081/*--------------------------------------------------------------------*/