blob: bfa9171fb883e544b911013825a818b83570fb4d [file] [log] [blame]
njn4f9c9342002-04-29 16:03:24 +00001/*--------------------------------------------------------------------*/
2/*--- The cache simulation framework: instrumentation, recording ---*/
3/*--- and results printing. ---*/
4/*--- vg_cachesim.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, an x86 protected-mode emulator
9 designed for debugging and profiling binaries on x86-Unixes.
10
11 Copyright (C) 2000-2002 Julian Seward
12 jseward@acm.org
13 Julian_Seward@muraroa.demon.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file LICENSE.
31*/
32
33#include <string.h>
34
35#include "vg_include.h"
36
37#include "vg_cachesim_L2.c"
38#include "vg_cachesim_I1.c"
39#include "vg_cachesim_D1.c"
40
41
42/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
43#define MAX_x86_INSTR_SIZE 16
44
45/* Size of various buffers used for storing strings */
46#define FILENAME_LEN 256
47#define FN_NAME_LEN 256
48#define BUF_LEN 512
49#define COMMIFY_BUF_LEN 128
njne0ee0712002-05-03 16:41:05 +000050#define RESULTS_BUF_LEN 128
51#define LINE_BUF_LEN 64
njn4f9c9342002-04-29 16:03:24 +000052
53/*------------------------------------------------------------*/
54/*--- Output file related stuff ---*/
55/*------------------------------------------------------------*/
56
57#define OUT_FILE "cachegrind.out"
58
59static void file_err()
60{
61 VG_(message)(Vg_UserMsg,
62 "FATAL: can't open cache simulation output file `%s'",
63 OUT_FILE );
64 VG_(exit)(1);
65}
66
67/*------------------------------------------------------------*/
68/*--- Cost center types, operations ---*/
69/*------------------------------------------------------------*/
70
71typedef struct _CC CC;
72struct _CC {
73 ULong a;
74 ULong m1;
75 ULong m2;
76};
77
78static __inline__ void initCC(CC* cc) {
79 cc->a = 0;
80 cc->m1 = 0;
81 cc->m2 = 0;
82}
83
84
85typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
86
87/* Instruction-level cost-centres. The typedefs for these structs are in
88 * vg_include.c
89 *
90 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +000091 *
njne0ee0712002-05-03 16:41:05 +000092 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +000093 */
94struct _iCC {
95 /* word 1 */
96 UChar tag;
97 UChar instr_size;
njne0ee0712002-05-03 16:41:05 +000098 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +000099
100 /* words 2+ */
101 Addr instr_addr;
102 CC I;
103};
104
105struct _idCC {
106 /* word 1 */
107 UChar tag;
108 UChar instr_size;
109 UChar data_size;
njne0ee0712002-05-03 16:41:05 +0000110 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000111
112 /* words 2+ */
113 Addr instr_addr;
114 CC I;
115 CC D;
116};
117
118static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
119{
120 cc->tag = INSTR_CC;
121 cc->instr_size = instr_size;
122 cc->instr_addr = instr_addr;
123 initCC(&cc->I);
124}
125
126static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
127 UInt instr_size, UInt data_size)
128{
129 cc->tag = X_CC;
130 cc->instr_size = instr_size;
131 cc->data_size = data_size;
132 cc->instr_addr = instr_addr;
133 initCC(&cc->I);
134 initCC(&cc->D);
135}
136
njne0ee0712002-05-03 16:41:05 +0000137static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000138{
njne0ee0712002-05-03 16:41:05 +0000139 VG_(sprintf)(buf, "%llu %llu %llu\n",
140 cc->I.a, cc->I.m1, cc->I.m2);
njn4f9c9342002-04-29 16:03:24 +0000141}
142
njne0ee0712002-05-03 16:41:05 +0000143static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000144{
njne0ee0712002-05-03 16:41:05 +0000145 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
146 cc->I.a, cc->I.m1, cc->I.m2,
147 cc->D.a, cc->D.m1, cc->D.m2);
njn4f9c9342002-04-29 16:03:24 +0000148}
149
njne0ee0712002-05-03 16:41:05 +0000150static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000151{
njne0ee0712002-05-03 16:41:05 +0000152 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
153 cc->I.a, cc->I.m1, cc->I.m2,
154 cc->D.a, cc->D.m1, cc->D.m2);
njn4f9c9342002-04-29 16:03:24 +0000155}
156
157/*------------------------------------------------------------*/
158/*--- BBCC hash table stuff ---*/
159/*------------------------------------------------------------*/
160
161/* The table of BBCCs is of the form hash(filename, hash(fn_name,
162 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
163 * fairly well for Konqueror. */
164
165#define N_FILE_ENTRIES 251
166#define N_FN_ENTRIES 53
167#define N_BBCC_ENTRIES 37
168
169/* The cost centres for a basic block are stored in a contiguous array.
170 * They are distinguishable by their tag field. */
171typedef struct _BBCC BBCC;
172struct _BBCC {
173 Addr orig_addr;
174 UInt array_size; /* byte-size of variable length array */
175 BBCC* next;
176 Addr array[0]; /* variable length array */
177};
178
179typedef struct _fn_node fn_node;
180struct _fn_node {
181 Char* fn_name;
182 BBCC* BBCCs[N_BBCC_ENTRIES];
183 fn_node* next;
184};
185
186typedef struct _file_node file_node;
187struct _file_node {
188 Char* filename;
189 fn_node* fns[N_FN_ENTRIES];
190 file_node* next;
191};
192
193/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
194file_node *BBCC_table[N_FILE_ENTRIES];
195
196Int distinct_files = 0;
197Int distinct_fns = 0;
198
199Int distinct_instrs = 0;
200Int full_debug_BBs = 0;
201Int file_line_debug_BBs = 0;
202Int fn_name_debug_BBs = 0;
203Int no_debug_BBs = 0;
204
205Int BB_retranslations = 0;
206
207static void init_BBCC_table()
208{
209 Int i;
210 for (i = 0; i < N_FILE_ENTRIES; i++)
211 BBCC_table[i] = NULL;
212}
213
njne0ee0712002-05-03 16:41:05 +0000214static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
215 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000216{
njn4f9c9342002-04-29 16:03:24 +0000217 Bool found1, found2, no_demangle = False;
218
219 found1 = VG_(what_line_is_this)(instr_addr, filename,
njne0ee0712002-05-03 16:41:05 +0000220 FILENAME_LEN, line_num);
njn4f9c9342002-04-29 16:03:24 +0000221 found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
222
223 if (!found1 && !found2) {
224 no_debug_BBs++;
225 VG_(strcpy)(filename, "???");
226 VG_(strcpy)(fn_name, "???");
227
228 } else if ( found1 && found2) {
229 full_debug_BBs++;
230
231 } else if ( found1 && !found2) {
232 file_line_debug_BBs++;
233 VG_(strcpy)(fn_name, "???");
234
235 } else /*(!found1 && found2)*/ {
236 fn_name_debug_BBs++;
237 VG_(strcpy)(filename, "???");
238 }
239}
240
241/* Forward declaration. */
242static Int compute_BBCC_array_size(UCodeBlock* cb);
243
244static __inline__
245file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
246{
247 Int i;
248 file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
249 new->filename = VG_(strdup)(VG_AR_PRIVATE, filename);
250 for (i = 0; i < N_FN_ENTRIES; i++) {
251 new->fns[i] = NULL;
252 }
253 new->next = next;
254 return new;
255}
256
257static __inline__
258fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
259{
260 Int i;
261 fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
262 new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
263 for (i = 0; i < N_BBCC_ENTRIES; i++) {
264 new->BBCCs[i] = NULL;
265 }
266 new->next = next;
267 return new;
268}
269
270static __inline__
271BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
272{
273 Int BBCC_array_size = compute_BBCC_array_size(cb);
274 BBCC* new;
275
276 new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
277 new->orig_addr = bb_orig_addr;
278 new->array_size = BBCC_array_size;
279 new->next = next;
280
281 return new;
282}
283
284#define HASH_CONSTANT 256
285
286static UInt hash(Char *s, UInt table_size)
287{
288 int hash_value = 0;
289 for ( ; *s; s++)
290 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
291 return hash_value;
292}
293
294/* Do a three step traversal: by filename, then fn_name, then instr_addr.
295 * In all cases prepends new nodes to their chain. Returns a pointer to the
296 * cost centre. Also sets BB_seen_before by reference.
297 */
298static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
299 Bool *BB_seen_before)
300{
301 file_node *curr_file_node;
302 fn_node *curr_fn_node;
303 BBCC *curr_BBCC;
304 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
305 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000306 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000307
njne0ee0712002-05-03 16:41:05 +0000308 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000309
310 VGP_PUSHCC(VgpCacheGetBBCC);
311 filename_hash = hash(filename, N_FILE_ENTRIES);
312 curr_file_node = BBCC_table[filename_hash];
313 while (NULL != curr_file_node &&
314 strcmp(filename, curr_file_node->filename) != 0) {
315 curr_file_node = curr_file_node->next;
316 }
317 if (NULL == curr_file_node) {
318 BBCC_table[filename_hash] = curr_file_node =
319 new_file_node(filename, BBCC_table[filename_hash]);
320 distinct_files++;
321 }
322
323 fnname_hash = hash(fn_name, N_FN_ENTRIES);
324 curr_fn_node = curr_file_node->fns[fnname_hash];
325 while (NULL != curr_fn_node &&
326 strcmp(fn_name, curr_fn_node->fn_name) != 0) {
327 curr_fn_node = curr_fn_node->next;
328 }
329 if (NULL == curr_fn_node) {
330 curr_file_node->fns[fnname_hash] = curr_fn_node =
331 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
332 distinct_fns++;
333 }
334
335 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
336 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
337 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
338 curr_BBCC = curr_BBCC->next;
339 }
340 if (curr_BBCC == NULL) {
341 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
342 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
343 *BB_seen_before = False;
344
345 } else {
346 vg_assert(bb_orig_addr == curr_BBCC->orig_addr);
347 vg_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000348 if (VG_(clo_verbosity) > 2) {
349 VG_(message)(Vg_DebugMsg,
350 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000351 }
352 *BB_seen_before = True;
353 BB_retranslations++;
354 }
355 VGP_POPCC;
356 return curr_BBCC;
357}
358
359/*------------------------------------------------------------*/
360/*--- Cache simulation instrumentation phase ---*/
361/*------------------------------------------------------------*/
362
363#define uInstr1 VG_(newUInstr1)
364#define uInstr2 VG_(newUInstr2)
365#define uInstr3 VG_(newUInstr3)
366#define dis VG_(disassemble)
367#define uLiteral VG_(setLiteralField)
368#define newTemp VG_(getNewTemp)
369
370static Int compute_BBCC_array_size(UCodeBlock* cb)
371{
372 UInstr* u_in;
373 Int i, CC_size, BBCC_size = 0;
374 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
375
376 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
377
378 for (i = 0; i < cb->used; i++) {
sewardjfc3e5d32002-04-30 10:18:48 +0000379 /* VG_(ppUInstr)(0, &cb->instrs[i]); */
njn4f9c9342002-04-29 16:03:24 +0000380
381 u_in = &cb->instrs[i];
382 switch(u_in->opcode) {
383
384 case INCEIP:
385 goto case_for_end_of_instr;
386
387 case JMP:
388 if (u_in->cond != CondAlways) break;
389
390 goto case_for_end_of_instr;
391
392 case_for_end_of_instr:
393
394 CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W
395 ? sizeof(idCC) : sizeof(iCC));
396
397 BBCC_size += CC_size;
398 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
399 break;
400
401 case LOAD:
402 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000403 /* Also, a STORE can come after a LOAD for bts/btr/btc */
sewardjfc3e5d32002-04-30 10:18:48 +0000404 vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */
405 !is_FPU_R && !is_FPU_W);
njn4f9c9342002-04-29 16:03:24 +0000406 is_LOAD = True;
407 break;
408
409 case STORE:
410 /* Multiple STOREs are possible for 'pushal' */
411 vg_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
412 is_STORE = True;
413 break;
414
415 case FPU_R:
416 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
417 is_FPU_R = True;
418 break;
419
420 case FPU_W:
421 vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
422 is_FPU_W = True;
423 break;
424
425 default:
426 break;
427 }
428 }
429
430 return BBCC_size;
431}
432
433/* Use this rather than eg. -1 because it's stored as a UInt. */
434#define INVALID_DATA_SIZE 999999
435
436UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
437{
438 UCodeBlock* cb;
439 Int i;
440 UInstr* u_in;
441 BBCC* BBCC_node;
442 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
443 Int CC_size = -1; /* Shut gcc warnings up */
444 Addr instr_addr = orig_addr;
445 UInt instr_size, data_size = INVALID_DATA_SIZE;
446 Int helper = -1; /* Shut gcc warnings up */
447 UInt stack_used;
448 Bool BB_seen_before = False;
449 Bool prev_instr_was_Jcond = False;
450 Addr BBCC_ptr0, BBCC_ptr;
451
452 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
453 * if it's the first time it's been seen), and point to start of the
454 * BBCC array. */
455 BBCC_node = get_BBCC(orig_addr, cb_in, &BB_seen_before);
456 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
457
458 cb = VG_(allocCodeBlock)();
459 cb->nextTemp = cb_in->nextTemp;
460
461 t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
462
463 for (i = 0; i < cb_in->used; i++) {
464 u_in = &cb_in->instrs[i];
465
466 //VG_(ppUInstr)(0, u_in);
467
468 /* What this is all about: we want to instrument each x86 instruction
469 * translation. The end of these are marked in three ways. The three
470 * ways, and the way we instrument them, are as follows:
471 *
472 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
473 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
474 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
475 *
476 * We must put the instrumentation before the jumps so that it is always
477 * executed. We don't have to put the instrumentation before the INCEIP
478 * (it could go after) but we do so for consistency.
479 *
480 * Junconds are always the last instruction in a basic block. Jconds are
481 * always the 2nd last, and must be followed by a Jcond. We check this
482 * with various assertions.
483 *
484 * Note that in VG_(disBB) we patched the `extra4b' field of the first
485 * occurring JMP in a block with the size of its x86 instruction. This
486 * is used now.
487 *
488 * Note that we don't have to treat JIFZ specially; unlike JMPs, JIFZ
489 * occurs in the middle of a BB and gets an INCEIP after it.
490 *
491 * The instrumentation is just a call to the appropriate helper function,
492 * passing it the address of the instruction's CC.
493 */
494 if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
495
496 switch (u_in->opcode) {
497
498 case INCEIP:
499 instr_size = u_in->val1;
500 goto case_for_end_of_x86_instr;
501
502 case JMP:
503 if (u_in->cond == CondAlways) {
504 vg_assert(i+1 == cb_in->used);
505
506 /* Don't instrument if previous instr was a Jcond. */
507 if (prev_instr_was_Jcond) {
508 vg_assert(0 == u_in->extra4b);
509 VG_(copyUInstr)(cb, u_in);
510 break;
511 }
512 prev_instr_was_Jcond = False;
513
514 } else {
515 vg_assert(i+2 == cb_in->used); /* 2nd last instr in block */
516 prev_instr_was_Jcond = True;
517 }
518
519 /* Ah, the first JMP... instrument, please. */
520 instr_size = u_in->extra4b;
521 goto case_for_end_of_x86_instr;
522
523 /* Shared code that is executed at the end of an x86 translation
524 * block, marked by either an INCEIP or an unconditional JMP. */
525 case_for_end_of_x86_instr:
526
527#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
528
529 /* Initialise the CC in the BBCC array appropriately if it hasn't
530 * been initialised before.
531 * Then call appropriate sim function, passing it the CC address.
532 * Note that CALLM_S/CALL_E aren't required here; by this point,
533 * the checking related to them has already happened. */
534 stack_used = 0;
535
536 vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
537 vg_assert(0 != instr_addr);
538
539 /* Save the caller-save registers before we push our args */
540 uInstr1(cb, PUSH, 4, RealReg, R_EAX);
541 uInstr1(cb, PUSH, 4, RealReg, R_ECX);
542 uInstr1(cb, PUSH, 4, RealReg, R_EDX);
543
544 if (!IS_(read) && !IS_(write)) {
545 iCC* CC_ptr = (iCC*)(BBCC_ptr);
546 vg_assert(INVALID_DATA_SIZE == data_size);
547 vg_assert(INVALID_TEMPREG == t_read_addr &&
548 INVALID_TEMPREG == t_write_addr);
549 CC_size = sizeof(iCC);
550 if (!BB_seen_before)
551 init_iCC(CC_ptr, instr_addr, instr_size);
552
553 helper = VGOFF_(cachesim_log_non_mem_instr);
554
555 } else {
556 CC_type X_CC;
557 idCC* CC_ptr = (idCC*)(BBCC_ptr);
558
559 vg_assert(4 == data_size || 2 == data_size || 1 == data_size ||
560 8 == data_size || 10 == data_size);
561
562 CC_size = sizeof(idCC);
563 helper = VGOFF_(cachesim_log_mem_instr);
564
565 if (IS_(read) && !IS_(write)) {
566 X_CC = READ_CC;
567 vg_assert(INVALID_TEMPREG != t_read_addr &&
568 INVALID_TEMPREG == t_write_addr);
569 t_data_addr = t_read_addr;
570
571 } else if (!IS_(read) && IS_(write)) {
572 X_CC = WRITE_CC;
573 vg_assert(INVALID_TEMPREG == t_read_addr &&
574 INVALID_TEMPREG != t_write_addr);
575 t_data_addr = t_write_addr;
576
577 } else {
578 vg_assert(IS_(read) && IS_(write));
579 X_CC = MOD_CC;
580 vg_assert(INVALID_TEMPREG != t_read_addr &&
581 INVALID_TEMPREG != t_write_addr);
582 t_data_addr = t_read_addr;
583 }
584
585 if (!BB_seen_before)
586 init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
587
588 /* 2nd arg: data addr */
589 uInstr1(cb, PUSH, 4, TempReg, t_data_addr);
590 stack_used += 4;
591 }
592#undef IS_
593
594 /* 1st arg: CC addr */
595 t_CC_addr = newTemp(cb);
596 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
597 uLiteral(cb, BBCC_ptr);
598 uInstr1(cb, PUSH, 4, TempReg, t_CC_addr);
599 stack_used += 4;
600
601 /* Call function and return. */
602 uInstr1(cb, CALLM, 0, Lit16, helper);
603 uInstr1(cb, CLEAR, 0, Lit16, stack_used);
604
605 /* Restore the caller-save registers now the call is done */
606 uInstr1(cb, POP, 4, RealReg, R_EDX);
607 uInstr1(cb, POP, 4, RealReg, R_ECX);
608 uInstr1(cb, POP, 4, RealReg, R_EAX);
609
610 VG_(copyUInstr)(cb, u_in);
611
612 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
613 BBCC_ptr += CC_size;
614 instr_addr += instr_size;
615 t_CC_addr = t_read_addr = t_write_addr =
616 t_data_addr = INVALID_TEMPREG;
617 data_size = INVALID_DATA_SIZE;
618 break;
619
620
621 /* For memory-ref instrs, copy the data_addr into a temporary to be
622 * passed to the cachesim_log_function at the end of the instruction.
623 */
624 case LOAD:
625 t_read_addr = newTemp(cb);
626 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
627 data_size = u_in->size;
628 VG_(copyUInstr)(cb, u_in);
629 break;
630
631 case FPU_R:
632 t_read_addr = newTemp(cb);
633 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
634 data_size = u_in->size;
635 VG_(copyUInstr)(cb, u_in);
636 break;
637
638 /* Note that we must set t_write_addr even for mod instructions;
639 * that's how the code above determines whether it does a write;
640 * without it, it would think a mod instruction is a read.
641 * As for the MOV, if it's a mod instruction it's redundant, but it's
642 * not expensive and mod instructions are rare anyway. */
643 case STORE:
644 case FPU_W:
645 t_write_addr = newTemp(cb);
646 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
647 data_size = u_in->size;
648 VG_(copyUInstr)(cb, u_in);
649 break;
650
651 case NOP: case CALLM_E: case CALLM_S:
652 break;
653
654 default:
655 VG_(copyUInstr)(cb, u_in);
656 break;
657 }
658 }
659
660 /* Just check everything looks ok */
661 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
662
663 VG_(freeCodeBlock)(cb_in);
664 return cb;
665}
666
667/*------------------------------------------------------------*/
668/*--- Cache simulation stuff ---*/
669/*------------------------------------------------------------*/
670
671/* Total reads/writes/misses. Calculated during CC traversal at the end. */
672static CC Ir_total;
673static CC Dr_total;
674static CC Dw_total;
675
676void VG_(init_cachesim)(void)
677{
678 /* Make sure the output file can be written. */
679 Int fd = VG_(open_write)(OUT_FILE);
680 if (-1 == fd) {
681 fd = VG_(create_and_write)(OUT_FILE);
682 if (-1 == fd) {
683 file_err();
684 }
685 }
686 VG_(close)(fd);
njne0ee0712002-05-03 16:41:05 +0000687
njn4f9c9342002-04-29 16:03:24 +0000688 initCC(&Ir_total);
689 initCC(&Dr_total);
690 initCC(&Dw_total);
691
692 cachesim_I1_initcache();
693 cachesim_D1_initcache();
694 cachesim_L2_initcache();
695
696 init_BBCC_table();
697}
698
699void VG_(cachesim_log_non_mem_instr)(iCC* cc)
700{
701 //VG_(printf)("sim I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
702 // cc, cc->instr_addr, cc->instr_size)
703 VGP_PUSHCC(VgpCacheSimulate);
704 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
705 cc->I.a++;
706 VGP_POPCC;
707}
708
709void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
710{
711 //VG_(printf)("sim D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
712 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
713 VGP_PUSHCC(VgpCacheSimulate);
714 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
715 cc->I.a++;
716
717 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
718 cc->D.a++;
719 VGP_POPCC;
720}
721
722/*------------------------------------------------------------*/
723/*--- Printing of output file and summary stats ---*/
724/*------------------------------------------------------------*/
725
njn4f9c9342002-04-29 16:03:24 +0000726static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
727 Char *first_instr_fn)
728{
729 Addr BBCC_ptr0, BBCC_ptr;
njne0ee0712002-05-03 16:41:05 +0000730 Char buf[BUF_LEN], curr_file[BUF_LEN], fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +0000731 UInt line_num;
732
733 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
734
njne0ee0712002-05-03 16:41:05 +0000735 /* Mark start of basic block in output, just to ease debugging */
736 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +0000737
738 VG_(strcpy)(curr_file, first_instr_fl);
739
740 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
741
742 /* We pretend the CC is an iCC for getting the tag. This is ok
743 * because both CC types have tag as their first byte. Once we know
744 * the type, we can cast and act appropriately. */
745
746 Char fl_buf[FILENAME_LEN];
747 Char fn_buf[FN_NAME_LEN];
748
njne0ee0712002-05-03 16:41:05 +0000749 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +0000750 switch ( ((iCC*)BBCC_ptr)->tag ) {
751
752#define ADD_CC_TO(CC_type, cc, total) \
753 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
754 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
755 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
756
757 case INSTR_CC:
njne0ee0712002-05-03 16:41:05 +0000758 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
759 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +0000760 ADD_CC_TO(iCC, I, Ir_total);
761 BBCC_ptr += sizeof(iCC);
762 break;
763
764 case READ_CC:
765 case MOD_CC:
njne0ee0712002-05-03 16:41:05 +0000766 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
767 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +0000768 ADD_CC_TO(idCC, I, Ir_total);
769 ADD_CC_TO(idCC, D, Dr_total);
770 BBCC_ptr += sizeof(idCC);
771 break;
772
773 case WRITE_CC:
njne0ee0712002-05-03 16:41:05 +0000774 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
775 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +0000776 ADD_CC_TO(idCC, I, Ir_total);
777 ADD_CC_TO(idCC, D, Dw_total);
778 BBCC_ptr += sizeof(idCC);
779 break;
780
781#undef ADD_CC_TO
782
783 default:
784 VG_(panic)("Unknown CC type in fprint_BBCC()\n");
785 break;
786 }
787 distinct_instrs++;
788
njne0ee0712002-05-03 16:41:05 +0000789 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
790
791 /* Allow for filename switching in the middle of a BB; if this happens,
792 * must print the new filename with the function name. */
793 if (0 != strcmp(fl_buf, curr_file)) {
794 VG_(strcpy)(curr_file, fl_buf);
795 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
796 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
797 }
798
njn4f9c9342002-04-29 16:03:24 +0000799 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +0000800 * first instruction in the BB, print warning. */
njn4f9c9342002-04-29 16:03:24 +0000801 if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
802 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +0000803 VG_(printf)(" filenames: BB:%s, instr:%s;"
804 " fn_names: BB:%s, instr:%s;"
805 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +0000806 first_instr_fl, fl_buf,
807 first_instr_fn, fn_buf,
808 line_num);
809 }
810
njne0ee0712002-05-03 16:41:05 +0000811 VG_(sprintf)(lbuf, "%u ", line_num);
812 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
813 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +0000814 }
815 /* If we switched filenames in the middle of the BB without switching back,
816 * switch back now because the subsequent BB may be relying on falling under
817 * the original file name. */
818 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
819 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
820 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
821 }
njne0ee0712002-05-03 16:41:05 +0000822
823 /* Mark end of basic block */
824 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +0000825
826 vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
827}
828
829static void fprint_BBCC_table_and_calc_totals(Int client_argc,
830 Char** client_argv)
831{
832 Int fd;
833 Char buf[BUF_LEN];
834 file_node *curr_file_node;
835 fn_node *curr_fn_node;
836 BBCC *curr_BBCC;
837 Int i,j,k;
838
839 VGP_PUSHCC(VgpCacheDump);
840 fd = VG_(open_write)(OUT_FILE);
841 if (-1 == fd) { file_err(); }
842
843 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
844 VG_(write)(fd, (void*)I1_desc_line, VG_(strlen)(I1_desc_line));
845 VG_(write)(fd, (void*)D1_desc_line, VG_(strlen)(D1_desc_line));
846 VG_(write)(fd, (void*)L2_desc_line, VG_(strlen)(L2_desc_line));
847
848 /* "cmd:" line */
849 VG_(strcpy)(buf, "cmd:");
850 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
851 for (i = 0; i < client_argc; i++) {
852 VG_(sprintf)(buf, " %s", client_argv[i]);
853 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
854 }
855 /* "events:" line */
856 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
857 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
858
859 /* Six loops here: three for the hash table arrays, and three for the
860 * chains hanging off the hash table arrays. */
861 for (i = 0; i < N_FILE_ENTRIES; i++) {
862 curr_file_node = BBCC_table[i];
863 while (curr_file_node != NULL) {
864 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
865 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
866
867 for (j = 0; j < N_FN_ENTRIES; j++) {
868 curr_fn_node = curr_file_node->fns[j];
869 while (curr_fn_node != NULL) {
870 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
871 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
872
873 for (k = 0; k < N_BBCC_ENTRIES; k++) {
874 curr_BBCC = curr_fn_node->BBCCs[k];
875 while (curr_BBCC != NULL) {
876 fprint_BBCC(fd, curr_BBCC,
877
878 curr_file_node->filename,
879 curr_fn_node->fn_name);
880
881 curr_BBCC = curr_BBCC->next;
882 }
883 }
884 curr_fn_node = curr_fn_node->next;
885 }
886 }
887 curr_file_node = curr_file_node->next;
888 }
889 }
890
891 /* Summary stats must come after rest of table, since we calculate them
892 * during traversal. */
893 VG_(sprintf)(buf, "summary: "
894 "%llu %llu %llu "
895 "%llu %llu %llu "
896 "%llu %llu %llu\n",
897 Ir_total.a, Ir_total.m1, Ir_total.m2,
898 Dr_total.a, Dr_total.m1, Dr_total.m2,
899 Dw_total.a, Dw_total.m1, Dw_total.m2);
900 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
901 VG_(close)(fd);
902}
903
904/* Adds commas to ULong, right justifying in a field field_width wide, returns
905 * the string in buf. */
906Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
907{
908 int len, n_commas, i, j, new_len, space;
909
910 VG_(sprintf)(buf, "%lu", n);
911 len = VG_(strlen)(buf);
912 n_commas = (len - 1) / 3;
913 new_len = len + n_commas;
914 space = field_width - new_len;
915
916 /* Allow for printing a number in a field_width smaller than it's size */
917 if (space < 0) space = 0;
918
919 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
920 * of three. */
921 for (j = -1, i = len ; i >= 0; i--) {
922 buf[i + n_commas + space] = buf[i];
923
924 if (3 == ++j) {
925 j = 0;
926 n_commas--;
927 buf[i + n_commas + space] = ',';
928 }
929 }
930 /* Right justify in field. */
931 for (i = 0; i < space; i++) buf[i] = ' ';
932 return new_len;
933}
934
935void percentify(Int n, Int pow, Int field_width, char buf[])
936{
937 int i, len, space;
938
939 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
940 len = VG_(strlen)(buf);
941 space = field_width - len;
942 i = len;
943
944 /* Right justify in field */
945 for ( ; i >= 0; i--) buf[i + space] = buf[i];
946 for (i = 0; i < space; i++) buf[i] = ' ';
947}
948
949void VG_(show_cachesim_results)(Int client_argc, Char** client_argv)
950{
951 CC D_total;
njn1d021fa2002-05-02 13:56:34 +0000952 ULong L2_total_m, L2_total_mr, L2_total_mw,
953 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +0000954 char buf1[RESULTS_BUF_LEN],
955 buf2[RESULTS_BUF_LEN],
956 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +0000957 Int l1, l2, l3;
958 Int p;
959
960 fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
961
962 /* I cache results. Use the I_refs value to determine the first column
963 * width. */
964 l1 = commify(Ir_total.a, 0, buf1);
965 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
966
967 commify(Ir_total.m1, l1, buf1);
968 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
969
970 commify(Ir_total.m2, l1, buf1);
971 VG_(message)(Vg_UserMsg, "L2 misses: %s", buf1);
972
973 p = 100;
974
975 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
976 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
977
978 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
979 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
980 VG_(message)(Vg_UserMsg, "");
981
982 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
983 * width of columns 2 & 3. */
984 D_total.a = Dr_total.a + Dw_total.a;
985 D_total.m1 = Dr_total.m1 + Dw_total.m1;
986 D_total.m2 = Dr_total.m2 + Dw_total.m2;
987
njn1d021fa2002-05-02 13:56:34 +0000988 commify( D_total.a, l1, buf1);
989 l2 = commify(Dr_total.a, 0, buf2);
990 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +0000991 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
992 buf1, buf2, buf3);
993
994 commify( D_total.m1, l1, buf1);
995 commify(Dr_total.m1, l2, buf2);
996 commify(Dw_total.m1, l3, buf3);
997 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
998 buf1, buf2, buf3);
999
1000 commify( D_total.m2, l1, buf1);
1001 commify(Dr_total.m2, l2, buf2);
1002 commify(Dw_total.m2, l3, buf3);
1003 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1004 buf1, buf2, buf3);
1005
1006 p = 10;
1007
1008 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1009 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1010 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1011 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1012
1013 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1014 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1015 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1016 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1017 VG_(message)(Vg_UserMsg, "");
1018
1019 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001020
1021 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1022 L2_total_r = Dr_total.m1 + Ir_total.m1;
1023 L2_total_w = Dw_total.m1;
1024 commify(L2_total, l1, buf1);
1025 commify(L2_total_r, l2, buf2);
1026 commify(L2_total_w, l3, buf3);
1027 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1028 buf1, buf2, buf3);
1029
njn4f9c9342002-04-29 16:03:24 +00001030 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1031 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1032 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001033 commify(L2_total_m, l1, buf1);
1034 commify(L2_total_mr, l2, buf2);
1035 commify(L2_total_mw, l3, buf3);
1036 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1037 buf1, buf2, buf3);
1038
1039 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1040 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1041 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1042 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1043
1044
1045 /* Hash table stats */
1046 if (VG_(clo_verbosity) > 1) {
1047 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1048 file_line_debug_BBs + no_debug_BBs;
1049
1050 VG_(message)(Vg_DebugMsg, "");
1051 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1052 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1053 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1054 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1055 full_debug_BBs * 100 / BB_lookups,
1056 full_debug_BBs);
1057 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1058 file_line_debug_BBs * 100 / BB_lookups,
1059 file_line_debug_BBs);
1060 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1061 fn_name_debug_BBs * 100 / BB_lookups,
1062 fn_name_debug_BBs);
1063 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1064 no_debug_BBs * 100 / BB_lookups,
1065 no_debug_BBs);
1066 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1067 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1068 }
1069 VGP_POPCC;
1070}
1071