blob: 13450d936b1738dc3355c41ec34e135ad87149a5 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
njnc9539842002-10-02 13:26:35 +00009 This file is part of Cachegrind, a Valgrind skin for cache
10 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
sewardj3c23d432002-06-01 23:43:49 +000012 Copyright (C) 2002 Nicholas Nethercote
13 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
36/* For cache simulation */
37typedef struct {
38 int size; /* bytes */
39 int assoc;
40 int line_size; /* bytes */
41} cache_t;
njn4f9c9342002-04-29 16:03:24 +000042
njn25cac76cb2002-09-23 11:21:57 +000043#include "cg_sim_L2.c"
44#include "cg_sim_I1.c"
45#include "cg_sim_D1.c"
njn4f9c9342002-04-29 16:03:24 +000046
njn25e49d8e72002-09-23 09:36:25 +000047/*------------------------------------------------------------*/
48/*--- Constants ---*/
49/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000050
51/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000052#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000053
njn25e49d8e72002-09-23 09:36:25 +000054#define MIN_LINE_SIZE 16
55
njn4f9c9342002-04-29 16:03:24 +000056/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000057#define FILENAME_LEN 256
58#define FN_NAME_LEN 256
59#define BUF_LEN 512
60#define COMMIFY_BUF_LEN 128
61#define RESULTS_BUF_LEN 128
62#define LINE_BUF_LEN 64
63
64/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000065/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000066/*------------------------------------------------------------*/
67
njn25e49d8e72002-09-23 09:36:25 +000068typedef
69 enum {
70 VgpGetBBCC = VgpFini+1,
71 VgpCacheSimulate,
72 VgpCacheResults
73 }
74 VgpSkinCC;
sewardj07133bf2002-06-13 10:25:56 +000075
njn4f9c9342002-04-29 16:03:24 +000076/*------------------------------------------------------------*/
77/*--- Output file related stuff ---*/
78/*------------------------------------------------------------*/
79
njn25e49d8e72002-09-23 09:36:25 +000080Char cachegrind_out_file[FILENAME_LEN];
njn4f9c9342002-04-29 16:03:24 +000081
82static void file_err()
83{
84 VG_(message)(Vg_UserMsg,
njn7cf0bd32002-06-08 13:36:03 +000085 "error: can't open cache simulation output file `%s'",
njn25e49d8e72002-09-23 09:36:25 +000086 cachegrind_out_file );
njn4f9c9342002-04-29 16:03:24 +000087 VG_(exit)(1);
88}
89
90/*------------------------------------------------------------*/
91/*--- Cost center types, operations ---*/
92/*------------------------------------------------------------*/
93
94typedef struct _CC CC;
95struct _CC {
96 ULong a;
97 ULong m1;
98 ULong m2;
99};
100
101static __inline__ void initCC(CC* cc) {
102 cc->a = 0;
103 cc->m1 = 0;
104 cc->m2 = 0;
105}
106
njn25e49d8e72002-09-23 09:36:25 +0000107typedef
108 enum {
109 InstrCC, /* eg. mov %eax, %ebx */
110 ReadCC, /* eg. mov (%ecx), %esi */
111 WriteCC, /* eg. mov %eax, (%edx) */
112 ModCC, /* eg. incl (%eax) (read+write one addr) */
113 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
114 (read+write two different addrs) */
115 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000116
117/* Instruction-level cost-centres. The typedefs for these structs are in
118 * vg_include.c
119 *
120 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000121 *
njne0ee0712002-05-03 16:41:05 +0000122 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000123 */
njn25e49d8e72002-09-23 09:36:25 +0000124typedef
125 struct {
126 /* word 1 */
127 UChar tag;
128 UChar instr_size;
129 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000130
njn25e49d8e72002-09-23 09:36:25 +0000131 /* words 2+ */
132 Addr instr_addr;
133 CC I;
134 }
135 iCC;
njn4f9c9342002-04-29 16:03:24 +0000136
njn25e49d8e72002-09-23 09:36:25 +0000137typedef
138 struct _idCC {
139 /* word 1 */
140 UChar tag;
141 UChar instr_size;
142 UChar data_size;
143 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000144
njn25e49d8e72002-09-23 09:36:25 +0000145 /* words 2+ */
146 Addr instr_addr;
147 CC I;
148 CC D;
149 }
150 idCC;
151
152typedef
153 struct _iddCC {
154 /* word 1 */
155 UChar tag;
156 UChar instr_size;
157 UChar data_size;
158 /* 1 byte padding */
159
160 /* words 2+ */
161 Addr instr_addr;
162 CC I;
163 CC Da;
164 CC Db;
165 }
166 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000167
168static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
169{
njn25e49d8e72002-09-23 09:36:25 +0000170 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000171 cc->instr_size = instr_size;
172 cc->instr_addr = instr_addr;
173 initCC(&cc->I);
174}
175
176static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
177 UInt instr_size, UInt data_size)
178{
179 cc->tag = X_CC;
180 cc->instr_size = instr_size;
181 cc->data_size = data_size;
182 cc->instr_addr = instr_addr;
183 initCC(&cc->I);
184 initCC(&cc->D);
185}
186
njn25e49d8e72002-09-23 09:36:25 +0000187static void init_iddCC(iddCC* cc, Addr instr_addr,
188 UInt instr_size, UInt data_size)
189{
190 cc->tag = ReadWriteCC;
191 cc->instr_size = instr_size;
192 cc->data_size = data_size;
193 cc->instr_addr = instr_addr;
194 initCC(&cc->I);
195 initCC(&cc->Da);
196 initCC(&cc->Db);
197}
198
njn4294fd42002-06-05 14:41:10 +0000199#define ADD_CC_TO(CC_type, cc, total) \
200 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
201 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
202 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
203
njn95114da2002-06-05 09:39:31 +0000204/* If 1, address of each instruction is printed as a comment after its counts
205 * in cachegrind.out */
206#define PRINT_INSTR_ADDRS 0
207
njne0ee0712002-05-03 16:41:05 +0000208static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000209{
njn95114da2002-06-05 09:39:31 +0000210#if PRINT_INSTR_ADDRS
211 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
212 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
213#else
njne0ee0712002-05-03 16:41:05 +0000214 VG_(sprintf)(buf, "%llu %llu %llu\n",
215 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000216#endif
njn4f9c9342002-04-29 16:03:24 +0000217}
218
njne0ee0712002-05-03 16:41:05 +0000219static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000220{
njn95114da2002-06-05 09:39:31 +0000221#if PRINT_INSTR_ADDRS
222 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
223 cc->I.a, cc->I.m1, cc->I.m2,
224 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
225#else
njne0ee0712002-05-03 16:41:05 +0000226 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
227 cc->I.a, cc->I.m1, cc->I.m2,
228 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000229#endif
njn4f9c9342002-04-29 16:03:24 +0000230}
231
njne0ee0712002-05-03 16:41:05 +0000232static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000233{
njn95114da2002-06-05 09:39:31 +0000234#if PRINT_INSTR_ADDRS
235 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
236 cc->I.a, cc->I.m1, cc->I.m2,
237 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
238#else
njne0ee0712002-05-03 16:41:05 +0000239 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
240 cc->I.a, cc->I.m1, cc->I.m2,
241 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000242#endif
njn4f9c9342002-04-29 16:03:24 +0000243}
244
njn25e49d8e72002-09-23 09:36:25 +0000245static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
246{
247#if PRINT_INSTR_ADDRS
248 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
249 cc->I.a, cc->I.m1, cc->I.m2,
250 cc->Da.a, cc->Da.m1, cc->Da.m2,
251 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
252#else
253 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
254 cc->I.a, cc->I.m1, cc->I.m2,
255 cc->Da.a, cc->Da.m1, cc->Da.m2,
256 cc->Db.a, cc->Db.m1, cc->Db.m2);
257#endif
258}
259
260
njn4f9c9342002-04-29 16:03:24 +0000261/*------------------------------------------------------------*/
262/*--- BBCC hash table stuff ---*/
263/*------------------------------------------------------------*/
264
265/* The table of BBCCs is of the form hash(filename, hash(fn_name,
266 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
267 * fairly well for Konqueror. */
268
269#define N_FILE_ENTRIES 251
270#define N_FN_ENTRIES 53
271#define N_BBCC_ENTRIES 37
272
273/* The cost centres for a basic block are stored in a contiguous array.
274 * They are distinguishable by their tag field. */
275typedef struct _BBCC BBCC;
276struct _BBCC {
277 Addr orig_addr;
278 UInt array_size; /* byte-size of variable length array */
279 BBCC* next;
280 Addr array[0]; /* variable length array */
281};
282
283typedef struct _fn_node fn_node;
284struct _fn_node {
285 Char* fn_name;
286 BBCC* BBCCs[N_BBCC_ENTRIES];
287 fn_node* next;
288};
289
290typedef struct _file_node file_node;
291struct _file_node {
292 Char* filename;
293 fn_node* fns[N_FN_ENTRIES];
294 file_node* next;
295};
296
297/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000298static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000299
sewardj4f29ddf2002-05-03 22:29:04 +0000300static Int distinct_files = 0;
301static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000302
sewardj4f29ddf2002-05-03 22:29:04 +0000303static Int distinct_instrs = 0;
304static Int full_debug_BBs = 0;
305static Int file_line_debug_BBs = 0;
306static Int fn_name_debug_BBs = 0;
307static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000308
sewardj4f29ddf2002-05-03 22:29:04 +0000309static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000310
njn4294fd42002-06-05 14:41:10 +0000311static CC Ir_discards;
312static CC Dr_discards;
313static CC Dw_discards;
314
njn4f9c9342002-04-29 16:03:24 +0000315static void init_BBCC_table()
316{
317 Int i;
318 for (i = 0; i < N_FILE_ENTRIES; i++)
319 BBCC_table[i] = NULL;
320}
321
njne0ee0712002-05-03 16:41:05 +0000322static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
323 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000324{
njn25e49d8e72002-09-23 09:36:25 +0000325 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000326
njn25e49d8e72002-09-23 09:36:25 +0000327 found1 = VG_(get_filename_linenum)(instr_addr, filename,
328 FILENAME_LEN, line_num);
329 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000330
331 if (!found1 && !found2) {
332 no_debug_BBs++;
333 VG_(strcpy)(filename, "???");
334 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000335 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000336
337 } else if ( found1 && found2) {
338 full_debug_BBs++;
339
340 } else if ( found1 && !found2) {
341 file_line_debug_BBs++;
342 VG_(strcpy)(fn_name, "???");
343
344 } else /*(!found1 && found2)*/ {
345 fn_name_debug_BBs++;
346 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000347 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000348 }
349}
350
351/* Forward declaration. */
352static Int compute_BBCC_array_size(UCodeBlock* cb);
353
354static __inline__
355file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
356{
357 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000358 file_node* new = VG_(malloc)(sizeof(file_node));
359 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000360 for (i = 0; i < N_FN_ENTRIES; i++) {
361 new->fns[i] = NULL;
362 }
363 new->next = next;
364 return new;
365}
366
367static __inline__
368fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
369{
370 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000371 fn_node* new = VG_(malloc)(sizeof(fn_node));
372 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000373 for (i = 0; i < N_BBCC_ENTRIES; i++) {
374 new->BBCCs[i] = NULL;
375 }
376 new->next = next;
377 return new;
378}
379
380static __inline__
381BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
382{
383 Int BBCC_array_size = compute_BBCC_array_size(cb);
384 BBCC* new;
385
njn25e49d8e72002-09-23 09:36:25 +0000386 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000387 new->orig_addr = bb_orig_addr;
388 new->array_size = BBCC_array_size;
389 new->next = next;
390
391 return new;
392}
393
394#define HASH_CONSTANT 256
395
396static UInt hash(Char *s, UInt table_size)
397{
398 int hash_value = 0;
399 for ( ; *s; s++)
400 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
401 return hash_value;
402}
403
404/* Do a three step traversal: by filename, then fn_name, then instr_addr.
405 * In all cases prepends new nodes to their chain. Returns a pointer to the
406 * cost centre. Also sets BB_seen_before by reference.
407 */
408static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000409 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000410{
411 file_node *curr_file_node;
412 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000413 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000414 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
415 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000416 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000417
njne0ee0712002-05-03 16:41:05 +0000418 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000419
njn25e49d8e72002-09-23 09:36:25 +0000420 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000421 filename_hash = hash(filename, N_FILE_ENTRIES);
422 curr_file_node = BBCC_table[filename_hash];
423 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000424 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000425 curr_file_node = curr_file_node->next;
426 }
427 if (NULL == curr_file_node) {
428 BBCC_table[filename_hash] = curr_file_node =
429 new_file_node(filename, BBCC_table[filename_hash]);
430 distinct_files++;
431 }
432
433 fnname_hash = hash(fn_name, N_FN_ENTRIES);
434 curr_fn_node = curr_file_node->fns[fnname_hash];
435 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000436 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000437 curr_fn_node = curr_fn_node->next;
438 }
439 if (NULL == curr_fn_node) {
440 curr_file_node->fns[fnname_hash] = curr_fn_node =
441 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
442 distinct_fns++;
443 }
444
445 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000446 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000447 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
448 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000449 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000450 curr_BBCC = curr_BBCC->next;
451 }
452 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000453
njne427a662002-10-02 11:08:25 +0000454 sk_assert(False == remove);
njn4294fd42002-06-05 14:41:10 +0000455
njn4f9c9342002-04-29 16:03:24 +0000456 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
457 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
458 *BB_seen_before = False;
459
460 } else {
njne427a662002-10-02 11:08:25 +0000461 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
462 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000463 if (VG_(clo_verbosity) > 2) {
464 VG_(message)(Vg_DebugMsg,
465 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000466 }
467 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000468
469 if (True == remove) {
470 // Remove curr_BBCC from chain; it will be used and free'd by the
471 // caller.
472 *prev_BBCC_next_ptr = curr_BBCC->next;
473
474 } else {
475 BB_retranslations++;
476 }
njn4f9c9342002-04-29 16:03:24 +0000477 }
njn25e49d8e72002-09-23 09:36:25 +0000478 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000479 return curr_BBCC;
480}
481
482/*------------------------------------------------------------*/
483/*--- Cache simulation instrumentation phase ---*/
484/*------------------------------------------------------------*/
485
njn25e49d8e72002-09-23 09:36:25 +0000486// SSS: do something about all these...
njn4ba5a792002-09-30 10:23:54 +0000487#define uInstr1 VG_(new_UInstr1)
488#define uInstr2 VG_(new_UInstr2)
489#define uInstr3 VG_(new_UInstr3)
490#define uLiteral VG_(set_lit_field)
491#define uCCall VG_(set_ccall_fields)
492#define newTemp VG_(get_new_temp)
njn4f9c9342002-04-29 16:03:24 +0000493
494static Int compute_BBCC_array_size(UCodeBlock* cb)
495{
496 UInstr* u_in;
497 Int i, CC_size, BBCC_size = 0;
498 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000499 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000500
501 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000502 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000503
504 for (i = 0; i < cb->used; i++) {
njn4f9c9342002-04-29 16:03:24 +0000505 u_in = &cb->instrs[i];
506 switch(u_in->opcode) {
507
508 case INCEIP:
509 goto case_for_end_of_instr;
510
511 case JMP:
512 if (u_in->cond != CondAlways) break;
513
514 goto case_for_end_of_instr;
515
516 case_for_end_of_instr:
517
njn25e49d8e72002-09-23 09:36:25 +0000518 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
519 t_read != t_write)
520 CC_size = sizeof(iddCC);
521 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
522 CC_size = sizeof(idCC);
523 else
524 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000525
526 BBCC_size += CC_size;
527 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
528 break;
529
530 case LOAD:
531 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000532 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000533 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000534 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000535 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000536 is_LOAD = True;
537 break;
538
539 case STORE:
540 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000541 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000542 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000543 is_STORE = True;
544 break;
545
546 case FPU_R:
njne427a662002-10-02 11:08:25 +0000547 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000548 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000549 is_FPU_R = True;
550 break;
551
552 case FPU_W:
njne427a662002-10-02 11:08:25 +0000553 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000554 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000555 is_FPU_W = True;
556 break;
557
558 default:
559 break;
560 }
561 }
562
563 return BBCC_size;
564}
565
njn25e49d8e72002-09-23 09:36:25 +0000566static __attribute__ ((regparm (1)))
567void log_1I_0D_cache_access(iCC* cc)
568{
569 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
570 // cc, cc->instr_addr, cc->instr_size)
571 VGP_PUSHCC(VgpCacheSimulate);
572 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
573 cc->I.a++;
574 VGP_POPCC(VgpCacheSimulate);
575}
576
577/* Difference between this function and log_1I_0D_cache_access() is that
578 this one can be passed any kind of CC, not just an iCC. So we have to
579 be careful to make sure we don't make any assumptions about CC layout.
580 (As it stands, they would be safe, but this will avoid potential heartache
581 if anyone else changes CC layout.)
582 Note that we only do the switch for the JIFZ version because if we always
583 called this switching version, things would run about 5% slower. */
584static __attribute__ ((regparm (1)))
585void log_1I_0D_cache_access_JIFZ(iCC* cc)
586{
587 UChar instr_size;
588 Addr instr_addr;
589 CC* I;
590
591 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
592 // cc, cc->instr_addr, cc->instr_size)
593 VGP_PUSHCC(VgpCacheSimulate);
594
595 switch(cc->tag) {
596 case InstrCC:
597 instr_size = cc->instr_size;
598 instr_addr = cc->instr_addr;
599 I = &(cc->I);
600 break;
601 case ReadCC:
602 case WriteCC:
603 case ModCC:
604 instr_size = ((idCC*)cc)->instr_size;
605 instr_addr = ((idCC*)cc)->instr_addr;
606 I = &( ((idCC*)cc)->I );
607 break;
608 case ReadWriteCC:
609 instr_size = ((iddCC*)cc)->instr_size;
610 instr_addr = ((iddCC*)cc)->instr_addr;
611 I = &( ((iddCC*)cc)->I );
612 break;
613 default:
njne427a662002-10-02 11:08:25 +0000614 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000615 break;
616 }
617 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
618 I->a++;
619 VGP_POPCC(VgpCacheSimulate);
620}
621
622__attribute__ ((regparm (2))) static
623void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
624{
625 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
626 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
627 VGP_PUSHCC(VgpCacheSimulate);
628 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
629 cc->D.a++;
630 VGP_POPCC(VgpCacheSimulate);
631}
632
633__attribute__ ((regparm (2))) static
634void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
635{
636 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
637 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
638 VGP_PUSHCC(VgpCacheSimulate);
639 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
640 cc->I.a++;
641
642 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
643 cc->D.a++;
644 VGP_POPCC(VgpCacheSimulate);
645}
646
647__attribute__ ((regparm (3))) static
648void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
649{
650 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
651 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
652 VGP_PUSHCC(VgpCacheSimulate);
653 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
654 cc->Da.a++;
655 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
656 cc->Db.a++;
657 VGP_POPCC(VgpCacheSimulate);
658}
659
660__attribute__ ((regparm (3))) static
661void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
662{
663 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
664 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
665 VGP_PUSHCC(VgpCacheSimulate);
666 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
667 cc->I.a++;
668
669 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
670 cc->Da.a++;
671 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
672 cc->Db.a++;
673 VGP_POPCC(VgpCacheSimulate);
674}
675
676UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
677{
678/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000679#define INVALID_DATA_SIZE 999999
680
njn4f9c9342002-04-29 16:03:24 +0000681 UCodeBlock* cb;
682 Int i;
683 UInstr* u_in;
684 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000685 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
686 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000687 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000688 Addr x86_instr_addr = orig_addr;
689 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
690 Addr helper;
691 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000692 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000693 Bool BB_seen_before = False;
694 Bool instrumented_Jcond = False;
695 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000696 Addr BBCC_ptr0, BBCC_ptr;
697
698 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
699 * if it's the first time it's been seen), and point to start of the
700 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000701 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000702 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
703
njn4ba5a792002-09-30 10:23:54 +0000704 cb = VG_(alloc_UCodeBlock)();
njn4f9c9342002-04-29 16:03:24 +0000705 cb->nextTemp = cb_in->nextTemp;
706
njn25e49d8e72002-09-23 09:36:25 +0000707 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
708 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000709
710 for (i = 0; i < cb_in->used; i++) {
711 u_in = &cb_in->instrs[i];
712
njn4f9c9342002-04-29 16:03:24 +0000713 /* What this is all about: we want to instrument each x86 instruction
714 * translation. The end of these are marked in three ways. The three
715 * ways, and the way we instrument them, are as follows:
716 *
717 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
718 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
719 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
720 *
njn25e49d8e72002-09-23 09:36:25 +0000721 * The last UInstr in a basic block is always a Juncond. Jconds,
722 * when they appear, are always second last. We check this with
723 * various assertions.
724 *
725 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000726 * executed. We don't have to put the instrumentation before the INCEIP
727 * (it could go after) but we do so for consistency.
728 *
njn25e49d8e72002-09-23 09:36:25 +0000729 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
730 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000731 *
njn25e49d8e72002-09-23 09:36:25 +0000732 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000733 *
734 * The instrumentation is just a call to the appropriate helper function,
735 * passing it the address of the instruction's CC.
736 */
njne427a662002-10-02 11:08:25 +0000737 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000738
739 switch (u_in->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000740 case NOP: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000741 break;
742
njn4f9c9342002-04-29 16:03:24 +0000743 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000744 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000745 */
746 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000747 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000748 t_read_addr = newTemp(cb);
749 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
750 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000751 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000752 break;
753
754 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000755 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000756 t_read_addr = newTemp(cb);
757 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000758 data_size = ( u_in->size <= MIN_LINE_SIZE
759 ? u_in->size
760 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000761 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000762 break;
763
764 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000765 * That's how the code above determines whether it does a write.
766 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000767 * As for the MOV, if it's a mod instruction it's redundant, but it's
768 * not expensive and mod instructions are rare anyway. */
769 case STORE:
770 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000771 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000772 t_write_addr = newTemp(cb);
773 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000774 /* 28 and 108 B data-sized instructions will be done
775 * inaccurately but they're very rare and this avoids errors
776 * from hitting more than two cache lines in the simulation. */
777 data_size = ( u_in->size <= MIN_LINE_SIZE
778 ? u_in->size
779 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000780 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000781 break;
782
njn25e49d8e72002-09-23 09:36:25 +0000783
784 /* For rep-prefixed instructions, log a single I-cache access
785 * before the UCode loop that implements the repeated part, which
786 * is where the multiple D-cache accesses are logged. */
787 case JIFZ:
788 has_rep_prefix = True;
789
790 /* Setup 1st and only arg: CC addr */
791 t_CC_addr = newTemp(cb);
792 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
793 uLiteral(cb, BBCC_ptr);
794
795 /* Call helper */
796 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
797 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000798 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000799 break;
800
801
802 /* INCEIP: insert instrumentation */
803 case INCEIP:
804 x86_instr_size = u_in->val1;
805 goto instrument_x86_instr;
806
807 /* JMP: insert instrumentation if the first JMP */
808 case JMP:
809 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000810 sk_assert(CondAlways == u_in->cond);
811 sk_assert(i+1 == cb_in->used);
njn4ba5a792002-09-30 10:23:54 +0000812 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000813 instrumented_Jcond = False; /* reset */
814 break;
815 }
816 /* The first JMP... instrument. */
817 if (CondAlways != u_in->cond) {
njne427a662002-10-02 11:08:25 +0000818 sk_assert(i+2 == cb_in->used);
njn25e49d8e72002-09-23 09:36:25 +0000819 instrumented_Jcond = True;
820 } else {
njne427a662002-10-02 11:08:25 +0000821 sk_assert(i+1 == cb_in->used);
njn25e49d8e72002-09-23 09:36:25 +0000822 }
823
824 /* Get x86 instr size from final JMP. */
825 x86_instr_size = LAST_UINSTR(cb_in).extra4b;
826 goto instrument_x86_instr;
827
828
829 /* Code executed at the end of each x86 instruction. */
830 instrument_x86_instr:
831
832 /* Initialise the CC in the BBCC array appropriately if it
833 * hasn't been initialised before. Then call appropriate sim
834 * function, passing it the CC address. */
835 stack_used = 0;
836
njne427a662002-10-02 11:08:25 +0000837 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000838 x86_instr_size <= MAX_x86_INSTR_SIZE);
839
840#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
841
842 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +0000843 sk_assert(INVALID_DATA_SIZE == data_size);
844 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000845 INVALID_TEMPREG == t_read &&
846 INVALID_TEMPREG == t_write_addr &&
847 INVALID_TEMPREG == t_write);
848 CC_size = sizeof(iCC);
849 if (!BB_seen_before)
850 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
851 helper = ( has_rep_prefix
852 ? (Addr)0 /* no extra log needed */
853 : (Addr) & log_1I_0D_cache_access
854 );
855 argc = 1;
856
857 } else {
njne427a662002-10-02 11:08:25 +0000858 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +0000859 8 == data_size || 10 == data_size ||
860 MIN_LINE_SIZE == data_size);
861
862 if (IS_(read) && !IS_(write)) {
863 CC_size = sizeof(idCC);
864 /* If it uses 'rep', we've already logged the I-cache
865 * access at the JIFZ UInstr (see JIFZ case below) so
866 * don't do it here */
867 helper = ( has_rep_prefix
868 ? (Addr) & log_0I_1D_cache_access
869 : (Addr) & log_1I_1D_cache_access
870 );
871 argc = 2;
872 if (!BB_seen_before)
873 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
874 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000875 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000876 INVALID_TEMPREG != t_read &&
877 INVALID_TEMPREG == t_write_addr &&
878 INVALID_TEMPREG == t_write);
879 t_data_addr1 = t_read_addr;
880
881 } else if (!IS_(read) && IS_(write)) {
882 CC_size = sizeof(idCC);
883 helper = ( has_rep_prefix
884 ? (Addr) & log_0I_1D_cache_access
885 : (Addr) & log_1I_1D_cache_access
886 );
887 argc = 2;
888 if (!BB_seen_before)
889 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
890 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000891 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000892 INVALID_TEMPREG == t_read &&
893 INVALID_TEMPREG != t_write_addr &&
894 INVALID_TEMPREG != t_write);
895 t_data_addr1 = t_write_addr;
896
897 } else {
njne427a662002-10-02 11:08:25 +0000898 sk_assert(IS_(read) && IS_(write));
899 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000900 INVALID_TEMPREG != t_read &&
901 INVALID_TEMPREG != t_write_addr &&
902 INVALID_TEMPREG != t_write);
903 if (t_read == t_write) {
904 CC_size = sizeof(idCC);
905 helper = ( has_rep_prefix
906 ? (Addr) & log_0I_1D_cache_access
907 : (Addr) & log_1I_1D_cache_access
908 );
909 argc = 2;
910 if (!BB_seen_before)
911 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
912 x86_instr_size, data_size);
913 t_data_addr1 = t_read_addr;
914 } else {
915 CC_size = sizeof(iddCC);
916 helper = ( has_rep_prefix
917 ? (Addr) & log_0I_2D_cache_access
918 : (Addr) & log_1I_2D_cache_access
919 );
920 argc = 3;
921 if (!BB_seen_before)
922 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
923 x86_instr_size, data_size);
924 t_data_addr1 = t_read_addr;
925 t_data_addr2 = t_write_addr;
926 }
927 }
928#undef IS_
929 }
930
931 /* Call the helper, if necessary */
932 if ((Addr)0 != helper) {
933
934 /* Setup 1st arg: CC addr */
935 t_CC_addr = newTemp(cb);
936 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
937 uLiteral(cb, BBCC_ptr);
938
939 /* Call the helper */
940 if (1 == argc)
941 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
942 else if (2 == argc)
943 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
944 TempReg, t_data_addr1);
945 else if (3 == argc)
946 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
947 TempReg, t_data_addr1,
948 TempReg, t_data_addr2);
949 else
njne427a662002-10-02 11:08:25 +0000950 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +0000951
952 uCCall(cb, helper, argc, argc, False);
953 }
954
955 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +0000956 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000957
958 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
959 BBCC_ptr += CC_size;
960 x86_instr_addr += x86_instr_size;
961 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
962 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
963 data_size = INVALID_DATA_SIZE;
964 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000965 break;
966
967 default:
njn4ba5a792002-09-30 10:23:54 +0000968 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000969 break;
970 }
971 }
972
973 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +0000974 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +0000975
njn4ba5a792002-09-30 10:23:54 +0000976 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000977 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000978
979#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000980}
981
982/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000983/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000984/*------------------------------------------------------------*/
985
986/* Total reads/writes/misses. Calculated during CC traversal at the end. */
987static CC Ir_total;
988static CC Dr_total;
989static CC Dw_total;
990
njn25e49d8e72002-09-23 09:36:25 +0000991#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
992
993static cache_t clo_I1_cache = UNDEFINED_CACHE;
994static cache_t clo_D1_cache = UNDEFINED_CACHE;
995static cache_t clo_L2_cache = UNDEFINED_CACHE;
996
njn7cf0bd32002-06-08 13:36:03 +0000997/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
998/* Probably only works for Intel and AMD chips, and probably only for some of
999 * them.
1000 */
1001
sewardj07133bf2002-06-13 10:25:56 +00001002static __inline__ void cpuid(Int n, Int *a, Int *b, Int *c, Int *d)
njn7cf0bd32002-06-08 13:36:03 +00001003{
1004 __asm__ __volatile__ (
1005 "cpuid"
1006 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1007 : "0" (n) /* input */
1008 );
1009}
1010
sewardj07133bf2002-06-13 10:25:56 +00001011static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001012{
1013 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001014 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001015 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001016 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001017 " Simulating a %d KB cache with %d B lines",
1018 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001019}
1020
1021/* Intel method is truly wretched. We have to do an insane indexing into an
1022 * array of pre-defined configurations for various parts of the memory
1023 * hierarchy.
1024 */
1025static
sewardj07133bf2002-06-13 10:25:56 +00001026Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001027{
sewardj07133bf2002-06-13 10:25:56 +00001028 UChar info[16];
1029 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001030 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001031
1032 if (level < 2) {
1033 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001034 "warning: CPUID level < 2 for Intel processor (%d)",
1035 level);
njn7cf0bd32002-06-08 13:36:03 +00001036 return -1;
1037 }
1038
sewardj07133bf2002-06-13 10:25:56 +00001039 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1040 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001041 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1042 info[0] = 0x0; /* reset AL */
1043
1044 if (0 != trials) {
1045 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001046 "warning: non-zero CPUID trials for Intel processor (%d)",
1047 trials);
njn7cf0bd32002-06-08 13:36:03 +00001048 return -1;
1049 }
1050
1051 for (i = 0; i < 16; i++) {
1052
1053 switch (info[i]) {
1054
1055 case 0x0: /* ignore zeros */
1056 break;
1057
njn25e49d8e72002-09-23 09:36:25 +00001058 /* TLB info, ignore */
1059 case 0x01: case 0x02: case 0x03: case 0x04:
1060 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njn7cf0bd32002-06-08 13:36:03 +00001061 break;
1062
1063 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1064 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
1065
1066 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1067 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
1068
njn25e49d8e72002-09-23 09:36:25 +00001069 /* IA-64 info -- panic! */
1070 case 0x10: case 0x15: case 0x1a:
1071 case 0x88: case 0x89: case 0x8a: case 0x8d:
1072 case 0x90: case 0x96: case 0x9b:
1073 VG_(message)(Vg_DebugMsg,
1074 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001075 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001076
njn7cf0bd32002-06-08 13:36:03 +00001077 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001078 VG_(message)(Vg_DebugMsg,
1079 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001080 break;
1081
njn25e49d8e72002-09-23 09:36:25 +00001082 /* These are sectored, whatever that means */
1083 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1084 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1085
1086 /* If a P6 core, this means "no L2 cache".
1087 If a P4 core, this means "no L3 cache".
1088 We don't know what core it is, so don't issue a warning. To detect
1089 a missing L2 cache, we use 'L2_found'. */
1090 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001091 break;
1092
njn25e49d8e72002-09-23 09:36:25 +00001093 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1094 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1095 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1096 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1097 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001098
1099 /* These are sectored, whatever that means */
1100 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1101 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1102 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1103
1104 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1105 * conversion to byte size is a total guess; treat the 12K and 16K
1106 * cases the same since the cache byte size must be a power of two for
1107 * everything to work!. Also guessing 32 bytes for the line size...
1108 */
1109 case 0x70: /* 12K micro-ops, 8-way */
1110 *I1c = (cache_t) { 16, 8, 32 };
1111 micro_ops_warn(12, 16, 32);
1112 break;
1113 case 0x71: /* 16K micro-ops, 8-way */
1114 *I1c = (cache_t) { 16, 8, 32 };
1115 micro_ops_warn(16, 16, 32);
1116 break;
1117 case 0x72: /* 32K micro-ops, 8-way */
1118 *I1c = (cache_t) { 32, 8, 32 };
1119 micro_ops_warn(32, 32, 32);
1120 break;
1121
njn25e49d8e72002-09-23 09:36:25 +00001122 /* These are sectored, whatever that means */
1123 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1124 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1125 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1126 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1127 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001128
njn25e49d8e72002-09-23 09:36:25 +00001129 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1130 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1131 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1132 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1133 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001134
1135 default:
1136 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001137 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001138 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001139 break;
1140 }
1141 }
njn25e49d8e72002-09-23 09:36:25 +00001142
1143 if (!L2_found)
1144 VG_(message)(Vg_DebugMsg,
1145 "warning: L2 cache not installed, ignore L2 results.");
1146
njn7cf0bd32002-06-08 13:36:03 +00001147 return 0;
1148}
1149
1150/* AMD method is straightforward, just extract appropriate bits from the
1151 * result registers.
1152 *
1153 * Bits, for D1 and I1:
1154 * 31..24 data L1 cache size in KBs
1155 * 23..16 data L1 cache associativity (FFh=full)
1156 * 15.. 8 data L1 cache lines per tag
1157 * 7.. 0 data L1 cache line size in bytes
1158 *
1159 * Bits, for L2:
1160 * 31..16 unified L2 cache size in KBs
1161 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1162 * 11.. 8 unified L2 cache lines per tag
1163 * 7.. 0 unified L2 cache line size in bytes
1164 *
1165 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1166 * upon this information. (Whatever that means -- njn)
1167 *
njn25e49d8e72002-09-23 09:36:25 +00001168 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1169 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1170 * so we detect that.
1171 *
njn7cf0bd32002-06-08 13:36:03 +00001172 * Returns 0 on success, non-zero on failure.
1173 */
sewardj07133bf2002-06-13 10:25:56 +00001174static
1175Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001176{
njn25e49d8e72002-09-23 09:36:25 +00001177 Int dummy, model, ext_level;
sewardj07133bf2002-06-13 10:25:56 +00001178 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001179
1180 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1181
1182 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1183 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001184 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1185 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001186 return -1;
1187 }
1188
1189 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1190 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1191
njn25e49d8e72002-09-23 09:36:25 +00001192 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1193 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1194
1195 /* Check for Duron bug */
1196 if (model == 0x630) {
1197 VG_(message)(Vg_UserMsg,
1198 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1199 L2i = (64 << 16) | (L2i & 0xffff);
1200 }
1201
njn7cf0bd32002-06-08 13:36:03 +00001202 D1c->size = (D1i >> 24) & 0xff;
1203 D1c->assoc = (D1i >> 16) & 0xff;
1204 D1c->line_size = (D1i >> 0) & 0xff;
1205
1206 I1c->size = (I1i >> 24) & 0xff;
1207 I1c->assoc = (I1i >> 16) & 0xff;
1208 I1c->line_size = (I1i >> 0) & 0xff;
1209
1210 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1211 L2c->assoc = (L2i >> 12) & 0xf;
1212 L2c->line_size = (L2i >> 0) & 0xff;
1213
1214 return 0;
1215}
1216
1217static jmp_buf cpuid_jmpbuf;
1218
1219static
1220void cpuid_SIGILL_handler(int signum)
1221{
1222 __builtin_longjmp(cpuid_jmpbuf, 1);
1223}
1224
1225static
sewardj07133bf2002-06-13 10:25:56 +00001226Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001227{
sewardj07133bf2002-06-13 10:25:56 +00001228 Int level, res, ret;
1229 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001230 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001231
1232 /* Install own SIGILL handler */
1233 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1234 sigill_new.ksa_flags = 0;
1235 sigill_new.ksa_restorer = NULL;
1236 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001237 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001238
1239 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001240 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001241
1242 /* Trap for illegal instruction, in case it's a really old processor that
1243 * doesn't support CPUID. */
1244 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1245 cpuid(0, &level, (int*)&vendor_id[0],
1246 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1247 vendor_id[12] = '\0';
1248
1249 /* Restore old SIGILL handler */
1250 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001251 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001252
1253 } else {
1254 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1255
1256 /* Restore old SIGILL handler */
1257 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001258 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001259 return -1;
1260 }
1261
1262 if (0 == level) {
1263 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1264 return -1;
1265 }
1266
1267 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1268 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1269 ret = Intel_cache_info(level, I1c, D1c, L2c);
1270
1271 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1272 ret = AMD_cache_info(I1c, D1c, L2c);
1273
1274 } else {
1275 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1276 vendor_id);
1277 return -1;
1278 }
1279
1280 /* Successful! Convert sizes from KB to bytes */
1281 I1c->size *= 1024;
1282 D1c->size *= 1024;
1283 L2c->size *= 1024;
1284
1285 return ret;
1286}
1287
1288/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001289static
1290void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001291{
1292 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001293 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001294 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001295 "warning: %s size of %dB not a power of two; "
1296 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001297 cache->size = dflt->size;
1298 }
1299
sewardj07133bf2002-06-13 10:25:56 +00001300 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001301 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001302 "warning: %s associativity of %d not a power of two; "
1303 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001304 cache->assoc = dflt->assoc;
1305 }
1306
sewardj07133bf2002-06-13 10:25:56 +00001307 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001308 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001309 "warning: %s line size of %dB not a power of two; "
1310 "defaulting to %dB",
1311 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001312 cache->line_size = dflt->line_size;
1313 }
1314
1315 /* Then check line size >= 16 -- any smaller and a single instruction could
1316 * straddle three cache lines, which breaks a simulation assertion and is
1317 * stupid anyway. */
1318 if (cache->line_size < MIN_LINE_SIZE) {
1319 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001320 "warning: %s line size of %dB too small; "
1321 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001322 cache->line_size = MIN_LINE_SIZE;
1323 }
1324
1325 /* Then check cache size > line size (causes seg faults if not). */
1326 if (cache->size <= cache->line_size) {
1327 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001328 "warning: %s cache size of %dB <= line size of %dB; "
1329 "increasing to %dB", name, cache->size, cache->line_size,
1330 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001331 cache->size = cache->line_size * 2;
1332 }
1333
1334 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1335 if (cache->assoc > (cache->size / cache->line_size)) {
1336 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001337 "warning: %s associativity > (size / line size); "
1338 "increasing size to %dB",
1339 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001340 cache->size = cache->assoc * cache->line_size;
1341 }
1342}
1343
1344/* On entry, args are undefined. Fill them with any info from the
1345 * command-line, then fill in any remaining with CPUID instruction if possible,
1346 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001347static
1348void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001349{
1350 /* Defaults are for a model 3 or 4 Athlon */
1351 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1352 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1353 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1354
njn25e49d8e72002-09-23 09:36:25 +00001355#define CMD_LINE_DEFINED(L) \
1356 (-1 != clo_##L##_cache.size || \
1357 -1 != clo_##L##_cache.assoc || \
1358 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001359
njn25e49d8e72002-09-23 09:36:25 +00001360 *I1c = clo_I1_cache;
1361 *D1c = clo_D1_cache;
1362 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001363
njn7cf0bd32002-06-08 13:36:03 +00001364 /* If any undefined on command-line, try CPUID */
1365 if (! CMD_LINE_DEFINED(I1) ||
1366 ! CMD_LINE_DEFINED(D1) ||
1367 ! CMD_LINE_DEFINED(L2)) {
1368
1369 /* Overwrite CPUID result for any cache defined on command-line */
1370 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1371
njn25e49d8e72002-09-23 09:36:25 +00001372 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1373 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1374 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001375
1376 /* CPUID failed, use defaults for each undefined by command-line */
1377 } else {
1378 VG_(message)(Vg_DebugMsg,
1379 "Couldn't detect cache configuration, using one "
1380 "or more defaults ");
1381
njn25e49d8e72002-09-23 09:36:25 +00001382 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1383 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1384 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001385 }
1386 }
1387#undef CMD_LINE_DEFINED
1388
1389 check_cache(I1c, &I1_dflt, "I1");
1390 check_cache(D1c, &D1_dflt, "D1");
1391 check_cache(L2c, &L2_dflt, "L2");
1392
1393 if (VG_(clo_verbosity) > 1) {
1394 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1395 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1396 I1c->size, I1c->assoc, I1c->line_size);
1397 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1398 D1c->size, D1c->assoc, D1c->line_size);
1399 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1400 L2c->size, L2c->assoc, L2c->line_size);
1401 }
1402}
1403
njn4f9c9342002-04-29 16:03:24 +00001404/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001405/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001406/*------------------------------------------------------------*/
1407
njn4f9c9342002-04-29 16:03:24 +00001408static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1409 Char *first_instr_fn)
1410{
1411 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001412 Char buf[BUF_LEN], curr_file[BUF_LEN],
1413 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001414 UInt line_num;
1415
1416 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1417
njne0ee0712002-05-03 16:41:05 +00001418 /* Mark start of basic block in output, just to ease debugging */
1419 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001420
1421 VG_(strcpy)(curr_file, first_instr_fl);
1422
1423 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1424
1425 /* We pretend the CC is an iCC for getting the tag. This is ok
1426 * because both CC types have tag as their first byte. Once we know
1427 * the type, we can cast and act appropriately. */
1428
1429 Char fl_buf[FILENAME_LEN];
1430 Char fn_buf[FN_NAME_LEN];
1431
njne0ee0712002-05-03 16:41:05 +00001432 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001433 switch ( ((iCC*)BBCC_ptr)->tag ) {
1434
njn25e49d8e72002-09-23 09:36:25 +00001435 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001436 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1437 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001438 ADD_CC_TO(iCC, I, Ir_total);
1439 BBCC_ptr += sizeof(iCC);
1440 break;
1441
njn25e49d8e72002-09-23 09:36:25 +00001442 case ReadCC:
1443 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001444 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1445 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001446 ADD_CC_TO(idCC, I, Ir_total);
1447 ADD_CC_TO(idCC, D, Dr_total);
1448 BBCC_ptr += sizeof(idCC);
1449 break;
1450
njn25e49d8e72002-09-23 09:36:25 +00001451 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001452 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1453 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001454 ADD_CC_TO(idCC, I, Ir_total);
1455 ADD_CC_TO(idCC, D, Dw_total);
1456 BBCC_ptr += sizeof(idCC);
1457 break;
1458
njn25e49d8e72002-09-23 09:36:25 +00001459 case ReadWriteCC:
1460 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1461 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1462 ADD_CC_TO(iddCC, I, Ir_total);
1463 ADD_CC_TO(iddCC, Da, Dr_total);
1464 ADD_CC_TO(iddCC, Db, Dw_total);
1465 BBCC_ptr += sizeof(iddCC);
1466 break;
1467
njn4f9c9342002-04-29 16:03:24 +00001468 default:
njne427a662002-10-02 11:08:25 +00001469 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001470 break;
1471 }
1472 distinct_instrs++;
1473
njne0ee0712002-05-03 16:41:05 +00001474 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1475
1476 /* Allow for filename switching in the middle of a BB; if this happens,
1477 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001478 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001479 VG_(strcpy)(curr_file, fl_buf);
1480 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1481 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1482 }
1483
njn4f9c9342002-04-29 16:03:24 +00001484 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001485 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001486 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001487 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001488 VG_(printf)(" filenames: BB:%s, instr:%s;"
1489 " fn_names: BB:%s, instr:%s;"
1490 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001491 first_instr_fl, fl_buf,
1492 first_instr_fn, fn_buf,
1493 line_num);
1494 }
1495
njne0ee0712002-05-03 16:41:05 +00001496 VG_(sprintf)(lbuf, "%u ", line_num);
1497 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1498 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001499 }
1500 /* If we switched filenames in the middle of the BB without switching back,
1501 * switch back now because the subsequent BB may be relying on falling under
1502 * the original file name. */
1503 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1504 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1505 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1506 }
njne0ee0712002-05-03 16:41:05 +00001507
1508 /* Mark end of basic block */
1509 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001510
njne427a662002-10-02 11:08:25 +00001511 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001512}
1513
njn25e49d8e72002-09-23 09:36:25 +00001514static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001515{
1516 Int fd;
1517 Char buf[BUF_LEN];
1518 file_node *curr_file_node;
1519 fn_node *curr_fn_node;
1520 BBCC *curr_BBCC;
1521 Int i,j,k;
1522
njn25e49d8e72002-09-23 09:36:25 +00001523 VGP_PUSHCC(VgpCacheResults);
1524 fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
njn4f9c9342002-04-29 16:03:24 +00001525 if (-1 == fd) { file_err(); }
1526
1527 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001528 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1529 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1530 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1531 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1532 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1533 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001534
1535 /* "cmd:" line */
1536 VG_(strcpy)(buf, "cmd:");
1537 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001538 for (i = 0; i < VG_(client_argc); i++) {
1539 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001540 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1541 }
1542 /* "events:" line */
1543 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1544 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1545
1546 /* Six loops here: three for the hash table arrays, and three for the
1547 * chains hanging off the hash table arrays. */
1548 for (i = 0; i < N_FILE_ENTRIES; i++) {
1549 curr_file_node = BBCC_table[i];
1550 while (curr_file_node != NULL) {
1551 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1552 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1553
1554 for (j = 0; j < N_FN_ENTRIES; j++) {
1555 curr_fn_node = curr_file_node->fns[j];
1556 while (curr_fn_node != NULL) {
1557 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1558 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1559
1560 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1561 curr_BBCC = curr_fn_node->BBCCs[k];
1562 while (curr_BBCC != NULL) {
1563 fprint_BBCC(fd, curr_BBCC,
1564
1565 curr_file_node->filename,
1566 curr_fn_node->fn_name);
1567
1568 curr_BBCC = curr_BBCC->next;
1569 }
1570 }
1571 curr_fn_node = curr_fn_node->next;
1572 }
1573 }
1574 curr_file_node = curr_file_node->next;
1575 }
1576 }
1577
njn4294fd42002-06-05 14:41:10 +00001578 /* Print stats from any discarded basic blocks */
1579 if (0 != Ir_discards.a) {
1580
1581 VG_(sprintf)(buf, "fl=(discarded)\n");
1582 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1583 VG_(sprintf)(buf, "fn=(discarded)\n");
1584 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1585
1586 /* Use 0 as line number */
1587 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1588 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1589 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1590 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1591 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1592
1593 Ir_total.a += Ir_discards.a;
1594 Ir_total.m1 += Ir_discards.m1;
1595 Ir_total.m2 += Ir_discards.m2;
1596 Dr_total.a += Dr_discards.a;
1597 Dr_total.m1 += Dr_discards.m1;
1598 Dr_total.m2 += Dr_discards.m2;
1599 Dw_total.a += Dw_discards.a;
1600 Dw_total.m1 += Dw_discards.m1;
1601 Dw_total.m2 += Dw_discards.m2;
1602 }
1603
njn4f9c9342002-04-29 16:03:24 +00001604 /* Summary stats must come after rest of table, since we calculate them
1605 * during traversal. */
1606 VG_(sprintf)(buf, "summary: "
1607 "%llu %llu %llu "
1608 "%llu %llu %llu "
1609 "%llu %llu %llu\n",
1610 Ir_total.a, Ir_total.m1, Ir_total.m2,
1611 Dr_total.a, Dr_total.m1, Dr_total.m2,
1612 Dw_total.a, Dw_total.m1, Dw_total.m2);
1613 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1614 VG_(close)(fd);
1615}
1616
1617/* Adds commas to ULong, right justifying in a field field_width wide, returns
1618 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +00001619static
njn4f9c9342002-04-29 16:03:24 +00001620Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
1621{
1622 int len, n_commas, i, j, new_len, space;
1623
1624 VG_(sprintf)(buf, "%lu", n);
1625 len = VG_(strlen)(buf);
1626 n_commas = (len - 1) / 3;
1627 new_len = len + n_commas;
1628 space = field_width - new_len;
1629
1630 /* Allow for printing a number in a field_width smaller than it's size */
1631 if (space < 0) space = 0;
1632
1633 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1634 * of three. */
1635 for (j = -1, i = len ; i >= 0; i--) {
1636 buf[i + n_commas + space] = buf[i];
1637
1638 if (3 == ++j) {
1639 j = 0;
1640 n_commas--;
1641 buf[i + n_commas + space] = ',';
1642 }
1643 }
1644 /* Right justify in field. */
1645 for (i = 0; i < space; i++) buf[i] = ' ';
1646 return new_len;
1647}
1648
sewardj4f29ddf2002-05-03 22:29:04 +00001649static
njn4f9c9342002-04-29 16:03:24 +00001650void percentify(Int n, Int pow, Int field_width, char buf[])
1651{
1652 int i, len, space;
1653
1654 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
1655 len = VG_(strlen)(buf);
1656 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001657 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001658 i = len;
1659
1660 /* Right justify in field */
1661 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1662 for (i = 0; i < space; i++) buf[i] = ' ';
1663}
1664
njn25e49d8e72002-09-23 09:36:25 +00001665void SK_(fini)(void)
njn4f9c9342002-04-29 16:03:24 +00001666{
1667 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001668 ULong L2_total_m, L2_total_mr, L2_total_mw,
1669 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +00001670 char buf1[RESULTS_BUF_LEN],
1671 buf2[RESULTS_BUF_LEN],
1672 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001673 Int l1, l2, l3;
1674 Int p;
1675
njn25e49d8e72002-09-23 09:36:25 +00001676 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001677
njn7cf0bd32002-06-08 13:36:03 +00001678 if (VG_(clo_verbosity) == 0)
1679 return;
1680
njn4f9c9342002-04-29 16:03:24 +00001681 /* I cache results. Use the I_refs value to determine the first column
1682 * width. */
1683 l1 = commify(Ir_total.a, 0, buf1);
1684 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1685
1686 commify(Ir_total.m1, l1, buf1);
1687 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1688
1689 commify(Ir_total.m2, l1, buf1);
njn95114da2002-06-05 09:39:31 +00001690 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
njn4f9c9342002-04-29 16:03:24 +00001691
1692 p = 100;
1693
njn25e49d8e72002-09-23 09:36:25 +00001694 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001695 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1696 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1697
1698 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1699 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1700 VG_(message)(Vg_UserMsg, "");
1701
1702 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1703 * width of columns 2 & 3. */
1704 D_total.a = Dr_total.a + Dw_total.a;
1705 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1706 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1707
njn1d021fa2002-05-02 13:56:34 +00001708 commify( D_total.a, l1, buf1);
1709 l2 = commify(Dr_total.a, 0, buf2);
1710 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +00001711 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1712 buf1, buf2, buf3);
1713
1714 commify( D_total.m1, l1, buf1);
1715 commify(Dr_total.m1, l2, buf2);
1716 commify(Dw_total.m1, l3, buf3);
1717 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1718 buf1, buf2, buf3);
1719
1720 commify( D_total.m2, l1, buf1);
1721 commify(Dr_total.m2, l2, buf2);
1722 commify(Dw_total.m2, l3, buf3);
njn95114da2002-06-05 09:39:31 +00001723 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
njn4f9c9342002-04-29 16:03:24 +00001724 buf1, buf2, buf3);
1725
1726 p = 10;
1727
njn25e49d8e72002-09-23 09:36:25 +00001728 if (0 == D_total.a) D_total.a = 1;
1729 if (0 == Dr_total.a) Dr_total.a = 1;
1730 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001731 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1732 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1733 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1734 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1735
1736 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1737 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1738 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1739 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1740 VG_(message)(Vg_UserMsg, "");
1741
1742 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001743
1744 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1745 L2_total_r = Dr_total.m1 + Ir_total.m1;
1746 L2_total_w = Dw_total.m1;
1747 commify(L2_total, l1, buf1);
1748 commify(L2_total_r, l2, buf2);
1749 commify(L2_total_w, l3, buf3);
1750 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1751 buf1, buf2, buf3);
1752
njn4f9c9342002-04-29 16:03:24 +00001753 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1754 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1755 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001756 commify(L2_total_m, l1, buf1);
1757 commify(L2_total_mr, l2, buf2);
1758 commify(L2_total_mw, l3, buf3);
1759 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1760 buf1, buf2, buf3);
1761
1762 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1763 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1764 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1765 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1766
1767
1768 /* Hash table stats */
1769 if (VG_(clo_verbosity) > 1) {
1770 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1771 file_line_debug_BBs + no_debug_BBs;
1772
1773 VG_(message)(Vg_DebugMsg, "");
1774 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1775 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1776 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1777 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1778 full_debug_BBs * 100 / BB_lookups,
1779 full_debug_BBs);
1780 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1781 file_line_debug_BBs * 100 / BB_lookups,
1782 file_line_debug_BBs);
1783 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1784 fn_name_debug_BBs * 100 / BB_lookups,
1785 fn_name_debug_BBs);
1786 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1787 no_debug_BBs * 100 / BB_lookups,
1788 no_debug_BBs);
1789 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1790 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1791 }
njn25e49d8e72002-09-23 09:36:25 +00001792 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001793}
1794
sewardj18d75132002-05-16 11:06:21 +00001795
njn4294fd42002-06-05 14:41:10 +00001796/* Called when a translation is invalidated due to self-modifying code or
1797 * unloaded of a shared object.
1798 *
1799 * Finds the BBCC in the table, removes it, adds the counts to the discard
1800 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001801void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001802{
njn4294fd42002-06-05 14:41:10 +00001803 BBCC *BBCC_node;
1804 Addr BBCC_ptr0, BBCC_ptr;
1805 Bool BB_seen_before;
1806
sewardj83205b32002-06-14 11:08:07 +00001807 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001808 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001809
1810 /* 2nd arg won't be used since BB should have been seen before (assertions
1811 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001812 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001813 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1814
njne427a662002-10-02 11:08:25 +00001815 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001816
1817 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1818
1819 /* We pretend the CC is an iCC for getting the tag. This is ok
1820 * because both CC types have tag as their first byte. Once we know
1821 * the type, we can cast and act appropriately. */
1822
1823 switch ( ((iCC*)BBCC_ptr)->tag ) {
1824
njn25e49d8e72002-09-23 09:36:25 +00001825 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001826 ADD_CC_TO(iCC, I, Ir_discards);
1827 BBCC_ptr += sizeof(iCC);
1828 break;
1829
njn25e49d8e72002-09-23 09:36:25 +00001830 case ReadCC:
1831 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001832 ADD_CC_TO(idCC, I, Ir_discards);
1833 ADD_CC_TO(idCC, D, Dr_discards);
1834 BBCC_ptr += sizeof(idCC);
1835 break;
1836
njn25e49d8e72002-09-23 09:36:25 +00001837 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001838 ADD_CC_TO(idCC, I, Ir_discards);
1839 ADD_CC_TO(idCC, D, Dw_discards);
1840 BBCC_ptr += sizeof(idCC);
1841 break;
1842
njn25e49d8e72002-09-23 09:36:25 +00001843 case ReadWriteCC:
1844 ADD_CC_TO(iddCC, I, Ir_discards);
1845 ADD_CC_TO(iddCC, Da, Dr_discards);
1846 ADD_CC_TO(iddCC, Db, Dw_discards);
1847 BBCC_ptr += sizeof(iddCC);
1848 break;
1849
njn4294fd42002-06-05 14:41:10 +00001850 default:
njne427a662002-10-02 11:08:25 +00001851 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00001852 break;
1853 }
1854 }
njn25e49d8e72002-09-23 09:36:25 +00001855 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001856}
1857
1858/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001859/*--- Command line processing ---*/
1860/*--------------------------------------------------------------------*/
1861
1862static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
1863{
1864 int i1, i2, i3;
1865 int i;
1866 char *opt = VG_(strdup)(orig_opt);
1867
1868 i = i1 = opt_len;
1869
1870 /* Option looks like "--I1=65536,2,64".
1871 * Find commas, replace with NULs to make three independent
1872 * strings, then extract numbers. Yuck. */
1873 while (VG_(isdigit)(opt[i])) i++;
1874 if (',' == opt[i]) {
1875 opt[i++] = '\0';
1876 i2 = i;
1877 } else goto bad;
1878 while (VG_(isdigit)(opt[i])) i++;
1879 if (',' == opt[i]) {
1880 opt[i++] = '\0';
1881 i3 = i;
1882 } else goto bad;
1883 while (VG_(isdigit)(opt[i])) i++;
1884 if ('\0' != opt[i]) goto bad;
1885
1886 cache->size = (Int)VG_(atoll)(opt + i1);
1887 cache->assoc = (Int)VG_(atoll)(opt + i2);
1888 cache->line_size = (Int)VG_(atoll)(opt + i3);
1889
1890 VG_(free)(opt);
1891
1892 return;
1893
1894 bad:
1895 VG_(bad_option)(orig_opt);
1896}
1897
1898Bool SK_(process_cmd_line_option)(Char* arg)
1899{
1900 /* 5 is length of "--I1=" */
1901 if (0 == VG_(strncmp)(arg, "--I1=", 5))
1902 parse_cache_opt(&clo_I1_cache, arg, 5);
1903 else if (0 == VG_(strncmp)(arg, "--D1=", 5))
1904 parse_cache_opt(&clo_D1_cache, arg, 5);
1905 else if (0 == VG_(strncmp)(arg, "--L2=", 5))
1906 parse_cache_opt(&clo_L2_cache, arg, 5);
1907 else
1908 return False;
1909
1910 return True;
1911}
1912
1913Char* SK_(usage)(void)
1914{
1915 return
1916" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1917" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
1918" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n";
1919}
1920
1921/*--------------------------------------------------------------------*/
1922/*--- Setup ---*/
1923/*--------------------------------------------------------------------*/
1924
1925void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* not_used)
1926{
1927 needs->name = "cachegrind";
1928 needs->description = "an I1/D1/L2 cache profiler";
njne427a662002-10-02 11:08:25 +00001929 needs->bug_reports_to = "njn25@cam.ac.uk";
njn25e49d8e72002-09-23 09:36:25 +00001930
1931 needs->basic_block_discards = True;
1932 needs->command_line_options = True;
1933
1934 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
1935 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
1936 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
1937 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
1938 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
1939 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
1940}
1941
1942void SK_(post_clo_init)(void)
1943{
1944 cache_t I1c, D1c, L2c;
1945 Int fd;
1946
1947 /* Set output file name: cachegrind.<pid>.out */
1948 VG_(sprintf)(cachegrind_out_file, "cachegrind.out.%d", VG_(getpid)());
1949
1950 /* Make sure the output file can be written. */
1951 fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
1952 if (-1 == fd) {
1953 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_WRONLY,
1954 VKI_S_IRUSR|VKI_S_IWUSR);
1955 if (-1 == fd) {
1956 file_err();
1957 }
1958 }
1959 VG_(close)(fd);
1960
1961 initCC(&Ir_total);
1962 initCC(&Dr_total);
1963 initCC(&Dw_total);
1964
1965 initCC(&Ir_discards);
1966 initCC(&Dr_discards);
1967 initCC(&Dw_discards);
1968
1969 get_caches(&I1c, &D1c, &L2c);
1970
1971 cachesim_I1_initcache(I1c);
1972 cachesim_D1_initcache(D1c);
1973 cachesim_L2_initcache(L2c);
1974
1975 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
1976 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
1977 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
1978
1979 init_BBCC_table();
1980}
1981
1982#if 0
1983Bool SK_(cheap_sanity_check)(void) { return True; }
1984
1985extern TTEntry* vg_tt;
1986
1987Bool SK_(expensive_sanity_check)(void)
1988{
1989 Int i;
1990 Bool dummy;
1991 for (i = 0; i < 200191; i++) {
1992 if (vg_tt[i].orig_addr != (Addr)1 &&
1993 vg_tt[i].orig_addr != (Addr)3) {
1994 VG_(printf)(".");
1995 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
1996 }
1997 }
1998 return True;
1999}
2000#endif
2001
2002/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002003/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002004/*--------------------------------------------------------------------*/