blob: 9ad903753998843034064f3186d5e33a19381218 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
njnc9539842002-10-02 13:26:35 +00009 This file is part of Cachegrind, a Valgrind skin for cache
10 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
njn0e1b5142003-04-15 14:58:06 +000012 Copyright (C) 2002-2003 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000013 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
njn27f1a382002-11-08 15:48:16 +000036VG_DETERMINE_INTERFACE_VERSION
37
njn25e49d8e72002-09-23 09:36:25 +000038/* For cache simulation */
39typedef struct {
40 int size; /* bytes */
41 int assoc;
42 int line_size; /* bytes */
43} cache_t;
njn4f9c9342002-04-29 16:03:24 +000044
njn25cac76cb2002-09-23 11:21:57 +000045#include "cg_sim_L2.c"
46#include "cg_sim_I1.c"
47#include "cg_sim_D1.c"
njn4f9c9342002-04-29 16:03:24 +000048
njn25e49d8e72002-09-23 09:36:25 +000049/*------------------------------------------------------------*/
50/*--- Constants ---*/
51/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000052
53/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000054#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000055
njn25e49d8e72002-09-23 09:36:25 +000056#define MIN_LINE_SIZE 16
57
njn4f9c9342002-04-29 16:03:24 +000058/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000059#define FILENAME_LEN 256
60#define FN_NAME_LEN 256
61#define BUF_LEN 512
62#define COMMIFY_BUF_LEN 128
63#define RESULTS_BUF_LEN 128
64#define LINE_BUF_LEN 64
65
66/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000067/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000068/*------------------------------------------------------------*/
69
njn25e49d8e72002-09-23 09:36:25 +000070typedef
71 enum {
72 VgpGetBBCC = VgpFini+1,
73 VgpCacheSimulate,
74 VgpCacheResults
75 }
76 VgpSkinCC;
sewardj07133bf2002-06-13 10:25:56 +000077
njn4f9c9342002-04-29 16:03:24 +000078/*------------------------------------------------------------*/
79/*--- Output file related stuff ---*/
80/*------------------------------------------------------------*/
81
njn13f02932003-04-30 20:23:58 +000082static Char* cachegrind_out_file;
njn4f9c9342002-04-29 16:03:24 +000083
sewardj0744b6c2002-12-11 00:45:42 +000084static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +000085{
86 VG_(message)(Vg_UserMsg,
sewardj0744b6c2002-12-11 00:45:42 +000087 "error: can't open cache simulation output file `%s'",
88 cachegrind_out_file );
89 VG_(message)(Vg_UserMsg,
90 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +000091}
92
93/*------------------------------------------------------------*/
94/*--- Cost center types, operations ---*/
95/*------------------------------------------------------------*/
96
97typedef struct _CC CC;
98struct _CC {
99 ULong a;
100 ULong m1;
101 ULong m2;
102};
103
104static __inline__ void initCC(CC* cc) {
105 cc->a = 0;
106 cc->m1 = 0;
107 cc->m2 = 0;
108}
109
njn25e49d8e72002-09-23 09:36:25 +0000110typedef
111 enum {
112 InstrCC, /* eg. mov %eax, %ebx */
113 ReadCC, /* eg. mov (%ecx), %esi */
114 WriteCC, /* eg. mov %eax, (%edx) */
115 ModCC, /* eg. incl (%eax) (read+write one addr) */
116 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
117 (read+write two different addrs) */
118 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000119
njn7e1b3b22003-07-04 11:44:39 +0000120/* Instruction-level cost-centres.
njn4f9c9342002-04-29 16:03:24 +0000121 *
122 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000123 *
njne0ee0712002-05-03 16:41:05 +0000124 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000125 */
njn25e49d8e72002-09-23 09:36:25 +0000126typedef
127 struct {
128 /* word 1 */
129 UChar tag;
130 UChar instr_size;
131 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000132
njn25e49d8e72002-09-23 09:36:25 +0000133 /* words 2+ */
134 Addr instr_addr;
135 CC I;
136 }
137 iCC;
njn4f9c9342002-04-29 16:03:24 +0000138
njn25e49d8e72002-09-23 09:36:25 +0000139typedef
140 struct _idCC {
141 /* word 1 */
142 UChar tag;
143 UChar instr_size;
144 UChar data_size;
145 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000146
njn25e49d8e72002-09-23 09:36:25 +0000147 /* words 2+ */
148 Addr instr_addr;
149 CC I;
150 CC D;
151 }
152 idCC;
153
154typedef
155 struct _iddCC {
156 /* word 1 */
157 UChar tag;
158 UChar instr_size;
159 UChar data_size;
160 /* 1 byte padding */
161
162 /* words 2+ */
163 Addr instr_addr;
164 CC I;
165 CC Da;
166 CC Db;
167 }
168 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000169
170static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
171{
njn25e49d8e72002-09-23 09:36:25 +0000172 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000173 cc->instr_size = instr_size;
174 cc->instr_addr = instr_addr;
175 initCC(&cc->I);
176}
177
178static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
179 UInt instr_size, UInt data_size)
180{
181 cc->tag = X_CC;
182 cc->instr_size = instr_size;
183 cc->data_size = data_size;
184 cc->instr_addr = instr_addr;
185 initCC(&cc->I);
186 initCC(&cc->D);
187}
188
njn25e49d8e72002-09-23 09:36:25 +0000189static void init_iddCC(iddCC* cc, Addr instr_addr,
190 UInt instr_size, UInt data_size)
191{
192 cc->tag = ReadWriteCC;
193 cc->instr_size = instr_size;
194 cc->data_size = data_size;
195 cc->instr_addr = instr_addr;
196 initCC(&cc->I);
197 initCC(&cc->Da);
198 initCC(&cc->Db);
199}
200
njn4294fd42002-06-05 14:41:10 +0000201#define ADD_CC_TO(CC_type, cc, total) \
202 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
203 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
204 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
205
njn95114da2002-06-05 09:39:31 +0000206/* If 1, address of each instruction is printed as a comment after its counts
207 * in cachegrind.out */
208#define PRINT_INSTR_ADDRS 0
209
njne0ee0712002-05-03 16:41:05 +0000210static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000211{
njn95114da2002-06-05 09:39:31 +0000212#if PRINT_INSTR_ADDRS
213 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
214 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
215#else
njne0ee0712002-05-03 16:41:05 +0000216 VG_(sprintf)(buf, "%llu %llu %llu\n",
217 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000218#endif
njn4f9c9342002-04-29 16:03:24 +0000219}
220
njne0ee0712002-05-03 16:41:05 +0000221static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000222{
njn95114da2002-06-05 09:39:31 +0000223#if PRINT_INSTR_ADDRS
224 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
225 cc->I.a, cc->I.m1, cc->I.m2,
226 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
227#else
njne0ee0712002-05-03 16:41:05 +0000228 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
229 cc->I.a, cc->I.m1, cc->I.m2,
230 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000231#endif
njn4f9c9342002-04-29 16:03:24 +0000232}
233
njne0ee0712002-05-03 16:41:05 +0000234static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000235{
njn95114da2002-06-05 09:39:31 +0000236#if PRINT_INSTR_ADDRS
237 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
238 cc->I.a, cc->I.m1, cc->I.m2,
239 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
240#else
njne0ee0712002-05-03 16:41:05 +0000241 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
242 cc->I.a, cc->I.m1, cc->I.m2,
243 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000244#endif
njn4f9c9342002-04-29 16:03:24 +0000245}
246
njn25e49d8e72002-09-23 09:36:25 +0000247static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
248{
249#if PRINT_INSTR_ADDRS
250 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
251 cc->I.a, cc->I.m1, cc->I.m2,
252 cc->Da.a, cc->Da.m1, cc->Da.m2,
253 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
254#else
255 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
256 cc->I.a, cc->I.m1, cc->I.m2,
257 cc->Da.a, cc->Da.m1, cc->Da.m2,
258 cc->Db.a, cc->Db.m1, cc->Db.m2);
259#endif
260}
261
262
njn4f9c9342002-04-29 16:03:24 +0000263/*------------------------------------------------------------*/
264/*--- BBCC hash table stuff ---*/
265/*------------------------------------------------------------*/
266
267/* The table of BBCCs is of the form hash(filename, hash(fn_name,
268 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
269 * fairly well for Konqueror. */
270
271#define N_FILE_ENTRIES 251
272#define N_FN_ENTRIES 53
273#define N_BBCC_ENTRIES 37
274
275/* The cost centres for a basic block are stored in a contiguous array.
276 * They are distinguishable by their tag field. */
277typedef struct _BBCC BBCC;
278struct _BBCC {
279 Addr orig_addr;
280 UInt array_size; /* byte-size of variable length array */
281 BBCC* next;
282 Addr array[0]; /* variable length array */
283};
284
285typedef struct _fn_node fn_node;
286struct _fn_node {
287 Char* fn_name;
288 BBCC* BBCCs[N_BBCC_ENTRIES];
289 fn_node* next;
290};
291
292typedef struct _file_node file_node;
293struct _file_node {
294 Char* filename;
295 fn_node* fns[N_FN_ENTRIES];
296 file_node* next;
297};
298
299/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000300static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000301
sewardj4f29ddf2002-05-03 22:29:04 +0000302static Int distinct_files = 0;
303static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000304
sewardj4f29ddf2002-05-03 22:29:04 +0000305static Int distinct_instrs = 0;
306static Int full_debug_BBs = 0;
307static Int file_line_debug_BBs = 0;
308static Int fn_name_debug_BBs = 0;
309static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000310
sewardj4f29ddf2002-05-03 22:29:04 +0000311static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000312
njn4294fd42002-06-05 14:41:10 +0000313static CC Ir_discards;
314static CC Dr_discards;
315static CC Dw_discards;
316
njn4f9c9342002-04-29 16:03:24 +0000317static void init_BBCC_table()
318{
319 Int i;
320 for (i = 0; i < N_FILE_ENTRIES; i++)
321 BBCC_table[i] = NULL;
322}
323
njne0ee0712002-05-03 16:41:05 +0000324static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
325 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000326{
njn25e49d8e72002-09-23 09:36:25 +0000327 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000328
njn25e49d8e72002-09-23 09:36:25 +0000329 found1 = VG_(get_filename_linenum)(instr_addr, filename,
330 FILENAME_LEN, line_num);
331 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000332
333 if (!found1 && !found2) {
334 no_debug_BBs++;
335 VG_(strcpy)(filename, "???");
336 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000337 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000338
339 } else if ( found1 && found2) {
340 full_debug_BBs++;
341
342 } else if ( found1 && !found2) {
343 file_line_debug_BBs++;
344 VG_(strcpy)(fn_name, "???");
345
346 } else /*(!found1 && found2)*/ {
347 fn_name_debug_BBs++;
348 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000349 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000350 }
351}
352
353/* Forward declaration. */
354static Int compute_BBCC_array_size(UCodeBlock* cb);
355
356static __inline__
357file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
358{
359 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000360 file_node* new = VG_(malloc)(sizeof(file_node));
361 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000362 for (i = 0; i < N_FN_ENTRIES; i++) {
363 new->fns[i] = NULL;
364 }
365 new->next = next;
366 return new;
367}
368
369static __inline__
370fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
371{
372 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000373 fn_node* new = VG_(malloc)(sizeof(fn_node));
374 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000375 for (i = 0; i < N_BBCC_ENTRIES; i++) {
376 new->BBCCs[i] = NULL;
377 }
378 new->next = next;
379 return new;
380}
381
382static __inline__
383BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
384{
385 Int BBCC_array_size = compute_BBCC_array_size(cb);
386 BBCC* new;
387
njn25e49d8e72002-09-23 09:36:25 +0000388 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000389 new->orig_addr = bb_orig_addr;
390 new->array_size = BBCC_array_size;
391 new->next = next;
392
393 return new;
394}
395
396#define HASH_CONSTANT 256
397
398static UInt hash(Char *s, UInt table_size)
399{
400 int hash_value = 0;
401 for ( ; *s; s++)
402 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
403 return hash_value;
404}
405
406/* Do a three step traversal: by filename, then fn_name, then instr_addr.
407 * In all cases prepends new nodes to their chain. Returns a pointer to the
408 * cost centre. Also sets BB_seen_before by reference.
409 */
410static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000411 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000412{
413 file_node *curr_file_node;
414 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000415 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000416 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
417 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000418 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000419
njne0ee0712002-05-03 16:41:05 +0000420 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000421
njn25e49d8e72002-09-23 09:36:25 +0000422 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000423 filename_hash = hash(filename, N_FILE_ENTRIES);
424 curr_file_node = BBCC_table[filename_hash];
425 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000426 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000427 curr_file_node = curr_file_node->next;
428 }
429 if (NULL == curr_file_node) {
430 BBCC_table[filename_hash] = curr_file_node =
431 new_file_node(filename, BBCC_table[filename_hash]);
432 distinct_files++;
433 }
434
435 fnname_hash = hash(fn_name, N_FN_ENTRIES);
436 curr_fn_node = curr_file_node->fns[fnname_hash];
437 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000438 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000439 curr_fn_node = curr_fn_node->next;
440 }
441 if (NULL == curr_fn_node) {
442 curr_file_node->fns[fnname_hash] = curr_fn_node =
443 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
444 distinct_fns++;
445 }
446
447 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000448 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000449 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
450 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000451 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000452 curr_BBCC = curr_BBCC->next;
453 }
454 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000455
njne427a662002-10-02 11:08:25 +0000456 sk_assert(False == remove);
njn4294fd42002-06-05 14:41:10 +0000457
njn4f9c9342002-04-29 16:03:24 +0000458 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
459 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
460 *BB_seen_before = False;
461
462 } else {
njne427a662002-10-02 11:08:25 +0000463 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
464 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000465 if (VG_(clo_verbosity) > 2) {
466 VG_(message)(Vg_DebugMsg,
467 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000468 }
469 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000470
471 if (True == remove) {
472 // Remove curr_BBCC from chain; it will be used and free'd by the
473 // caller.
474 *prev_BBCC_next_ptr = curr_BBCC->next;
475
476 } else {
477 BB_retranslations++;
478 }
njn4f9c9342002-04-29 16:03:24 +0000479 }
njn25e49d8e72002-09-23 09:36:25 +0000480 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000481 return curr_BBCC;
482}
483
484/*------------------------------------------------------------*/
485/*--- Cache simulation instrumentation phase ---*/
486/*------------------------------------------------------------*/
487
njn4f9c9342002-04-29 16:03:24 +0000488static Int compute_BBCC_array_size(UCodeBlock* cb)
489{
490 UInstr* u_in;
491 Int i, CC_size, BBCC_size = 0;
492 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000493 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000494
495 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000496 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000497
njn810086f2002-11-14 12:42:47 +0000498 for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
499 u_in = VG_(get_instr)(cb, i);
njn4f9c9342002-04-29 16:03:24 +0000500 switch(u_in->opcode) {
501
502 case INCEIP:
503 goto case_for_end_of_instr;
504
505 case JMP:
506 if (u_in->cond != CondAlways) break;
507
508 goto case_for_end_of_instr;
509
510 case_for_end_of_instr:
511
njn25e49d8e72002-09-23 09:36:25 +0000512 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
513 t_read != t_write)
514 CC_size = sizeof(iddCC);
515 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
516 CC_size = sizeof(idCC);
517 else
518 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000519
520 BBCC_size += CC_size;
521 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
522 break;
523
524 case LOAD:
525 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000526 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000527 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000528 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000529 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000530 is_LOAD = True;
531 break;
532
533 case STORE:
534 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000535 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000536 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000537 is_STORE = True;
538 break;
539
sewardj3949d102003-03-28 17:21:29 +0000540 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000541 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000542 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000543 case FPU_R:
njne427a662002-10-02 11:08:25 +0000544 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000545 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000546 is_FPU_R = True;
547 break;
548
njn21f805d2003-08-25 16:15:40 +0000549 case SSE2a_MemRd:
550 case SSE2a1_MemRd:
551 sk_assert(u_in->size == 4 || u_in->size == 16);
552 t_read = u_in->val3;
553 is_FPU_R = True;
554 break;
555
556 case SSE3a_MemRd:
557 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
558 t_read = u_in->val3;
559 is_FPU_R = True;
560 break;
561
562 case SSE3ag_MemRd_RegWr:
563 sk_assert(u_in->size == 4 || u_in->size == 8);
564 t_read = u_in->val1;
565 is_FPU_R = True;
566 break;
567
sewardj3949d102003-03-28 17:21:29 +0000568 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000569 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000570 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000571 case FPU_W:
njne427a662002-10-02 11:08:25 +0000572 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000573 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000574 is_FPU_W = True;
575 break;
576
njn21f805d2003-08-25 16:15:40 +0000577 case SSE2a_MemWr:
578 sk_assert(u_in->size == 4 || u_in->size == 16);
579 t_write = u_in->val3;
580 is_FPU_W = True;
581 break;
582
583 case SSE3a_MemWr:
584 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
585 t_write = u_in->val3;
586 is_FPU_W = True;
587 break;
588
njn4f9c9342002-04-29 16:03:24 +0000589 default:
590 break;
591 }
592 }
593
594 return BBCC_size;
595}
596
njn25e49d8e72002-09-23 09:36:25 +0000597static __attribute__ ((regparm (1)))
598void log_1I_0D_cache_access(iCC* cc)
599{
600 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
601 // cc, cc->instr_addr, cc->instr_size)
602 VGP_PUSHCC(VgpCacheSimulate);
603 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
604 cc->I.a++;
605 VGP_POPCC(VgpCacheSimulate);
606}
607
608/* Difference between this function and log_1I_0D_cache_access() is that
609 this one can be passed any kind of CC, not just an iCC. So we have to
610 be careful to make sure we don't make any assumptions about CC layout.
611 (As it stands, they would be safe, but this will avoid potential heartache
612 if anyone else changes CC layout.)
613 Note that we only do the switch for the JIFZ version because if we always
614 called this switching version, things would run about 5% slower. */
615static __attribute__ ((regparm (1)))
616void log_1I_0D_cache_access_JIFZ(iCC* cc)
617{
618 UChar instr_size;
619 Addr instr_addr;
620 CC* I;
621
622 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
623 // cc, cc->instr_addr, cc->instr_size)
624 VGP_PUSHCC(VgpCacheSimulate);
625
626 switch(cc->tag) {
627 case InstrCC:
628 instr_size = cc->instr_size;
629 instr_addr = cc->instr_addr;
630 I = &(cc->I);
631 break;
632 case ReadCC:
633 case WriteCC:
634 case ModCC:
635 instr_size = ((idCC*)cc)->instr_size;
636 instr_addr = ((idCC*)cc)->instr_addr;
637 I = &( ((idCC*)cc)->I );
638 break;
639 case ReadWriteCC:
640 instr_size = ((iddCC*)cc)->instr_size;
641 instr_addr = ((iddCC*)cc)->instr_addr;
642 I = &( ((iddCC*)cc)->I );
643 break;
644 default:
njne427a662002-10-02 11:08:25 +0000645 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000646 break;
647 }
648 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
649 I->a++;
650 VGP_POPCC(VgpCacheSimulate);
651}
652
653__attribute__ ((regparm (2))) static
654void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
655{
656 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
657 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
658 VGP_PUSHCC(VgpCacheSimulate);
659 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
660 cc->D.a++;
661 VGP_POPCC(VgpCacheSimulate);
662}
663
664__attribute__ ((regparm (2))) static
665void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
666{
667 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
668 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
669 VGP_PUSHCC(VgpCacheSimulate);
670 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
671 cc->I.a++;
672
673 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
674 cc->D.a++;
675 VGP_POPCC(VgpCacheSimulate);
676}
677
678__attribute__ ((regparm (3))) static
679void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
680{
681 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
682 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
683 VGP_PUSHCC(VgpCacheSimulate);
684 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
685 cc->Da.a++;
686 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
687 cc->Db.a++;
688 VGP_POPCC(VgpCacheSimulate);
689}
690
691__attribute__ ((regparm (3))) static
692void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
693{
694 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
695 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
696 VGP_PUSHCC(VgpCacheSimulate);
697 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
698 cc->I.a++;
699
700 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
701 cc->Da.a++;
702 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
703 cc->Db.a++;
704 VGP_POPCC(VgpCacheSimulate);
705}
706
707UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
708{
709/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000710#define INVALID_DATA_SIZE 999999
711
njn4f9c9342002-04-29 16:03:24 +0000712 UCodeBlock* cb;
713 Int i;
714 UInstr* u_in;
715 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000716 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
717 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000718 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000719 Addr x86_instr_addr = orig_addr;
720 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
721 Addr helper;
722 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000723 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000724 Bool BB_seen_before = False;
725 Bool instrumented_Jcond = False;
726 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000727 Addr BBCC_ptr0, BBCC_ptr;
728
729 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
730 * if it's the first time it's been seen), and point to start of the
731 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000732 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000733 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
734
njn810086f2002-11-14 12:42:47 +0000735 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000736
njn25e49d8e72002-09-23 09:36:25 +0000737 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
738 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000739
njn810086f2002-11-14 12:42:47 +0000740 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
741 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000742
njn4f9c9342002-04-29 16:03:24 +0000743 /* What this is all about: we want to instrument each x86 instruction
744 * translation. The end of these are marked in three ways. The three
745 * ways, and the way we instrument them, are as follows:
746 *
747 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
748 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
749 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
750 *
njn25e49d8e72002-09-23 09:36:25 +0000751 * The last UInstr in a basic block is always a Juncond. Jconds,
752 * when they appear, are always second last. We check this with
753 * various assertions.
754 *
755 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000756 * executed. We don't have to put the instrumentation before the INCEIP
757 * (it could go after) but we do so for consistency.
758 *
njn25e49d8e72002-09-23 09:36:25 +0000759 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
760 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000761 *
njn25e49d8e72002-09-23 09:36:25 +0000762 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000763 *
764 * The instrumentation is just a call to the appropriate helper function,
765 * passing it the address of the instruction's CC.
766 */
njne427a662002-10-02 11:08:25 +0000767 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000768
769 switch (u_in->opcode) {
sewardj7a5ebcf2002-11-13 22:42:13 +0000770 case NOP: case LOCK: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000771 break;
772
njn4f9c9342002-04-29 16:03:24 +0000773 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000774 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000775 */
776 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000777 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000778 t_read_addr = newTemp(cb);
779 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
780 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000781 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000782 break;
783
sewardj3949d102003-03-28 17:21:29 +0000784 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000785 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000786 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000787 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000788 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000789 t_read_addr = newTemp(cb);
790 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000791 data_size = ( u_in->size <= MIN_LINE_SIZE
792 ? u_in->size
793 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000794 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000795 break;
796
njn21f805d2003-08-25 16:15:40 +0000797 case SSE2a_MemRd:
798 case SSE2a1_MemRd:
799 sk_assert(u_in->size == 4 || u_in->size == 16);
800 t_read = u_in->val3;
801 t_read_addr = newTemp(cb);
802 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
803 data_size = u_in->size;
804 VG_(copy_UInstr)(cb, u_in);
805 break;
806
807 case SSE3a_MemRd:
808 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
809 t_read = u_in->val3;
810 t_read_addr = newTemp(cb);
811 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
812 data_size = u_in->size;
813 VG_(copy_UInstr)(cb, u_in);
814 break;
815
816 case SSE3ag_MemRd_RegWr:
817 sk_assert(u_in->size == 4 || u_in->size == 8);
818 t_read = u_in->val1;
819 t_read_addr = newTemp(cb);
820 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
821 data_size = u_in->size;
822 VG_(copy_UInstr)(cb, u_in);
823 break;
824
njn4f9c9342002-04-29 16:03:24 +0000825 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000826 * That's how the code above determines whether it does a write.
827 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000828 * As for the MOV, if it's a mod instruction it's redundant, but it's
829 * not expensive and mod instructions are rare anyway. */
sewardj3949d102003-03-28 17:21:29 +0000830 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000831 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000832 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000833 case STORE:
834 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000835 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000836 t_write_addr = newTemp(cb);
837 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000838 /* 28 and 108 B data-sized instructions will be done
839 * inaccurately but they're very rare and this avoids errors
840 * from hitting more than two cache lines in the simulation. */
841 data_size = ( u_in->size <= MIN_LINE_SIZE
842 ? u_in->size
843 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000844 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000845 break;
846
njn21f805d2003-08-25 16:15:40 +0000847 case SSE2a_MemWr:
848 sk_assert(u_in->size == 4 || u_in->size == 16);
849 /* fall through */
850 case SSE3a_MemWr:
851 sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
852 t_write = u_in->val3;
853 t_write_addr = newTemp(cb);
854 uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
855 data_size = u_in->size;
856 VG_(copy_UInstr)(cb, u_in);
857 break;
njn25e49d8e72002-09-23 09:36:25 +0000858
859 /* For rep-prefixed instructions, log a single I-cache access
860 * before the UCode loop that implements the repeated part, which
861 * is where the multiple D-cache accesses are logged. */
862 case JIFZ:
863 has_rep_prefix = True;
864
865 /* Setup 1st and only arg: CC addr */
866 t_CC_addr = newTemp(cb);
867 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
868 uLiteral(cb, BBCC_ptr);
869
870 /* Call helper */
871 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
872 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000873 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000874 break;
875
876
877 /* INCEIP: insert instrumentation */
878 case INCEIP:
879 x86_instr_size = u_in->val1;
880 goto instrument_x86_instr;
881
882 /* JMP: insert instrumentation if the first JMP */
883 case JMP:
884 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000885 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000886 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000887 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000888 instrumented_Jcond = False; /* reset */
889 break;
890 }
891 /* The first JMP... instrument. */
892 if (CondAlways != u_in->cond) {
njn810086f2002-11-14 12:42:47 +0000893 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000894 instrumented_Jcond = True;
895 } else {
njn810086f2002-11-14 12:42:47 +0000896 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000897 }
898
899 /* Get x86 instr size from final JMP. */
njn810086f2002-11-14 12:42:47 +0000900 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
901
njn25e49d8e72002-09-23 09:36:25 +0000902 goto instrument_x86_instr;
903
904
905 /* Code executed at the end of each x86 instruction. */
906 instrument_x86_instr:
907
908 /* Initialise the CC in the BBCC array appropriately if it
909 * hasn't been initialised before. Then call appropriate sim
910 * function, passing it the CC address. */
911 stack_used = 0;
912
njne427a662002-10-02 11:08:25 +0000913 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000914 x86_instr_size <= MAX_x86_INSTR_SIZE);
915
916#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
917
918 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +0000919 sk_assert(INVALID_DATA_SIZE == data_size);
920 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000921 INVALID_TEMPREG == t_read &&
922 INVALID_TEMPREG == t_write_addr &&
923 INVALID_TEMPREG == t_write);
924 CC_size = sizeof(iCC);
925 if (!BB_seen_before)
926 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
927 helper = ( has_rep_prefix
928 ? (Addr)0 /* no extra log needed */
929 : (Addr) & log_1I_0D_cache_access
930 );
931 argc = 1;
932
933 } else {
njne427a662002-10-02 11:08:25 +0000934 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +0000935 8 == data_size || 10 == data_size ||
936 MIN_LINE_SIZE == data_size);
937
938 if (IS_(read) && !IS_(write)) {
939 CC_size = sizeof(idCC);
940 /* If it uses 'rep', we've already logged the I-cache
941 * access at the JIFZ UInstr (see JIFZ case below) so
942 * don't do it here */
943 helper = ( has_rep_prefix
944 ? (Addr) & log_0I_1D_cache_access
945 : (Addr) & log_1I_1D_cache_access
946 );
947 argc = 2;
948 if (!BB_seen_before)
949 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
950 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000951 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000952 INVALID_TEMPREG != t_read &&
953 INVALID_TEMPREG == t_write_addr &&
954 INVALID_TEMPREG == t_write);
955 t_data_addr1 = t_read_addr;
956
957 } else if (!IS_(read) && IS_(write)) {
958 CC_size = sizeof(idCC);
959 helper = ( has_rep_prefix
960 ? (Addr) & log_0I_1D_cache_access
961 : (Addr) & log_1I_1D_cache_access
962 );
963 argc = 2;
964 if (!BB_seen_before)
965 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
966 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000967 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000968 INVALID_TEMPREG == t_read &&
969 INVALID_TEMPREG != t_write_addr &&
970 INVALID_TEMPREG != t_write);
971 t_data_addr1 = t_write_addr;
972
973 } else {
njne427a662002-10-02 11:08:25 +0000974 sk_assert(IS_(read) && IS_(write));
975 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000976 INVALID_TEMPREG != t_read &&
977 INVALID_TEMPREG != t_write_addr &&
978 INVALID_TEMPREG != t_write);
979 if (t_read == t_write) {
980 CC_size = sizeof(idCC);
981 helper = ( has_rep_prefix
982 ? (Addr) & log_0I_1D_cache_access
983 : (Addr) & log_1I_1D_cache_access
984 );
985 argc = 2;
986 if (!BB_seen_before)
987 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
988 x86_instr_size, data_size);
989 t_data_addr1 = t_read_addr;
990 } else {
991 CC_size = sizeof(iddCC);
992 helper = ( has_rep_prefix
993 ? (Addr) & log_0I_2D_cache_access
994 : (Addr) & log_1I_2D_cache_access
995 );
996 argc = 3;
997 if (!BB_seen_before)
998 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
999 x86_instr_size, data_size);
1000 t_data_addr1 = t_read_addr;
1001 t_data_addr2 = t_write_addr;
1002 }
1003 }
1004#undef IS_
1005 }
1006
1007 /* Call the helper, if necessary */
1008 if ((Addr)0 != helper) {
1009
1010 /* Setup 1st arg: CC addr */
1011 t_CC_addr = newTemp(cb);
1012 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
1013 uLiteral(cb, BBCC_ptr);
1014
1015 /* Call the helper */
1016 if (1 == argc)
1017 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
1018 else if (2 == argc)
1019 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
1020 TempReg, t_data_addr1);
1021 else if (3 == argc)
1022 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
1023 TempReg, t_data_addr1,
1024 TempReg, t_data_addr2);
1025 else
njne427a662002-10-02 11:08:25 +00001026 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +00001027
1028 uCCall(cb, helper, argc, argc, False);
1029 }
1030
1031 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +00001032 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +00001033
1034 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
1035 BBCC_ptr += CC_size;
1036 x86_instr_addr += x86_instr_size;
1037 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
1038 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
1039 data_size = INVALID_DATA_SIZE;
1040 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +00001041 break;
1042
1043 default:
njn4ba5a792002-09-30 10:23:54 +00001044 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +00001045 break;
1046 }
1047 }
1048
1049 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +00001050 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001051
njn4ba5a792002-09-30 10:23:54 +00001052 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +00001053 return cb;
njn25e49d8e72002-09-23 09:36:25 +00001054
1055#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +00001056}
1057
1058/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001059/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +00001060/*------------------------------------------------------------*/
1061
1062/* Total reads/writes/misses. Calculated during CC traversal at the end. */
1063static CC Ir_total;
1064static CC Dr_total;
1065static CC Dw_total;
1066
njn25e49d8e72002-09-23 09:36:25 +00001067#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
1068
1069static cache_t clo_I1_cache = UNDEFINED_CACHE;
1070static cache_t clo_D1_cache = UNDEFINED_CACHE;
1071static cache_t clo_L2_cache = UNDEFINED_CACHE;
1072
njn7cf0bd32002-06-08 13:36:03 +00001073/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
1074/* Probably only works for Intel and AMD chips, and probably only for some of
1075 * them.
1076 */
1077
sewardj05bcdcb2003-05-18 10:05:38 +00001078static __inline__ void cpuid(Int n, UInt *a, UInt *b, UInt *c, UInt *d)
njn7cf0bd32002-06-08 13:36:03 +00001079{
1080 __asm__ __volatile__ (
1081 "cpuid"
1082 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1083 : "0" (n) /* input */
1084 );
1085}
1086
sewardj07133bf2002-06-13 10:25:56 +00001087static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001088{
1089 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001090 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001091 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001092 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001093 " Simulating a %d KB cache with %d B lines",
1094 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001095}
1096
1097/* Intel method is truly wretched. We have to do an insane indexing into an
1098 * array of pre-defined configurations for various parts of the memory
1099 * hierarchy.
1100 */
1101static
sewardj07133bf2002-06-13 10:25:56 +00001102Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001103{
sewardj07133bf2002-06-13 10:25:56 +00001104 UChar info[16];
1105 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001106 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001107
1108 if (level < 2) {
1109 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001110 "warning: CPUID level < 2 for Intel processor (%d)",
1111 level);
njn7cf0bd32002-06-08 13:36:03 +00001112 return -1;
1113 }
1114
sewardj07133bf2002-06-13 10:25:56 +00001115 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1116 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001117 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1118 info[0] = 0x0; /* reset AL */
1119
1120 if (0 != trials) {
1121 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001122 "warning: non-zero CPUID trials for Intel processor (%d)",
1123 trials);
njn7cf0bd32002-06-08 13:36:03 +00001124 return -1;
1125 }
1126
1127 for (i = 0; i < 16; i++) {
1128
1129 switch (info[i]) {
1130
1131 case 0x0: /* ignore zeros */
1132 break;
1133
njn25e49d8e72002-09-23 09:36:25 +00001134 /* TLB info, ignore */
1135 case 0x01: case 0x02: case 0x03: case 0x04:
1136 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njne864e2f2003-09-29 15:55:44 +00001137 case 0xb0: case 0xb3:
njn7cf0bd32002-06-08 13:36:03 +00001138 break;
1139
1140 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1141 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001142 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001143
1144 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1145 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
njne864e2f2003-09-29 15:55:44 +00001146 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
njn7cf0bd32002-06-08 13:36:03 +00001147
njn25e49d8e72002-09-23 09:36:25 +00001148 /* IA-64 info -- panic! */
1149 case 0x10: case 0x15: case 0x1a:
1150 case 0x88: case 0x89: case 0x8a: case 0x8d:
1151 case 0x90: case 0x96: case 0x9b:
1152 VG_(message)(Vg_DebugMsg,
1153 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001154 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001155
njn7cf0bd32002-06-08 13:36:03 +00001156 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001157 VG_(message)(Vg_DebugMsg,
1158 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001159 break;
1160
njn25e49d8e72002-09-23 09:36:25 +00001161 /* These are sectored, whatever that means */
1162 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1163 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1164
1165 /* If a P6 core, this means "no L2 cache".
1166 If a P4 core, this means "no L3 cache".
1167 We don't know what core it is, so don't issue a warning. To detect
1168 a missing L2 cache, we use 'L2_found'. */
1169 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001170 break;
1171
njn25e49d8e72002-09-23 09:36:25 +00001172 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1173 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1174 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1175 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1176 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001177
1178 /* These are sectored, whatever that means */
1179 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1180 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1181 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1182
1183 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1184 * conversion to byte size is a total guess; treat the 12K and 16K
1185 * cases the same since the cache byte size must be a power of two for
1186 * everything to work!. Also guessing 32 bytes for the line size...
1187 */
1188 case 0x70: /* 12K micro-ops, 8-way */
1189 *I1c = (cache_t) { 16, 8, 32 };
1190 micro_ops_warn(12, 16, 32);
1191 break;
1192 case 0x71: /* 16K micro-ops, 8-way */
1193 *I1c = (cache_t) { 16, 8, 32 };
1194 micro_ops_warn(16, 16, 32);
1195 break;
1196 case 0x72: /* 32K micro-ops, 8-way */
1197 *I1c = (cache_t) { 32, 8, 32 };
1198 micro_ops_warn(32, 32, 32);
1199 break;
1200
njn25e49d8e72002-09-23 09:36:25 +00001201 /* These are sectored, whatever that means */
1202 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1203 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1204 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1205 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1206 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001207
njn25e49d8e72002-09-23 09:36:25 +00001208 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1209 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1210 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1211 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1212 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn607adfc2003-09-30 14:15:44 +00001213 case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
1214 case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001215
1216 default:
1217 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001218 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001219 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001220 break;
1221 }
1222 }
njn25e49d8e72002-09-23 09:36:25 +00001223
1224 if (!L2_found)
1225 VG_(message)(Vg_DebugMsg,
1226 "warning: L2 cache not installed, ignore L2 results.");
1227
njn7cf0bd32002-06-08 13:36:03 +00001228 return 0;
1229}
1230
1231/* AMD method is straightforward, just extract appropriate bits from the
1232 * result registers.
1233 *
1234 * Bits, for D1 and I1:
1235 * 31..24 data L1 cache size in KBs
1236 * 23..16 data L1 cache associativity (FFh=full)
1237 * 15.. 8 data L1 cache lines per tag
1238 * 7.. 0 data L1 cache line size in bytes
1239 *
1240 * Bits, for L2:
1241 * 31..16 unified L2 cache size in KBs
1242 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1243 * 11.. 8 unified L2 cache lines per tag
1244 * 7.. 0 unified L2 cache line size in bytes
1245 *
1246 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1247 * upon this information. (Whatever that means -- njn)
1248 *
njn25e49d8e72002-09-23 09:36:25 +00001249 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1250 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1251 * so we detect that.
1252 *
njn7cf0bd32002-06-08 13:36:03 +00001253 * Returns 0 on success, non-zero on failure.
1254 */
sewardj07133bf2002-06-13 10:25:56 +00001255static
1256Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001257{
sewardj05bcdcb2003-05-18 10:05:38 +00001258 UInt ext_level;
1259 Int dummy, model;
sewardj07133bf2002-06-13 10:25:56 +00001260 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001261
1262 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1263
1264 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1265 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001266 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1267 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001268 return -1;
1269 }
1270
1271 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1272 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1273
njn25e49d8e72002-09-23 09:36:25 +00001274 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1275 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1276
1277 /* Check for Duron bug */
1278 if (model == 0x630) {
1279 VG_(message)(Vg_UserMsg,
1280 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1281 L2i = (64 << 16) | (L2i & 0xffff);
1282 }
1283
njn7cf0bd32002-06-08 13:36:03 +00001284 D1c->size = (D1i >> 24) & 0xff;
1285 D1c->assoc = (D1i >> 16) & 0xff;
1286 D1c->line_size = (D1i >> 0) & 0xff;
1287
1288 I1c->size = (I1i >> 24) & 0xff;
1289 I1c->assoc = (I1i >> 16) & 0xff;
1290 I1c->line_size = (I1i >> 0) & 0xff;
1291
1292 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1293 L2c->assoc = (L2i >> 12) & 0xf;
1294 L2c->line_size = (L2i >> 0) & 0xff;
1295
1296 return 0;
1297}
1298
1299static jmp_buf cpuid_jmpbuf;
1300
1301static
1302void cpuid_SIGILL_handler(int signum)
1303{
1304 __builtin_longjmp(cpuid_jmpbuf, 1);
1305}
1306
1307static
sewardj07133bf2002-06-13 10:25:56 +00001308Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001309{
sewardj07133bf2002-06-13 10:25:56 +00001310 Int level, res, ret;
1311 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001312 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001313
1314 /* Install own SIGILL handler */
1315 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1316 sigill_new.ksa_flags = 0;
1317 sigill_new.ksa_restorer = NULL;
1318 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001319 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001320
1321 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001322 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001323
1324 /* Trap for illegal instruction, in case it's a really old processor that
1325 * doesn't support CPUID. */
1326 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1327 cpuid(0, &level, (int*)&vendor_id[0],
1328 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1329 vendor_id[12] = '\0';
1330
1331 /* Restore old SIGILL handler */
1332 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001333 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001334
1335 } else {
1336 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1337
1338 /* Restore old SIGILL handler */
1339 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001340 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001341 return -1;
1342 }
1343
1344 if (0 == level) {
1345 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1346 return -1;
1347 }
1348
1349 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1350 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1351 ret = Intel_cache_info(level, I1c, D1c, L2c);
1352
1353 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1354 ret = AMD_cache_info(I1c, D1c, L2c);
1355
1356 } else {
1357 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1358 vendor_id);
1359 return -1;
1360 }
1361
1362 /* Successful! Convert sizes from KB to bytes */
1363 I1c->size *= 1024;
1364 D1c->size *= 1024;
1365 L2c->size *= 1024;
1366
1367 return ret;
1368}
1369
1370/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001371static
1372void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001373{
1374 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001375 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001376 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001377 "warning: %s size of %dB not a power of two; "
1378 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001379 cache->size = dflt->size;
1380 }
1381
sewardj07133bf2002-06-13 10:25:56 +00001382 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001383 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001384 "warning: %s associativity of %d not a power of two; "
1385 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001386 cache->assoc = dflt->assoc;
1387 }
1388
sewardj07133bf2002-06-13 10:25:56 +00001389 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001390 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001391 "warning: %s line size of %dB not a power of two; "
1392 "defaulting to %dB",
1393 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001394 cache->line_size = dflt->line_size;
1395 }
1396
1397 /* Then check line size >= 16 -- any smaller and a single instruction could
1398 * straddle three cache lines, which breaks a simulation assertion and is
1399 * stupid anyway. */
1400 if (cache->line_size < MIN_LINE_SIZE) {
1401 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001402 "warning: %s line size of %dB too small; "
1403 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001404 cache->line_size = MIN_LINE_SIZE;
1405 }
1406
1407 /* Then check cache size > line size (causes seg faults if not). */
1408 if (cache->size <= cache->line_size) {
1409 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001410 "warning: %s cache size of %dB <= line size of %dB; "
1411 "increasing to %dB", name, cache->size, cache->line_size,
1412 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001413 cache->size = cache->line_size * 2;
1414 }
1415
1416 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1417 if (cache->assoc > (cache->size / cache->line_size)) {
1418 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001419 "warning: %s associativity > (size / line size); "
1420 "increasing size to %dB",
1421 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001422 cache->size = cache->assoc * cache->line_size;
1423 }
1424}
1425
1426/* On entry, args are undefined. Fill them with any info from the
1427 * command-line, then fill in any remaining with CPUID instruction if possible,
1428 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001429static
1430void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001431{
1432 /* Defaults are for a model 3 or 4 Athlon */
1433 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1434 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1435 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1436
njn25e49d8e72002-09-23 09:36:25 +00001437#define CMD_LINE_DEFINED(L) \
1438 (-1 != clo_##L##_cache.size || \
1439 -1 != clo_##L##_cache.assoc || \
1440 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001441
njn25e49d8e72002-09-23 09:36:25 +00001442 *I1c = clo_I1_cache;
1443 *D1c = clo_D1_cache;
1444 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001445
njn7cf0bd32002-06-08 13:36:03 +00001446 /* If any undefined on command-line, try CPUID */
1447 if (! CMD_LINE_DEFINED(I1) ||
1448 ! CMD_LINE_DEFINED(D1) ||
1449 ! CMD_LINE_DEFINED(L2)) {
1450
1451 /* Overwrite CPUID result for any cache defined on command-line */
1452 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1453
njn25e49d8e72002-09-23 09:36:25 +00001454 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1455 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1456 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001457
1458 /* CPUID failed, use defaults for each undefined by command-line */
1459 } else {
1460 VG_(message)(Vg_DebugMsg,
1461 "Couldn't detect cache configuration, using one "
1462 "or more defaults ");
1463
njn25e49d8e72002-09-23 09:36:25 +00001464 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1465 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1466 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001467 }
1468 }
1469#undef CMD_LINE_DEFINED
1470
1471 check_cache(I1c, &I1_dflt, "I1");
1472 check_cache(D1c, &D1_dflt, "D1");
1473 check_cache(L2c, &L2_dflt, "L2");
1474
1475 if (VG_(clo_verbosity) > 1) {
1476 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1477 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1478 I1c->size, I1c->assoc, I1c->line_size);
1479 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1480 D1c->size, D1c->assoc, D1c->line_size);
1481 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1482 L2c->size, L2c->assoc, L2c->line_size);
1483 }
1484}
1485
njn4f9c9342002-04-29 16:03:24 +00001486/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001487/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001488/*------------------------------------------------------------*/
1489
njn4f9c9342002-04-29 16:03:24 +00001490static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1491 Char *first_instr_fn)
1492{
1493 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001494 Char buf[BUF_LEN], curr_file[BUF_LEN],
1495 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001496 UInt line_num;
1497
1498 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1499
njne0ee0712002-05-03 16:41:05 +00001500 /* Mark start of basic block in output, just to ease debugging */
1501 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001502
1503 VG_(strcpy)(curr_file, first_instr_fl);
1504
1505 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1506
1507 /* We pretend the CC is an iCC for getting the tag. This is ok
1508 * because both CC types have tag as their first byte. Once we know
1509 * the type, we can cast and act appropriately. */
1510
1511 Char fl_buf[FILENAME_LEN];
1512 Char fn_buf[FN_NAME_LEN];
1513
njne0ee0712002-05-03 16:41:05 +00001514 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001515 switch ( ((iCC*)BBCC_ptr)->tag ) {
1516
njn25e49d8e72002-09-23 09:36:25 +00001517 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001518 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1519 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001520 ADD_CC_TO(iCC, I, Ir_total);
1521 BBCC_ptr += sizeof(iCC);
1522 break;
1523
njn25e49d8e72002-09-23 09:36:25 +00001524 case ReadCC:
1525 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001526 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1527 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001528 ADD_CC_TO(idCC, I, Ir_total);
1529 ADD_CC_TO(idCC, D, Dr_total);
1530 BBCC_ptr += sizeof(idCC);
1531 break;
1532
njn25e49d8e72002-09-23 09:36:25 +00001533 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001534 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1535 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001536 ADD_CC_TO(idCC, I, Ir_total);
1537 ADD_CC_TO(idCC, D, Dw_total);
1538 BBCC_ptr += sizeof(idCC);
1539 break;
1540
njn25e49d8e72002-09-23 09:36:25 +00001541 case ReadWriteCC:
1542 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1543 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1544 ADD_CC_TO(iddCC, I, Ir_total);
1545 ADD_CC_TO(iddCC, Da, Dr_total);
1546 ADD_CC_TO(iddCC, Db, Dw_total);
1547 BBCC_ptr += sizeof(iddCC);
1548 break;
1549
njn4f9c9342002-04-29 16:03:24 +00001550 default:
njne427a662002-10-02 11:08:25 +00001551 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001552 break;
1553 }
1554 distinct_instrs++;
1555
njne0ee0712002-05-03 16:41:05 +00001556 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1557
1558 /* Allow for filename switching in the middle of a BB; if this happens,
1559 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001560 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001561 VG_(strcpy)(curr_file, fl_buf);
1562 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1563 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1564 }
1565
njn4f9c9342002-04-29 16:03:24 +00001566 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001567 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001568 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001569 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001570 VG_(printf)(" filenames: BB:%s, instr:%s;"
1571 " fn_names: BB:%s, instr:%s;"
1572 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001573 first_instr_fl, fl_buf,
1574 first_instr_fn, fn_buf,
1575 line_num);
1576 }
1577
njne0ee0712002-05-03 16:41:05 +00001578 VG_(sprintf)(lbuf, "%u ", line_num);
1579 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1580 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001581 }
1582 /* If we switched filenames in the middle of the BB without switching back,
1583 * switch back now because the subsequent BB may be relying on falling under
1584 * the original file name. */
1585 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1586 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1587 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1588 }
njne0ee0712002-05-03 16:41:05 +00001589
1590 /* Mark end of basic block */
1591 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001592
njne427a662002-10-02 11:08:25 +00001593 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001594}
1595
njn25e49d8e72002-09-23 09:36:25 +00001596static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001597{
1598 Int fd;
1599 Char buf[BUF_LEN];
1600 file_node *curr_file_node;
1601 fn_node *curr_fn_node;
1602 BBCC *curr_BBCC;
1603 Int i,j,k;
1604
njn25e49d8e72002-09-23 09:36:25 +00001605 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001606
njndb918dd2003-07-22 20:45:11 +00001607 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001608 VKI_S_IRUSR|VKI_S_IWUSR);
1609 if (-1 == fd) {
sewardj0744b6c2002-12-11 00:45:42 +00001610 /* If the file can't be opened for whatever reason (conflict
1611 between multiple cachegrinded processes?), give up now. */
1612 file_err();
1613 return;
1614 }
njn4f9c9342002-04-29 16:03:24 +00001615
1616 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001617 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1618 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1619 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1620 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1621 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1622 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001623
1624 /* "cmd:" line */
1625 VG_(strcpy)(buf, "cmd:");
1626 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001627 for (i = 0; i < VG_(client_argc); i++) {
1628 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001629 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1630 }
1631 /* "events:" line */
1632 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1633 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1634
1635 /* Six loops here: three for the hash table arrays, and three for the
1636 * chains hanging off the hash table arrays. */
1637 for (i = 0; i < N_FILE_ENTRIES; i++) {
1638 curr_file_node = BBCC_table[i];
1639 while (curr_file_node != NULL) {
1640 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1641 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1642
1643 for (j = 0; j < N_FN_ENTRIES; j++) {
1644 curr_fn_node = curr_file_node->fns[j];
1645 while (curr_fn_node != NULL) {
1646 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1647 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1648
1649 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1650 curr_BBCC = curr_fn_node->BBCCs[k];
1651 while (curr_BBCC != NULL) {
1652 fprint_BBCC(fd, curr_BBCC,
1653
1654 curr_file_node->filename,
1655 curr_fn_node->fn_name);
1656
1657 curr_BBCC = curr_BBCC->next;
1658 }
1659 }
1660 curr_fn_node = curr_fn_node->next;
1661 }
1662 }
1663 curr_file_node = curr_file_node->next;
1664 }
1665 }
1666
njn4294fd42002-06-05 14:41:10 +00001667 /* Print stats from any discarded basic blocks */
1668 if (0 != Ir_discards.a) {
1669
1670 VG_(sprintf)(buf, "fl=(discarded)\n");
1671 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1672 VG_(sprintf)(buf, "fn=(discarded)\n");
1673 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1674
1675 /* Use 0 as line number */
1676 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1677 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1678 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1679 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1680 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1681
1682 Ir_total.a += Ir_discards.a;
1683 Ir_total.m1 += Ir_discards.m1;
1684 Ir_total.m2 += Ir_discards.m2;
1685 Dr_total.a += Dr_discards.a;
1686 Dr_total.m1 += Dr_discards.m1;
1687 Dr_total.m2 += Dr_discards.m2;
1688 Dw_total.a += Dw_discards.a;
1689 Dw_total.m1 += Dw_discards.m1;
1690 Dw_total.m2 += Dw_discards.m2;
1691 }
1692
njn4f9c9342002-04-29 16:03:24 +00001693 /* Summary stats must come after rest of table, since we calculate them
1694 * during traversal. */
1695 VG_(sprintf)(buf, "summary: "
1696 "%llu %llu %llu "
1697 "%llu %llu %llu "
1698 "%llu %llu %llu\n",
1699 Ir_total.a, Ir_total.m1, Ir_total.m2,
1700 Dr_total.a, Dr_total.m1, Dr_total.m2,
1701 Dw_total.a, Dw_total.m1, Dw_total.m2);
1702 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1703 VG_(close)(fd);
1704}
1705
njn607adfc2003-09-30 14:15:44 +00001706static UInt ULong_width(ULong n)
njn4f9c9342002-04-29 16:03:24 +00001707{
njn607adfc2003-09-30 14:15:44 +00001708 UInt w = 0;
1709 while (n > 0) {
1710 n = n / 10;
1711 w++;
njn4f9c9342002-04-29 16:03:24 +00001712 }
njn607adfc2003-09-30 14:15:44 +00001713 return w + (w-1)/3; // add space for commas
njn4f9c9342002-04-29 16:03:24 +00001714}
1715
sewardj4f29ddf2002-05-03 22:29:04 +00001716static
daywalker8ad1a402003-09-18 01:15:32 +00001717void percentify(Int n, Int ex, Int field_width, char buf[])
njn4f9c9342002-04-29 16:03:24 +00001718{
1719 int i, len, space;
1720
daywalker8ad1a402003-09-18 01:15:32 +00001721 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
njn4f9c9342002-04-29 16:03:24 +00001722 len = VG_(strlen)(buf);
1723 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001724 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001725 i = len;
1726
1727 /* Right justify in field */
1728 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1729 for (i = 0; i < space; i++) buf[i] = ' ';
1730}
1731
njn7d9f94d2003-04-22 21:41:40 +00001732void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001733{
njn607adfc2003-09-30 14:15:44 +00001734 static char buf1[RESULTS_BUF_LEN],
1735 buf2[RESULTS_BUF_LEN],
1736 buf3[RESULTS_BUF_LEN],
1737 fmt [RESULTS_BUF_LEN];
1738
njn4f9c9342002-04-29 16:03:24 +00001739 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001740 ULong L2_total_m, L2_total_mr, L2_total_mw,
1741 L2_total, L2_total_r, L2_total_w;
njn4f9c9342002-04-29 16:03:24 +00001742 Int l1, l2, l3;
1743 Int p;
1744
njn25e49d8e72002-09-23 09:36:25 +00001745 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001746
njn7cf0bd32002-06-08 13:36:03 +00001747 if (VG_(clo_verbosity) == 0)
1748 return;
1749
njn4f9c9342002-04-29 16:03:24 +00001750 /* I cache results. Use the I_refs value to determine the first column
1751 * width. */
njn607adfc2003-09-30 14:15:44 +00001752 l1 = ULong_width(Ir_total.a);
1753 l2 = ULong_width(Dr_total.a);
1754 l3 = ULong_width(Dw_total.a);
njn4f9c9342002-04-29 16:03:24 +00001755
njn607adfc2003-09-30 14:15:44 +00001756 /* Make format string, getting width right for numbers */
1757 VG_(sprintf)(fmt, "%%s %%,%dld", l1);
1758
1759 VG_(message)(Vg_UserMsg, fmt, "I refs: ", Ir_total.a);
1760 VG_(message)(Vg_UserMsg, fmt, "I1 misses: ", Ir_total.m1);
1761 VG_(message)(Vg_UserMsg, fmt, "L2i misses: ", Ir_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001762
1763 p = 100;
1764
njn25e49d8e72002-09-23 09:36:25 +00001765 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001766 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1767 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1768
1769 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1770 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1771 VG_(message)(Vg_UserMsg, "");
1772
1773 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1774 * width of columns 2 & 3. */
1775 D_total.a = Dr_total.a + Dw_total.a;
1776 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1777 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1778
njn607adfc2003-09-30 14:15:44 +00001779 /* Make format string, getting width right for numbers */
1780 VG_(sprintf)(fmt, "%%s %%,%dld (%%,%dld rd + %%,%dld wr)", l1, l2, l3);
njn4f9c9342002-04-29 16:03:24 +00001781
njn607adfc2003-09-30 14:15:44 +00001782 VG_(message)(Vg_UserMsg, fmt, "D refs: ",
1783 D_total.a, Dr_total.a, Dw_total.a);
1784 VG_(message)(Vg_UserMsg, fmt, "D1 misses: ",
1785 D_total.m1, Dr_total.m1, Dw_total.m1);
1786 VG_(message)(Vg_UserMsg, fmt, "L2d misses: ",
1787 D_total.m2, Dr_total.m2, Dw_total.m2);
njn4f9c9342002-04-29 16:03:24 +00001788
1789 p = 10;
1790
njn25e49d8e72002-09-23 09:36:25 +00001791 if (0 == D_total.a) D_total.a = 1;
1792 if (0 == Dr_total.a) Dr_total.a = 1;
1793 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001794 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1795 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1796 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1797 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1798
1799 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1800 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1801 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1802 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1803 VG_(message)(Vg_UserMsg, "");
1804
1805 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001806
1807 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1808 L2_total_r = Dr_total.m1 + Ir_total.m1;
1809 L2_total_w = Dw_total.m1;
njn607adfc2003-09-30 14:15:44 +00001810 VG_(message)(Vg_UserMsg, fmt, "L2 refs: ",
1811 L2_total, L2_total_r, L2_total_w);
njn1d021fa2002-05-02 13:56:34 +00001812
njn4f9c9342002-04-29 16:03:24 +00001813 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1814 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1815 L2_total_mw = Dw_total.m2;
njn607adfc2003-09-30 14:15:44 +00001816 VG_(message)(Vg_UserMsg, fmt, "L2 misses: ",
1817 L2_total_m, L2_total_mr, L2_total_mw);
njn4f9c9342002-04-29 16:03:24 +00001818
1819 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1820 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1821 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1822 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1823
1824
1825 /* Hash table stats */
1826 if (VG_(clo_verbosity) > 1) {
1827 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1828 file_line_debug_BBs + no_debug_BBs;
1829
1830 VG_(message)(Vg_DebugMsg, "");
1831 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1832 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1833 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1834 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1835 full_debug_BBs * 100 / BB_lookups,
1836 full_debug_BBs);
1837 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1838 file_line_debug_BBs * 100 / BB_lookups,
1839 file_line_debug_BBs);
1840 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1841 fn_name_debug_BBs * 100 / BB_lookups,
1842 fn_name_debug_BBs);
1843 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1844 no_debug_BBs * 100 / BB_lookups,
1845 no_debug_BBs);
1846 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1847 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1848 }
njn25e49d8e72002-09-23 09:36:25 +00001849 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001850}
1851
sewardj18d75132002-05-16 11:06:21 +00001852
njn4294fd42002-06-05 14:41:10 +00001853/* Called when a translation is invalidated due to self-modifying code or
1854 * unloaded of a shared object.
1855 *
1856 * Finds the BBCC in the table, removes it, adds the counts to the discard
1857 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001858void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001859{
njn4294fd42002-06-05 14:41:10 +00001860 BBCC *BBCC_node;
1861 Addr BBCC_ptr0, BBCC_ptr;
1862 Bool BB_seen_before;
1863
sewardj83205b32002-06-14 11:08:07 +00001864 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001865 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001866
1867 /* 2nd arg won't be used since BB should have been seen before (assertions
1868 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001869 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001870 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1871
njne427a662002-10-02 11:08:25 +00001872 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001873
1874 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1875
1876 /* We pretend the CC is an iCC for getting the tag. This is ok
1877 * because both CC types have tag as their first byte. Once we know
1878 * the type, we can cast and act appropriately. */
1879
1880 switch ( ((iCC*)BBCC_ptr)->tag ) {
1881
njn25e49d8e72002-09-23 09:36:25 +00001882 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001883 ADD_CC_TO(iCC, I, Ir_discards);
1884 BBCC_ptr += sizeof(iCC);
1885 break;
1886
njn25e49d8e72002-09-23 09:36:25 +00001887 case ReadCC:
1888 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001889 ADD_CC_TO(idCC, I, Ir_discards);
1890 ADD_CC_TO(idCC, D, Dr_discards);
1891 BBCC_ptr += sizeof(idCC);
1892 break;
1893
njn25e49d8e72002-09-23 09:36:25 +00001894 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001895 ADD_CC_TO(idCC, I, Ir_discards);
1896 ADD_CC_TO(idCC, D, Dw_discards);
1897 BBCC_ptr += sizeof(idCC);
1898 break;
1899
njn25e49d8e72002-09-23 09:36:25 +00001900 case ReadWriteCC:
1901 ADD_CC_TO(iddCC, I, Ir_discards);
1902 ADD_CC_TO(iddCC, Da, Dr_discards);
1903 ADD_CC_TO(iddCC, Db, Dw_discards);
1904 BBCC_ptr += sizeof(iddCC);
1905 break;
1906
njn4294fd42002-06-05 14:41:10 +00001907 default:
njne427a662002-10-02 11:08:25 +00001908 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00001909 break;
1910 }
1911 }
njn25e49d8e72002-09-23 09:36:25 +00001912 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001913}
1914
1915/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001916/*--- Command line processing ---*/
1917/*--------------------------------------------------------------------*/
1918
1919static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
1920{
1921 int i1, i2, i3;
1922 int i;
1923 char *opt = VG_(strdup)(orig_opt);
1924
1925 i = i1 = opt_len;
1926
1927 /* Option looks like "--I1=65536,2,64".
1928 * Find commas, replace with NULs to make three independent
1929 * strings, then extract numbers. Yuck. */
1930 while (VG_(isdigit)(opt[i])) i++;
1931 if (',' == opt[i]) {
1932 opt[i++] = '\0';
1933 i2 = i;
1934 } else goto bad;
1935 while (VG_(isdigit)(opt[i])) i++;
1936 if (',' == opt[i]) {
1937 opt[i++] = '\0';
1938 i3 = i;
1939 } else goto bad;
1940 while (VG_(isdigit)(opt[i])) i++;
1941 if ('\0' != opt[i]) goto bad;
1942
1943 cache->size = (Int)VG_(atoll)(opt + i1);
1944 cache->assoc = (Int)VG_(atoll)(opt + i2);
1945 cache->line_size = (Int)VG_(atoll)(opt + i3);
1946
1947 VG_(free)(opt);
1948
1949 return;
1950
1951 bad:
1952 VG_(bad_option)(orig_opt);
1953}
1954
1955Bool SK_(process_cmd_line_option)(Char* arg)
1956{
1957 /* 5 is length of "--I1=" */
njn39c86652003-05-21 10:13:39 +00001958 if (VG_CLO_STREQN(5, arg, "--I1="))
njn25e49d8e72002-09-23 09:36:25 +00001959 parse_cache_opt(&clo_I1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001960 else if (VG_CLO_STREQN(5, arg, "--D1="))
njn25e49d8e72002-09-23 09:36:25 +00001961 parse_cache_opt(&clo_D1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001962 else if (VG_CLO_STREQN(5, arg, "--L2="))
njn25e49d8e72002-09-23 09:36:25 +00001963 parse_cache_opt(&clo_L2_cache, arg, 5);
1964 else
1965 return False;
1966
1967 return True;
1968}
1969
njn3e884182003-04-15 13:03:23 +00001970void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001971{
njn3e884182003-04-15 13:03:23 +00001972 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001973" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1974" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001975" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1976 );
1977}
1978
1979void SK_(print_debug_usage)(void)
1980{
1981 VG_(printf)(
1982" (none)\n"
1983 );
njn25e49d8e72002-09-23 09:36:25 +00001984}
1985
1986/*--------------------------------------------------------------------*/
1987/*--- Setup ---*/
1988/*--------------------------------------------------------------------*/
1989
njn810086f2002-11-14 12:42:47 +00001990void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001991{
njn13f02932003-04-30 20:23:58 +00001992 Char* base_dir = NULL;
njn607adfc2003-09-30 14:15:44 +00001993
njn810086f2002-11-14 12:42:47 +00001994 VG_(details_name) ("Cachegrind");
1995 VG_(details_version) (NULL);
1996 VG_(details_description) ("an I1/D1/L2 cache profiler");
1997 VG_(details_copyright_author)(
njn0e1b5142003-04-15 14:58:06 +00001998 "Copyright (C) 2002-2003, and GNU GPL'd, by Nicholas Nethercote.");
njn810086f2002-11-14 12:42:47 +00001999 VG_(details_bug_reports_to) ("njn25@cam.ac.uk");
sewardj78210aa2002-12-01 02:55:46 +00002000 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00002001
njn810086f2002-11-14 12:42:47 +00002002 VG_(needs_basic_block_discards)();
2003 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00002004
2005 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
2006 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
2007 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
2008 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
2009 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
2010 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00002011
njn99ccf082003-09-30 13:51:23 +00002012 /* Get working directory */
2013 sk_assert( VG_(getcwd_alloc)(&base_dir) );
2014
njn13f02932003-04-30 20:23:58 +00002015 /* Block is big enough for dir name + cachegrind.out.<pid> */
2016 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
2017 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
2018 base_dir, VG_(getpid)());
njn99ccf082003-09-30 13:51:23 +00002019 VG_(free)(base_dir);
njn25e49d8e72002-09-23 09:36:25 +00002020}
2021
2022void SK_(post_clo_init)(void)
2023{
2024 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00002025
2026 initCC(&Ir_total);
2027 initCC(&Dr_total);
2028 initCC(&Dw_total);
2029
2030 initCC(&Ir_discards);
2031 initCC(&Dr_discards);
2032 initCC(&Dw_discards);
2033
2034 get_caches(&I1c, &D1c, &L2c);
2035
2036 cachesim_I1_initcache(I1c);
2037 cachesim_D1_initcache(D1c);
2038 cachesim_L2_initcache(L2c);
2039
2040 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
2041 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
2042 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
2043
2044 init_BBCC_table();
2045}
2046
2047#if 0
2048Bool SK_(cheap_sanity_check)(void) { return True; }
2049
2050extern TTEntry* vg_tt;
2051
2052Bool SK_(expensive_sanity_check)(void)
2053{
2054 Int i;
2055 Bool dummy;
2056 for (i = 0; i < 200191; i++) {
2057 if (vg_tt[i].orig_addr != (Addr)1 &&
2058 vg_tt[i].orig_addr != (Addr)3) {
2059 VG_(printf)(".");
2060 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
2061 }
2062 }
2063 return True;
2064}
2065#endif
2066
2067/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002068/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002069/*--------------------------------------------------------------------*/