blob: e378501e0459fbffc94ef78a94722511a7f1ba18 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
njnc9539842002-10-02 13:26:35 +00009 This file is part of Cachegrind, a Valgrind skin for cache
10 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
njn0e1b5142003-04-15 14:58:06 +000012 Copyright (C) 2002-2003 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000013 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
njn27f1a382002-11-08 15:48:16 +000036VG_DETERMINE_INTERFACE_VERSION
37
njn25e49d8e72002-09-23 09:36:25 +000038/* For cache simulation */
39typedef struct {
40 int size; /* bytes */
41 int assoc;
42 int line_size; /* bytes */
43} cache_t;
njn4f9c9342002-04-29 16:03:24 +000044
njn25cac76cb2002-09-23 11:21:57 +000045#include "cg_sim_L2.c"
46#include "cg_sim_I1.c"
47#include "cg_sim_D1.c"
njn4f9c9342002-04-29 16:03:24 +000048
njn25e49d8e72002-09-23 09:36:25 +000049/*------------------------------------------------------------*/
50/*--- Constants ---*/
51/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000052
53/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000054#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000055
njn25e49d8e72002-09-23 09:36:25 +000056#define MIN_LINE_SIZE 16
57
njn4f9c9342002-04-29 16:03:24 +000058/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000059#define FILENAME_LEN 256
60#define FN_NAME_LEN 256
61#define BUF_LEN 512
62#define COMMIFY_BUF_LEN 128
63#define RESULTS_BUF_LEN 128
64#define LINE_BUF_LEN 64
65
66/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000067/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000068/*------------------------------------------------------------*/
69
njn25e49d8e72002-09-23 09:36:25 +000070typedef
71 enum {
72 VgpGetBBCC = VgpFini+1,
73 VgpCacheSimulate,
74 VgpCacheResults
75 }
76 VgpSkinCC;
sewardj07133bf2002-06-13 10:25:56 +000077
njn4f9c9342002-04-29 16:03:24 +000078/*------------------------------------------------------------*/
79/*--- Output file related stuff ---*/
80/*------------------------------------------------------------*/
81
njn13f02932003-04-30 20:23:58 +000082static Char* cachegrind_out_file;
njn4f9c9342002-04-29 16:03:24 +000083
sewardj0744b6c2002-12-11 00:45:42 +000084static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +000085{
86 VG_(message)(Vg_UserMsg,
sewardj0744b6c2002-12-11 00:45:42 +000087 "error: can't open cache simulation output file `%s'",
88 cachegrind_out_file );
89 VG_(message)(Vg_UserMsg,
90 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +000091}
92
93/*------------------------------------------------------------*/
94/*--- Cost center types, operations ---*/
95/*------------------------------------------------------------*/
96
97typedef struct _CC CC;
98struct _CC {
99 ULong a;
100 ULong m1;
101 ULong m2;
102};
103
104static __inline__ void initCC(CC* cc) {
105 cc->a = 0;
106 cc->m1 = 0;
107 cc->m2 = 0;
108}
109
njn25e49d8e72002-09-23 09:36:25 +0000110typedef
111 enum {
112 InstrCC, /* eg. mov %eax, %ebx */
113 ReadCC, /* eg. mov (%ecx), %esi */
114 WriteCC, /* eg. mov %eax, (%edx) */
115 ModCC, /* eg. incl (%eax) (read+write one addr) */
116 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
117 (read+write two different addrs) */
118 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000119
njn7e1b3b22003-07-04 11:44:39 +0000120/* Instruction-level cost-centres.
njn4f9c9342002-04-29 16:03:24 +0000121 *
122 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000123 *
njne0ee0712002-05-03 16:41:05 +0000124 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000125 */
njn25e49d8e72002-09-23 09:36:25 +0000126typedef
127 struct {
128 /* word 1 */
129 UChar tag;
130 UChar instr_size;
131 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000132
njn25e49d8e72002-09-23 09:36:25 +0000133 /* words 2+ */
134 Addr instr_addr;
135 CC I;
136 }
137 iCC;
njn4f9c9342002-04-29 16:03:24 +0000138
njn25e49d8e72002-09-23 09:36:25 +0000139typedef
140 struct _idCC {
141 /* word 1 */
142 UChar tag;
143 UChar instr_size;
144 UChar data_size;
145 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000146
njn25e49d8e72002-09-23 09:36:25 +0000147 /* words 2+ */
148 Addr instr_addr;
149 CC I;
150 CC D;
151 }
152 idCC;
153
154typedef
155 struct _iddCC {
156 /* word 1 */
157 UChar tag;
158 UChar instr_size;
159 UChar data_size;
160 /* 1 byte padding */
161
162 /* words 2+ */
163 Addr instr_addr;
164 CC I;
165 CC Da;
166 CC Db;
167 }
168 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000169
170static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
171{
njn25e49d8e72002-09-23 09:36:25 +0000172 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000173 cc->instr_size = instr_size;
174 cc->instr_addr = instr_addr;
175 initCC(&cc->I);
176}
177
178static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
179 UInt instr_size, UInt data_size)
180{
181 cc->tag = X_CC;
182 cc->instr_size = instr_size;
183 cc->data_size = data_size;
184 cc->instr_addr = instr_addr;
185 initCC(&cc->I);
186 initCC(&cc->D);
187}
188
njn25e49d8e72002-09-23 09:36:25 +0000189static void init_iddCC(iddCC* cc, Addr instr_addr,
190 UInt instr_size, UInt data_size)
191{
192 cc->tag = ReadWriteCC;
193 cc->instr_size = instr_size;
194 cc->data_size = data_size;
195 cc->instr_addr = instr_addr;
196 initCC(&cc->I);
197 initCC(&cc->Da);
198 initCC(&cc->Db);
199}
200
njn4294fd42002-06-05 14:41:10 +0000201#define ADD_CC_TO(CC_type, cc, total) \
202 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
203 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
204 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
205
njn95114da2002-06-05 09:39:31 +0000206/* If 1, address of each instruction is printed as a comment after its counts
207 * in cachegrind.out */
208#define PRINT_INSTR_ADDRS 0
209
njne0ee0712002-05-03 16:41:05 +0000210static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000211{
njn95114da2002-06-05 09:39:31 +0000212#if PRINT_INSTR_ADDRS
213 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
214 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
215#else
njne0ee0712002-05-03 16:41:05 +0000216 VG_(sprintf)(buf, "%llu %llu %llu\n",
217 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000218#endif
njn4f9c9342002-04-29 16:03:24 +0000219}
220
njne0ee0712002-05-03 16:41:05 +0000221static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000222{
njn95114da2002-06-05 09:39:31 +0000223#if PRINT_INSTR_ADDRS
224 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
225 cc->I.a, cc->I.m1, cc->I.m2,
226 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
227#else
njne0ee0712002-05-03 16:41:05 +0000228 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
229 cc->I.a, cc->I.m1, cc->I.m2,
230 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000231#endif
njn4f9c9342002-04-29 16:03:24 +0000232}
233
njne0ee0712002-05-03 16:41:05 +0000234static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000235{
njn95114da2002-06-05 09:39:31 +0000236#if PRINT_INSTR_ADDRS
237 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
238 cc->I.a, cc->I.m1, cc->I.m2,
239 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
240#else
njne0ee0712002-05-03 16:41:05 +0000241 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
242 cc->I.a, cc->I.m1, cc->I.m2,
243 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000244#endif
njn4f9c9342002-04-29 16:03:24 +0000245}
246
njn25e49d8e72002-09-23 09:36:25 +0000247static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
248{
249#if PRINT_INSTR_ADDRS
250 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
251 cc->I.a, cc->I.m1, cc->I.m2,
252 cc->Da.a, cc->Da.m1, cc->Da.m2,
253 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
254#else
255 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
256 cc->I.a, cc->I.m1, cc->I.m2,
257 cc->Da.a, cc->Da.m1, cc->Da.m2,
258 cc->Db.a, cc->Db.m1, cc->Db.m2);
259#endif
260}
261
262
njn4f9c9342002-04-29 16:03:24 +0000263/*------------------------------------------------------------*/
264/*--- BBCC hash table stuff ---*/
265/*------------------------------------------------------------*/
266
267/* The table of BBCCs is of the form hash(filename, hash(fn_name,
268 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
269 * fairly well for Konqueror. */
270
271#define N_FILE_ENTRIES 251
272#define N_FN_ENTRIES 53
273#define N_BBCC_ENTRIES 37
274
275/* The cost centres for a basic block are stored in a contiguous array.
276 * They are distinguishable by their tag field. */
277typedef struct _BBCC BBCC;
278struct _BBCC {
279 Addr orig_addr;
280 UInt array_size; /* byte-size of variable length array */
281 BBCC* next;
282 Addr array[0]; /* variable length array */
283};
284
285typedef struct _fn_node fn_node;
286struct _fn_node {
287 Char* fn_name;
288 BBCC* BBCCs[N_BBCC_ENTRIES];
289 fn_node* next;
290};
291
292typedef struct _file_node file_node;
293struct _file_node {
294 Char* filename;
295 fn_node* fns[N_FN_ENTRIES];
296 file_node* next;
297};
298
299/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000300static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000301
sewardj4f29ddf2002-05-03 22:29:04 +0000302static Int distinct_files = 0;
303static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000304
sewardj4f29ddf2002-05-03 22:29:04 +0000305static Int distinct_instrs = 0;
306static Int full_debug_BBs = 0;
307static Int file_line_debug_BBs = 0;
308static Int fn_name_debug_BBs = 0;
309static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000310
sewardj4f29ddf2002-05-03 22:29:04 +0000311static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000312
njn4294fd42002-06-05 14:41:10 +0000313static CC Ir_discards;
314static CC Dr_discards;
315static CC Dw_discards;
316
njn4f9c9342002-04-29 16:03:24 +0000317static void init_BBCC_table()
318{
319 Int i;
320 for (i = 0; i < N_FILE_ENTRIES; i++)
321 BBCC_table[i] = NULL;
322}
323
njne0ee0712002-05-03 16:41:05 +0000324static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
325 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000326{
njn25e49d8e72002-09-23 09:36:25 +0000327 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000328
njn25e49d8e72002-09-23 09:36:25 +0000329 found1 = VG_(get_filename_linenum)(instr_addr, filename,
330 FILENAME_LEN, line_num);
331 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000332
333 if (!found1 && !found2) {
334 no_debug_BBs++;
335 VG_(strcpy)(filename, "???");
336 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000337 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000338
339 } else if ( found1 && found2) {
340 full_debug_BBs++;
341
342 } else if ( found1 && !found2) {
343 file_line_debug_BBs++;
344 VG_(strcpy)(fn_name, "???");
345
346 } else /*(!found1 && found2)*/ {
347 fn_name_debug_BBs++;
348 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000349 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000350 }
351}
352
353/* Forward declaration. */
354static Int compute_BBCC_array_size(UCodeBlock* cb);
355
356static __inline__
357file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
358{
359 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000360 file_node* new = VG_(malloc)(sizeof(file_node));
361 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000362 for (i = 0; i < N_FN_ENTRIES; i++) {
363 new->fns[i] = NULL;
364 }
365 new->next = next;
366 return new;
367}
368
369static __inline__
370fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
371{
372 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000373 fn_node* new = VG_(malloc)(sizeof(fn_node));
374 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000375 for (i = 0; i < N_BBCC_ENTRIES; i++) {
376 new->BBCCs[i] = NULL;
377 }
378 new->next = next;
379 return new;
380}
381
382static __inline__
383BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
384{
385 Int BBCC_array_size = compute_BBCC_array_size(cb);
386 BBCC* new;
387
njn25e49d8e72002-09-23 09:36:25 +0000388 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000389 new->orig_addr = bb_orig_addr;
390 new->array_size = BBCC_array_size;
391 new->next = next;
392
393 return new;
394}
395
396#define HASH_CONSTANT 256
397
398static UInt hash(Char *s, UInt table_size)
399{
400 int hash_value = 0;
401 for ( ; *s; s++)
402 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
403 return hash_value;
404}
405
406/* Do a three step traversal: by filename, then fn_name, then instr_addr.
407 * In all cases prepends new nodes to their chain. Returns a pointer to the
408 * cost centre. Also sets BB_seen_before by reference.
409 */
410static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000411 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000412{
413 file_node *curr_file_node;
414 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000415 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000416 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
417 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000418 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000419
njne0ee0712002-05-03 16:41:05 +0000420 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000421
njn25e49d8e72002-09-23 09:36:25 +0000422 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000423 filename_hash = hash(filename, N_FILE_ENTRIES);
424 curr_file_node = BBCC_table[filename_hash];
425 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000426 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000427 curr_file_node = curr_file_node->next;
428 }
429 if (NULL == curr_file_node) {
430 BBCC_table[filename_hash] = curr_file_node =
431 new_file_node(filename, BBCC_table[filename_hash]);
432 distinct_files++;
433 }
434
435 fnname_hash = hash(fn_name, N_FN_ENTRIES);
436 curr_fn_node = curr_file_node->fns[fnname_hash];
437 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000438 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000439 curr_fn_node = curr_fn_node->next;
440 }
441 if (NULL == curr_fn_node) {
442 curr_file_node->fns[fnname_hash] = curr_fn_node =
443 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
444 distinct_fns++;
445 }
446
447 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000448 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000449 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
450 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000451 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000452 curr_BBCC = curr_BBCC->next;
453 }
454 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000455
njne427a662002-10-02 11:08:25 +0000456 sk_assert(False == remove);
njn4294fd42002-06-05 14:41:10 +0000457
njn4f9c9342002-04-29 16:03:24 +0000458 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
459 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
460 *BB_seen_before = False;
461
462 } else {
njne427a662002-10-02 11:08:25 +0000463 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
464 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000465 if (VG_(clo_verbosity) > 2) {
466 VG_(message)(Vg_DebugMsg,
467 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000468 }
469 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000470
471 if (True == remove) {
472 // Remove curr_BBCC from chain; it will be used and free'd by the
473 // caller.
474 *prev_BBCC_next_ptr = curr_BBCC->next;
475
476 } else {
477 BB_retranslations++;
478 }
njn4f9c9342002-04-29 16:03:24 +0000479 }
njn25e49d8e72002-09-23 09:36:25 +0000480 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000481 return curr_BBCC;
482}
483
484/*------------------------------------------------------------*/
485/*--- Cache simulation instrumentation phase ---*/
486/*------------------------------------------------------------*/
487
njn4f9c9342002-04-29 16:03:24 +0000488static Int compute_BBCC_array_size(UCodeBlock* cb)
489{
490 UInstr* u_in;
491 Int i, CC_size, BBCC_size = 0;
492 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000493 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000494
495 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000496 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000497
njn810086f2002-11-14 12:42:47 +0000498 for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
499 u_in = VG_(get_instr)(cb, i);
njn4f9c9342002-04-29 16:03:24 +0000500 switch(u_in->opcode) {
501
502 case INCEIP:
503 goto case_for_end_of_instr;
504
505 case JMP:
506 if (u_in->cond != CondAlways) break;
507
508 goto case_for_end_of_instr;
509
510 case_for_end_of_instr:
511
njn25e49d8e72002-09-23 09:36:25 +0000512 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
513 t_read != t_write)
514 CC_size = sizeof(iddCC);
515 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
516 CC_size = sizeof(idCC);
517 else
518 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000519
520 BBCC_size += CC_size;
521 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
522 break;
523
524 case LOAD:
525 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000526 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000527 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000528 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000529 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000530 is_LOAD = True;
531 break;
532
533 case STORE:
534 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000535 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000536 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000537 is_STORE = True;
538 break;
539
sewardj3949d102003-03-28 17:21:29 +0000540 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000541 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000542 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000543 case FPU_R:
njne427a662002-10-02 11:08:25 +0000544 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000545 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000546 is_FPU_R = True;
547 break;
548
sewardj3949d102003-03-28 17:21:29 +0000549 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000550 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000551 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000552 case FPU_W:
njne427a662002-10-02 11:08:25 +0000553 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000554 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000555 is_FPU_W = True;
556 break;
557
558 default:
559 break;
560 }
561 }
562
563 return BBCC_size;
564}
565
njn25e49d8e72002-09-23 09:36:25 +0000566static __attribute__ ((regparm (1)))
567void log_1I_0D_cache_access(iCC* cc)
568{
569 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
570 // cc, cc->instr_addr, cc->instr_size)
571 VGP_PUSHCC(VgpCacheSimulate);
572 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
573 cc->I.a++;
574 VGP_POPCC(VgpCacheSimulate);
575}
576
577/* Difference between this function and log_1I_0D_cache_access() is that
578 this one can be passed any kind of CC, not just an iCC. So we have to
579 be careful to make sure we don't make any assumptions about CC layout.
580 (As it stands, they would be safe, but this will avoid potential heartache
581 if anyone else changes CC layout.)
582 Note that we only do the switch for the JIFZ version because if we always
583 called this switching version, things would run about 5% slower. */
584static __attribute__ ((regparm (1)))
585void log_1I_0D_cache_access_JIFZ(iCC* cc)
586{
587 UChar instr_size;
588 Addr instr_addr;
589 CC* I;
590
591 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
592 // cc, cc->instr_addr, cc->instr_size)
593 VGP_PUSHCC(VgpCacheSimulate);
594
595 switch(cc->tag) {
596 case InstrCC:
597 instr_size = cc->instr_size;
598 instr_addr = cc->instr_addr;
599 I = &(cc->I);
600 break;
601 case ReadCC:
602 case WriteCC:
603 case ModCC:
604 instr_size = ((idCC*)cc)->instr_size;
605 instr_addr = ((idCC*)cc)->instr_addr;
606 I = &( ((idCC*)cc)->I );
607 break;
608 case ReadWriteCC:
609 instr_size = ((iddCC*)cc)->instr_size;
610 instr_addr = ((iddCC*)cc)->instr_addr;
611 I = &( ((iddCC*)cc)->I );
612 break;
613 default:
njne427a662002-10-02 11:08:25 +0000614 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000615 break;
616 }
617 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
618 I->a++;
619 VGP_POPCC(VgpCacheSimulate);
620}
621
622__attribute__ ((regparm (2))) static
623void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
624{
625 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
626 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
627 VGP_PUSHCC(VgpCacheSimulate);
628 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
629 cc->D.a++;
630 VGP_POPCC(VgpCacheSimulate);
631}
632
633__attribute__ ((regparm (2))) static
634void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
635{
636 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
637 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
638 VGP_PUSHCC(VgpCacheSimulate);
639 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
640 cc->I.a++;
641
642 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
643 cc->D.a++;
644 VGP_POPCC(VgpCacheSimulate);
645}
646
647__attribute__ ((regparm (3))) static
648void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
649{
650 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
651 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
652 VGP_PUSHCC(VgpCacheSimulate);
653 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
654 cc->Da.a++;
655 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
656 cc->Db.a++;
657 VGP_POPCC(VgpCacheSimulate);
658}
659
660__attribute__ ((regparm (3))) static
661void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
662{
663 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
664 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
665 VGP_PUSHCC(VgpCacheSimulate);
666 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
667 cc->I.a++;
668
669 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
670 cc->Da.a++;
671 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
672 cc->Db.a++;
673 VGP_POPCC(VgpCacheSimulate);
674}
675
676UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
677{
678/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000679#define INVALID_DATA_SIZE 999999
680
njn4f9c9342002-04-29 16:03:24 +0000681 UCodeBlock* cb;
682 Int i;
683 UInstr* u_in;
684 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000685 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
686 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000687 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000688 Addr x86_instr_addr = orig_addr;
689 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
690 Addr helper;
691 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000692 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000693 Bool BB_seen_before = False;
694 Bool instrumented_Jcond = False;
695 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000696 Addr BBCC_ptr0, BBCC_ptr;
697
698 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
699 * if it's the first time it's been seen), and point to start of the
700 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000701 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000702 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
703
njn810086f2002-11-14 12:42:47 +0000704 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000705
njn25e49d8e72002-09-23 09:36:25 +0000706 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
707 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000708
njn810086f2002-11-14 12:42:47 +0000709 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
710 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000711
njn4f9c9342002-04-29 16:03:24 +0000712 /* What this is all about: we want to instrument each x86 instruction
713 * translation. The end of these are marked in three ways. The three
714 * ways, and the way we instrument them, are as follows:
715 *
716 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
717 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
718 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
719 *
njn25e49d8e72002-09-23 09:36:25 +0000720 * The last UInstr in a basic block is always a Juncond. Jconds,
721 * when they appear, are always second last. We check this with
722 * various assertions.
723 *
724 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000725 * executed. We don't have to put the instrumentation before the INCEIP
726 * (it could go after) but we do so for consistency.
727 *
njn25e49d8e72002-09-23 09:36:25 +0000728 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
729 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000730 *
njn25e49d8e72002-09-23 09:36:25 +0000731 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000732 *
733 * The instrumentation is just a call to the appropriate helper function,
734 * passing it the address of the instruction's CC.
735 */
njne427a662002-10-02 11:08:25 +0000736 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000737
738 switch (u_in->opcode) {
sewardj7a5ebcf2002-11-13 22:42:13 +0000739 case NOP: case LOCK: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000740 break;
741
njn4f9c9342002-04-29 16:03:24 +0000742 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000743 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000744 */
745 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000746 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000747 t_read_addr = newTemp(cb);
748 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
749 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000750 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000751 break;
752
sewardj3949d102003-03-28 17:21:29 +0000753 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000754 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000755 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000756 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000757 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000758 t_read_addr = newTemp(cb);
759 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000760 data_size = ( u_in->size <= MIN_LINE_SIZE
761 ? u_in->size
762 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000763 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000764 break;
765
766 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000767 * That's how the code above determines whether it does a write.
768 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000769 * As for the MOV, if it's a mod instruction it's redundant, but it's
770 * not expensive and mod instructions are rare anyway. */
sewardj3949d102003-03-28 17:21:29 +0000771 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000772 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000773 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000774 case STORE:
775 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000776 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000777 t_write_addr = newTemp(cb);
778 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000779 /* 28 and 108 B data-sized instructions will be done
780 * inaccurately but they're very rare and this avoids errors
781 * from hitting more than two cache lines in the simulation. */
782 data_size = ( u_in->size <= MIN_LINE_SIZE
783 ? u_in->size
784 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000785 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000786 break;
787
njn25e49d8e72002-09-23 09:36:25 +0000788
789 /* For rep-prefixed instructions, log a single I-cache access
790 * before the UCode loop that implements the repeated part, which
791 * is where the multiple D-cache accesses are logged. */
792 case JIFZ:
793 has_rep_prefix = True;
794
795 /* Setup 1st and only arg: CC addr */
796 t_CC_addr = newTemp(cb);
797 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
798 uLiteral(cb, BBCC_ptr);
799
800 /* Call helper */
801 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
802 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000803 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000804 break;
805
806
807 /* INCEIP: insert instrumentation */
808 case INCEIP:
809 x86_instr_size = u_in->val1;
810 goto instrument_x86_instr;
811
812 /* JMP: insert instrumentation if the first JMP */
813 case JMP:
814 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000815 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000816 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000817 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000818 instrumented_Jcond = False; /* reset */
819 break;
820 }
821 /* The first JMP... instrument. */
822 if (CondAlways != u_in->cond) {
njn810086f2002-11-14 12:42:47 +0000823 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000824 instrumented_Jcond = True;
825 } else {
njn810086f2002-11-14 12:42:47 +0000826 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000827 }
828
829 /* Get x86 instr size from final JMP. */
njn810086f2002-11-14 12:42:47 +0000830 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
831
njn25e49d8e72002-09-23 09:36:25 +0000832 goto instrument_x86_instr;
833
834
835 /* Code executed at the end of each x86 instruction. */
836 instrument_x86_instr:
837
838 /* Initialise the CC in the BBCC array appropriately if it
839 * hasn't been initialised before. Then call appropriate sim
840 * function, passing it the CC address. */
841 stack_used = 0;
842
njne427a662002-10-02 11:08:25 +0000843 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000844 x86_instr_size <= MAX_x86_INSTR_SIZE);
845
846#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
847
848 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +0000849 sk_assert(INVALID_DATA_SIZE == data_size);
850 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000851 INVALID_TEMPREG == t_read &&
852 INVALID_TEMPREG == t_write_addr &&
853 INVALID_TEMPREG == t_write);
854 CC_size = sizeof(iCC);
855 if (!BB_seen_before)
856 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
857 helper = ( has_rep_prefix
858 ? (Addr)0 /* no extra log needed */
859 : (Addr) & log_1I_0D_cache_access
860 );
861 argc = 1;
862
863 } else {
njne427a662002-10-02 11:08:25 +0000864 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +0000865 8 == data_size || 10 == data_size ||
866 MIN_LINE_SIZE == data_size);
867
868 if (IS_(read) && !IS_(write)) {
869 CC_size = sizeof(idCC);
870 /* If it uses 'rep', we've already logged the I-cache
871 * access at the JIFZ UInstr (see JIFZ case below) so
872 * don't do it here */
873 helper = ( has_rep_prefix
874 ? (Addr) & log_0I_1D_cache_access
875 : (Addr) & log_1I_1D_cache_access
876 );
877 argc = 2;
878 if (!BB_seen_before)
879 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
880 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000881 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000882 INVALID_TEMPREG != t_read &&
883 INVALID_TEMPREG == t_write_addr &&
884 INVALID_TEMPREG == t_write);
885 t_data_addr1 = t_read_addr;
886
887 } else if (!IS_(read) && IS_(write)) {
888 CC_size = sizeof(idCC);
889 helper = ( has_rep_prefix
890 ? (Addr) & log_0I_1D_cache_access
891 : (Addr) & log_1I_1D_cache_access
892 );
893 argc = 2;
894 if (!BB_seen_before)
895 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
896 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000897 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000898 INVALID_TEMPREG == t_read &&
899 INVALID_TEMPREG != t_write_addr &&
900 INVALID_TEMPREG != t_write);
901 t_data_addr1 = t_write_addr;
902
903 } else {
njne427a662002-10-02 11:08:25 +0000904 sk_assert(IS_(read) && IS_(write));
905 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000906 INVALID_TEMPREG != t_read &&
907 INVALID_TEMPREG != t_write_addr &&
908 INVALID_TEMPREG != t_write);
909 if (t_read == t_write) {
910 CC_size = sizeof(idCC);
911 helper = ( has_rep_prefix
912 ? (Addr) & log_0I_1D_cache_access
913 : (Addr) & log_1I_1D_cache_access
914 );
915 argc = 2;
916 if (!BB_seen_before)
917 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
918 x86_instr_size, data_size);
919 t_data_addr1 = t_read_addr;
920 } else {
921 CC_size = sizeof(iddCC);
922 helper = ( has_rep_prefix
923 ? (Addr) & log_0I_2D_cache_access
924 : (Addr) & log_1I_2D_cache_access
925 );
926 argc = 3;
927 if (!BB_seen_before)
928 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
929 x86_instr_size, data_size);
930 t_data_addr1 = t_read_addr;
931 t_data_addr2 = t_write_addr;
932 }
933 }
934#undef IS_
935 }
936
937 /* Call the helper, if necessary */
938 if ((Addr)0 != helper) {
939
940 /* Setup 1st arg: CC addr */
941 t_CC_addr = newTemp(cb);
942 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
943 uLiteral(cb, BBCC_ptr);
944
945 /* Call the helper */
946 if (1 == argc)
947 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
948 else if (2 == argc)
949 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
950 TempReg, t_data_addr1);
951 else if (3 == argc)
952 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
953 TempReg, t_data_addr1,
954 TempReg, t_data_addr2);
955 else
njne427a662002-10-02 11:08:25 +0000956 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +0000957
958 uCCall(cb, helper, argc, argc, False);
959 }
960
961 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +0000962 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000963
964 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
965 BBCC_ptr += CC_size;
966 x86_instr_addr += x86_instr_size;
967 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
968 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
969 data_size = INVALID_DATA_SIZE;
970 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000971 break;
972
973 default:
njn4ba5a792002-09-30 10:23:54 +0000974 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000975 break;
976 }
977 }
978
979 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +0000980 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +0000981
njn4ba5a792002-09-30 10:23:54 +0000982 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000983 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000984
985#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000986}
987
988/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000989/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000990/*------------------------------------------------------------*/
991
992/* Total reads/writes/misses. Calculated during CC traversal at the end. */
993static CC Ir_total;
994static CC Dr_total;
995static CC Dw_total;
996
njn25e49d8e72002-09-23 09:36:25 +0000997#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
998
999static cache_t clo_I1_cache = UNDEFINED_CACHE;
1000static cache_t clo_D1_cache = UNDEFINED_CACHE;
1001static cache_t clo_L2_cache = UNDEFINED_CACHE;
1002
njn7cf0bd32002-06-08 13:36:03 +00001003/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
1004/* Probably only works for Intel and AMD chips, and probably only for some of
1005 * them.
1006 */
1007
sewardj05bcdcb2003-05-18 10:05:38 +00001008static __inline__ void cpuid(Int n, UInt *a, UInt *b, UInt *c, UInt *d)
njn7cf0bd32002-06-08 13:36:03 +00001009{
1010 __asm__ __volatile__ (
1011 "cpuid"
1012 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1013 : "0" (n) /* input */
1014 );
1015}
1016
sewardj07133bf2002-06-13 10:25:56 +00001017static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001018{
1019 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001020 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001021 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001022 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001023 " Simulating a %d KB cache with %d B lines",
1024 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001025}
1026
1027/* Intel method is truly wretched. We have to do an insane indexing into an
1028 * array of pre-defined configurations for various parts of the memory
1029 * hierarchy.
1030 */
1031static
sewardj07133bf2002-06-13 10:25:56 +00001032Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001033{
sewardj07133bf2002-06-13 10:25:56 +00001034 UChar info[16];
1035 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001036 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001037
1038 if (level < 2) {
1039 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001040 "warning: CPUID level < 2 for Intel processor (%d)",
1041 level);
njn7cf0bd32002-06-08 13:36:03 +00001042 return -1;
1043 }
1044
sewardj07133bf2002-06-13 10:25:56 +00001045 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1046 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001047 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1048 info[0] = 0x0; /* reset AL */
1049
1050 if (0 != trials) {
1051 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001052 "warning: non-zero CPUID trials for Intel processor (%d)",
1053 trials);
njn7cf0bd32002-06-08 13:36:03 +00001054 return -1;
1055 }
1056
1057 for (i = 0; i < 16; i++) {
1058
1059 switch (info[i]) {
1060
1061 case 0x0: /* ignore zeros */
1062 break;
1063
njn25e49d8e72002-09-23 09:36:25 +00001064 /* TLB info, ignore */
1065 case 0x01: case 0x02: case 0x03: case 0x04:
1066 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njn7cf0bd32002-06-08 13:36:03 +00001067 break;
1068
1069 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1070 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
1071
1072 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1073 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
1074
njn25e49d8e72002-09-23 09:36:25 +00001075 /* IA-64 info -- panic! */
1076 case 0x10: case 0x15: case 0x1a:
1077 case 0x88: case 0x89: case 0x8a: case 0x8d:
1078 case 0x90: case 0x96: case 0x9b:
1079 VG_(message)(Vg_DebugMsg,
1080 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001081 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001082
njn7cf0bd32002-06-08 13:36:03 +00001083 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001084 VG_(message)(Vg_DebugMsg,
1085 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001086 break;
1087
njn25e49d8e72002-09-23 09:36:25 +00001088 /* These are sectored, whatever that means */
1089 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1090 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1091
1092 /* If a P6 core, this means "no L2 cache".
1093 If a P4 core, this means "no L3 cache".
1094 We don't know what core it is, so don't issue a warning. To detect
1095 a missing L2 cache, we use 'L2_found'. */
1096 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001097 break;
1098
njn25e49d8e72002-09-23 09:36:25 +00001099 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1100 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1101 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1102 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1103 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001104
1105 /* These are sectored, whatever that means */
1106 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1107 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1108 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1109
1110 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1111 * conversion to byte size is a total guess; treat the 12K and 16K
1112 * cases the same since the cache byte size must be a power of two for
1113 * everything to work!. Also guessing 32 bytes for the line size...
1114 */
1115 case 0x70: /* 12K micro-ops, 8-way */
1116 *I1c = (cache_t) { 16, 8, 32 };
1117 micro_ops_warn(12, 16, 32);
1118 break;
1119 case 0x71: /* 16K micro-ops, 8-way */
1120 *I1c = (cache_t) { 16, 8, 32 };
1121 micro_ops_warn(16, 16, 32);
1122 break;
1123 case 0x72: /* 32K micro-ops, 8-way */
1124 *I1c = (cache_t) { 32, 8, 32 };
1125 micro_ops_warn(32, 32, 32);
1126 break;
1127
njn25e49d8e72002-09-23 09:36:25 +00001128 /* These are sectored, whatever that means */
1129 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1130 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1131 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1132 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1133 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001134
njn25e49d8e72002-09-23 09:36:25 +00001135 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1136 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1137 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1138 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1139 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001140
1141 default:
1142 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001143 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001144 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001145 break;
1146 }
1147 }
njn25e49d8e72002-09-23 09:36:25 +00001148
1149 if (!L2_found)
1150 VG_(message)(Vg_DebugMsg,
1151 "warning: L2 cache not installed, ignore L2 results.");
1152
njn7cf0bd32002-06-08 13:36:03 +00001153 return 0;
1154}
1155
1156/* AMD method is straightforward, just extract appropriate bits from the
1157 * result registers.
1158 *
1159 * Bits, for D1 and I1:
1160 * 31..24 data L1 cache size in KBs
1161 * 23..16 data L1 cache associativity (FFh=full)
1162 * 15.. 8 data L1 cache lines per tag
1163 * 7.. 0 data L1 cache line size in bytes
1164 *
1165 * Bits, for L2:
1166 * 31..16 unified L2 cache size in KBs
1167 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1168 * 11.. 8 unified L2 cache lines per tag
1169 * 7.. 0 unified L2 cache line size in bytes
1170 *
1171 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1172 * upon this information. (Whatever that means -- njn)
1173 *
njn25e49d8e72002-09-23 09:36:25 +00001174 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1175 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1176 * so we detect that.
1177 *
njn7cf0bd32002-06-08 13:36:03 +00001178 * Returns 0 on success, non-zero on failure.
1179 */
sewardj07133bf2002-06-13 10:25:56 +00001180static
1181Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001182{
sewardj05bcdcb2003-05-18 10:05:38 +00001183 UInt ext_level;
1184 Int dummy, model;
sewardj07133bf2002-06-13 10:25:56 +00001185 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001186
1187 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1188
1189 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1190 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001191 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1192 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001193 return -1;
1194 }
1195
1196 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1197 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1198
njn25e49d8e72002-09-23 09:36:25 +00001199 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1200 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1201
1202 /* Check for Duron bug */
1203 if (model == 0x630) {
1204 VG_(message)(Vg_UserMsg,
1205 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1206 L2i = (64 << 16) | (L2i & 0xffff);
1207 }
1208
njn7cf0bd32002-06-08 13:36:03 +00001209 D1c->size = (D1i >> 24) & 0xff;
1210 D1c->assoc = (D1i >> 16) & 0xff;
1211 D1c->line_size = (D1i >> 0) & 0xff;
1212
1213 I1c->size = (I1i >> 24) & 0xff;
1214 I1c->assoc = (I1i >> 16) & 0xff;
1215 I1c->line_size = (I1i >> 0) & 0xff;
1216
1217 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1218 L2c->assoc = (L2i >> 12) & 0xf;
1219 L2c->line_size = (L2i >> 0) & 0xff;
1220
1221 return 0;
1222}
1223
1224static jmp_buf cpuid_jmpbuf;
1225
1226static
1227void cpuid_SIGILL_handler(int signum)
1228{
1229 __builtin_longjmp(cpuid_jmpbuf, 1);
1230}
1231
1232static
sewardj07133bf2002-06-13 10:25:56 +00001233Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001234{
sewardj07133bf2002-06-13 10:25:56 +00001235 Int level, res, ret;
1236 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001237 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001238
1239 /* Install own SIGILL handler */
1240 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1241 sigill_new.ksa_flags = 0;
1242 sigill_new.ksa_restorer = NULL;
1243 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001244 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001245
1246 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001247 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001248
1249 /* Trap for illegal instruction, in case it's a really old processor that
1250 * doesn't support CPUID. */
1251 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1252 cpuid(0, &level, (int*)&vendor_id[0],
1253 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1254 vendor_id[12] = '\0';
1255
1256 /* Restore old SIGILL handler */
1257 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001258 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001259
1260 } else {
1261 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1262
1263 /* Restore old SIGILL handler */
1264 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001265 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001266 return -1;
1267 }
1268
1269 if (0 == level) {
1270 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1271 return -1;
1272 }
1273
1274 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1275 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1276 ret = Intel_cache_info(level, I1c, D1c, L2c);
1277
1278 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1279 ret = AMD_cache_info(I1c, D1c, L2c);
1280
1281 } else {
1282 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1283 vendor_id);
1284 return -1;
1285 }
1286
1287 /* Successful! Convert sizes from KB to bytes */
1288 I1c->size *= 1024;
1289 D1c->size *= 1024;
1290 L2c->size *= 1024;
1291
1292 return ret;
1293}
1294
1295/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001296static
1297void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001298{
1299 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001300 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001301 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001302 "warning: %s size of %dB not a power of two; "
1303 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001304 cache->size = dflt->size;
1305 }
1306
sewardj07133bf2002-06-13 10:25:56 +00001307 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001308 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001309 "warning: %s associativity of %d not a power of two; "
1310 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001311 cache->assoc = dflt->assoc;
1312 }
1313
sewardj07133bf2002-06-13 10:25:56 +00001314 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001315 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001316 "warning: %s line size of %dB not a power of two; "
1317 "defaulting to %dB",
1318 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001319 cache->line_size = dflt->line_size;
1320 }
1321
1322 /* Then check line size >= 16 -- any smaller and a single instruction could
1323 * straddle three cache lines, which breaks a simulation assertion and is
1324 * stupid anyway. */
1325 if (cache->line_size < MIN_LINE_SIZE) {
1326 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001327 "warning: %s line size of %dB too small; "
1328 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001329 cache->line_size = MIN_LINE_SIZE;
1330 }
1331
1332 /* Then check cache size > line size (causes seg faults if not). */
1333 if (cache->size <= cache->line_size) {
1334 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001335 "warning: %s cache size of %dB <= line size of %dB; "
1336 "increasing to %dB", name, cache->size, cache->line_size,
1337 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001338 cache->size = cache->line_size * 2;
1339 }
1340
1341 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1342 if (cache->assoc > (cache->size / cache->line_size)) {
1343 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001344 "warning: %s associativity > (size / line size); "
1345 "increasing size to %dB",
1346 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001347 cache->size = cache->assoc * cache->line_size;
1348 }
1349}
1350
1351/* On entry, args are undefined. Fill them with any info from the
1352 * command-line, then fill in any remaining with CPUID instruction if possible,
1353 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001354static
1355void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001356{
1357 /* Defaults are for a model 3 or 4 Athlon */
1358 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1359 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1360 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1361
njn25e49d8e72002-09-23 09:36:25 +00001362#define CMD_LINE_DEFINED(L) \
1363 (-1 != clo_##L##_cache.size || \
1364 -1 != clo_##L##_cache.assoc || \
1365 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001366
njn25e49d8e72002-09-23 09:36:25 +00001367 *I1c = clo_I1_cache;
1368 *D1c = clo_D1_cache;
1369 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001370
njn7cf0bd32002-06-08 13:36:03 +00001371 /* If any undefined on command-line, try CPUID */
1372 if (! CMD_LINE_DEFINED(I1) ||
1373 ! CMD_LINE_DEFINED(D1) ||
1374 ! CMD_LINE_DEFINED(L2)) {
1375
1376 /* Overwrite CPUID result for any cache defined on command-line */
1377 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1378
njn25e49d8e72002-09-23 09:36:25 +00001379 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1380 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1381 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001382
1383 /* CPUID failed, use defaults for each undefined by command-line */
1384 } else {
1385 VG_(message)(Vg_DebugMsg,
1386 "Couldn't detect cache configuration, using one "
1387 "or more defaults ");
1388
njn25e49d8e72002-09-23 09:36:25 +00001389 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1390 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1391 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001392 }
1393 }
1394#undef CMD_LINE_DEFINED
1395
1396 check_cache(I1c, &I1_dflt, "I1");
1397 check_cache(D1c, &D1_dflt, "D1");
1398 check_cache(L2c, &L2_dflt, "L2");
1399
1400 if (VG_(clo_verbosity) > 1) {
1401 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1402 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1403 I1c->size, I1c->assoc, I1c->line_size);
1404 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1405 D1c->size, D1c->assoc, D1c->line_size);
1406 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1407 L2c->size, L2c->assoc, L2c->line_size);
1408 }
1409}
1410
njn4f9c9342002-04-29 16:03:24 +00001411/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001412/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001413/*------------------------------------------------------------*/
1414
njn4f9c9342002-04-29 16:03:24 +00001415static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1416 Char *first_instr_fn)
1417{
1418 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001419 Char buf[BUF_LEN], curr_file[BUF_LEN],
1420 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001421 UInt line_num;
1422
1423 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1424
njne0ee0712002-05-03 16:41:05 +00001425 /* Mark start of basic block in output, just to ease debugging */
1426 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001427
1428 VG_(strcpy)(curr_file, first_instr_fl);
1429
1430 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1431
1432 /* We pretend the CC is an iCC for getting the tag. This is ok
1433 * because both CC types have tag as their first byte. Once we know
1434 * the type, we can cast and act appropriately. */
1435
1436 Char fl_buf[FILENAME_LEN];
1437 Char fn_buf[FN_NAME_LEN];
1438
njne0ee0712002-05-03 16:41:05 +00001439 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001440 switch ( ((iCC*)BBCC_ptr)->tag ) {
1441
njn25e49d8e72002-09-23 09:36:25 +00001442 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001443 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1444 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001445 ADD_CC_TO(iCC, I, Ir_total);
1446 BBCC_ptr += sizeof(iCC);
1447 break;
1448
njn25e49d8e72002-09-23 09:36:25 +00001449 case ReadCC:
1450 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001451 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1452 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001453 ADD_CC_TO(idCC, I, Ir_total);
1454 ADD_CC_TO(idCC, D, Dr_total);
1455 BBCC_ptr += sizeof(idCC);
1456 break;
1457
njn25e49d8e72002-09-23 09:36:25 +00001458 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001459 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1460 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001461 ADD_CC_TO(idCC, I, Ir_total);
1462 ADD_CC_TO(idCC, D, Dw_total);
1463 BBCC_ptr += sizeof(idCC);
1464 break;
1465
njn25e49d8e72002-09-23 09:36:25 +00001466 case ReadWriteCC:
1467 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1468 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1469 ADD_CC_TO(iddCC, I, Ir_total);
1470 ADD_CC_TO(iddCC, Da, Dr_total);
1471 ADD_CC_TO(iddCC, Db, Dw_total);
1472 BBCC_ptr += sizeof(iddCC);
1473 break;
1474
njn4f9c9342002-04-29 16:03:24 +00001475 default:
njne427a662002-10-02 11:08:25 +00001476 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001477 break;
1478 }
1479 distinct_instrs++;
1480
njne0ee0712002-05-03 16:41:05 +00001481 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1482
1483 /* Allow for filename switching in the middle of a BB; if this happens,
1484 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001485 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001486 VG_(strcpy)(curr_file, fl_buf);
1487 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1488 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1489 }
1490
njn4f9c9342002-04-29 16:03:24 +00001491 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001492 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001493 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001494 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001495 VG_(printf)(" filenames: BB:%s, instr:%s;"
1496 " fn_names: BB:%s, instr:%s;"
1497 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001498 first_instr_fl, fl_buf,
1499 first_instr_fn, fn_buf,
1500 line_num);
1501 }
1502
njne0ee0712002-05-03 16:41:05 +00001503 VG_(sprintf)(lbuf, "%u ", line_num);
1504 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1505 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001506 }
1507 /* If we switched filenames in the middle of the BB without switching back,
1508 * switch back now because the subsequent BB may be relying on falling under
1509 * the original file name. */
1510 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1511 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1512 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1513 }
njne0ee0712002-05-03 16:41:05 +00001514
1515 /* Mark end of basic block */
1516 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001517
njne427a662002-10-02 11:08:25 +00001518 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001519}
1520
njn25e49d8e72002-09-23 09:36:25 +00001521static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001522{
1523 Int fd;
1524 Char buf[BUF_LEN];
1525 file_node *curr_file_node;
1526 fn_node *curr_fn_node;
1527 BBCC *curr_BBCC;
1528 Int i,j,k;
1529
njn25e49d8e72002-09-23 09:36:25 +00001530 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001531
njndb918dd2003-07-22 20:45:11 +00001532 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
njn13f02932003-04-30 20:23:58 +00001533 VKI_S_IRUSR|VKI_S_IWUSR);
1534 if (-1 == fd) {
sewardj0744b6c2002-12-11 00:45:42 +00001535 /* If the file can't be opened for whatever reason (conflict
1536 between multiple cachegrinded processes?), give up now. */
1537 file_err();
1538 return;
1539 }
njn4f9c9342002-04-29 16:03:24 +00001540
1541 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001542 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1543 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1544 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1545 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1546 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1547 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001548
1549 /* "cmd:" line */
1550 VG_(strcpy)(buf, "cmd:");
1551 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001552 for (i = 0; i < VG_(client_argc); i++) {
1553 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001554 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1555 }
1556 /* "events:" line */
1557 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1558 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1559
1560 /* Six loops here: three for the hash table arrays, and three for the
1561 * chains hanging off the hash table arrays. */
1562 for (i = 0; i < N_FILE_ENTRIES; i++) {
1563 curr_file_node = BBCC_table[i];
1564 while (curr_file_node != NULL) {
1565 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1566 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1567
1568 for (j = 0; j < N_FN_ENTRIES; j++) {
1569 curr_fn_node = curr_file_node->fns[j];
1570 while (curr_fn_node != NULL) {
1571 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1572 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1573
1574 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1575 curr_BBCC = curr_fn_node->BBCCs[k];
1576 while (curr_BBCC != NULL) {
1577 fprint_BBCC(fd, curr_BBCC,
1578
1579 curr_file_node->filename,
1580 curr_fn_node->fn_name);
1581
1582 curr_BBCC = curr_BBCC->next;
1583 }
1584 }
1585 curr_fn_node = curr_fn_node->next;
1586 }
1587 }
1588 curr_file_node = curr_file_node->next;
1589 }
1590 }
1591
njn4294fd42002-06-05 14:41:10 +00001592 /* Print stats from any discarded basic blocks */
1593 if (0 != Ir_discards.a) {
1594
1595 VG_(sprintf)(buf, "fl=(discarded)\n");
1596 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1597 VG_(sprintf)(buf, "fn=(discarded)\n");
1598 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1599
1600 /* Use 0 as line number */
1601 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1602 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1603 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1604 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1605 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1606
1607 Ir_total.a += Ir_discards.a;
1608 Ir_total.m1 += Ir_discards.m1;
1609 Ir_total.m2 += Ir_discards.m2;
1610 Dr_total.a += Dr_discards.a;
1611 Dr_total.m1 += Dr_discards.m1;
1612 Dr_total.m2 += Dr_discards.m2;
1613 Dw_total.a += Dw_discards.a;
1614 Dw_total.m1 += Dw_discards.m1;
1615 Dw_total.m2 += Dw_discards.m2;
1616 }
1617
njn4f9c9342002-04-29 16:03:24 +00001618 /* Summary stats must come after rest of table, since we calculate them
1619 * during traversal. */
1620 VG_(sprintf)(buf, "summary: "
1621 "%llu %llu %llu "
1622 "%llu %llu %llu "
1623 "%llu %llu %llu\n",
1624 Ir_total.a, Ir_total.m1, Ir_total.m2,
1625 Dr_total.a, Dr_total.m1, Dr_total.m2,
1626 Dw_total.a, Dw_total.m1, Dw_total.m2);
1627 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1628 VG_(close)(fd);
1629}
1630
1631/* Adds commas to ULong, right justifying in a field field_width wide, returns
1632 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +00001633static
njn4f9c9342002-04-29 16:03:24 +00001634Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
1635{
1636 int len, n_commas, i, j, new_len, space;
1637
njne0205ff2003-04-08 00:56:14 +00001638 VG_(sprintf)(buf, "%llu", n);
njn4f9c9342002-04-29 16:03:24 +00001639 len = VG_(strlen)(buf);
1640 n_commas = (len - 1) / 3;
1641 new_len = len + n_commas;
1642 space = field_width - new_len;
1643
1644 /* Allow for printing a number in a field_width smaller than it's size */
1645 if (space < 0) space = 0;
1646
1647 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1648 * of three. */
1649 for (j = -1, i = len ; i >= 0; i--) {
1650 buf[i + n_commas + space] = buf[i];
1651
1652 if (3 == ++j) {
1653 j = 0;
1654 n_commas--;
1655 buf[i + n_commas + space] = ',';
1656 }
1657 }
1658 /* Right justify in field. */
1659 for (i = 0; i < space; i++) buf[i] = ' ';
1660 return new_len;
1661}
1662
sewardj4f29ddf2002-05-03 22:29:04 +00001663static
njn4f9c9342002-04-29 16:03:24 +00001664void percentify(Int n, Int pow, Int field_width, char buf[])
1665{
1666 int i, len, space;
1667
1668 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
1669 len = VG_(strlen)(buf);
1670 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001671 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001672 i = len;
1673
1674 /* Right justify in field */
1675 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1676 for (i = 0; i < space; i++) buf[i] = ' ';
1677}
1678
njn7d9f94d2003-04-22 21:41:40 +00001679void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001680{
1681 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001682 ULong L2_total_m, L2_total_mr, L2_total_mw,
1683 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +00001684 char buf1[RESULTS_BUF_LEN],
1685 buf2[RESULTS_BUF_LEN],
1686 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001687 Int l1, l2, l3;
1688 Int p;
1689
njn25e49d8e72002-09-23 09:36:25 +00001690 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001691
njn7cf0bd32002-06-08 13:36:03 +00001692 if (VG_(clo_verbosity) == 0)
1693 return;
1694
njn4f9c9342002-04-29 16:03:24 +00001695 /* I cache results. Use the I_refs value to determine the first column
1696 * width. */
1697 l1 = commify(Ir_total.a, 0, buf1);
1698 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1699
1700 commify(Ir_total.m1, l1, buf1);
1701 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1702
1703 commify(Ir_total.m2, l1, buf1);
njn95114da2002-06-05 09:39:31 +00001704 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
njn4f9c9342002-04-29 16:03:24 +00001705
1706 p = 100;
1707
njn25e49d8e72002-09-23 09:36:25 +00001708 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001709 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1710 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1711
1712 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1713 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1714 VG_(message)(Vg_UserMsg, "");
1715
1716 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1717 * width of columns 2 & 3. */
1718 D_total.a = Dr_total.a + Dw_total.a;
1719 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1720 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1721
njn1d021fa2002-05-02 13:56:34 +00001722 commify( D_total.a, l1, buf1);
1723 l2 = commify(Dr_total.a, 0, buf2);
1724 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +00001725 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1726 buf1, buf2, buf3);
1727
1728 commify( D_total.m1, l1, buf1);
1729 commify(Dr_total.m1, l2, buf2);
1730 commify(Dw_total.m1, l3, buf3);
1731 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1732 buf1, buf2, buf3);
1733
1734 commify( D_total.m2, l1, buf1);
1735 commify(Dr_total.m2, l2, buf2);
1736 commify(Dw_total.m2, l3, buf3);
njn95114da2002-06-05 09:39:31 +00001737 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
njn4f9c9342002-04-29 16:03:24 +00001738 buf1, buf2, buf3);
1739
1740 p = 10;
1741
njn25e49d8e72002-09-23 09:36:25 +00001742 if (0 == D_total.a) D_total.a = 1;
1743 if (0 == Dr_total.a) Dr_total.a = 1;
1744 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001745 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1746 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1747 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1748 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1749
1750 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1751 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1752 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1753 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1754 VG_(message)(Vg_UserMsg, "");
1755
1756 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001757
1758 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1759 L2_total_r = Dr_total.m1 + Ir_total.m1;
1760 L2_total_w = Dw_total.m1;
1761 commify(L2_total, l1, buf1);
1762 commify(L2_total_r, l2, buf2);
1763 commify(L2_total_w, l3, buf3);
1764 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1765 buf1, buf2, buf3);
1766
njn4f9c9342002-04-29 16:03:24 +00001767 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1768 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1769 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001770 commify(L2_total_m, l1, buf1);
1771 commify(L2_total_mr, l2, buf2);
1772 commify(L2_total_mw, l3, buf3);
1773 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1774 buf1, buf2, buf3);
1775
1776 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1777 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1778 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1779 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1780
1781
1782 /* Hash table stats */
1783 if (VG_(clo_verbosity) > 1) {
1784 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1785 file_line_debug_BBs + no_debug_BBs;
1786
1787 VG_(message)(Vg_DebugMsg, "");
1788 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1789 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1790 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1791 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1792 full_debug_BBs * 100 / BB_lookups,
1793 full_debug_BBs);
1794 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1795 file_line_debug_BBs * 100 / BB_lookups,
1796 file_line_debug_BBs);
1797 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1798 fn_name_debug_BBs * 100 / BB_lookups,
1799 fn_name_debug_BBs);
1800 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1801 no_debug_BBs * 100 / BB_lookups,
1802 no_debug_BBs);
1803 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1804 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1805 }
njn25e49d8e72002-09-23 09:36:25 +00001806 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001807}
1808
sewardj18d75132002-05-16 11:06:21 +00001809
njn4294fd42002-06-05 14:41:10 +00001810/* Called when a translation is invalidated due to self-modifying code or
1811 * unloaded of a shared object.
1812 *
1813 * Finds the BBCC in the table, removes it, adds the counts to the discard
1814 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001815void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001816{
njn4294fd42002-06-05 14:41:10 +00001817 BBCC *BBCC_node;
1818 Addr BBCC_ptr0, BBCC_ptr;
1819 Bool BB_seen_before;
1820
sewardj83205b32002-06-14 11:08:07 +00001821 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001822 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001823
1824 /* 2nd arg won't be used since BB should have been seen before (assertions
1825 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001826 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001827 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1828
njne427a662002-10-02 11:08:25 +00001829 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001830
1831 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1832
1833 /* We pretend the CC is an iCC for getting the tag. This is ok
1834 * because both CC types have tag as their first byte. Once we know
1835 * the type, we can cast and act appropriately. */
1836
1837 switch ( ((iCC*)BBCC_ptr)->tag ) {
1838
njn25e49d8e72002-09-23 09:36:25 +00001839 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001840 ADD_CC_TO(iCC, I, Ir_discards);
1841 BBCC_ptr += sizeof(iCC);
1842 break;
1843
njn25e49d8e72002-09-23 09:36:25 +00001844 case ReadCC:
1845 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001846 ADD_CC_TO(idCC, I, Ir_discards);
1847 ADD_CC_TO(idCC, D, Dr_discards);
1848 BBCC_ptr += sizeof(idCC);
1849 break;
1850
njn25e49d8e72002-09-23 09:36:25 +00001851 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001852 ADD_CC_TO(idCC, I, Ir_discards);
1853 ADD_CC_TO(idCC, D, Dw_discards);
1854 BBCC_ptr += sizeof(idCC);
1855 break;
1856
njn25e49d8e72002-09-23 09:36:25 +00001857 case ReadWriteCC:
1858 ADD_CC_TO(iddCC, I, Ir_discards);
1859 ADD_CC_TO(iddCC, Da, Dr_discards);
1860 ADD_CC_TO(iddCC, Db, Dw_discards);
1861 BBCC_ptr += sizeof(iddCC);
1862 break;
1863
njn4294fd42002-06-05 14:41:10 +00001864 default:
njne427a662002-10-02 11:08:25 +00001865 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00001866 break;
1867 }
1868 }
njn25e49d8e72002-09-23 09:36:25 +00001869 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001870}
1871
1872/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001873/*--- Command line processing ---*/
1874/*--------------------------------------------------------------------*/
1875
1876static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
1877{
1878 int i1, i2, i3;
1879 int i;
1880 char *opt = VG_(strdup)(orig_opt);
1881
1882 i = i1 = opt_len;
1883
1884 /* Option looks like "--I1=65536,2,64".
1885 * Find commas, replace with NULs to make three independent
1886 * strings, then extract numbers. Yuck. */
1887 while (VG_(isdigit)(opt[i])) i++;
1888 if (',' == opt[i]) {
1889 opt[i++] = '\0';
1890 i2 = i;
1891 } else goto bad;
1892 while (VG_(isdigit)(opt[i])) i++;
1893 if (',' == opt[i]) {
1894 opt[i++] = '\0';
1895 i3 = i;
1896 } else goto bad;
1897 while (VG_(isdigit)(opt[i])) i++;
1898 if ('\0' != opt[i]) goto bad;
1899
1900 cache->size = (Int)VG_(atoll)(opt + i1);
1901 cache->assoc = (Int)VG_(atoll)(opt + i2);
1902 cache->line_size = (Int)VG_(atoll)(opt + i3);
1903
1904 VG_(free)(opt);
1905
1906 return;
1907
1908 bad:
1909 VG_(bad_option)(orig_opt);
1910}
1911
1912Bool SK_(process_cmd_line_option)(Char* arg)
1913{
1914 /* 5 is length of "--I1=" */
njn39c86652003-05-21 10:13:39 +00001915 if (VG_CLO_STREQN(5, arg, "--I1="))
njn25e49d8e72002-09-23 09:36:25 +00001916 parse_cache_opt(&clo_I1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001917 else if (VG_CLO_STREQN(5, arg, "--D1="))
njn25e49d8e72002-09-23 09:36:25 +00001918 parse_cache_opt(&clo_D1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001919 else if (VG_CLO_STREQN(5, arg, "--L2="))
njn25e49d8e72002-09-23 09:36:25 +00001920 parse_cache_opt(&clo_L2_cache, arg, 5);
1921 else
1922 return False;
1923
1924 return True;
1925}
1926
njn3e884182003-04-15 13:03:23 +00001927void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001928{
njn3e884182003-04-15 13:03:23 +00001929 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001930" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1931" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001932" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1933 );
1934}
1935
1936void SK_(print_debug_usage)(void)
1937{
1938 VG_(printf)(
1939" (none)\n"
1940 );
njn25e49d8e72002-09-23 09:36:25 +00001941}
1942
1943/*--------------------------------------------------------------------*/
1944/*--- Setup ---*/
1945/*--------------------------------------------------------------------*/
1946
njn810086f2002-11-14 12:42:47 +00001947void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001948{
njn13f02932003-04-30 20:23:58 +00001949 UInt buf_size = 100;
1950 Char* base_dir = NULL;
1951
njn810086f2002-11-14 12:42:47 +00001952 VG_(details_name) ("Cachegrind");
1953 VG_(details_version) (NULL);
1954 VG_(details_description) ("an I1/D1/L2 cache profiler");
1955 VG_(details_copyright_author)(
njn0e1b5142003-04-15 14:58:06 +00001956 "Copyright (C) 2002-2003, and GNU GPL'd, by Nicholas Nethercote.");
njn810086f2002-11-14 12:42:47 +00001957 VG_(details_bug_reports_to) ("njn25@cam.ac.uk");
sewardj78210aa2002-12-01 02:55:46 +00001958 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00001959
njn810086f2002-11-14 12:42:47 +00001960 VG_(needs_basic_block_discards)();
1961 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00001962
1963 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
1964 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
1965 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
1966 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
1967 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
1968 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00001969
1970 /* getcwd() fails if the buffer isn't big enough -- keep doubling size
1971 until it succeeds. */
1972 while (NULL == base_dir) {
1973 base_dir = VG_(malloc)(buf_size);
1974 if (NULL == VG_(getcwd)(base_dir, buf_size))
1975 buf_size *= 2;
1976 }
1977 /* Block is big enough for dir name + cachegrind.out.<pid> */
1978 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
1979 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
1980 base_dir, VG_(getpid)());
njn25e49d8e72002-09-23 09:36:25 +00001981}
1982
1983void SK_(post_clo_init)(void)
1984{
1985 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00001986
1987 initCC(&Ir_total);
1988 initCC(&Dr_total);
1989 initCC(&Dw_total);
1990
1991 initCC(&Ir_discards);
1992 initCC(&Dr_discards);
1993 initCC(&Dw_discards);
1994
1995 get_caches(&I1c, &D1c, &L2c);
1996
1997 cachesim_I1_initcache(I1c);
1998 cachesim_D1_initcache(D1c);
1999 cachesim_L2_initcache(L2c);
2000
2001 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
2002 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
2003 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
2004
2005 init_BBCC_table();
2006}
2007
2008#if 0
2009Bool SK_(cheap_sanity_check)(void) { return True; }
2010
2011extern TTEntry* vg_tt;
2012
2013Bool SK_(expensive_sanity_check)(void)
2014{
2015 Int i;
2016 Bool dummy;
2017 for (i = 0; i < 200191; i++) {
2018 if (vg_tt[i].orig_addr != (Addr)1 &&
2019 vg_tt[i].orig_addr != (Addr)3) {
2020 VG_(printf)(".");
2021 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
2022 }
2023 }
2024 return True;
2025}
2026#endif
2027
2028/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002029/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002030/*--------------------------------------------------------------------*/