blob: c288647942cbd8ac01f585c5b233faa80e50eb77 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
njnc9539842002-10-02 13:26:35 +00009 This file is part of Cachegrind, a Valgrind skin for cache
10 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
sewardj3c23d432002-06-01 23:43:49 +000012 Copyright (C) 2002 Nicholas Nethercote
13 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
njn27f1a382002-11-08 15:48:16 +000036VG_DETERMINE_INTERFACE_VERSION
37
njn25e49d8e72002-09-23 09:36:25 +000038/* For cache simulation */
39typedef struct {
40 int size; /* bytes */
41 int assoc;
42 int line_size; /* bytes */
43} cache_t;
njn4f9c9342002-04-29 16:03:24 +000044
njn25cac76cb2002-09-23 11:21:57 +000045#include "cg_sim_L2.c"
46#include "cg_sim_I1.c"
47#include "cg_sim_D1.c"
njn4f9c9342002-04-29 16:03:24 +000048
njn25e49d8e72002-09-23 09:36:25 +000049/*------------------------------------------------------------*/
50/*--- Constants ---*/
51/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000052
53/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000054#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000055
njn25e49d8e72002-09-23 09:36:25 +000056#define MIN_LINE_SIZE 16
57
njn4f9c9342002-04-29 16:03:24 +000058/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000059#define FILENAME_LEN 256
60#define FN_NAME_LEN 256
61#define BUF_LEN 512
62#define COMMIFY_BUF_LEN 128
63#define RESULTS_BUF_LEN 128
64#define LINE_BUF_LEN 64
65
66/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000067/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000068/*------------------------------------------------------------*/
69
njn25e49d8e72002-09-23 09:36:25 +000070typedef
71 enum {
72 VgpGetBBCC = VgpFini+1,
73 VgpCacheSimulate,
74 VgpCacheResults
75 }
76 VgpSkinCC;
sewardj07133bf2002-06-13 10:25:56 +000077
njn4f9c9342002-04-29 16:03:24 +000078/*------------------------------------------------------------*/
79/*--- Output file related stuff ---*/
80/*------------------------------------------------------------*/
81
njn25e49d8e72002-09-23 09:36:25 +000082Char cachegrind_out_file[FILENAME_LEN];
njn4f9c9342002-04-29 16:03:24 +000083
84static void file_err()
85{
86 VG_(message)(Vg_UserMsg,
njn7cf0bd32002-06-08 13:36:03 +000087 "error: can't open cache simulation output file `%s'",
njn25e49d8e72002-09-23 09:36:25 +000088 cachegrind_out_file );
njn4f9c9342002-04-29 16:03:24 +000089 VG_(exit)(1);
90}
91
92/*------------------------------------------------------------*/
93/*--- Cost center types, operations ---*/
94/*------------------------------------------------------------*/
95
96typedef struct _CC CC;
97struct _CC {
98 ULong a;
99 ULong m1;
100 ULong m2;
101};
102
103static __inline__ void initCC(CC* cc) {
104 cc->a = 0;
105 cc->m1 = 0;
106 cc->m2 = 0;
107}
108
njn25e49d8e72002-09-23 09:36:25 +0000109typedef
110 enum {
111 InstrCC, /* eg. mov %eax, %ebx */
112 ReadCC, /* eg. mov (%ecx), %esi */
113 WriteCC, /* eg. mov %eax, (%edx) */
114 ModCC, /* eg. incl (%eax) (read+write one addr) */
115 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
116 (read+write two different addrs) */
117 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000118
119/* Instruction-level cost-centres. The typedefs for these structs are in
120 * vg_include.c
121 *
122 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000123 *
njne0ee0712002-05-03 16:41:05 +0000124 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000125 */
njn25e49d8e72002-09-23 09:36:25 +0000126typedef
127 struct {
128 /* word 1 */
129 UChar tag;
130 UChar instr_size;
131 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000132
njn25e49d8e72002-09-23 09:36:25 +0000133 /* words 2+ */
134 Addr instr_addr;
135 CC I;
136 }
137 iCC;
njn4f9c9342002-04-29 16:03:24 +0000138
njn25e49d8e72002-09-23 09:36:25 +0000139typedef
140 struct _idCC {
141 /* word 1 */
142 UChar tag;
143 UChar instr_size;
144 UChar data_size;
145 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000146
njn25e49d8e72002-09-23 09:36:25 +0000147 /* words 2+ */
148 Addr instr_addr;
149 CC I;
150 CC D;
151 }
152 idCC;
153
154typedef
155 struct _iddCC {
156 /* word 1 */
157 UChar tag;
158 UChar instr_size;
159 UChar data_size;
160 /* 1 byte padding */
161
162 /* words 2+ */
163 Addr instr_addr;
164 CC I;
165 CC Da;
166 CC Db;
167 }
168 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000169
170static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
171{
njn25e49d8e72002-09-23 09:36:25 +0000172 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000173 cc->instr_size = instr_size;
174 cc->instr_addr = instr_addr;
175 initCC(&cc->I);
176}
177
178static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
179 UInt instr_size, UInt data_size)
180{
181 cc->tag = X_CC;
182 cc->instr_size = instr_size;
183 cc->data_size = data_size;
184 cc->instr_addr = instr_addr;
185 initCC(&cc->I);
186 initCC(&cc->D);
187}
188
njn25e49d8e72002-09-23 09:36:25 +0000189static void init_iddCC(iddCC* cc, Addr instr_addr,
190 UInt instr_size, UInt data_size)
191{
192 cc->tag = ReadWriteCC;
193 cc->instr_size = instr_size;
194 cc->data_size = data_size;
195 cc->instr_addr = instr_addr;
196 initCC(&cc->I);
197 initCC(&cc->Da);
198 initCC(&cc->Db);
199}
200
njn4294fd42002-06-05 14:41:10 +0000201#define ADD_CC_TO(CC_type, cc, total) \
202 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
203 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
204 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
205
njn95114da2002-06-05 09:39:31 +0000206/* If 1, address of each instruction is printed as a comment after its counts
207 * in cachegrind.out */
208#define PRINT_INSTR_ADDRS 0
209
njne0ee0712002-05-03 16:41:05 +0000210static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000211{
njn95114da2002-06-05 09:39:31 +0000212#if PRINT_INSTR_ADDRS
213 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
214 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
215#else
njne0ee0712002-05-03 16:41:05 +0000216 VG_(sprintf)(buf, "%llu %llu %llu\n",
217 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000218#endif
njn4f9c9342002-04-29 16:03:24 +0000219}
220
njne0ee0712002-05-03 16:41:05 +0000221static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000222{
njn95114da2002-06-05 09:39:31 +0000223#if PRINT_INSTR_ADDRS
224 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
225 cc->I.a, cc->I.m1, cc->I.m2,
226 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
227#else
njne0ee0712002-05-03 16:41:05 +0000228 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
229 cc->I.a, cc->I.m1, cc->I.m2,
230 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000231#endif
njn4f9c9342002-04-29 16:03:24 +0000232}
233
njne0ee0712002-05-03 16:41:05 +0000234static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000235{
njn95114da2002-06-05 09:39:31 +0000236#if PRINT_INSTR_ADDRS
237 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
238 cc->I.a, cc->I.m1, cc->I.m2,
239 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
240#else
njne0ee0712002-05-03 16:41:05 +0000241 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
242 cc->I.a, cc->I.m1, cc->I.m2,
243 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000244#endif
njn4f9c9342002-04-29 16:03:24 +0000245}
246
njn25e49d8e72002-09-23 09:36:25 +0000247static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
248{
249#if PRINT_INSTR_ADDRS
250 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
251 cc->I.a, cc->I.m1, cc->I.m2,
252 cc->Da.a, cc->Da.m1, cc->Da.m2,
253 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
254#else
255 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
256 cc->I.a, cc->I.m1, cc->I.m2,
257 cc->Da.a, cc->Da.m1, cc->Da.m2,
258 cc->Db.a, cc->Db.m1, cc->Db.m2);
259#endif
260}
261
262
njn4f9c9342002-04-29 16:03:24 +0000263/*------------------------------------------------------------*/
264/*--- BBCC hash table stuff ---*/
265/*------------------------------------------------------------*/
266
267/* The table of BBCCs is of the form hash(filename, hash(fn_name,
268 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
269 * fairly well for Konqueror. */
270
271#define N_FILE_ENTRIES 251
272#define N_FN_ENTRIES 53
273#define N_BBCC_ENTRIES 37
274
275/* The cost centres for a basic block are stored in a contiguous array.
276 * They are distinguishable by their tag field. */
277typedef struct _BBCC BBCC;
278struct _BBCC {
279 Addr orig_addr;
280 UInt array_size; /* byte-size of variable length array */
281 BBCC* next;
282 Addr array[0]; /* variable length array */
283};
284
285typedef struct _fn_node fn_node;
286struct _fn_node {
287 Char* fn_name;
288 BBCC* BBCCs[N_BBCC_ENTRIES];
289 fn_node* next;
290};
291
292typedef struct _file_node file_node;
293struct _file_node {
294 Char* filename;
295 fn_node* fns[N_FN_ENTRIES];
296 file_node* next;
297};
298
299/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000300static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000301
sewardj4f29ddf2002-05-03 22:29:04 +0000302static Int distinct_files = 0;
303static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000304
sewardj4f29ddf2002-05-03 22:29:04 +0000305static Int distinct_instrs = 0;
306static Int full_debug_BBs = 0;
307static Int file_line_debug_BBs = 0;
308static Int fn_name_debug_BBs = 0;
309static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000310
sewardj4f29ddf2002-05-03 22:29:04 +0000311static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000312
njn4294fd42002-06-05 14:41:10 +0000313static CC Ir_discards;
314static CC Dr_discards;
315static CC Dw_discards;
316
njn4f9c9342002-04-29 16:03:24 +0000317static void init_BBCC_table()
318{
319 Int i;
320 for (i = 0; i < N_FILE_ENTRIES; i++)
321 BBCC_table[i] = NULL;
322}
323
njne0ee0712002-05-03 16:41:05 +0000324static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
325 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000326{
njn25e49d8e72002-09-23 09:36:25 +0000327 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000328
njn25e49d8e72002-09-23 09:36:25 +0000329 found1 = VG_(get_filename_linenum)(instr_addr, filename,
330 FILENAME_LEN, line_num);
331 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000332
333 if (!found1 && !found2) {
334 no_debug_BBs++;
335 VG_(strcpy)(filename, "???");
336 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000337 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000338
339 } else if ( found1 && found2) {
340 full_debug_BBs++;
341
342 } else if ( found1 && !found2) {
343 file_line_debug_BBs++;
344 VG_(strcpy)(fn_name, "???");
345
346 } else /*(!found1 && found2)*/ {
347 fn_name_debug_BBs++;
348 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000349 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000350 }
351}
352
353/* Forward declaration. */
354static Int compute_BBCC_array_size(UCodeBlock* cb);
355
356static __inline__
357file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
358{
359 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000360 file_node* new = VG_(malloc)(sizeof(file_node));
361 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000362 for (i = 0; i < N_FN_ENTRIES; i++) {
363 new->fns[i] = NULL;
364 }
365 new->next = next;
366 return new;
367}
368
369static __inline__
370fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
371{
372 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000373 fn_node* new = VG_(malloc)(sizeof(fn_node));
374 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000375 for (i = 0; i < N_BBCC_ENTRIES; i++) {
376 new->BBCCs[i] = NULL;
377 }
378 new->next = next;
379 return new;
380}
381
382static __inline__
383BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
384{
385 Int BBCC_array_size = compute_BBCC_array_size(cb);
386 BBCC* new;
387
njn25e49d8e72002-09-23 09:36:25 +0000388 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000389 new->orig_addr = bb_orig_addr;
390 new->array_size = BBCC_array_size;
391 new->next = next;
392
393 return new;
394}
395
396#define HASH_CONSTANT 256
397
398static UInt hash(Char *s, UInt table_size)
399{
400 int hash_value = 0;
401 for ( ; *s; s++)
402 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
403 return hash_value;
404}
405
406/* Do a three step traversal: by filename, then fn_name, then instr_addr.
407 * In all cases prepends new nodes to their chain. Returns a pointer to the
408 * cost centre. Also sets BB_seen_before by reference.
409 */
410static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000411 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000412{
413 file_node *curr_file_node;
414 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000415 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000416 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
417 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000418 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000419
njne0ee0712002-05-03 16:41:05 +0000420 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000421
njn25e49d8e72002-09-23 09:36:25 +0000422 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000423 filename_hash = hash(filename, N_FILE_ENTRIES);
424 curr_file_node = BBCC_table[filename_hash];
425 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000426 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000427 curr_file_node = curr_file_node->next;
428 }
429 if (NULL == curr_file_node) {
430 BBCC_table[filename_hash] = curr_file_node =
431 new_file_node(filename, BBCC_table[filename_hash]);
432 distinct_files++;
433 }
434
435 fnname_hash = hash(fn_name, N_FN_ENTRIES);
436 curr_fn_node = curr_file_node->fns[fnname_hash];
437 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000438 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000439 curr_fn_node = curr_fn_node->next;
440 }
441 if (NULL == curr_fn_node) {
442 curr_file_node->fns[fnname_hash] = curr_fn_node =
443 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
444 distinct_fns++;
445 }
446
447 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000448 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000449 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
450 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000451 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000452 curr_BBCC = curr_BBCC->next;
453 }
454 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000455
njne427a662002-10-02 11:08:25 +0000456 sk_assert(False == remove);
njn4294fd42002-06-05 14:41:10 +0000457
njn4f9c9342002-04-29 16:03:24 +0000458 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
459 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
460 *BB_seen_before = False;
461
462 } else {
njne427a662002-10-02 11:08:25 +0000463 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
464 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000465 if (VG_(clo_verbosity) > 2) {
466 VG_(message)(Vg_DebugMsg,
467 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000468 }
469 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000470
471 if (True == remove) {
472 // Remove curr_BBCC from chain; it will be used and free'd by the
473 // caller.
474 *prev_BBCC_next_ptr = curr_BBCC->next;
475
476 } else {
477 BB_retranslations++;
478 }
njn4f9c9342002-04-29 16:03:24 +0000479 }
njn25e49d8e72002-09-23 09:36:25 +0000480 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000481 return curr_BBCC;
482}
483
484/*------------------------------------------------------------*/
485/*--- Cache simulation instrumentation phase ---*/
486/*------------------------------------------------------------*/
487
njn4f9c9342002-04-29 16:03:24 +0000488static Int compute_BBCC_array_size(UCodeBlock* cb)
489{
490 UInstr* u_in;
491 Int i, CC_size, BBCC_size = 0;
492 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000493 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000494
495 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000496 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000497
498 for (i = 0; i < cb->used; i++) {
njn4f9c9342002-04-29 16:03:24 +0000499 u_in = &cb->instrs[i];
500 switch(u_in->opcode) {
501
502 case INCEIP:
503 goto case_for_end_of_instr;
504
505 case JMP:
506 if (u_in->cond != CondAlways) break;
507
508 goto case_for_end_of_instr;
509
510 case_for_end_of_instr:
511
njn25e49d8e72002-09-23 09:36:25 +0000512 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
513 t_read != t_write)
514 CC_size = sizeof(iddCC);
515 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
516 CC_size = sizeof(idCC);
517 else
518 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000519
520 BBCC_size += CC_size;
521 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
522 break;
523
524 case LOAD:
525 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000526 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000527 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000528 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000529 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000530 is_LOAD = True;
531 break;
532
533 case STORE:
534 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000535 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000536 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000537 is_STORE = True;
538 break;
539
540 case FPU_R:
njne427a662002-10-02 11:08:25 +0000541 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000542 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000543 is_FPU_R = True;
544 break;
545
546 case FPU_W:
njne427a662002-10-02 11:08:25 +0000547 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000548 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000549 is_FPU_W = True;
550 break;
551
552 default:
553 break;
554 }
555 }
556
557 return BBCC_size;
558}
559
njn25e49d8e72002-09-23 09:36:25 +0000560static __attribute__ ((regparm (1)))
561void log_1I_0D_cache_access(iCC* cc)
562{
563 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
564 // cc, cc->instr_addr, cc->instr_size)
565 VGP_PUSHCC(VgpCacheSimulate);
566 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
567 cc->I.a++;
568 VGP_POPCC(VgpCacheSimulate);
569}
570
571/* Difference between this function and log_1I_0D_cache_access() is that
572 this one can be passed any kind of CC, not just an iCC. So we have to
573 be careful to make sure we don't make any assumptions about CC layout.
574 (As it stands, they would be safe, but this will avoid potential heartache
575 if anyone else changes CC layout.)
576 Note that we only do the switch for the JIFZ version because if we always
577 called this switching version, things would run about 5% slower. */
578static __attribute__ ((regparm (1)))
579void log_1I_0D_cache_access_JIFZ(iCC* cc)
580{
581 UChar instr_size;
582 Addr instr_addr;
583 CC* I;
584
585 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
586 // cc, cc->instr_addr, cc->instr_size)
587 VGP_PUSHCC(VgpCacheSimulate);
588
589 switch(cc->tag) {
590 case InstrCC:
591 instr_size = cc->instr_size;
592 instr_addr = cc->instr_addr;
593 I = &(cc->I);
594 break;
595 case ReadCC:
596 case WriteCC:
597 case ModCC:
598 instr_size = ((idCC*)cc)->instr_size;
599 instr_addr = ((idCC*)cc)->instr_addr;
600 I = &( ((idCC*)cc)->I );
601 break;
602 case ReadWriteCC:
603 instr_size = ((iddCC*)cc)->instr_size;
604 instr_addr = ((iddCC*)cc)->instr_addr;
605 I = &( ((iddCC*)cc)->I );
606 break;
607 default:
njne427a662002-10-02 11:08:25 +0000608 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000609 break;
610 }
611 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
612 I->a++;
613 VGP_POPCC(VgpCacheSimulate);
614}
615
616__attribute__ ((regparm (2))) static
617void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
618{
619 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
620 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
621 VGP_PUSHCC(VgpCacheSimulate);
622 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
623 cc->D.a++;
624 VGP_POPCC(VgpCacheSimulate);
625}
626
627__attribute__ ((regparm (2))) static
628void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
629{
630 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
631 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
632 VGP_PUSHCC(VgpCacheSimulate);
633 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
634 cc->I.a++;
635
636 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
637 cc->D.a++;
638 VGP_POPCC(VgpCacheSimulate);
639}
640
641__attribute__ ((regparm (3))) static
642void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
643{
644 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
645 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
646 VGP_PUSHCC(VgpCacheSimulate);
647 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
648 cc->Da.a++;
649 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
650 cc->Db.a++;
651 VGP_POPCC(VgpCacheSimulate);
652}
653
654__attribute__ ((regparm (3))) static
655void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
656{
657 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
658 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
659 VGP_PUSHCC(VgpCacheSimulate);
660 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
661 cc->I.a++;
662
663 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
664 cc->Da.a++;
665 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
666 cc->Db.a++;
667 VGP_POPCC(VgpCacheSimulate);
668}
669
670UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
671{
672/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000673#define INVALID_DATA_SIZE 999999
674
njn4f9c9342002-04-29 16:03:24 +0000675 UCodeBlock* cb;
676 Int i;
677 UInstr* u_in;
678 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000679 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
680 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000681 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000682 Addr x86_instr_addr = orig_addr;
683 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
684 Addr helper;
685 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000686 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000687 Bool BB_seen_before = False;
688 Bool instrumented_Jcond = False;
689 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000690 Addr BBCC_ptr0, BBCC_ptr;
691
692 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
693 * if it's the first time it's been seen), and point to start of the
694 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000695 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000696 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
697
njn4ba5a792002-09-30 10:23:54 +0000698 cb = VG_(alloc_UCodeBlock)();
njn4f9c9342002-04-29 16:03:24 +0000699 cb->nextTemp = cb_in->nextTemp;
700
njn25e49d8e72002-09-23 09:36:25 +0000701 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
702 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000703
704 for (i = 0; i < cb_in->used; i++) {
705 u_in = &cb_in->instrs[i];
706
njn4f9c9342002-04-29 16:03:24 +0000707 /* What this is all about: we want to instrument each x86 instruction
708 * translation. The end of these are marked in three ways. The three
709 * ways, and the way we instrument them, are as follows:
710 *
711 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
712 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
713 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
714 *
njn25e49d8e72002-09-23 09:36:25 +0000715 * The last UInstr in a basic block is always a Juncond. Jconds,
716 * when they appear, are always second last. We check this with
717 * various assertions.
718 *
719 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000720 * executed. We don't have to put the instrumentation before the INCEIP
721 * (it could go after) but we do so for consistency.
722 *
njn25e49d8e72002-09-23 09:36:25 +0000723 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
724 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000725 *
njn25e49d8e72002-09-23 09:36:25 +0000726 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000727 *
728 * The instrumentation is just a call to the appropriate helper function,
729 * passing it the address of the instruction's CC.
730 */
njne427a662002-10-02 11:08:25 +0000731 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000732
733 switch (u_in->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000734 case NOP: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000735 break;
736
njn4f9c9342002-04-29 16:03:24 +0000737 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000738 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000739 */
740 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000741 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000742 t_read_addr = newTemp(cb);
743 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
744 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000745 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000746 break;
747
748 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000749 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000750 t_read_addr = newTemp(cb);
751 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000752 data_size = ( u_in->size <= MIN_LINE_SIZE
753 ? u_in->size
754 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000755 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000756 break;
757
758 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000759 * That's how the code above determines whether it does a write.
760 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000761 * As for the MOV, if it's a mod instruction it's redundant, but it's
762 * not expensive and mod instructions are rare anyway. */
763 case STORE:
764 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000765 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000766 t_write_addr = newTemp(cb);
767 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000768 /* 28 and 108 B data-sized instructions will be done
769 * inaccurately but they're very rare and this avoids errors
770 * from hitting more than two cache lines in the simulation. */
771 data_size = ( u_in->size <= MIN_LINE_SIZE
772 ? u_in->size
773 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000774 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000775 break;
776
njn25e49d8e72002-09-23 09:36:25 +0000777
778 /* For rep-prefixed instructions, log a single I-cache access
779 * before the UCode loop that implements the repeated part, which
780 * is where the multiple D-cache accesses are logged. */
781 case JIFZ:
782 has_rep_prefix = True;
783
784 /* Setup 1st and only arg: CC addr */
785 t_CC_addr = newTemp(cb);
786 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
787 uLiteral(cb, BBCC_ptr);
788
789 /* Call helper */
790 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
791 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000792 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000793 break;
794
795
796 /* INCEIP: insert instrumentation */
797 case INCEIP:
798 x86_instr_size = u_in->val1;
799 goto instrument_x86_instr;
800
801 /* JMP: insert instrumentation if the first JMP */
802 case JMP:
803 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000804 sk_assert(CondAlways == u_in->cond);
805 sk_assert(i+1 == cb_in->used);
njn4ba5a792002-09-30 10:23:54 +0000806 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000807 instrumented_Jcond = False; /* reset */
808 break;
809 }
810 /* The first JMP... instrument. */
811 if (CondAlways != u_in->cond) {
njne427a662002-10-02 11:08:25 +0000812 sk_assert(i+2 == cb_in->used);
njn25e49d8e72002-09-23 09:36:25 +0000813 instrumented_Jcond = True;
814 } else {
njne427a662002-10-02 11:08:25 +0000815 sk_assert(i+1 == cb_in->used);
njn25e49d8e72002-09-23 09:36:25 +0000816 }
817
818 /* Get x86 instr size from final JMP. */
819 x86_instr_size = LAST_UINSTR(cb_in).extra4b;
820 goto instrument_x86_instr;
821
822
823 /* Code executed at the end of each x86 instruction. */
824 instrument_x86_instr:
825
826 /* Initialise the CC in the BBCC array appropriately if it
827 * hasn't been initialised before. Then call appropriate sim
828 * function, passing it the CC address. */
829 stack_used = 0;
830
njne427a662002-10-02 11:08:25 +0000831 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000832 x86_instr_size <= MAX_x86_INSTR_SIZE);
833
834#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
835
836 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +0000837 sk_assert(INVALID_DATA_SIZE == data_size);
838 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000839 INVALID_TEMPREG == t_read &&
840 INVALID_TEMPREG == t_write_addr &&
841 INVALID_TEMPREG == t_write);
842 CC_size = sizeof(iCC);
843 if (!BB_seen_before)
844 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
845 helper = ( has_rep_prefix
846 ? (Addr)0 /* no extra log needed */
847 : (Addr) & log_1I_0D_cache_access
848 );
849 argc = 1;
850
851 } else {
njne427a662002-10-02 11:08:25 +0000852 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +0000853 8 == data_size || 10 == data_size ||
854 MIN_LINE_SIZE == data_size);
855
856 if (IS_(read) && !IS_(write)) {
857 CC_size = sizeof(idCC);
858 /* If it uses 'rep', we've already logged the I-cache
859 * access at the JIFZ UInstr (see JIFZ case below) so
860 * don't do it here */
861 helper = ( has_rep_prefix
862 ? (Addr) & log_0I_1D_cache_access
863 : (Addr) & log_1I_1D_cache_access
864 );
865 argc = 2;
866 if (!BB_seen_before)
867 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
868 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000869 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000870 INVALID_TEMPREG != t_read &&
871 INVALID_TEMPREG == t_write_addr &&
872 INVALID_TEMPREG == t_write);
873 t_data_addr1 = t_read_addr;
874
875 } else if (!IS_(read) && IS_(write)) {
876 CC_size = sizeof(idCC);
877 helper = ( has_rep_prefix
878 ? (Addr) & log_0I_1D_cache_access
879 : (Addr) & log_1I_1D_cache_access
880 );
881 argc = 2;
882 if (!BB_seen_before)
883 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
884 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000885 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000886 INVALID_TEMPREG == t_read &&
887 INVALID_TEMPREG != t_write_addr &&
888 INVALID_TEMPREG != t_write);
889 t_data_addr1 = t_write_addr;
890
891 } else {
njne427a662002-10-02 11:08:25 +0000892 sk_assert(IS_(read) && IS_(write));
893 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000894 INVALID_TEMPREG != t_read &&
895 INVALID_TEMPREG != t_write_addr &&
896 INVALID_TEMPREG != t_write);
897 if (t_read == t_write) {
898 CC_size = sizeof(idCC);
899 helper = ( has_rep_prefix
900 ? (Addr) & log_0I_1D_cache_access
901 : (Addr) & log_1I_1D_cache_access
902 );
903 argc = 2;
904 if (!BB_seen_before)
905 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
906 x86_instr_size, data_size);
907 t_data_addr1 = t_read_addr;
908 } else {
909 CC_size = sizeof(iddCC);
910 helper = ( has_rep_prefix
911 ? (Addr) & log_0I_2D_cache_access
912 : (Addr) & log_1I_2D_cache_access
913 );
914 argc = 3;
915 if (!BB_seen_before)
916 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
917 x86_instr_size, data_size);
918 t_data_addr1 = t_read_addr;
919 t_data_addr2 = t_write_addr;
920 }
921 }
922#undef IS_
923 }
924
925 /* Call the helper, if necessary */
926 if ((Addr)0 != helper) {
927
928 /* Setup 1st arg: CC addr */
929 t_CC_addr = newTemp(cb);
930 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
931 uLiteral(cb, BBCC_ptr);
932
933 /* Call the helper */
934 if (1 == argc)
935 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
936 else if (2 == argc)
937 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
938 TempReg, t_data_addr1);
939 else if (3 == argc)
940 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
941 TempReg, t_data_addr1,
942 TempReg, t_data_addr2);
943 else
njne427a662002-10-02 11:08:25 +0000944 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +0000945
946 uCCall(cb, helper, argc, argc, False);
947 }
948
949 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +0000950 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000951
952 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
953 BBCC_ptr += CC_size;
954 x86_instr_addr += x86_instr_size;
955 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
956 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
957 data_size = INVALID_DATA_SIZE;
958 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000959 break;
960
961 default:
njn4ba5a792002-09-30 10:23:54 +0000962 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000963 break;
964 }
965 }
966
967 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +0000968 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +0000969
njn4ba5a792002-09-30 10:23:54 +0000970 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000971 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000972
973#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000974}
975
976/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000977/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000978/*------------------------------------------------------------*/
979
980/* Total reads/writes/misses. Calculated during CC traversal at the end. */
981static CC Ir_total;
982static CC Dr_total;
983static CC Dw_total;
984
njn25e49d8e72002-09-23 09:36:25 +0000985#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
986
987static cache_t clo_I1_cache = UNDEFINED_CACHE;
988static cache_t clo_D1_cache = UNDEFINED_CACHE;
989static cache_t clo_L2_cache = UNDEFINED_CACHE;
990
njn7cf0bd32002-06-08 13:36:03 +0000991/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
992/* Probably only works for Intel and AMD chips, and probably only for some of
993 * them.
994 */
995
sewardj07133bf2002-06-13 10:25:56 +0000996static __inline__ void cpuid(Int n, Int *a, Int *b, Int *c, Int *d)
njn7cf0bd32002-06-08 13:36:03 +0000997{
998 __asm__ __volatile__ (
999 "cpuid"
1000 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1001 : "0" (n) /* input */
1002 );
1003}
1004
sewardj07133bf2002-06-13 10:25:56 +00001005static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001006{
1007 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001008 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001009 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001010 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001011 " Simulating a %d KB cache with %d B lines",
1012 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001013}
1014
1015/* Intel method is truly wretched. We have to do an insane indexing into an
1016 * array of pre-defined configurations for various parts of the memory
1017 * hierarchy.
1018 */
1019static
sewardj07133bf2002-06-13 10:25:56 +00001020Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001021{
sewardj07133bf2002-06-13 10:25:56 +00001022 UChar info[16];
1023 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001024 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001025
1026 if (level < 2) {
1027 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001028 "warning: CPUID level < 2 for Intel processor (%d)",
1029 level);
njn7cf0bd32002-06-08 13:36:03 +00001030 return -1;
1031 }
1032
sewardj07133bf2002-06-13 10:25:56 +00001033 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1034 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001035 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1036 info[0] = 0x0; /* reset AL */
1037
1038 if (0 != trials) {
1039 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001040 "warning: non-zero CPUID trials for Intel processor (%d)",
1041 trials);
njn7cf0bd32002-06-08 13:36:03 +00001042 return -1;
1043 }
1044
1045 for (i = 0; i < 16; i++) {
1046
1047 switch (info[i]) {
1048
1049 case 0x0: /* ignore zeros */
1050 break;
1051
njn25e49d8e72002-09-23 09:36:25 +00001052 /* TLB info, ignore */
1053 case 0x01: case 0x02: case 0x03: case 0x04:
1054 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njn7cf0bd32002-06-08 13:36:03 +00001055 break;
1056
1057 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1058 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
1059
1060 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1061 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
1062
njn25e49d8e72002-09-23 09:36:25 +00001063 /* IA-64 info -- panic! */
1064 case 0x10: case 0x15: case 0x1a:
1065 case 0x88: case 0x89: case 0x8a: case 0x8d:
1066 case 0x90: case 0x96: case 0x9b:
1067 VG_(message)(Vg_DebugMsg,
1068 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001069 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001070
njn7cf0bd32002-06-08 13:36:03 +00001071 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001072 VG_(message)(Vg_DebugMsg,
1073 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001074 break;
1075
njn25e49d8e72002-09-23 09:36:25 +00001076 /* These are sectored, whatever that means */
1077 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1078 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1079
1080 /* If a P6 core, this means "no L2 cache".
1081 If a P4 core, this means "no L3 cache".
1082 We don't know what core it is, so don't issue a warning. To detect
1083 a missing L2 cache, we use 'L2_found'. */
1084 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001085 break;
1086
njn25e49d8e72002-09-23 09:36:25 +00001087 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1088 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1089 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1090 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1091 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001092
1093 /* These are sectored, whatever that means */
1094 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1095 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1096 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1097
1098 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1099 * conversion to byte size is a total guess; treat the 12K and 16K
1100 * cases the same since the cache byte size must be a power of two for
1101 * everything to work!. Also guessing 32 bytes for the line size...
1102 */
1103 case 0x70: /* 12K micro-ops, 8-way */
1104 *I1c = (cache_t) { 16, 8, 32 };
1105 micro_ops_warn(12, 16, 32);
1106 break;
1107 case 0x71: /* 16K micro-ops, 8-way */
1108 *I1c = (cache_t) { 16, 8, 32 };
1109 micro_ops_warn(16, 16, 32);
1110 break;
1111 case 0x72: /* 32K micro-ops, 8-way */
1112 *I1c = (cache_t) { 32, 8, 32 };
1113 micro_ops_warn(32, 32, 32);
1114 break;
1115
njn25e49d8e72002-09-23 09:36:25 +00001116 /* These are sectored, whatever that means */
1117 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1118 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1119 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1120 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1121 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001122
njn25e49d8e72002-09-23 09:36:25 +00001123 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1124 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1125 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1126 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1127 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001128
1129 default:
1130 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001131 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001132 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001133 break;
1134 }
1135 }
njn25e49d8e72002-09-23 09:36:25 +00001136
1137 if (!L2_found)
1138 VG_(message)(Vg_DebugMsg,
1139 "warning: L2 cache not installed, ignore L2 results.");
1140
njn7cf0bd32002-06-08 13:36:03 +00001141 return 0;
1142}
1143
1144/* AMD method is straightforward, just extract appropriate bits from the
1145 * result registers.
1146 *
1147 * Bits, for D1 and I1:
1148 * 31..24 data L1 cache size in KBs
1149 * 23..16 data L1 cache associativity (FFh=full)
1150 * 15.. 8 data L1 cache lines per tag
1151 * 7.. 0 data L1 cache line size in bytes
1152 *
1153 * Bits, for L2:
1154 * 31..16 unified L2 cache size in KBs
1155 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1156 * 11.. 8 unified L2 cache lines per tag
1157 * 7.. 0 unified L2 cache line size in bytes
1158 *
1159 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1160 * upon this information. (Whatever that means -- njn)
1161 *
njn25e49d8e72002-09-23 09:36:25 +00001162 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1163 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1164 * so we detect that.
1165 *
njn7cf0bd32002-06-08 13:36:03 +00001166 * Returns 0 on success, non-zero on failure.
1167 */
sewardj07133bf2002-06-13 10:25:56 +00001168static
1169Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001170{
njn25e49d8e72002-09-23 09:36:25 +00001171 Int dummy, model, ext_level;
sewardj07133bf2002-06-13 10:25:56 +00001172 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001173
1174 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1175
1176 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1177 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001178 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1179 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001180 return -1;
1181 }
1182
1183 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1184 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1185
njn25e49d8e72002-09-23 09:36:25 +00001186 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1187 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1188
1189 /* Check for Duron bug */
1190 if (model == 0x630) {
1191 VG_(message)(Vg_UserMsg,
1192 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1193 L2i = (64 << 16) | (L2i & 0xffff);
1194 }
1195
njn7cf0bd32002-06-08 13:36:03 +00001196 D1c->size = (D1i >> 24) & 0xff;
1197 D1c->assoc = (D1i >> 16) & 0xff;
1198 D1c->line_size = (D1i >> 0) & 0xff;
1199
1200 I1c->size = (I1i >> 24) & 0xff;
1201 I1c->assoc = (I1i >> 16) & 0xff;
1202 I1c->line_size = (I1i >> 0) & 0xff;
1203
1204 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1205 L2c->assoc = (L2i >> 12) & 0xf;
1206 L2c->line_size = (L2i >> 0) & 0xff;
1207
1208 return 0;
1209}
1210
1211static jmp_buf cpuid_jmpbuf;
1212
1213static
1214void cpuid_SIGILL_handler(int signum)
1215{
1216 __builtin_longjmp(cpuid_jmpbuf, 1);
1217}
1218
1219static
sewardj07133bf2002-06-13 10:25:56 +00001220Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001221{
sewardj07133bf2002-06-13 10:25:56 +00001222 Int level, res, ret;
1223 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001224 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001225
1226 /* Install own SIGILL handler */
1227 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1228 sigill_new.ksa_flags = 0;
1229 sigill_new.ksa_restorer = NULL;
1230 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001231 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001232
1233 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001234 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001235
1236 /* Trap for illegal instruction, in case it's a really old processor that
1237 * doesn't support CPUID. */
1238 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1239 cpuid(0, &level, (int*)&vendor_id[0],
1240 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1241 vendor_id[12] = '\0';
1242
1243 /* Restore old SIGILL handler */
1244 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001245 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001246
1247 } else {
1248 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1249
1250 /* Restore old SIGILL handler */
1251 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001252 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001253 return -1;
1254 }
1255
1256 if (0 == level) {
1257 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1258 return -1;
1259 }
1260
1261 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1262 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1263 ret = Intel_cache_info(level, I1c, D1c, L2c);
1264
1265 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1266 ret = AMD_cache_info(I1c, D1c, L2c);
1267
1268 } else {
1269 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1270 vendor_id);
1271 return -1;
1272 }
1273
1274 /* Successful! Convert sizes from KB to bytes */
1275 I1c->size *= 1024;
1276 D1c->size *= 1024;
1277 L2c->size *= 1024;
1278
1279 return ret;
1280}
1281
1282/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001283static
1284void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001285{
1286 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001287 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001288 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001289 "warning: %s size of %dB not a power of two; "
1290 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001291 cache->size = dflt->size;
1292 }
1293
sewardj07133bf2002-06-13 10:25:56 +00001294 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001295 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001296 "warning: %s associativity of %d not a power of two; "
1297 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001298 cache->assoc = dflt->assoc;
1299 }
1300
sewardj07133bf2002-06-13 10:25:56 +00001301 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001302 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001303 "warning: %s line size of %dB not a power of two; "
1304 "defaulting to %dB",
1305 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001306 cache->line_size = dflt->line_size;
1307 }
1308
1309 /* Then check line size >= 16 -- any smaller and a single instruction could
1310 * straddle three cache lines, which breaks a simulation assertion and is
1311 * stupid anyway. */
1312 if (cache->line_size < MIN_LINE_SIZE) {
1313 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001314 "warning: %s line size of %dB too small; "
1315 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001316 cache->line_size = MIN_LINE_SIZE;
1317 }
1318
1319 /* Then check cache size > line size (causes seg faults if not). */
1320 if (cache->size <= cache->line_size) {
1321 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001322 "warning: %s cache size of %dB <= line size of %dB; "
1323 "increasing to %dB", name, cache->size, cache->line_size,
1324 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001325 cache->size = cache->line_size * 2;
1326 }
1327
1328 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1329 if (cache->assoc > (cache->size / cache->line_size)) {
1330 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001331 "warning: %s associativity > (size / line size); "
1332 "increasing size to %dB",
1333 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001334 cache->size = cache->assoc * cache->line_size;
1335 }
1336}
1337
1338/* On entry, args are undefined. Fill them with any info from the
1339 * command-line, then fill in any remaining with CPUID instruction if possible,
1340 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001341static
1342void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001343{
1344 /* Defaults are for a model 3 or 4 Athlon */
1345 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1346 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1347 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1348
njn25e49d8e72002-09-23 09:36:25 +00001349#define CMD_LINE_DEFINED(L) \
1350 (-1 != clo_##L##_cache.size || \
1351 -1 != clo_##L##_cache.assoc || \
1352 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001353
njn25e49d8e72002-09-23 09:36:25 +00001354 *I1c = clo_I1_cache;
1355 *D1c = clo_D1_cache;
1356 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001357
njn7cf0bd32002-06-08 13:36:03 +00001358 /* If any undefined on command-line, try CPUID */
1359 if (! CMD_LINE_DEFINED(I1) ||
1360 ! CMD_LINE_DEFINED(D1) ||
1361 ! CMD_LINE_DEFINED(L2)) {
1362
1363 /* Overwrite CPUID result for any cache defined on command-line */
1364 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1365
njn25e49d8e72002-09-23 09:36:25 +00001366 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1367 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1368 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001369
1370 /* CPUID failed, use defaults for each undefined by command-line */
1371 } else {
1372 VG_(message)(Vg_DebugMsg,
1373 "Couldn't detect cache configuration, using one "
1374 "or more defaults ");
1375
njn25e49d8e72002-09-23 09:36:25 +00001376 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1377 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1378 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001379 }
1380 }
1381#undef CMD_LINE_DEFINED
1382
1383 check_cache(I1c, &I1_dflt, "I1");
1384 check_cache(D1c, &D1_dflt, "D1");
1385 check_cache(L2c, &L2_dflt, "L2");
1386
1387 if (VG_(clo_verbosity) > 1) {
1388 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1389 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1390 I1c->size, I1c->assoc, I1c->line_size);
1391 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1392 D1c->size, D1c->assoc, D1c->line_size);
1393 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1394 L2c->size, L2c->assoc, L2c->line_size);
1395 }
1396}
1397
njn4f9c9342002-04-29 16:03:24 +00001398/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001399/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001400/*------------------------------------------------------------*/
1401
njn4f9c9342002-04-29 16:03:24 +00001402static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1403 Char *first_instr_fn)
1404{
1405 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001406 Char buf[BUF_LEN], curr_file[BUF_LEN],
1407 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001408 UInt line_num;
1409
1410 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1411
njne0ee0712002-05-03 16:41:05 +00001412 /* Mark start of basic block in output, just to ease debugging */
1413 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001414
1415 VG_(strcpy)(curr_file, first_instr_fl);
1416
1417 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1418
1419 /* We pretend the CC is an iCC for getting the tag. This is ok
1420 * because both CC types have tag as their first byte. Once we know
1421 * the type, we can cast and act appropriately. */
1422
1423 Char fl_buf[FILENAME_LEN];
1424 Char fn_buf[FN_NAME_LEN];
1425
njne0ee0712002-05-03 16:41:05 +00001426 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001427 switch ( ((iCC*)BBCC_ptr)->tag ) {
1428
njn25e49d8e72002-09-23 09:36:25 +00001429 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001430 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1431 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001432 ADD_CC_TO(iCC, I, Ir_total);
1433 BBCC_ptr += sizeof(iCC);
1434 break;
1435
njn25e49d8e72002-09-23 09:36:25 +00001436 case ReadCC:
1437 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001438 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1439 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001440 ADD_CC_TO(idCC, I, Ir_total);
1441 ADD_CC_TO(idCC, D, Dr_total);
1442 BBCC_ptr += sizeof(idCC);
1443 break;
1444
njn25e49d8e72002-09-23 09:36:25 +00001445 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001446 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1447 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001448 ADD_CC_TO(idCC, I, Ir_total);
1449 ADD_CC_TO(idCC, D, Dw_total);
1450 BBCC_ptr += sizeof(idCC);
1451 break;
1452
njn25e49d8e72002-09-23 09:36:25 +00001453 case ReadWriteCC:
1454 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1455 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1456 ADD_CC_TO(iddCC, I, Ir_total);
1457 ADD_CC_TO(iddCC, Da, Dr_total);
1458 ADD_CC_TO(iddCC, Db, Dw_total);
1459 BBCC_ptr += sizeof(iddCC);
1460 break;
1461
njn4f9c9342002-04-29 16:03:24 +00001462 default:
njne427a662002-10-02 11:08:25 +00001463 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001464 break;
1465 }
1466 distinct_instrs++;
1467
njne0ee0712002-05-03 16:41:05 +00001468 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1469
1470 /* Allow for filename switching in the middle of a BB; if this happens,
1471 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001472 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001473 VG_(strcpy)(curr_file, fl_buf);
1474 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1475 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1476 }
1477
njn4f9c9342002-04-29 16:03:24 +00001478 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001479 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001480 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001481 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001482 VG_(printf)(" filenames: BB:%s, instr:%s;"
1483 " fn_names: BB:%s, instr:%s;"
1484 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001485 first_instr_fl, fl_buf,
1486 first_instr_fn, fn_buf,
1487 line_num);
1488 }
1489
njne0ee0712002-05-03 16:41:05 +00001490 VG_(sprintf)(lbuf, "%u ", line_num);
1491 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1492 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001493 }
1494 /* If we switched filenames in the middle of the BB without switching back,
1495 * switch back now because the subsequent BB may be relying on falling under
1496 * the original file name. */
1497 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1498 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1499 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1500 }
njne0ee0712002-05-03 16:41:05 +00001501
1502 /* Mark end of basic block */
1503 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001504
njne427a662002-10-02 11:08:25 +00001505 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001506}
1507
njn25e49d8e72002-09-23 09:36:25 +00001508static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001509{
1510 Int fd;
1511 Char buf[BUF_LEN];
1512 file_node *curr_file_node;
1513 fn_node *curr_fn_node;
1514 BBCC *curr_BBCC;
1515 Int i,j,k;
1516
njn25e49d8e72002-09-23 09:36:25 +00001517 VGP_PUSHCC(VgpCacheResults);
1518 fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
njn4f9c9342002-04-29 16:03:24 +00001519 if (-1 == fd) { file_err(); }
1520
1521 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001522 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1523 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1524 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1525 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1526 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1527 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001528
1529 /* "cmd:" line */
1530 VG_(strcpy)(buf, "cmd:");
1531 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001532 for (i = 0; i < VG_(client_argc); i++) {
1533 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001534 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1535 }
1536 /* "events:" line */
1537 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1538 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1539
1540 /* Six loops here: three for the hash table arrays, and three for the
1541 * chains hanging off the hash table arrays. */
1542 for (i = 0; i < N_FILE_ENTRIES; i++) {
1543 curr_file_node = BBCC_table[i];
1544 while (curr_file_node != NULL) {
1545 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1546 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1547
1548 for (j = 0; j < N_FN_ENTRIES; j++) {
1549 curr_fn_node = curr_file_node->fns[j];
1550 while (curr_fn_node != NULL) {
1551 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1552 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1553
1554 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1555 curr_BBCC = curr_fn_node->BBCCs[k];
1556 while (curr_BBCC != NULL) {
1557 fprint_BBCC(fd, curr_BBCC,
1558
1559 curr_file_node->filename,
1560 curr_fn_node->fn_name);
1561
1562 curr_BBCC = curr_BBCC->next;
1563 }
1564 }
1565 curr_fn_node = curr_fn_node->next;
1566 }
1567 }
1568 curr_file_node = curr_file_node->next;
1569 }
1570 }
1571
njn4294fd42002-06-05 14:41:10 +00001572 /* Print stats from any discarded basic blocks */
1573 if (0 != Ir_discards.a) {
1574
1575 VG_(sprintf)(buf, "fl=(discarded)\n");
1576 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1577 VG_(sprintf)(buf, "fn=(discarded)\n");
1578 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1579
1580 /* Use 0 as line number */
1581 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1582 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1583 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1584 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1585 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1586
1587 Ir_total.a += Ir_discards.a;
1588 Ir_total.m1 += Ir_discards.m1;
1589 Ir_total.m2 += Ir_discards.m2;
1590 Dr_total.a += Dr_discards.a;
1591 Dr_total.m1 += Dr_discards.m1;
1592 Dr_total.m2 += Dr_discards.m2;
1593 Dw_total.a += Dw_discards.a;
1594 Dw_total.m1 += Dw_discards.m1;
1595 Dw_total.m2 += Dw_discards.m2;
1596 }
1597
njn4f9c9342002-04-29 16:03:24 +00001598 /* Summary stats must come after rest of table, since we calculate them
1599 * during traversal. */
1600 VG_(sprintf)(buf, "summary: "
1601 "%llu %llu %llu "
1602 "%llu %llu %llu "
1603 "%llu %llu %llu\n",
1604 Ir_total.a, Ir_total.m1, Ir_total.m2,
1605 Dr_total.a, Dr_total.m1, Dr_total.m2,
1606 Dw_total.a, Dw_total.m1, Dw_total.m2);
1607 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1608 VG_(close)(fd);
1609}
1610
1611/* Adds commas to ULong, right justifying in a field field_width wide, returns
1612 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +00001613static
njn4f9c9342002-04-29 16:03:24 +00001614Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
1615{
1616 int len, n_commas, i, j, new_len, space;
1617
1618 VG_(sprintf)(buf, "%lu", n);
1619 len = VG_(strlen)(buf);
1620 n_commas = (len - 1) / 3;
1621 new_len = len + n_commas;
1622 space = field_width - new_len;
1623
1624 /* Allow for printing a number in a field_width smaller than it's size */
1625 if (space < 0) space = 0;
1626
1627 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1628 * of three. */
1629 for (j = -1, i = len ; i >= 0; i--) {
1630 buf[i + n_commas + space] = buf[i];
1631
1632 if (3 == ++j) {
1633 j = 0;
1634 n_commas--;
1635 buf[i + n_commas + space] = ',';
1636 }
1637 }
1638 /* Right justify in field. */
1639 for (i = 0; i < space; i++) buf[i] = ' ';
1640 return new_len;
1641}
1642
sewardj4f29ddf2002-05-03 22:29:04 +00001643static
njn4f9c9342002-04-29 16:03:24 +00001644void percentify(Int n, Int pow, Int field_width, char buf[])
1645{
1646 int i, len, space;
1647
1648 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
1649 len = VG_(strlen)(buf);
1650 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001651 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001652 i = len;
1653
1654 /* Right justify in field */
1655 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1656 for (i = 0; i < space; i++) buf[i] = ' ';
1657}
1658
njn25e49d8e72002-09-23 09:36:25 +00001659void SK_(fini)(void)
njn4f9c9342002-04-29 16:03:24 +00001660{
1661 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001662 ULong L2_total_m, L2_total_mr, L2_total_mw,
1663 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +00001664 char buf1[RESULTS_BUF_LEN],
1665 buf2[RESULTS_BUF_LEN],
1666 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001667 Int l1, l2, l3;
1668 Int p;
1669
njn25e49d8e72002-09-23 09:36:25 +00001670 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001671
njn7cf0bd32002-06-08 13:36:03 +00001672 if (VG_(clo_verbosity) == 0)
1673 return;
1674
njn4f9c9342002-04-29 16:03:24 +00001675 /* I cache results. Use the I_refs value to determine the first column
1676 * width. */
1677 l1 = commify(Ir_total.a, 0, buf1);
1678 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1679
1680 commify(Ir_total.m1, l1, buf1);
1681 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1682
1683 commify(Ir_total.m2, l1, buf1);
njn95114da2002-06-05 09:39:31 +00001684 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
njn4f9c9342002-04-29 16:03:24 +00001685
1686 p = 100;
1687
njn25e49d8e72002-09-23 09:36:25 +00001688 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001689 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1690 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1691
1692 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1693 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1694 VG_(message)(Vg_UserMsg, "");
1695
1696 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1697 * width of columns 2 & 3. */
1698 D_total.a = Dr_total.a + Dw_total.a;
1699 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1700 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1701
njn1d021fa2002-05-02 13:56:34 +00001702 commify( D_total.a, l1, buf1);
1703 l2 = commify(Dr_total.a, 0, buf2);
1704 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +00001705 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1706 buf1, buf2, buf3);
1707
1708 commify( D_total.m1, l1, buf1);
1709 commify(Dr_total.m1, l2, buf2);
1710 commify(Dw_total.m1, l3, buf3);
1711 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1712 buf1, buf2, buf3);
1713
1714 commify( D_total.m2, l1, buf1);
1715 commify(Dr_total.m2, l2, buf2);
1716 commify(Dw_total.m2, l3, buf3);
njn95114da2002-06-05 09:39:31 +00001717 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
njn4f9c9342002-04-29 16:03:24 +00001718 buf1, buf2, buf3);
1719
1720 p = 10;
1721
njn25e49d8e72002-09-23 09:36:25 +00001722 if (0 == D_total.a) D_total.a = 1;
1723 if (0 == Dr_total.a) Dr_total.a = 1;
1724 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001725 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1726 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1727 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1728 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1729
1730 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1731 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1732 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1733 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1734 VG_(message)(Vg_UserMsg, "");
1735
1736 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001737
1738 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1739 L2_total_r = Dr_total.m1 + Ir_total.m1;
1740 L2_total_w = Dw_total.m1;
1741 commify(L2_total, l1, buf1);
1742 commify(L2_total_r, l2, buf2);
1743 commify(L2_total_w, l3, buf3);
1744 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1745 buf1, buf2, buf3);
1746
njn4f9c9342002-04-29 16:03:24 +00001747 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1748 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1749 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001750 commify(L2_total_m, l1, buf1);
1751 commify(L2_total_mr, l2, buf2);
1752 commify(L2_total_mw, l3, buf3);
1753 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1754 buf1, buf2, buf3);
1755
1756 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1757 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1758 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1759 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1760
1761
1762 /* Hash table stats */
1763 if (VG_(clo_verbosity) > 1) {
1764 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1765 file_line_debug_BBs + no_debug_BBs;
1766
1767 VG_(message)(Vg_DebugMsg, "");
1768 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1769 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1770 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1771 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1772 full_debug_BBs * 100 / BB_lookups,
1773 full_debug_BBs);
1774 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1775 file_line_debug_BBs * 100 / BB_lookups,
1776 file_line_debug_BBs);
1777 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1778 fn_name_debug_BBs * 100 / BB_lookups,
1779 fn_name_debug_BBs);
1780 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1781 no_debug_BBs * 100 / BB_lookups,
1782 no_debug_BBs);
1783 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1784 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1785 }
njn25e49d8e72002-09-23 09:36:25 +00001786 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001787}
1788
sewardj18d75132002-05-16 11:06:21 +00001789
njn4294fd42002-06-05 14:41:10 +00001790/* Called when a translation is invalidated due to self-modifying code or
1791 * unloaded of a shared object.
1792 *
1793 * Finds the BBCC in the table, removes it, adds the counts to the discard
1794 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001795void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001796{
njn4294fd42002-06-05 14:41:10 +00001797 BBCC *BBCC_node;
1798 Addr BBCC_ptr0, BBCC_ptr;
1799 Bool BB_seen_before;
1800
sewardj83205b32002-06-14 11:08:07 +00001801 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001802 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001803
1804 /* 2nd arg won't be used since BB should have been seen before (assertions
1805 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001806 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001807 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1808
njne427a662002-10-02 11:08:25 +00001809 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001810
1811 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1812
1813 /* We pretend the CC is an iCC for getting the tag. This is ok
1814 * because both CC types have tag as their first byte. Once we know
1815 * the type, we can cast and act appropriately. */
1816
1817 switch ( ((iCC*)BBCC_ptr)->tag ) {
1818
njn25e49d8e72002-09-23 09:36:25 +00001819 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001820 ADD_CC_TO(iCC, I, Ir_discards);
1821 BBCC_ptr += sizeof(iCC);
1822 break;
1823
njn25e49d8e72002-09-23 09:36:25 +00001824 case ReadCC:
1825 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001826 ADD_CC_TO(idCC, I, Ir_discards);
1827 ADD_CC_TO(idCC, D, Dr_discards);
1828 BBCC_ptr += sizeof(idCC);
1829 break;
1830
njn25e49d8e72002-09-23 09:36:25 +00001831 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001832 ADD_CC_TO(idCC, I, Ir_discards);
1833 ADD_CC_TO(idCC, D, Dw_discards);
1834 BBCC_ptr += sizeof(idCC);
1835 break;
1836
njn25e49d8e72002-09-23 09:36:25 +00001837 case ReadWriteCC:
1838 ADD_CC_TO(iddCC, I, Ir_discards);
1839 ADD_CC_TO(iddCC, Da, Dr_discards);
1840 ADD_CC_TO(iddCC, Db, Dw_discards);
1841 BBCC_ptr += sizeof(iddCC);
1842 break;
1843
njn4294fd42002-06-05 14:41:10 +00001844 default:
njne427a662002-10-02 11:08:25 +00001845 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00001846 break;
1847 }
1848 }
njn25e49d8e72002-09-23 09:36:25 +00001849 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001850}
1851
1852/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001853/*--- Command line processing ---*/
1854/*--------------------------------------------------------------------*/
1855
1856static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
1857{
1858 int i1, i2, i3;
1859 int i;
1860 char *opt = VG_(strdup)(orig_opt);
1861
1862 i = i1 = opt_len;
1863
1864 /* Option looks like "--I1=65536,2,64".
1865 * Find commas, replace with NULs to make three independent
1866 * strings, then extract numbers. Yuck. */
1867 while (VG_(isdigit)(opt[i])) i++;
1868 if (',' == opt[i]) {
1869 opt[i++] = '\0';
1870 i2 = i;
1871 } else goto bad;
1872 while (VG_(isdigit)(opt[i])) i++;
1873 if (',' == opt[i]) {
1874 opt[i++] = '\0';
1875 i3 = i;
1876 } else goto bad;
1877 while (VG_(isdigit)(opt[i])) i++;
1878 if ('\0' != opt[i]) goto bad;
1879
1880 cache->size = (Int)VG_(atoll)(opt + i1);
1881 cache->assoc = (Int)VG_(atoll)(opt + i2);
1882 cache->line_size = (Int)VG_(atoll)(opt + i3);
1883
1884 VG_(free)(opt);
1885
1886 return;
1887
1888 bad:
1889 VG_(bad_option)(orig_opt);
1890}
1891
1892Bool SK_(process_cmd_line_option)(Char* arg)
1893{
1894 /* 5 is length of "--I1=" */
1895 if (0 == VG_(strncmp)(arg, "--I1=", 5))
1896 parse_cache_opt(&clo_I1_cache, arg, 5);
1897 else if (0 == VG_(strncmp)(arg, "--D1=", 5))
1898 parse_cache_opt(&clo_D1_cache, arg, 5);
1899 else if (0 == VG_(strncmp)(arg, "--L2=", 5))
1900 parse_cache_opt(&clo_L2_cache, arg, 5);
1901 else
1902 return False;
1903
1904 return True;
1905}
1906
1907Char* SK_(usage)(void)
1908{
1909 return
1910" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1911" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
1912" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n";
1913}
1914
1915/*--------------------------------------------------------------------*/
1916/*--- Setup ---*/
1917/*--------------------------------------------------------------------*/
1918
njnd04b7c62002-10-03 14:05:52 +00001919void SK_(pre_clo_init)(VgDetails* details, VgNeeds* needs,
1920 VgTrackEvents* not_used)
njn25e49d8e72002-09-23 09:36:25 +00001921{
sewardj4aa62ba2002-10-05 15:49:27 +00001922 details->name = "Cachegrind";
njnd04b7c62002-10-03 14:05:52 +00001923 details->version = NULL;
1924 details->description = "an I1/D1/L2 cache profiler";
1925 details->copyright_author =
1926 "Copyright (C) 2002, and GNU GPL'd, by Nicholas Nethercote.";
1927 details->bug_reports_to = "njn25@cam.ac.uk";
njn25e49d8e72002-09-23 09:36:25 +00001928
njnd04b7c62002-10-03 14:05:52 +00001929 needs->basic_block_discards = True;
1930 needs->command_line_options = True;
njn25e49d8e72002-09-23 09:36:25 +00001931
1932 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
1933 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
1934 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
1935 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
1936 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
1937 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
1938}
1939
1940void SK_(post_clo_init)(void)
1941{
1942 cache_t I1c, D1c, L2c;
1943 Int fd;
1944
1945 /* Set output file name: cachegrind.<pid>.out */
1946 VG_(sprintf)(cachegrind_out_file, "cachegrind.out.%d", VG_(getpid)());
1947
1948 /* Make sure the output file can be written. */
1949 fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
1950 if (-1 == fd) {
1951 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_WRONLY,
1952 VKI_S_IRUSR|VKI_S_IWUSR);
1953 if (-1 == fd) {
1954 file_err();
1955 }
1956 }
1957 VG_(close)(fd);
1958
1959 initCC(&Ir_total);
1960 initCC(&Dr_total);
1961 initCC(&Dw_total);
1962
1963 initCC(&Ir_discards);
1964 initCC(&Dr_discards);
1965 initCC(&Dw_discards);
1966
1967 get_caches(&I1c, &D1c, &L2c);
1968
1969 cachesim_I1_initcache(I1c);
1970 cachesim_D1_initcache(D1c);
1971 cachesim_L2_initcache(L2c);
1972
1973 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
1974 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
1975 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
1976
1977 init_BBCC_table();
1978}
1979
1980#if 0
1981Bool SK_(cheap_sanity_check)(void) { return True; }
1982
1983extern TTEntry* vg_tt;
1984
1985Bool SK_(expensive_sanity_check)(void)
1986{
1987 Int i;
1988 Bool dummy;
1989 for (i = 0; i < 200191; i++) {
1990 if (vg_tt[i].orig_addr != (Addr)1 &&
1991 vg_tt[i].orig_addr != (Addr)3) {
1992 VG_(printf)(".");
1993 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
1994 }
1995 }
1996 return True;
1997}
1998#endif
1999
2000/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002001/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002002/*--------------------------------------------------------------------*/