blob: 2a91bc0c51339df58164d97d37f918f355569602 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
njnc9539842002-10-02 13:26:35 +00009 This file is part of Cachegrind, a Valgrind skin for cache
10 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
sewardj3c23d432002-06-01 23:43:49 +000012 Copyright (C) 2002 Nicholas Nethercote
13 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
njn27f1a382002-11-08 15:48:16 +000036VG_DETERMINE_INTERFACE_VERSION
37
njn25e49d8e72002-09-23 09:36:25 +000038/* For cache simulation */
39typedef struct {
40 int size; /* bytes */
41 int assoc;
42 int line_size; /* bytes */
43} cache_t;
njn4f9c9342002-04-29 16:03:24 +000044
njn25cac76cb2002-09-23 11:21:57 +000045#include "cg_sim_L2.c"
46#include "cg_sim_I1.c"
47#include "cg_sim_D1.c"
njn4f9c9342002-04-29 16:03:24 +000048
njn25e49d8e72002-09-23 09:36:25 +000049/*------------------------------------------------------------*/
50/*--- Constants ---*/
51/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000052
53/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000054#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000055
njn25e49d8e72002-09-23 09:36:25 +000056#define MIN_LINE_SIZE 16
57
njn4f9c9342002-04-29 16:03:24 +000058/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000059#define FILENAME_LEN 256
60#define FN_NAME_LEN 256
61#define BUF_LEN 512
62#define COMMIFY_BUF_LEN 128
63#define RESULTS_BUF_LEN 128
64#define LINE_BUF_LEN 64
65
66/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000067/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000068/*------------------------------------------------------------*/
69
njn25e49d8e72002-09-23 09:36:25 +000070typedef
71 enum {
72 VgpGetBBCC = VgpFini+1,
73 VgpCacheSimulate,
74 VgpCacheResults
75 }
76 VgpSkinCC;
sewardj07133bf2002-06-13 10:25:56 +000077
njn4f9c9342002-04-29 16:03:24 +000078/*------------------------------------------------------------*/
79/*--- Output file related stuff ---*/
80/*------------------------------------------------------------*/
81
njn25e49d8e72002-09-23 09:36:25 +000082Char cachegrind_out_file[FILENAME_LEN];
njn4f9c9342002-04-29 16:03:24 +000083
sewardj0744b6c2002-12-11 00:45:42 +000084static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +000085{
86 VG_(message)(Vg_UserMsg,
sewardj0744b6c2002-12-11 00:45:42 +000087 "error: can't open cache simulation output file `%s'",
88 cachegrind_out_file );
89 VG_(message)(Vg_UserMsg,
90 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +000091}
92
93/*------------------------------------------------------------*/
94/*--- Cost center types, operations ---*/
95/*------------------------------------------------------------*/
96
97typedef struct _CC CC;
98struct _CC {
99 ULong a;
100 ULong m1;
101 ULong m2;
102};
103
104static __inline__ void initCC(CC* cc) {
105 cc->a = 0;
106 cc->m1 = 0;
107 cc->m2 = 0;
108}
109
njn25e49d8e72002-09-23 09:36:25 +0000110typedef
111 enum {
112 InstrCC, /* eg. mov %eax, %ebx */
113 ReadCC, /* eg. mov (%ecx), %esi */
114 WriteCC, /* eg. mov %eax, (%edx) */
115 ModCC, /* eg. incl (%eax) (read+write one addr) */
116 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
117 (read+write two different addrs) */
118 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000119
120/* Instruction-level cost-centres. The typedefs for these structs are in
121 * vg_include.c
122 *
123 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000124 *
njne0ee0712002-05-03 16:41:05 +0000125 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000126 */
njn25e49d8e72002-09-23 09:36:25 +0000127typedef
128 struct {
129 /* word 1 */
130 UChar tag;
131 UChar instr_size;
132 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000133
njn25e49d8e72002-09-23 09:36:25 +0000134 /* words 2+ */
135 Addr instr_addr;
136 CC I;
137 }
138 iCC;
njn4f9c9342002-04-29 16:03:24 +0000139
njn25e49d8e72002-09-23 09:36:25 +0000140typedef
141 struct _idCC {
142 /* word 1 */
143 UChar tag;
144 UChar instr_size;
145 UChar data_size;
146 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000147
njn25e49d8e72002-09-23 09:36:25 +0000148 /* words 2+ */
149 Addr instr_addr;
150 CC I;
151 CC D;
152 }
153 idCC;
154
155typedef
156 struct _iddCC {
157 /* word 1 */
158 UChar tag;
159 UChar instr_size;
160 UChar data_size;
161 /* 1 byte padding */
162
163 /* words 2+ */
164 Addr instr_addr;
165 CC I;
166 CC Da;
167 CC Db;
168 }
169 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000170
171static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
172{
njn25e49d8e72002-09-23 09:36:25 +0000173 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000174 cc->instr_size = instr_size;
175 cc->instr_addr = instr_addr;
176 initCC(&cc->I);
177}
178
179static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
180 UInt instr_size, UInt data_size)
181{
182 cc->tag = X_CC;
183 cc->instr_size = instr_size;
184 cc->data_size = data_size;
185 cc->instr_addr = instr_addr;
186 initCC(&cc->I);
187 initCC(&cc->D);
188}
189
njn25e49d8e72002-09-23 09:36:25 +0000190static void init_iddCC(iddCC* cc, Addr instr_addr,
191 UInt instr_size, UInt data_size)
192{
193 cc->tag = ReadWriteCC;
194 cc->instr_size = instr_size;
195 cc->data_size = data_size;
196 cc->instr_addr = instr_addr;
197 initCC(&cc->I);
198 initCC(&cc->Da);
199 initCC(&cc->Db);
200}
201
njn4294fd42002-06-05 14:41:10 +0000202#define ADD_CC_TO(CC_type, cc, total) \
203 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
204 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
205 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
206
njn95114da2002-06-05 09:39:31 +0000207/* If 1, address of each instruction is printed as a comment after its counts
208 * in cachegrind.out */
209#define PRINT_INSTR_ADDRS 0
210
njne0ee0712002-05-03 16:41:05 +0000211static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000212{
njn95114da2002-06-05 09:39:31 +0000213#if PRINT_INSTR_ADDRS
214 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
215 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
216#else
njne0ee0712002-05-03 16:41:05 +0000217 VG_(sprintf)(buf, "%llu %llu %llu\n",
218 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000219#endif
njn4f9c9342002-04-29 16:03:24 +0000220}
221
njne0ee0712002-05-03 16:41:05 +0000222static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000223{
njn95114da2002-06-05 09:39:31 +0000224#if PRINT_INSTR_ADDRS
225 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
226 cc->I.a, cc->I.m1, cc->I.m2,
227 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
228#else
njne0ee0712002-05-03 16:41:05 +0000229 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
230 cc->I.a, cc->I.m1, cc->I.m2,
231 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000232#endif
njn4f9c9342002-04-29 16:03:24 +0000233}
234
njne0ee0712002-05-03 16:41:05 +0000235static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000236{
njn95114da2002-06-05 09:39:31 +0000237#if PRINT_INSTR_ADDRS
238 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
239 cc->I.a, cc->I.m1, cc->I.m2,
240 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
241#else
njne0ee0712002-05-03 16:41:05 +0000242 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
243 cc->I.a, cc->I.m1, cc->I.m2,
244 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000245#endif
njn4f9c9342002-04-29 16:03:24 +0000246}
247
njn25e49d8e72002-09-23 09:36:25 +0000248static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
249{
250#if PRINT_INSTR_ADDRS
251 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
252 cc->I.a, cc->I.m1, cc->I.m2,
253 cc->Da.a, cc->Da.m1, cc->Da.m2,
254 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
255#else
256 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
257 cc->I.a, cc->I.m1, cc->I.m2,
258 cc->Da.a, cc->Da.m1, cc->Da.m2,
259 cc->Db.a, cc->Db.m1, cc->Db.m2);
260#endif
261}
262
263
njn4f9c9342002-04-29 16:03:24 +0000264/*------------------------------------------------------------*/
265/*--- BBCC hash table stuff ---*/
266/*------------------------------------------------------------*/
267
268/* The table of BBCCs is of the form hash(filename, hash(fn_name,
269 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
270 * fairly well for Konqueror. */
271
272#define N_FILE_ENTRIES 251
273#define N_FN_ENTRIES 53
274#define N_BBCC_ENTRIES 37
275
276/* The cost centres for a basic block are stored in a contiguous array.
277 * They are distinguishable by their tag field. */
278typedef struct _BBCC BBCC;
279struct _BBCC {
280 Addr orig_addr;
281 UInt array_size; /* byte-size of variable length array */
282 BBCC* next;
283 Addr array[0]; /* variable length array */
284};
285
286typedef struct _fn_node fn_node;
287struct _fn_node {
288 Char* fn_name;
289 BBCC* BBCCs[N_BBCC_ENTRIES];
290 fn_node* next;
291};
292
293typedef struct _file_node file_node;
294struct _file_node {
295 Char* filename;
296 fn_node* fns[N_FN_ENTRIES];
297 file_node* next;
298};
299
300/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000301static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000302
sewardj4f29ddf2002-05-03 22:29:04 +0000303static Int distinct_files = 0;
304static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000305
sewardj4f29ddf2002-05-03 22:29:04 +0000306static Int distinct_instrs = 0;
307static Int full_debug_BBs = 0;
308static Int file_line_debug_BBs = 0;
309static Int fn_name_debug_BBs = 0;
310static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000311
sewardj4f29ddf2002-05-03 22:29:04 +0000312static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000313
njn4294fd42002-06-05 14:41:10 +0000314static CC Ir_discards;
315static CC Dr_discards;
316static CC Dw_discards;
317
njn4f9c9342002-04-29 16:03:24 +0000318static void init_BBCC_table()
319{
320 Int i;
321 for (i = 0; i < N_FILE_ENTRIES; i++)
322 BBCC_table[i] = NULL;
323}
324
njne0ee0712002-05-03 16:41:05 +0000325static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
326 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000327{
njn25e49d8e72002-09-23 09:36:25 +0000328 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000329
njn25e49d8e72002-09-23 09:36:25 +0000330 found1 = VG_(get_filename_linenum)(instr_addr, filename,
331 FILENAME_LEN, line_num);
332 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000333
334 if (!found1 && !found2) {
335 no_debug_BBs++;
336 VG_(strcpy)(filename, "???");
337 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000338 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000339
340 } else if ( found1 && found2) {
341 full_debug_BBs++;
342
343 } else if ( found1 && !found2) {
344 file_line_debug_BBs++;
345 VG_(strcpy)(fn_name, "???");
346
347 } else /*(!found1 && found2)*/ {
348 fn_name_debug_BBs++;
349 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000350 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000351 }
352}
353
354/* Forward declaration. */
355static Int compute_BBCC_array_size(UCodeBlock* cb);
356
357static __inline__
358file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
359{
360 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000361 file_node* new = VG_(malloc)(sizeof(file_node));
362 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000363 for (i = 0; i < N_FN_ENTRIES; i++) {
364 new->fns[i] = NULL;
365 }
366 new->next = next;
367 return new;
368}
369
370static __inline__
371fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
372{
373 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000374 fn_node* new = VG_(malloc)(sizeof(fn_node));
375 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000376 for (i = 0; i < N_BBCC_ENTRIES; i++) {
377 new->BBCCs[i] = NULL;
378 }
379 new->next = next;
380 return new;
381}
382
383static __inline__
384BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
385{
386 Int BBCC_array_size = compute_BBCC_array_size(cb);
387 BBCC* new;
388
njn25e49d8e72002-09-23 09:36:25 +0000389 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000390 new->orig_addr = bb_orig_addr;
391 new->array_size = BBCC_array_size;
392 new->next = next;
393
394 return new;
395}
396
397#define HASH_CONSTANT 256
398
399static UInt hash(Char *s, UInt table_size)
400{
401 int hash_value = 0;
402 for ( ; *s; s++)
403 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
404 return hash_value;
405}
406
407/* Do a three step traversal: by filename, then fn_name, then instr_addr.
408 * In all cases prepends new nodes to their chain. Returns a pointer to the
409 * cost centre. Also sets BB_seen_before by reference.
410 */
411static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000412 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000413{
414 file_node *curr_file_node;
415 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000416 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000417 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
418 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000419 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000420
njne0ee0712002-05-03 16:41:05 +0000421 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000422
njn25e49d8e72002-09-23 09:36:25 +0000423 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000424 filename_hash = hash(filename, N_FILE_ENTRIES);
425 curr_file_node = BBCC_table[filename_hash];
426 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000427 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000428 curr_file_node = curr_file_node->next;
429 }
430 if (NULL == curr_file_node) {
431 BBCC_table[filename_hash] = curr_file_node =
432 new_file_node(filename, BBCC_table[filename_hash]);
433 distinct_files++;
434 }
435
436 fnname_hash = hash(fn_name, N_FN_ENTRIES);
437 curr_fn_node = curr_file_node->fns[fnname_hash];
438 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000439 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000440 curr_fn_node = curr_fn_node->next;
441 }
442 if (NULL == curr_fn_node) {
443 curr_file_node->fns[fnname_hash] = curr_fn_node =
444 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
445 distinct_fns++;
446 }
447
448 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000449 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000450 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
451 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000452 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000453 curr_BBCC = curr_BBCC->next;
454 }
455 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000456
njne427a662002-10-02 11:08:25 +0000457 sk_assert(False == remove);
njn4294fd42002-06-05 14:41:10 +0000458
njn4f9c9342002-04-29 16:03:24 +0000459 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
460 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
461 *BB_seen_before = False;
462
463 } else {
njne427a662002-10-02 11:08:25 +0000464 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
465 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000466 if (VG_(clo_verbosity) > 2) {
467 VG_(message)(Vg_DebugMsg,
468 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000469 }
470 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000471
472 if (True == remove) {
473 // Remove curr_BBCC from chain; it will be used and free'd by the
474 // caller.
475 *prev_BBCC_next_ptr = curr_BBCC->next;
476
477 } else {
478 BB_retranslations++;
479 }
njn4f9c9342002-04-29 16:03:24 +0000480 }
njn25e49d8e72002-09-23 09:36:25 +0000481 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000482 return curr_BBCC;
483}
484
485/*------------------------------------------------------------*/
486/*--- Cache simulation instrumentation phase ---*/
487/*------------------------------------------------------------*/
488
njn4f9c9342002-04-29 16:03:24 +0000489static Int compute_BBCC_array_size(UCodeBlock* cb)
490{
491 UInstr* u_in;
492 Int i, CC_size, BBCC_size = 0;
493 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000494 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000495
496 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000497 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000498
njn810086f2002-11-14 12:42:47 +0000499 for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
500 u_in = VG_(get_instr)(cb, i);
njn4f9c9342002-04-29 16:03:24 +0000501 switch(u_in->opcode) {
502
503 case INCEIP:
504 goto case_for_end_of_instr;
505
506 case JMP:
507 if (u_in->cond != CondAlways) break;
508
509 goto case_for_end_of_instr;
510
511 case_for_end_of_instr:
512
njn25e49d8e72002-09-23 09:36:25 +0000513 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
514 t_read != t_write)
515 CC_size = sizeof(iddCC);
516 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
517 CC_size = sizeof(idCC);
518 else
519 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000520
521 BBCC_size += CC_size;
522 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
523 break;
524
525 case LOAD:
526 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000527 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000528 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000529 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000530 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000531 is_LOAD = True;
532 break;
533
534 case STORE:
535 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000536 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000537 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000538 is_STORE = True;
539 break;
540
541 case FPU_R:
njne427a662002-10-02 11:08:25 +0000542 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000543 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000544 is_FPU_R = True;
545 break;
546
547 case FPU_W:
njne427a662002-10-02 11:08:25 +0000548 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000549 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000550 is_FPU_W = True;
551 break;
552
553 default:
554 break;
555 }
556 }
557
558 return BBCC_size;
559}
560
njn25e49d8e72002-09-23 09:36:25 +0000561static __attribute__ ((regparm (1)))
562void log_1I_0D_cache_access(iCC* cc)
563{
564 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
565 // cc, cc->instr_addr, cc->instr_size)
566 VGP_PUSHCC(VgpCacheSimulate);
567 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
568 cc->I.a++;
569 VGP_POPCC(VgpCacheSimulate);
570}
571
572/* Difference between this function and log_1I_0D_cache_access() is that
573 this one can be passed any kind of CC, not just an iCC. So we have to
574 be careful to make sure we don't make any assumptions about CC layout.
575 (As it stands, they would be safe, but this will avoid potential heartache
576 if anyone else changes CC layout.)
577 Note that we only do the switch for the JIFZ version because if we always
578 called this switching version, things would run about 5% slower. */
579static __attribute__ ((regparm (1)))
580void log_1I_0D_cache_access_JIFZ(iCC* cc)
581{
582 UChar instr_size;
583 Addr instr_addr;
584 CC* I;
585
586 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
587 // cc, cc->instr_addr, cc->instr_size)
588 VGP_PUSHCC(VgpCacheSimulate);
589
590 switch(cc->tag) {
591 case InstrCC:
592 instr_size = cc->instr_size;
593 instr_addr = cc->instr_addr;
594 I = &(cc->I);
595 break;
596 case ReadCC:
597 case WriteCC:
598 case ModCC:
599 instr_size = ((idCC*)cc)->instr_size;
600 instr_addr = ((idCC*)cc)->instr_addr;
601 I = &( ((idCC*)cc)->I );
602 break;
603 case ReadWriteCC:
604 instr_size = ((iddCC*)cc)->instr_size;
605 instr_addr = ((iddCC*)cc)->instr_addr;
606 I = &( ((iddCC*)cc)->I );
607 break;
608 default:
njne427a662002-10-02 11:08:25 +0000609 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000610 break;
611 }
612 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
613 I->a++;
614 VGP_POPCC(VgpCacheSimulate);
615}
616
617__attribute__ ((regparm (2))) static
618void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
619{
620 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
621 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
622 VGP_PUSHCC(VgpCacheSimulate);
623 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
624 cc->D.a++;
625 VGP_POPCC(VgpCacheSimulate);
626}
627
628__attribute__ ((regparm (2))) static
629void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
630{
631 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
632 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
633 VGP_PUSHCC(VgpCacheSimulate);
634 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
635 cc->I.a++;
636
637 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
638 cc->D.a++;
639 VGP_POPCC(VgpCacheSimulate);
640}
641
642__attribute__ ((regparm (3))) static
643void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
644{
645 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
646 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
647 VGP_PUSHCC(VgpCacheSimulate);
648 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
649 cc->Da.a++;
650 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
651 cc->Db.a++;
652 VGP_POPCC(VgpCacheSimulate);
653}
654
655__attribute__ ((regparm (3))) static
656void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
657{
658 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
659 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
660 VGP_PUSHCC(VgpCacheSimulate);
661 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
662 cc->I.a++;
663
664 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
665 cc->Da.a++;
666 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
667 cc->Db.a++;
668 VGP_POPCC(VgpCacheSimulate);
669}
670
671UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
672{
673/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000674#define INVALID_DATA_SIZE 999999
675
njn4f9c9342002-04-29 16:03:24 +0000676 UCodeBlock* cb;
677 Int i;
678 UInstr* u_in;
679 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000680 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
681 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000682 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000683 Addr x86_instr_addr = orig_addr;
684 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
685 Addr helper;
686 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000687 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000688 Bool BB_seen_before = False;
689 Bool instrumented_Jcond = False;
690 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000691 Addr BBCC_ptr0, BBCC_ptr;
692
693 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
694 * if it's the first time it's been seen), and point to start of the
695 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000696 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000697 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
698
njn810086f2002-11-14 12:42:47 +0000699 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000700
njn25e49d8e72002-09-23 09:36:25 +0000701 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
702 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000703
njn810086f2002-11-14 12:42:47 +0000704 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
705 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000706
njn4f9c9342002-04-29 16:03:24 +0000707 /* What this is all about: we want to instrument each x86 instruction
708 * translation. The end of these are marked in three ways. The three
709 * ways, and the way we instrument them, are as follows:
710 *
711 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
712 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
713 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
714 *
njn25e49d8e72002-09-23 09:36:25 +0000715 * The last UInstr in a basic block is always a Juncond. Jconds,
716 * when they appear, are always second last. We check this with
717 * various assertions.
718 *
719 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000720 * executed. We don't have to put the instrumentation before the INCEIP
721 * (it could go after) but we do so for consistency.
722 *
njn25e49d8e72002-09-23 09:36:25 +0000723 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
724 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000725 *
njn25e49d8e72002-09-23 09:36:25 +0000726 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000727 *
728 * The instrumentation is just a call to the appropriate helper function,
729 * passing it the address of the instruction's CC.
730 */
njne427a662002-10-02 11:08:25 +0000731 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000732
733 switch (u_in->opcode) {
sewardj7a5ebcf2002-11-13 22:42:13 +0000734 case NOP: case LOCK: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000735 break;
736
njn4f9c9342002-04-29 16:03:24 +0000737 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000738 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000739 */
740 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000741 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000742 t_read_addr = newTemp(cb);
743 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
744 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000745 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000746 break;
747
748 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000749 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000750 t_read_addr = newTemp(cb);
751 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000752 data_size = ( u_in->size <= MIN_LINE_SIZE
753 ? u_in->size
754 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000755 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000756 break;
757
758 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000759 * That's how the code above determines whether it does a write.
760 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000761 * As for the MOV, if it's a mod instruction it's redundant, but it's
762 * not expensive and mod instructions are rare anyway. */
763 case STORE:
764 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000765 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000766 t_write_addr = newTemp(cb);
767 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000768 /* 28 and 108 B data-sized instructions will be done
769 * inaccurately but they're very rare and this avoids errors
770 * from hitting more than two cache lines in the simulation. */
771 data_size = ( u_in->size <= MIN_LINE_SIZE
772 ? u_in->size
773 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000774 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000775 break;
776
njn25e49d8e72002-09-23 09:36:25 +0000777
778 /* For rep-prefixed instructions, log a single I-cache access
779 * before the UCode loop that implements the repeated part, which
780 * is where the multiple D-cache accesses are logged. */
781 case JIFZ:
782 has_rep_prefix = True;
783
784 /* Setup 1st and only arg: CC addr */
785 t_CC_addr = newTemp(cb);
786 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
787 uLiteral(cb, BBCC_ptr);
788
789 /* Call helper */
790 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
791 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000792 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000793 break;
794
795
796 /* INCEIP: insert instrumentation */
797 case INCEIP:
798 x86_instr_size = u_in->val1;
799 goto instrument_x86_instr;
800
801 /* JMP: insert instrumentation if the first JMP */
802 case JMP:
803 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000804 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000805 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000806 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000807 instrumented_Jcond = False; /* reset */
808 break;
809 }
810 /* The first JMP... instrument. */
811 if (CondAlways != u_in->cond) {
njn810086f2002-11-14 12:42:47 +0000812 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000813 instrumented_Jcond = True;
814 } else {
njn810086f2002-11-14 12:42:47 +0000815 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000816 }
817
818 /* Get x86 instr size from final JMP. */
njn810086f2002-11-14 12:42:47 +0000819 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
820
njn25e49d8e72002-09-23 09:36:25 +0000821 goto instrument_x86_instr;
822
823
824 /* Code executed at the end of each x86 instruction. */
825 instrument_x86_instr:
826
827 /* Initialise the CC in the BBCC array appropriately if it
828 * hasn't been initialised before. Then call appropriate sim
829 * function, passing it the CC address. */
830 stack_used = 0;
831
njne427a662002-10-02 11:08:25 +0000832 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000833 x86_instr_size <= MAX_x86_INSTR_SIZE);
834
835#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
836
837 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +0000838 sk_assert(INVALID_DATA_SIZE == data_size);
839 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000840 INVALID_TEMPREG == t_read &&
841 INVALID_TEMPREG == t_write_addr &&
842 INVALID_TEMPREG == t_write);
843 CC_size = sizeof(iCC);
844 if (!BB_seen_before)
845 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
846 helper = ( has_rep_prefix
847 ? (Addr)0 /* no extra log needed */
848 : (Addr) & log_1I_0D_cache_access
849 );
850 argc = 1;
851
852 } else {
njne427a662002-10-02 11:08:25 +0000853 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +0000854 8 == data_size || 10 == data_size ||
855 MIN_LINE_SIZE == data_size);
856
857 if (IS_(read) && !IS_(write)) {
858 CC_size = sizeof(idCC);
859 /* If it uses 'rep', we've already logged the I-cache
860 * access at the JIFZ UInstr (see JIFZ case below) so
861 * don't do it here */
862 helper = ( has_rep_prefix
863 ? (Addr) & log_0I_1D_cache_access
864 : (Addr) & log_1I_1D_cache_access
865 );
866 argc = 2;
867 if (!BB_seen_before)
868 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
869 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000870 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000871 INVALID_TEMPREG != t_read &&
872 INVALID_TEMPREG == t_write_addr &&
873 INVALID_TEMPREG == t_write);
874 t_data_addr1 = t_read_addr;
875
876 } else if (!IS_(read) && IS_(write)) {
877 CC_size = sizeof(idCC);
878 helper = ( has_rep_prefix
879 ? (Addr) & log_0I_1D_cache_access
880 : (Addr) & log_1I_1D_cache_access
881 );
882 argc = 2;
883 if (!BB_seen_before)
884 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
885 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000886 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000887 INVALID_TEMPREG == t_read &&
888 INVALID_TEMPREG != t_write_addr &&
889 INVALID_TEMPREG != t_write);
890 t_data_addr1 = t_write_addr;
891
892 } else {
njne427a662002-10-02 11:08:25 +0000893 sk_assert(IS_(read) && IS_(write));
894 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000895 INVALID_TEMPREG != t_read &&
896 INVALID_TEMPREG != t_write_addr &&
897 INVALID_TEMPREG != t_write);
898 if (t_read == t_write) {
899 CC_size = sizeof(idCC);
900 helper = ( has_rep_prefix
901 ? (Addr) & log_0I_1D_cache_access
902 : (Addr) & log_1I_1D_cache_access
903 );
904 argc = 2;
905 if (!BB_seen_before)
906 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
907 x86_instr_size, data_size);
908 t_data_addr1 = t_read_addr;
909 } else {
910 CC_size = sizeof(iddCC);
911 helper = ( has_rep_prefix
912 ? (Addr) & log_0I_2D_cache_access
913 : (Addr) & log_1I_2D_cache_access
914 );
915 argc = 3;
916 if (!BB_seen_before)
917 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
918 x86_instr_size, data_size);
919 t_data_addr1 = t_read_addr;
920 t_data_addr2 = t_write_addr;
921 }
922 }
923#undef IS_
924 }
925
926 /* Call the helper, if necessary */
927 if ((Addr)0 != helper) {
928
929 /* Setup 1st arg: CC addr */
930 t_CC_addr = newTemp(cb);
931 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
932 uLiteral(cb, BBCC_ptr);
933
934 /* Call the helper */
935 if (1 == argc)
936 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
937 else if (2 == argc)
938 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
939 TempReg, t_data_addr1);
940 else if (3 == argc)
941 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
942 TempReg, t_data_addr1,
943 TempReg, t_data_addr2);
944 else
njne427a662002-10-02 11:08:25 +0000945 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +0000946
947 uCCall(cb, helper, argc, argc, False);
948 }
949
950 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +0000951 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000952
953 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
954 BBCC_ptr += CC_size;
955 x86_instr_addr += x86_instr_size;
956 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
957 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
958 data_size = INVALID_DATA_SIZE;
959 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000960 break;
961
962 default:
njn4ba5a792002-09-30 10:23:54 +0000963 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000964 break;
965 }
966 }
967
968 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +0000969 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +0000970
njn4ba5a792002-09-30 10:23:54 +0000971 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000972 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000973
974#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000975}
976
977/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000978/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000979/*------------------------------------------------------------*/
980
981/* Total reads/writes/misses. Calculated during CC traversal at the end. */
982static CC Ir_total;
983static CC Dr_total;
984static CC Dw_total;
985
njn25e49d8e72002-09-23 09:36:25 +0000986#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
987
988static cache_t clo_I1_cache = UNDEFINED_CACHE;
989static cache_t clo_D1_cache = UNDEFINED_CACHE;
990static cache_t clo_L2_cache = UNDEFINED_CACHE;
991
njn7cf0bd32002-06-08 13:36:03 +0000992/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
993/* Probably only works for Intel and AMD chips, and probably only for some of
994 * them.
995 */
996
sewardj07133bf2002-06-13 10:25:56 +0000997static __inline__ void cpuid(Int n, Int *a, Int *b, Int *c, Int *d)
njn7cf0bd32002-06-08 13:36:03 +0000998{
999 __asm__ __volatile__ (
1000 "cpuid"
1001 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1002 : "0" (n) /* input */
1003 );
1004}
1005
sewardj07133bf2002-06-13 10:25:56 +00001006static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001007{
1008 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001009 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001010 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001011 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001012 " Simulating a %d KB cache with %d B lines",
1013 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001014}
1015
1016/* Intel method is truly wretched. We have to do an insane indexing into an
1017 * array of pre-defined configurations for various parts of the memory
1018 * hierarchy.
1019 */
1020static
sewardj07133bf2002-06-13 10:25:56 +00001021Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001022{
sewardj07133bf2002-06-13 10:25:56 +00001023 UChar info[16];
1024 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001025 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001026
1027 if (level < 2) {
1028 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001029 "warning: CPUID level < 2 for Intel processor (%d)",
1030 level);
njn7cf0bd32002-06-08 13:36:03 +00001031 return -1;
1032 }
1033
sewardj07133bf2002-06-13 10:25:56 +00001034 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1035 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001036 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1037 info[0] = 0x0; /* reset AL */
1038
1039 if (0 != trials) {
1040 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001041 "warning: non-zero CPUID trials for Intel processor (%d)",
1042 trials);
njn7cf0bd32002-06-08 13:36:03 +00001043 return -1;
1044 }
1045
1046 for (i = 0; i < 16; i++) {
1047
1048 switch (info[i]) {
1049
1050 case 0x0: /* ignore zeros */
1051 break;
1052
njn25e49d8e72002-09-23 09:36:25 +00001053 /* TLB info, ignore */
1054 case 0x01: case 0x02: case 0x03: case 0x04:
1055 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njn7cf0bd32002-06-08 13:36:03 +00001056 break;
1057
1058 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1059 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
1060
1061 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1062 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
1063
njn25e49d8e72002-09-23 09:36:25 +00001064 /* IA-64 info -- panic! */
1065 case 0x10: case 0x15: case 0x1a:
1066 case 0x88: case 0x89: case 0x8a: case 0x8d:
1067 case 0x90: case 0x96: case 0x9b:
1068 VG_(message)(Vg_DebugMsg,
1069 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001070 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001071
njn7cf0bd32002-06-08 13:36:03 +00001072 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001073 VG_(message)(Vg_DebugMsg,
1074 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001075 break;
1076
njn25e49d8e72002-09-23 09:36:25 +00001077 /* These are sectored, whatever that means */
1078 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1079 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1080
1081 /* If a P6 core, this means "no L2 cache".
1082 If a P4 core, this means "no L3 cache".
1083 We don't know what core it is, so don't issue a warning. To detect
1084 a missing L2 cache, we use 'L2_found'. */
1085 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001086 break;
1087
njn25e49d8e72002-09-23 09:36:25 +00001088 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1089 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1090 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1091 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1092 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001093
1094 /* These are sectored, whatever that means */
1095 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1096 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1097 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1098
1099 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1100 * conversion to byte size is a total guess; treat the 12K and 16K
1101 * cases the same since the cache byte size must be a power of two for
1102 * everything to work!. Also guessing 32 bytes for the line size...
1103 */
1104 case 0x70: /* 12K micro-ops, 8-way */
1105 *I1c = (cache_t) { 16, 8, 32 };
1106 micro_ops_warn(12, 16, 32);
1107 break;
1108 case 0x71: /* 16K micro-ops, 8-way */
1109 *I1c = (cache_t) { 16, 8, 32 };
1110 micro_ops_warn(16, 16, 32);
1111 break;
1112 case 0x72: /* 32K micro-ops, 8-way */
1113 *I1c = (cache_t) { 32, 8, 32 };
1114 micro_ops_warn(32, 32, 32);
1115 break;
1116
njn25e49d8e72002-09-23 09:36:25 +00001117 /* These are sectored, whatever that means */
1118 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1119 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1120 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1121 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1122 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001123
njn25e49d8e72002-09-23 09:36:25 +00001124 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1125 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1126 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1127 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1128 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001129
1130 default:
1131 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001132 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001133 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001134 break;
1135 }
1136 }
njn25e49d8e72002-09-23 09:36:25 +00001137
1138 if (!L2_found)
1139 VG_(message)(Vg_DebugMsg,
1140 "warning: L2 cache not installed, ignore L2 results.");
1141
njn7cf0bd32002-06-08 13:36:03 +00001142 return 0;
1143}
1144
1145/* AMD method is straightforward, just extract appropriate bits from the
1146 * result registers.
1147 *
1148 * Bits, for D1 and I1:
1149 * 31..24 data L1 cache size in KBs
1150 * 23..16 data L1 cache associativity (FFh=full)
1151 * 15.. 8 data L1 cache lines per tag
1152 * 7.. 0 data L1 cache line size in bytes
1153 *
1154 * Bits, for L2:
1155 * 31..16 unified L2 cache size in KBs
1156 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1157 * 11.. 8 unified L2 cache lines per tag
1158 * 7.. 0 unified L2 cache line size in bytes
1159 *
1160 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1161 * upon this information. (Whatever that means -- njn)
1162 *
njn25e49d8e72002-09-23 09:36:25 +00001163 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1164 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1165 * so we detect that.
1166 *
njn7cf0bd32002-06-08 13:36:03 +00001167 * Returns 0 on success, non-zero on failure.
1168 */
sewardj07133bf2002-06-13 10:25:56 +00001169static
1170Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001171{
njn25e49d8e72002-09-23 09:36:25 +00001172 Int dummy, model, ext_level;
sewardj07133bf2002-06-13 10:25:56 +00001173 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001174
1175 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1176
1177 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1178 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001179 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1180 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001181 return -1;
1182 }
1183
1184 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1185 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1186
njn25e49d8e72002-09-23 09:36:25 +00001187 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1188 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1189
1190 /* Check for Duron bug */
1191 if (model == 0x630) {
1192 VG_(message)(Vg_UserMsg,
1193 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1194 L2i = (64 << 16) | (L2i & 0xffff);
1195 }
1196
njn7cf0bd32002-06-08 13:36:03 +00001197 D1c->size = (D1i >> 24) & 0xff;
1198 D1c->assoc = (D1i >> 16) & 0xff;
1199 D1c->line_size = (D1i >> 0) & 0xff;
1200
1201 I1c->size = (I1i >> 24) & 0xff;
1202 I1c->assoc = (I1i >> 16) & 0xff;
1203 I1c->line_size = (I1i >> 0) & 0xff;
1204
1205 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1206 L2c->assoc = (L2i >> 12) & 0xf;
1207 L2c->line_size = (L2i >> 0) & 0xff;
1208
1209 return 0;
1210}
1211
1212static jmp_buf cpuid_jmpbuf;
1213
1214static
1215void cpuid_SIGILL_handler(int signum)
1216{
1217 __builtin_longjmp(cpuid_jmpbuf, 1);
1218}
1219
1220static
sewardj07133bf2002-06-13 10:25:56 +00001221Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001222{
sewardj07133bf2002-06-13 10:25:56 +00001223 Int level, res, ret;
1224 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001225 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001226
1227 /* Install own SIGILL handler */
1228 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1229 sigill_new.ksa_flags = 0;
1230 sigill_new.ksa_restorer = NULL;
1231 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001232 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001233
1234 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001235 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001236
1237 /* Trap for illegal instruction, in case it's a really old processor that
1238 * doesn't support CPUID. */
1239 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1240 cpuid(0, &level, (int*)&vendor_id[0],
1241 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1242 vendor_id[12] = '\0';
1243
1244 /* Restore old SIGILL handler */
1245 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001246 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001247
1248 } else {
1249 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1250
1251 /* Restore old SIGILL handler */
1252 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001253 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001254 return -1;
1255 }
1256
1257 if (0 == level) {
1258 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1259 return -1;
1260 }
1261
1262 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1263 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1264 ret = Intel_cache_info(level, I1c, D1c, L2c);
1265
1266 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1267 ret = AMD_cache_info(I1c, D1c, L2c);
1268
1269 } else {
1270 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1271 vendor_id);
1272 return -1;
1273 }
1274
1275 /* Successful! Convert sizes from KB to bytes */
1276 I1c->size *= 1024;
1277 D1c->size *= 1024;
1278 L2c->size *= 1024;
1279
1280 return ret;
1281}
1282
1283/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001284static
1285void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001286{
1287 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001288 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001289 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001290 "warning: %s size of %dB not a power of two; "
1291 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001292 cache->size = dflt->size;
1293 }
1294
sewardj07133bf2002-06-13 10:25:56 +00001295 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001296 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001297 "warning: %s associativity of %d not a power of two; "
1298 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001299 cache->assoc = dflt->assoc;
1300 }
1301
sewardj07133bf2002-06-13 10:25:56 +00001302 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001303 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001304 "warning: %s line size of %dB not a power of two; "
1305 "defaulting to %dB",
1306 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001307 cache->line_size = dflt->line_size;
1308 }
1309
1310 /* Then check line size >= 16 -- any smaller and a single instruction could
1311 * straddle three cache lines, which breaks a simulation assertion and is
1312 * stupid anyway. */
1313 if (cache->line_size < MIN_LINE_SIZE) {
1314 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001315 "warning: %s line size of %dB too small; "
1316 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001317 cache->line_size = MIN_LINE_SIZE;
1318 }
1319
1320 /* Then check cache size > line size (causes seg faults if not). */
1321 if (cache->size <= cache->line_size) {
1322 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001323 "warning: %s cache size of %dB <= line size of %dB; "
1324 "increasing to %dB", name, cache->size, cache->line_size,
1325 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001326 cache->size = cache->line_size * 2;
1327 }
1328
1329 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1330 if (cache->assoc > (cache->size / cache->line_size)) {
1331 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001332 "warning: %s associativity > (size / line size); "
1333 "increasing size to %dB",
1334 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001335 cache->size = cache->assoc * cache->line_size;
1336 }
1337}
1338
1339/* On entry, args are undefined. Fill them with any info from the
1340 * command-line, then fill in any remaining with CPUID instruction if possible,
1341 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001342static
1343void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001344{
1345 /* Defaults are for a model 3 or 4 Athlon */
1346 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1347 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1348 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1349
njn25e49d8e72002-09-23 09:36:25 +00001350#define CMD_LINE_DEFINED(L) \
1351 (-1 != clo_##L##_cache.size || \
1352 -1 != clo_##L##_cache.assoc || \
1353 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001354
njn25e49d8e72002-09-23 09:36:25 +00001355 *I1c = clo_I1_cache;
1356 *D1c = clo_D1_cache;
1357 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001358
njn7cf0bd32002-06-08 13:36:03 +00001359 /* If any undefined on command-line, try CPUID */
1360 if (! CMD_LINE_DEFINED(I1) ||
1361 ! CMD_LINE_DEFINED(D1) ||
1362 ! CMD_LINE_DEFINED(L2)) {
1363
1364 /* Overwrite CPUID result for any cache defined on command-line */
1365 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1366
njn25e49d8e72002-09-23 09:36:25 +00001367 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1368 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1369 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001370
1371 /* CPUID failed, use defaults for each undefined by command-line */
1372 } else {
1373 VG_(message)(Vg_DebugMsg,
1374 "Couldn't detect cache configuration, using one "
1375 "or more defaults ");
1376
njn25e49d8e72002-09-23 09:36:25 +00001377 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1378 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1379 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001380 }
1381 }
1382#undef CMD_LINE_DEFINED
1383
1384 check_cache(I1c, &I1_dflt, "I1");
1385 check_cache(D1c, &D1_dflt, "D1");
1386 check_cache(L2c, &L2_dflt, "L2");
1387
1388 if (VG_(clo_verbosity) > 1) {
1389 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1390 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1391 I1c->size, I1c->assoc, I1c->line_size);
1392 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1393 D1c->size, D1c->assoc, D1c->line_size);
1394 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1395 L2c->size, L2c->assoc, L2c->line_size);
1396 }
1397}
1398
njn4f9c9342002-04-29 16:03:24 +00001399/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001400/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001401/*------------------------------------------------------------*/
1402
njn4f9c9342002-04-29 16:03:24 +00001403static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1404 Char *first_instr_fn)
1405{
1406 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001407 Char buf[BUF_LEN], curr_file[BUF_LEN],
1408 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001409 UInt line_num;
1410
1411 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1412
njne0ee0712002-05-03 16:41:05 +00001413 /* Mark start of basic block in output, just to ease debugging */
1414 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001415
1416 VG_(strcpy)(curr_file, first_instr_fl);
1417
1418 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1419
1420 /* We pretend the CC is an iCC for getting the tag. This is ok
1421 * because both CC types have tag as their first byte. Once we know
1422 * the type, we can cast and act appropriately. */
1423
1424 Char fl_buf[FILENAME_LEN];
1425 Char fn_buf[FN_NAME_LEN];
1426
njne0ee0712002-05-03 16:41:05 +00001427 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001428 switch ( ((iCC*)BBCC_ptr)->tag ) {
1429
njn25e49d8e72002-09-23 09:36:25 +00001430 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001431 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1432 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001433 ADD_CC_TO(iCC, I, Ir_total);
1434 BBCC_ptr += sizeof(iCC);
1435 break;
1436
njn25e49d8e72002-09-23 09:36:25 +00001437 case ReadCC:
1438 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001439 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1440 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001441 ADD_CC_TO(idCC, I, Ir_total);
1442 ADD_CC_TO(idCC, D, Dr_total);
1443 BBCC_ptr += sizeof(idCC);
1444 break;
1445
njn25e49d8e72002-09-23 09:36:25 +00001446 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001447 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1448 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001449 ADD_CC_TO(idCC, I, Ir_total);
1450 ADD_CC_TO(idCC, D, Dw_total);
1451 BBCC_ptr += sizeof(idCC);
1452 break;
1453
njn25e49d8e72002-09-23 09:36:25 +00001454 case ReadWriteCC:
1455 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1456 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1457 ADD_CC_TO(iddCC, I, Ir_total);
1458 ADD_CC_TO(iddCC, Da, Dr_total);
1459 ADD_CC_TO(iddCC, Db, Dw_total);
1460 BBCC_ptr += sizeof(iddCC);
1461 break;
1462
njn4f9c9342002-04-29 16:03:24 +00001463 default:
njne427a662002-10-02 11:08:25 +00001464 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001465 break;
1466 }
1467 distinct_instrs++;
1468
njne0ee0712002-05-03 16:41:05 +00001469 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1470
1471 /* Allow for filename switching in the middle of a BB; if this happens,
1472 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001473 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001474 VG_(strcpy)(curr_file, fl_buf);
1475 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1476 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1477 }
1478
njn4f9c9342002-04-29 16:03:24 +00001479 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001480 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001481 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001482 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001483 VG_(printf)(" filenames: BB:%s, instr:%s;"
1484 " fn_names: BB:%s, instr:%s;"
1485 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001486 first_instr_fl, fl_buf,
1487 first_instr_fn, fn_buf,
1488 line_num);
1489 }
1490
njne0ee0712002-05-03 16:41:05 +00001491 VG_(sprintf)(lbuf, "%u ", line_num);
1492 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1493 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001494 }
1495 /* If we switched filenames in the middle of the BB without switching back,
1496 * switch back now because the subsequent BB may be relying on falling under
1497 * the original file name. */
1498 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1499 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1500 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1501 }
njne0ee0712002-05-03 16:41:05 +00001502
1503 /* Mark end of basic block */
1504 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001505
njne427a662002-10-02 11:08:25 +00001506 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001507}
1508
njn25e49d8e72002-09-23 09:36:25 +00001509static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001510{
1511 Int fd;
1512 Char buf[BUF_LEN];
1513 file_node *curr_file_node;
1514 fn_node *curr_fn_node;
1515 BBCC *curr_BBCC;
1516 Int i,j,k;
1517
njn25e49d8e72002-09-23 09:36:25 +00001518 VGP_PUSHCC(VgpCacheResults);
1519 fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
sewardj0744b6c2002-12-11 00:45:42 +00001520 if (-1 == fd) {
1521 /* If the file can't be opened for whatever reason (conflict
1522 between multiple cachegrinded processes?), give up now. */
1523 file_err();
1524 return;
1525 }
njn4f9c9342002-04-29 16:03:24 +00001526
1527 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001528 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1529 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1530 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1531 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1532 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1533 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001534
1535 /* "cmd:" line */
1536 VG_(strcpy)(buf, "cmd:");
1537 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001538 for (i = 0; i < VG_(client_argc); i++) {
1539 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001540 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1541 }
1542 /* "events:" line */
1543 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1544 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1545
1546 /* Six loops here: three for the hash table arrays, and three for the
1547 * chains hanging off the hash table arrays. */
1548 for (i = 0; i < N_FILE_ENTRIES; i++) {
1549 curr_file_node = BBCC_table[i];
1550 while (curr_file_node != NULL) {
1551 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1552 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1553
1554 for (j = 0; j < N_FN_ENTRIES; j++) {
1555 curr_fn_node = curr_file_node->fns[j];
1556 while (curr_fn_node != NULL) {
1557 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1558 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1559
1560 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1561 curr_BBCC = curr_fn_node->BBCCs[k];
1562 while (curr_BBCC != NULL) {
1563 fprint_BBCC(fd, curr_BBCC,
1564
1565 curr_file_node->filename,
1566 curr_fn_node->fn_name);
1567
1568 curr_BBCC = curr_BBCC->next;
1569 }
1570 }
1571 curr_fn_node = curr_fn_node->next;
1572 }
1573 }
1574 curr_file_node = curr_file_node->next;
1575 }
1576 }
1577
njn4294fd42002-06-05 14:41:10 +00001578 /* Print stats from any discarded basic blocks */
1579 if (0 != Ir_discards.a) {
1580
1581 VG_(sprintf)(buf, "fl=(discarded)\n");
1582 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1583 VG_(sprintf)(buf, "fn=(discarded)\n");
1584 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1585
1586 /* Use 0 as line number */
1587 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1588 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1589 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1590 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1591 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1592
1593 Ir_total.a += Ir_discards.a;
1594 Ir_total.m1 += Ir_discards.m1;
1595 Ir_total.m2 += Ir_discards.m2;
1596 Dr_total.a += Dr_discards.a;
1597 Dr_total.m1 += Dr_discards.m1;
1598 Dr_total.m2 += Dr_discards.m2;
1599 Dw_total.a += Dw_discards.a;
1600 Dw_total.m1 += Dw_discards.m1;
1601 Dw_total.m2 += Dw_discards.m2;
1602 }
1603
njn4f9c9342002-04-29 16:03:24 +00001604 /* Summary stats must come after rest of table, since we calculate them
1605 * during traversal. */
1606 VG_(sprintf)(buf, "summary: "
1607 "%llu %llu %llu "
1608 "%llu %llu %llu "
1609 "%llu %llu %llu\n",
1610 Ir_total.a, Ir_total.m1, Ir_total.m2,
1611 Dr_total.a, Dr_total.m1, Dr_total.m2,
1612 Dw_total.a, Dw_total.m1, Dw_total.m2);
1613 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1614 VG_(close)(fd);
1615}
1616
1617/* Adds commas to ULong, right justifying in a field field_width wide, returns
1618 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +00001619static
njn4f9c9342002-04-29 16:03:24 +00001620Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
1621{
1622 int len, n_commas, i, j, new_len, space;
1623
1624 VG_(sprintf)(buf, "%lu", n);
1625 len = VG_(strlen)(buf);
1626 n_commas = (len - 1) / 3;
1627 new_len = len + n_commas;
1628 space = field_width - new_len;
1629
1630 /* Allow for printing a number in a field_width smaller than it's size */
1631 if (space < 0) space = 0;
1632
1633 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1634 * of three. */
1635 for (j = -1, i = len ; i >= 0; i--) {
1636 buf[i + n_commas + space] = buf[i];
1637
1638 if (3 == ++j) {
1639 j = 0;
1640 n_commas--;
1641 buf[i + n_commas + space] = ',';
1642 }
1643 }
1644 /* Right justify in field. */
1645 for (i = 0; i < space; i++) buf[i] = ' ';
1646 return new_len;
1647}
1648
sewardj4f29ddf2002-05-03 22:29:04 +00001649static
njn4f9c9342002-04-29 16:03:24 +00001650void percentify(Int n, Int pow, Int field_width, char buf[])
1651{
1652 int i, len, space;
1653
1654 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
1655 len = VG_(strlen)(buf);
1656 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001657 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001658 i = len;
1659
1660 /* Right justify in field */
1661 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1662 for (i = 0; i < space; i++) buf[i] = ' ';
1663}
1664
njn25e49d8e72002-09-23 09:36:25 +00001665void SK_(fini)(void)
njn4f9c9342002-04-29 16:03:24 +00001666{
1667 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001668 ULong L2_total_m, L2_total_mr, L2_total_mw,
1669 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +00001670 char buf1[RESULTS_BUF_LEN],
1671 buf2[RESULTS_BUF_LEN],
1672 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001673 Int l1, l2, l3;
1674 Int p;
1675
njn25e49d8e72002-09-23 09:36:25 +00001676 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001677
njn7cf0bd32002-06-08 13:36:03 +00001678 if (VG_(clo_verbosity) == 0)
1679 return;
1680
njn4f9c9342002-04-29 16:03:24 +00001681 /* I cache results. Use the I_refs value to determine the first column
1682 * width. */
1683 l1 = commify(Ir_total.a, 0, buf1);
1684 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1685
1686 commify(Ir_total.m1, l1, buf1);
1687 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1688
1689 commify(Ir_total.m2, l1, buf1);
njn95114da2002-06-05 09:39:31 +00001690 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
njn4f9c9342002-04-29 16:03:24 +00001691
1692 p = 100;
1693
njn25e49d8e72002-09-23 09:36:25 +00001694 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001695 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1696 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1697
1698 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1699 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1700 VG_(message)(Vg_UserMsg, "");
1701
1702 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1703 * width of columns 2 & 3. */
1704 D_total.a = Dr_total.a + Dw_total.a;
1705 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1706 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1707
njn1d021fa2002-05-02 13:56:34 +00001708 commify( D_total.a, l1, buf1);
1709 l2 = commify(Dr_total.a, 0, buf2);
1710 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +00001711 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1712 buf1, buf2, buf3);
1713
1714 commify( D_total.m1, l1, buf1);
1715 commify(Dr_total.m1, l2, buf2);
1716 commify(Dw_total.m1, l3, buf3);
1717 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1718 buf1, buf2, buf3);
1719
1720 commify( D_total.m2, l1, buf1);
1721 commify(Dr_total.m2, l2, buf2);
1722 commify(Dw_total.m2, l3, buf3);
njn95114da2002-06-05 09:39:31 +00001723 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
njn4f9c9342002-04-29 16:03:24 +00001724 buf1, buf2, buf3);
1725
1726 p = 10;
1727
njn25e49d8e72002-09-23 09:36:25 +00001728 if (0 == D_total.a) D_total.a = 1;
1729 if (0 == Dr_total.a) Dr_total.a = 1;
1730 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001731 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1732 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1733 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1734 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1735
1736 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1737 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1738 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1739 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1740 VG_(message)(Vg_UserMsg, "");
1741
1742 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001743
1744 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1745 L2_total_r = Dr_total.m1 + Ir_total.m1;
1746 L2_total_w = Dw_total.m1;
1747 commify(L2_total, l1, buf1);
1748 commify(L2_total_r, l2, buf2);
1749 commify(L2_total_w, l3, buf3);
1750 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1751 buf1, buf2, buf3);
1752
njn4f9c9342002-04-29 16:03:24 +00001753 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1754 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1755 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001756 commify(L2_total_m, l1, buf1);
1757 commify(L2_total_mr, l2, buf2);
1758 commify(L2_total_mw, l3, buf3);
1759 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1760 buf1, buf2, buf3);
1761
1762 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1763 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1764 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1765 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1766
1767
1768 /* Hash table stats */
1769 if (VG_(clo_verbosity) > 1) {
1770 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1771 file_line_debug_BBs + no_debug_BBs;
1772
1773 VG_(message)(Vg_DebugMsg, "");
1774 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1775 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1776 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1777 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1778 full_debug_BBs * 100 / BB_lookups,
1779 full_debug_BBs);
1780 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1781 file_line_debug_BBs * 100 / BB_lookups,
1782 file_line_debug_BBs);
1783 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1784 fn_name_debug_BBs * 100 / BB_lookups,
1785 fn_name_debug_BBs);
1786 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1787 no_debug_BBs * 100 / BB_lookups,
1788 no_debug_BBs);
1789 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1790 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1791 }
njn25e49d8e72002-09-23 09:36:25 +00001792 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001793}
1794
sewardj18d75132002-05-16 11:06:21 +00001795
njn4294fd42002-06-05 14:41:10 +00001796/* Called when a translation is invalidated due to self-modifying code or
1797 * unloaded of a shared object.
1798 *
1799 * Finds the BBCC in the table, removes it, adds the counts to the discard
1800 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001801void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001802{
njn4294fd42002-06-05 14:41:10 +00001803 BBCC *BBCC_node;
1804 Addr BBCC_ptr0, BBCC_ptr;
1805 Bool BB_seen_before;
1806
sewardj83205b32002-06-14 11:08:07 +00001807 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001808 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001809
1810 /* 2nd arg won't be used since BB should have been seen before (assertions
1811 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001812 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001813 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1814
njne427a662002-10-02 11:08:25 +00001815 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001816
1817 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1818
1819 /* We pretend the CC is an iCC for getting the tag. This is ok
1820 * because both CC types have tag as their first byte. Once we know
1821 * the type, we can cast and act appropriately. */
1822
1823 switch ( ((iCC*)BBCC_ptr)->tag ) {
1824
njn25e49d8e72002-09-23 09:36:25 +00001825 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001826 ADD_CC_TO(iCC, I, Ir_discards);
1827 BBCC_ptr += sizeof(iCC);
1828 break;
1829
njn25e49d8e72002-09-23 09:36:25 +00001830 case ReadCC:
1831 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001832 ADD_CC_TO(idCC, I, Ir_discards);
1833 ADD_CC_TO(idCC, D, Dr_discards);
1834 BBCC_ptr += sizeof(idCC);
1835 break;
1836
njn25e49d8e72002-09-23 09:36:25 +00001837 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001838 ADD_CC_TO(idCC, I, Ir_discards);
1839 ADD_CC_TO(idCC, D, Dw_discards);
1840 BBCC_ptr += sizeof(idCC);
1841 break;
1842
njn25e49d8e72002-09-23 09:36:25 +00001843 case ReadWriteCC:
1844 ADD_CC_TO(iddCC, I, Ir_discards);
1845 ADD_CC_TO(iddCC, Da, Dr_discards);
1846 ADD_CC_TO(iddCC, Db, Dw_discards);
1847 BBCC_ptr += sizeof(iddCC);
1848 break;
1849
njn4294fd42002-06-05 14:41:10 +00001850 default:
njne427a662002-10-02 11:08:25 +00001851 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00001852 break;
1853 }
1854 }
njn25e49d8e72002-09-23 09:36:25 +00001855 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001856}
1857
1858/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001859/*--- Command line processing ---*/
1860/*--------------------------------------------------------------------*/
1861
1862static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
1863{
1864 int i1, i2, i3;
1865 int i;
1866 char *opt = VG_(strdup)(orig_opt);
1867
1868 i = i1 = opt_len;
1869
1870 /* Option looks like "--I1=65536,2,64".
1871 * Find commas, replace with NULs to make three independent
1872 * strings, then extract numbers. Yuck. */
1873 while (VG_(isdigit)(opt[i])) i++;
1874 if (',' == opt[i]) {
1875 opt[i++] = '\0';
1876 i2 = i;
1877 } else goto bad;
1878 while (VG_(isdigit)(opt[i])) i++;
1879 if (',' == opt[i]) {
1880 opt[i++] = '\0';
1881 i3 = i;
1882 } else goto bad;
1883 while (VG_(isdigit)(opt[i])) i++;
1884 if ('\0' != opt[i]) goto bad;
1885
1886 cache->size = (Int)VG_(atoll)(opt + i1);
1887 cache->assoc = (Int)VG_(atoll)(opt + i2);
1888 cache->line_size = (Int)VG_(atoll)(opt + i3);
1889
1890 VG_(free)(opt);
1891
1892 return;
1893
1894 bad:
1895 VG_(bad_option)(orig_opt);
1896}
1897
1898Bool SK_(process_cmd_line_option)(Char* arg)
1899{
1900 /* 5 is length of "--I1=" */
1901 if (0 == VG_(strncmp)(arg, "--I1=", 5))
1902 parse_cache_opt(&clo_I1_cache, arg, 5);
1903 else if (0 == VG_(strncmp)(arg, "--D1=", 5))
1904 parse_cache_opt(&clo_D1_cache, arg, 5);
1905 else if (0 == VG_(strncmp)(arg, "--L2=", 5))
1906 parse_cache_opt(&clo_L2_cache, arg, 5);
1907 else
1908 return False;
1909
1910 return True;
1911}
1912
1913Char* SK_(usage)(void)
1914{
1915 return
1916" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1917" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
1918" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n";
1919}
1920
1921/*--------------------------------------------------------------------*/
1922/*--- Setup ---*/
1923/*--------------------------------------------------------------------*/
1924
njn810086f2002-11-14 12:42:47 +00001925void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001926{
njn810086f2002-11-14 12:42:47 +00001927 VG_(details_name) ("Cachegrind");
1928 VG_(details_version) (NULL);
1929 VG_(details_description) ("an I1/D1/L2 cache profiler");
1930 VG_(details_copyright_author)(
1931 "Copyright (C) 2002, and GNU GPL'd, by Nicholas Nethercote.");
1932 VG_(details_bug_reports_to) ("njn25@cam.ac.uk");
sewardj78210aa2002-12-01 02:55:46 +00001933 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00001934
njn810086f2002-11-14 12:42:47 +00001935 VG_(needs_basic_block_discards)();
1936 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00001937
1938 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
1939 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
1940 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
1941 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
1942 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
1943 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
1944}
1945
1946void SK_(post_clo_init)(void)
1947{
1948 cache_t I1c, D1c, L2c;
1949 Int fd;
1950
1951 /* Set output file name: cachegrind.<pid>.out */
1952 VG_(sprintf)(cachegrind_out_file, "cachegrind.out.%d", VG_(getpid)());
1953
1954 /* Make sure the output file can be written. */
1955 fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
1956 if (-1 == fd) {
1957 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_WRONLY,
1958 VKI_S_IRUSR|VKI_S_IWUSR);
1959 if (-1 == fd) {
1960 file_err();
1961 }
1962 }
sewardj0744b6c2002-12-11 00:45:42 +00001963 if (-1 != fd)
1964 VG_(close)(fd);
njn25e49d8e72002-09-23 09:36:25 +00001965
1966 initCC(&Ir_total);
1967 initCC(&Dr_total);
1968 initCC(&Dw_total);
1969
1970 initCC(&Ir_discards);
1971 initCC(&Dr_discards);
1972 initCC(&Dw_discards);
1973
1974 get_caches(&I1c, &D1c, &L2c);
1975
1976 cachesim_I1_initcache(I1c);
1977 cachesim_D1_initcache(D1c);
1978 cachesim_L2_initcache(L2c);
1979
1980 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
1981 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
1982 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
1983
1984 init_BBCC_table();
1985}
1986
1987#if 0
1988Bool SK_(cheap_sanity_check)(void) { return True; }
1989
1990extern TTEntry* vg_tt;
1991
1992Bool SK_(expensive_sanity_check)(void)
1993{
1994 Int i;
1995 Bool dummy;
1996 for (i = 0; i < 200191; i++) {
1997 if (vg_tt[i].orig_addr != (Addr)1 &&
1998 vg_tt[i].orig_addr != (Addr)3) {
1999 VG_(printf)(".");
2000 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
2001 }
2002 }
2003 return True;
2004}
2005#endif
2006
2007/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002008/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002009/*--------------------------------------------------------------------*/