blob: 95929dcfa59cc38eb5f2d0211b72e06cfbebe031 [file] [log] [blame]
sewardj07133bf2002-06-13 10:25:56 +00001
njn4f9c9342002-04-29 16:03:24 +00002/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00003/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
4/*--- results printing. ---*/
njn25cac76cb2002-09-23 11:21:57 +00005/*--- cg_main.c ---*/
njn4f9c9342002-04-29 16:03:24 +00006/*--------------------------------------------------------------------*/
7
8/*
njnc9539842002-10-02 13:26:35 +00009 This file is part of Cachegrind, a Valgrind skin for cache
10 profiling programs.
njn4f9c9342002-04-29 16:03:24 +000011
njn0e1b5142003-04-15 14:58:06 +000012 Copyright (C) 2002-2003 Nicholas Nethercote
sewardj3c23d432002-06-01 23:43:49 +000013 njn25@cam.ac.uk
njn4f9c9342002-04-29 16:03:24 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
njn25e49d8e72002-09-23 09:36:25 +000030 The GNU General Public License is contained in the file COPYING.
njn4f9c9342002-04-29 16:03:24 +000031*/
32
njn25e49d8e72002-09-23 09:36:25 +000033#include "vg_skin.h"
34//#include "vg_profile.c"
35
njn27f1a382002-11-08 15:48:16 +000036VG_DETERMINE_INTERFACE_VERSION
37
njn25e49d8e72002-09-23 09:36:25 +000038/* For cache simulation */
39typedef struct {
40 int size; /* bytes */
41 int assoc;
42 int line_size; /* bytes */
43} cache_t;
njn4f9c9342002-04-29 16:03:24 +000044
njn25cac76cb2002-09-23 11:21:57 +000045#include "cg_sim_L2.c"
46#include "cg_sim_I1.c"
47#include "cg_sim_D1.c"
njn4f9c9342002-04-29 16:03:24 +000048
njn25e49d8e72002-09-23 09:36:25 +000049/*------------------------------------------------------------*/
50/*--- Constants ---*/
51/*------------------------------------------------------------*/
njn4f9c9342002-04-29 16:03:24 +000052
53/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
njn7cf0bd32002-06-08 13:36:03 +000054#define MAX_x86_INSTR_SIZE 16
njn4f9c9342002-04-29 16:03:24 +000055
njn25e49d8e72002-09-23 09:36:25 +000056#define MIN_LINE_SIZE 16
57
njn4f9c9342002-04-29 16:03:24 +000058/* Size of various buffers used for storing strings */
njn7cf0bd32002-06-08 13:36:03 +000059#define FILENAME_LEN 256
60#define FN_NAME_LEN 256
61#define BUF_LEN 512
62#define COMMIFY_BUF_LEN 128
63#define RESULTS_BUF_LEN 128
64#define LINE_BUF_LEN 64
65
66/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +000067/*--- Profiling events ---*/
njn7cf0bd32002-06-08 13:36:03 +000068/*------------------------------------------------------------*/
69
njn25e49d8e72002-09-23 09:36:25 +000070typedef
71 enum {
72 VgpGetBBCC = VgpFini+1,
73 VgpCacheSimulate,
74 VgpCacheResults
75 }
76 VgpSkinCC;
sewardj07133bf2002-06-13 10:25:56 +000077
njn4f9c9342002-04-29 16:03:24 +000078/*------------------------------------------------------------*/
79/*--- Output file related stuff ---*/
80/*------------------------------------------------------------*/
81
njn13f02932003-04-30 20:23:58 +000082static Char* cachegrind_out_file;
njn4f9c9342002-04-29 16:03:24 +000083
sewardj0744b6c2002-12-11 00:45:42 +000084static void file_err ( void )
njn4f9c9342002-04-29 16:03:24 +000085{
86 VG_(message)(Vg_UserMsg,
sewardj0744b6c2002-12-11 00:45:42 +000087 "error: can't open cache simulation output file `%s'",
88 cachegrind_out_file );
89 VG_(message)(Vg_UserMsg,
90 " ... so simulation results will be missing.");
njn4f9c9342002-04-29 16:03:24 +000091}
92
93/*------------------------------------------------------------*/
94/*--- Cost center types, operations ---*/
95/*------------------------------------------------------------*/
96
97typedef struct _CC CC;
98struct _CC {
99 ULong a;
100 ULong m1;
101 ULong m2;
102};
103
104static __inline__ void initCC(CC* cc) {
105 cc->a = 0;
106 cc->m1 = 0;
107 cc->m2 = 0;
108}
109
njn25e49d8e72002-09-23 09:36:25 +0000110typedef
111 enum {
112 InstrCC, /* eg. mov %eax, %ebx */
113 ReadCC, /* eg. mov (%ecx), %esi */
114 WriteCC, /* eg. mov %eax, (%edx) */
115 ModCC, /* eg. incl (%eax) (read+write one addr) */
116 ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
117 (read+write two different addrs) */
118 } CC_type;
njn4f9c9342002-04-29 16:03:24 +0000119
120/* Instruction-level cost-centres. The typedefs for these structs are in
121 * vg_include.c
122 *
123 * WARNING: the 'tag' field *must* be the first byte of both CC types.
njn4f9c9342002-04-29 16:03:24 +0000124 *
njne0ee0712002-05-03 16:41:05 +0000125 * This is because we use it to work out what kind of CC we're dealing with.
njn4f9c9342002-04-29 16:03:24 +0000126 */
njn25e49d8e72002-09-23 09:36:25 +0000127typedef
128 struct {
129 /* word 1 */
130 UChar tag;
131 UChar instr_size;
132 /* 2 bytes padding */
njn4f9c9342002-04-29 16:03:24 +0000133
njn25e49d8e72002-09-23 09:36:25 +0000134 /* words 2+ */
135 Addr instr_addr;
136 CC I;
137 }
138 iCC;
njn4f9c9342002-04-29 16:03:24 +0000139
njn25e49d8e72002-09-23 09:36:25 +0000140typedef
141 struct _idCC {
142 /* word 1 */
143 UChar tag;
144 UChar instr_size;
145 UChar data_size;
146 /* 1 byte padding */
njn4f9c9342002-04-29 16:03:24 +0000147
njn25e49d8e72002-09-23 09:36:25 +0000148 /* words 2+ */
149 Addr instr_addr;
150 CC I;
151 CC D;
152 }
153 idCC;
154
155typedef
156 struct _iddCC {
157 /* word 1 */
158 UChar tag;
159 UChar instr_size;
160 UChar data_size;
161 /* 1 byte padding */
162
163 /* words 2+ */
164 Addr instr_addr;
165 CC I;
166 CC Da;
167 CC Db;
168 }
169 iddCC;
njn4f9c9342002-04-29 16:03:24 +0000170
171static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
172{
njn25e49d8e72002-09-23 09:36:25 +0000173 cc->tag = InstrCC;
njn4f9c9342002-04-29 16:03:24 +0000174 cc->instr_size = instr_size;
175 cc->instr_addr = instr_addr;
176 initCC(&cc->I);
177}
178
179static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
180 UInt instr_size, UInt data_size)
181{
182 cc->tag = X_CC;
183 cc->instr_size = instr_size;
184 cc->data_size = data_size;
185 cc->instr_addr = instr_addr;
186 initCC(&cc->I);
187 initCC(&cc->D);
188}
189
njn25e49d8e72002-09-23 09:36:25 +0000190static void init_iddCC(iddCC* cc, Addr instr_addr,
191 UInt instr_size, UInt data_size)
192{
193 cc->tag = ReadWriteCC;
194 cc->instr_size = instr_size;
195 cc->data_size = data_size;
196 cc->instr_addr = instr_addr;
197 initCC(&cc->I);
198 initCC(&cc->Da);
199 initCC(&cc->Db);
200}
201
njn4294fd42002-06-05 14:41:10 +0000202#define ADD_CC_TO(CC_type, cc, total) \
203 total.a += ((CC_type*)BBCC_ptr)->cc.a; \
204 total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
205 total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
206
njn95114da2002-06-05 09:39:31 +0000207/* If 1, address of each instruction is printed as a comment after its counts
208 * in cachegrind.out */
209#define PRINT_INSTR_ADDRS 0
210
njne0ee0712002-05-03 16:41:05 +0000211static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000212{
njn95114da2002-06-05 09:39:31 +0000213#if PRINT_INSTR_ADDRS
214 VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
215 cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
216#else
njne0ee0712002-05-03 16:41:05 +0000217 VG_(sprintf)(buf, "%llu %llu %llu\n",
218 cc->I.a, cc->I.m1, cc->I.m2);
njn95114da2002-06-05 09:39:31 +0000219#endif
njn4f9c9342002-04-29 16:03:24 +0000220}
221
njne0ee0712002-05-03 16:41:05 +0000222static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000223{
njn95114da2002-06-05 09:39:31 +0000224#if PRINT_INSTR_ADDRS
225 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
226 cc->I.a, cc->I.m1, cc->I.m2,
227 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
228#else
njne0ee0712002-05-03 16:41:05 +0000229 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
230 cc->I.a, cc->I.m1, cc->I.m2,
231 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000232#endif
njn4f9c9342002-04-29 16:03:24 +0000233}
234
njne0ee0712002-05-03 16:41:05 +0000235static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
njn4f9c9342002-04-29 16:03:24 +0000236{
njn95114da2002-06-05 09:39:31 +0000237#if PRINT_INSTR_ADDRS
238 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
239 cc->I.a, cc->I.m1, cc->I.m2,
240 cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
241#else
njne0ee0712002-05-03 16:41:05 +0000242 VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
243 cc->I.a, cc->I.m1, cc->I.m2,
244 cc->D.a, cc->D.m1, cc->D.m2);
njn95114da2002-06-05 09:39:31 +0000245#endif
njn4f9c9342002-04-29 16:03:24 +0000246}
247
njn25e49d8e72002-09-23 09:36:25 +0000248static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
249{
250#if PRINT_INSTR_ADDRS
251 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
252 cc->I.a, cc->I.m1, cc->I.m2,
253 cc->Da.a, cc->Da.m1, cc->Da.m2,
254 cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
255#else
256 VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
257 cc->I.a, cc->I.m1, cc->I.m2,
258 cc->Da.a, cc->Da.m1, cc->Da.m2,
259 cc->Db.a, cc->Db.m1, cc->Db.m2);
260#endif
261}
262
263
njn4f9c9342002-04-29 16:03:24 +0000264/*------------------------------------------------------------*/
265/*--- BBCC hash table stuff ---*/
266/*------------------------------------------------------------*/
267
268/* The table of BBCCs is of the form hash(filename, hash(fn_name,
269 * hash(BBCCs))). Each hash table is separately chained. The sizes below work
270 * fairly well for Konqueror. */
271
272#define N_FILE_ENTRIES 251
273#define N_FN_ENTRIES 53
274#define N_BBCC_ENTRIES 37
275
276/* The cost centres for a basic block are stored in a contiguous array.
277 * They are distinguishable by their tag field. */
278typedef struct _BBCC BBCC;
279struct _BBCC {
280 Addr orig_addr;
281 UInt array_size; /* byte-size of variable length array */
282 BBCC* next;
283 Addr array[0]; /* variable length array */
284};
285
286typedef struct _fn_node fn_node;
287struct _fn_node {
288 Char* fn_name;
289 BBCC* BBCCs[N_BBCC_ENTRIES];
290 fn_node* next;
291};
292
293typedef struct _file_node file_node;
294struct _file_node {
295 Char* filename;
296 fn_node* fns[N_FN_ENTRIES];
297 file_node* next;
298};
299
300/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
sewardj4f29ddf2002-05-03 22:29:04 +0000301static file_node *BBCC_table[N_FILE_ENTRIES];
njn4f9c9342002-04-29 16:03:24 +0000302
sewardj4f29ddf2002-05-03 22:29:04 +0000303static Int distinct_files = 0;
304static Int distinct_fns = 0;
njn4f9c9342002-04-29 16:03:24 +0000305
sewardj4f29ddf2002-05-03 22:29:04 +0000306static Int distinct_instrs = 0;
307static Int full_debug_BBs = 0;
308static Int file_line_debug_BBs = 0;
309static Int fn_name_debug_BBs = 0;
310static Int no_debug_BBs = 0;
njn4f9c9342002-04-29 16:03:24 +0000311
sewardj4f29ddf2002-05-03 22:29:04 +0000312static Int BB_retranslations = 0;
njn4f9c9342002-04-29 16:03:24 +0000313
njn4294fd42002-06-05 14:41:10 +0000314static CC Ir_discards;
315static CC Dr_discards;
316static CC Dw_discards;
317
njn4f9c9342002-04-29 16:03:24 +0000318static void init_BBCC_table()
319{
320 Int i;
321 for (i = 0; i < N_FILE_ENTRIES; i++)
322 BBCC_table[i] = NULL;
323}
324
njne0ee0712002-05-03 16:41:05 +0000325static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
326 Char fn_name[FN_NAME_LEN], Int* line_num)
njn4f9c9342002-04-29 16:03:24 +0000327{
njn25e49d8e72002-09-23 09:36:25 +0000328 Bool found1, found2;
njn4f9c9342002-04-29 16:03:24 +0000329
njn25e49d8e72002-09-23 09:36:25 +0000330 found1 = VG_(get_filename_linenum)(instr_addr, filename,
331 FILENAME_LEN, line_num);
332 found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
njn4f9c9342002-04-29 16:03:24 +0000333
334 if (!found1 && !found2) {
335 no_debug_BBs++;
336 VG_(strcpy)(filename, "???");
337 VG_(strcpy)(fn_name, "???");
njn9b3366a2002-06-10 15:31:16 +0000338 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000339
340 } else if ( found1 && found2) {
341 full_debug_BBs++;
342
343 } else if ( found1 && !found2) {
344 file_line_debug_BBs++;
345 VG_(strcpy)(fn_name, "???");
346
347 } else /*(!found1 && found2)*/ {
348 fn_name_debug_BBs++;
349 VG_(strcpy)(filename, "???");
njn9b3366a2002-06-10 15:31:16 +0000350 *line_num = 0;
njn4f9c9342002-04-29 16:03:24 +0000351 }
352}
353
354/* Forward declaration. */
355static Int compute_BBCC_array_size(UCodeBlock* cb);
356
357static __inline__
358file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
359{
360 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000361 file_node* new = VG_(malloc)(sizeof(file_node));
362 new->filename = VG_(strdup)(filename);
njn4f9c9342002-04-29 16:03:24 +0000363 for (i = 0; i < N_FN_ENTRIES; i++) {
364 new->fns[i] = NULL;
365 }
366 new->next = next;
367 return new;
368}
369
370static __inline__
371fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
372{
373 Int i;
njn25e49d8e72002-09-23 09:36:25 +0000374 fn_node* new = VG_(malloc)(sizeof(fn_node));
375 new->fn_name = VG_(strdup)(fn_name);
njn4f9c9342002-04-29 16:03:24 +0000376 for (i = 0; i < N_BBCC_ENTRIES; i++) {
377 new->BBCCs[i] = NULL;
378 }
379 new->next = next;
380 return new;
381}
382
383static __inline__
384BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
385{
386 Int BBCC_array_size = compute_BBCC_array_size(cb);
387 BBCC* new;
388
njn25e49d8e72002-09-23 09:36:25 +0000389 new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
njn4f9c9342002-04-29 16:03:24 +0000390 new->orig_addr = bb_orig_addr;
391 new->array_size = BBCC_array_size;
392 new->next = next;
393
394 return new;
395}
396
397#define HASH_CONSTANT 256
398
399static UInt hash(Char *s, UInt table_size)
400{
401 int hash_value = 0;
402 for ( ; *s; s++)
403 hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
404 return hash_value;
405}
406
407/* Do a three step traversal: by filename, then fn_name, then instr_addr.
408 * In all cases prepends new nodes to their chain. Returns a pointer to the
409 * cost centre. Also sets BB_seen_before by reference.
410 */
411static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
njn4294fd42002-06-05 14:41:10 +0000412 Bool remove, Bool *BB_seen_before)
njn4f9c9342002-04-29 16:03:24 +0000413{
414 file_node *curr_file_node;
415 fn_node *curr_fn_node;
njn4294fd42002-06-05 14:41:10 +0000416 BBCC **prev_BBCC_next_ptr, *curr_BBCC;
njn4f9c9342002-04-29 16:03:24 +0000417 Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
418 UInt filename_hash, fnname_hash, BBCC_hash;
njne0ee0712002-05-03 16:41:05 +0000419 Int dummy_line_num;
njn4f9c9342002-04-29 16:03:24 +0000420
njne0ee0712002-05-03 16:41:05 +0000421 get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
njn4f9c9342002-04-29 16:03:24 +0000422
njn25e49d8e72002-09-23 09:36:25 +0000423 VGP_PUSHCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000424 filename_hash = hash(filename, N_FILE_ENTRIES);
425 curr_file_node = BBCC_table[filename_hash];
426 while (NULL != curr_file_node &&
sewardj18d75132002-05-16 11:06:21 +0000427 VG_(strcmp)(filename, curr_file_node->filename) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000428 curr_file_node = curr_file_node->next;
429 }
430 if (NULL == curr_file_node) {
431 BBCC_table[filename_hash] = curr_file_node =
432 new_file_node(filename, BBCC_table[filename_hash]);
433 distinct_files++;
434 }
435
436 fnname_hash = hash(fn_name, N_FN_ENTRIES);
437 curr_fn_node = curr_file_node->fns[fnname_hash];
438 while (NULL != curr_fn_node &&
sewardj18d75132002-05-16 11:06:21 +0000439 VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
njn4f9c9342002-04-29 16:03:24 +0000440 curr_fn_node = curr_fn_node->next;
441 }
442 if (NULL == curr_fn_node) {
443 curr_file_node->fns[fnname_hash] = curr_fn_node =
444 new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
445 distinct_fns++;
446 }
447
448 BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
njn4294fd42002-06-05 14:41:10 +0000449 prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
njn4f9c9342002-04-29 16:03:24 +0000450 curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
451 while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
njn4294fd42002-06-05 14:41:10 +0000452 prev_BBCC_next_ptr = &(curr_BBCC->next);
njn4f9c9342002-04-29 16:03:24 +0000453 curr_BBCC = curr_BBCC->next;
454 }
455 if (curr_BBCC == NULL) {
njn4294fd42002-06-05 14:41:10 +0000456
njne427a662002-10-02 11:08:25 +0000457 sk_assert(False == remove);
njn4294fd42002-06-05 14:41:10 +0000458
njn4f9c9342002-04-29 16:03:24 +0000459 curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
460 new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
461 *BB_seen_before = False;
462
463 } else {
njne427a662002-10-02 11:08:25 +0000464 sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
465 sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
sewardj98e91bc2002-05-01 02:32:10 +0000466 if (VG_(clo_verbosity) > 2) {
467 VG_(message)(Vg_DebugMsg,
468 "BB retranslation, retrieving from BBCC table");
njn4f9c9342002-04-29 16:03:24 +0000469 }
470 *BB_seen_before = True;
njn4294fd42002-06-05 14:41:10 +0000471
472 if (True == remove) {
473 // Remove curr_BBCC from chain; it will be used and free'd by the
474 // caller.
475 *prev_BBCC_next_ptr = curr_BBCC->next;
476
477 } else {
478 BB_retranslations++;
479 }
njn4f9c9342002-04-29 16:03:24 +0000480 }
njn25e49d8e72002-09-23 09:36:25 +0000481 VGP_POPCC(VgpGetBBCC);
njn4f9c9342002-04-29 16:03:24 +0000482 return curr_BBCC;
483}
484
485/*------------------------------------------------------------*/
486/*--- Cache simulation instrumentation phase ---*/
487/*------------------------------------------------------------*/
488
njn4f9c9342002-04-29 16:03:24 +0000489static Int compute_BBCC_array_size(UCodeBlock* cb)
490{
491 UInstr* u_in;
492 Int i, CC_size, BBCC_size = 0;
493 Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
njn25e49d8e72002-09-23 09:36:25 +0000494 Int t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000495
496 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
njn25e49d8e72002-09-23 09:36:25 +0000497 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000498
njn810086f2002-11-14 12:42:47 +0000499 for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
500 u_in = VG_(get_instr)(cb, i);
njn4f9c9342002-04-29 16:03:24 +0000501 switch(u_in->opcode) {
502
503 case INCEIP:
504 goto case_for_end_of_instr;
505
506 case JMP:
507 if (u_in->cond != CondAlways) break;
508
509 goto case_for_end_of_instr;
510
511 case_for_end_of_instr:
512
njn25e49d8e72002-09-23 09:36:25 +0000513 if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
514 t_read != t_write)
515 CC_size = sizeof(iddCC);
516 else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
517 CC_size = sizeof(idCC);
518 else
519 CC_size = sizeof(iCC);
njn4f9c9342002-04-29 16:03:24 +0000520
521 BBCC_size += CC_size;
522 is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
523 break;
524
525 case LOAD:
526 /* Two LDBs are possible for a single instruction */
njn9aae6742002-04-30 13:44:01 +0000527 /* Also, a STORE can come after a LOAD for bts/btr/btc */
njne427a662002-10-02 11:08:25 +0000528 sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
sewardjfc3e5d32002-04-30 10:18:48 +0000529 !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000530 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000531 is_LOAD = True;
532 break;
533
534 case STORE:
535 /* Multiple STOREs are possible for 'pushal' */
njne427a662002-10-02 11:08:25 +0000536 sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000537 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000538 is_STORE = True;
539 break;
540
sewardj3949d102003-03-28 17:21:29 +0000541 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000542 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000543 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000544 case FPU_R:
njne427a662002-10-02 11:08:25 +0000545 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000546 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000547 is_FPU_R = True;
548 break;
549
sewardj3949d102003-03-28 17:21:29 +0000550 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000551 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000552 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000553 case FPU_W:
njne427a662002-10-02 11:08:25 +0000554 sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
njn25e49d8e72002-09-23 09:36:25 +0000555 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000556 is_FPU_W = True;
557 break;
558
559 default:
560 break;
561 }
562 }
563
564 return BBCC_size;
565}
566
njn25e49d8e72002-09-23 09:36:25 +0000567static __attribute__ ((regparm (1)))
568void log_1I_0D_cache_access(iCC* cc)
569{
570 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
571 // cc, cc->instr_addr, cc->instr_size)
572 VGP_PUSHCC(VgpCacheSimulate);
573 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
574 cc->I.a++;
575 VGP_POPCC(VgpCacheSimulate);
576}
577
578/* Difference between this function and log_1I_0D_cache_access() is that
579 this one can be passed any kind of CC, not just an iCC. So we have to
580 be careful to make sure we don't make any assumptions about CC layout.
581 (As it stands, they would be safe, but this will avoid potential heartache
582 if anyone else changes CC layout.)
583 Note that we only do the switch for the JIFZ version because if we always
584 called this switching version, things would run about 5% slower. */
585static __attribute__ ((regparm (1)))
586void log_1I_0D_cache_access_JIFZ(iCC* cc)
587{
588 UChar instr_size;
589 Addr instr_addr;
590 CC* I;
591
592 //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
593 // cc, cc->instr_addr, cc->instr_size)
594 VGP_PUSHCC(VgpCacheSimulate);
595
596 switch(cc->tag) {
597 case InstrCC:
598 instr_size = cc->instr_size;
599 instr_addr = cc->instr_addr;
600 I = &(cc->I);
601 break;
602 case ReadCC:
603 case WriteCC:
604 case ModCC:
605 instr_size = ((idCC*)cc)->instr_size;
606 instr_addr = ((idCC*)cc)->instr_addr;
607 I = &( ((idCC*)cc)->I );
608 break;
609 case ReadWriteCC:
610 instr_size = ((iddCC*)cc)->instr_size;
611 instr_addr = ((iddCC*)cc)->instr_addr;
612 I = &( ((iddCC*)cc)->I );
613 break;
614 default:
njne427a662002-10-02 11:08:25 +0000615 VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
njn25e49d8e72002-09-23 09:36:25 +0000616 break;
617 }
618 cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
619 I->a++;
620 VGP_POPCC(VgpCacheSimulate);
621}
622
623__attribute__ ((regparm (2))) static
624void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
625{
626 //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
627 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
628 VGP_PUSHCC(VgpCacheSimulate);
629 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
630 cc->D.a++;
631 VGP_POPCC(VgpCacheSimulate);
632}
633
634__attribute__ ((regparm (2))) static
635void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
636{
637 //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
638 // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
639 VGP_PUSHCC(VgpCacheSimulate);
640 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
641 cc->I.a++;
642
643 cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
644 cc->D.a++;
645 VGP_POPCC(VgpCacheSimulate);
646}
647
648__attribute__ ((regparm (3))) static
649void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
650{
651 //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
652 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
653 VGP_PUSHCC(VgpCacheSimulate);
654 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
655 cc->Da.a++;
656 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
657 cc->Db.a++;
658 VGP_POPCC(VgpCacheSimulate);
659}
660
661__attribute__ ((regparm (3))) static
662void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
663{
664 //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
665 // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
666 VGP_PUSHCC(VgpCacheSimulate);
667 cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
668 cc->I.a++;
669
670 cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
671 cc->Da.a++;
672 cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
673 cc->Db.a++;
674 VGP_POPCC(VgpCacheSimulate);
675}
676
677UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
678{
679/* Use this rather than eg. -1 because it's a UInt. */
njn4f9c9342002-04-29 16:03:24 +0000680#define INVALID_DATA_SIZE 999999
681
njn4f9c9342002-04-29 16:03:24 +0000682 UCodeBlock* cb;
683 Int i;
684 UInstr* u_in;
685 BBCC* BBCC_node;
njn25e49d8e72002-09-23 09:36:25 +0000686 Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
687 t_data_addr2, t_read, t_write;
njn4f9c9342002-04-29 16:03:24 +0000688 Int CC_size = -1; /* Shut gcc warnings up */
njn25e49d8e72002-09-23 09:36:25 +0000689 Addr x86_instr_addr = orig_addr;
690 UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
691 Addr helper;
692 Int argc;
njn4f9c9342002-04-29 16:03:24 +0000693 UInt stack_used;
njn25e49d8e72002-09-23 09:36:25 +0000694 Bool BB_seen_before = False;
695 Bool instrumented_Jcond = False;
696 Bool has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000697 Addr BBCC_ptr0, BBCC_ptr;
698
699 /* Get BBCC (creating if necessary -- requires a counting pass over the BB
700 * if it's the first time it's been seen), and point to start of the
701 * BBCC array. */
njn25e49d8e72002-09-23 09:36:25 +0000702 BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
njn4f9c9342002-04-29 16:03:24 +0000703 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
704
njn810086f2002-11-14 12:42:47 +0000705 cb = VG_(setup_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000706
njn25e49d8e72002-09-23 09:36:25 +0000707 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
708 t_read = t_write = INVALID_TEMPREG;
njn4f9c9342002-04-29 16:03:24 +0000709
njn810086f2002-11-14 12:42:47 +0000710 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
711 u_in = VG_(get_instr)(cb_in, i);
njn4f9c9342002-04-29 16:03:24 +0000712
njn4f9c9342002-04-29 16:03:24 +0000713 /* What this is all about: we want to instrument each x86 instruction
714 * translation. The end of these are marked in three ways. The three
715 * ways, and the way we instrument them, are as follows:
716 *
717 * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
718 * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
719 * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
720 *
njn25e49d8e72002-09-23 09:36:25 +0000721 * The last UInstr in a basic block is always a Juncond. Jconds,
722 * when they appear, are always second last. We check this with
723 * various assertions.
724 *
725 * We must put the instrumentation before any jumps so that it is always
njn4f9c9342002-04-29 16:03:24 +0000726 * executed. We don't have to put the instrumentation before the INCEIP
727 * (it could go after) but we do so for consistency.
728 *
njn25e49d8e72002-09-23 09:36:25 +0000729 * x86 instruction sizes are obtained from INCEIPs (for case 1) or
730 * from .extra4b field of the final JMP (for case 2 & 3).
njn4f9c9342002-04-29 16:03:24 +0000731 *
njn25e49d8e72002-09-23 09:36:25 +0000732 * Note that JIFZ is treated differently.
njn4f9c9342002-04-29 16:03:24 +0000733 *
734 * The instrumentation is just a call to the appropriate helper function,
735 * passing it the address of the instruction's CC.
736 */
njne427a662002-10-02 11:08:25 +0000737 if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
njn4f9c9342002-04-29 16:03:24 +0000738
739 switch (u_in->opcode) {
sewardj7a5ebcf2002-11-13 22:42:13 +0000740 case NOP: case LOCK: case CALLM_E: case CALLM_S:
njn4f9c9342002-04-29 16:03:24 +0000741 break;
742
njn4f9c9342002-04-29 16:03:24 +0000743 /* For memory-ref instrs, copy the data_addr into a temporary to be
njn25e49d8e72002-09-23 09:36:25 +0000744 * passed to the cachesim_* helper at the end of the instruction.
njn4f9c9342002-04-29 16:03:24 +0000745 */
746 case LOAD:
njn25e49d8e72002-09-23 09:36:25 +0000747 t_read = u_in->val1;
njn4f9c9342002-04-29 16:03:24 +0000748 t_read_addr = newTemp(cb);
749 uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
750 data_size = u_in->size;
njn4ba5a792002-09-30 10:23:54 +0000751 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000752 break;
753
sewardj3949d102003-03-28 17:21:29 +0000754 case MMX2_MemRd:
sewardjd7971012003-04-04 00:21:58 +0000755 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000756 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000757 case FPU_R:
njn25e49d8e72002-09-23 09:36:25 +0000758 t_read = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000759 t_read_addr = newTemp(cb);
760 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
njn25e49d8e72002-09-23 09:36:25 +0000761 data_size = ( u_in->size <= MIN_LINE_SIZE
762 ? u_in->size
763 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000764 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000765 break;
766
767 /* Note that we must set t_write_addr even for mod instructions;
njn25e49d8e72002-09-23 09:36:25 +0000768 * That's how the code above determines whether it does a write.
769 * Without it, it would think a mod instruction is a read.
njn4f9c9342002-04-29 16:03:24 +0000770 * As for the MOV, if it's a mod instruction it's redundant, but it's
771 * not expensive and mod instructions are rare anyway. */
sewardj3949d102003-03-28 17:21:29 +0000772 case MMX2_MemWr:
sewardjd7971012003-04-04 00:21:58 +0000773 sk_assert(u_in->size == 4 || u_in->size == 8);
sewardj3949d102003-03-28 17:21:29 +0000774 /* fall through */
njn4f9c9342002-04-29 16:03:24 +0000775 case STORE:
776 case FPU_W:
njn25e49d8e72002-09-23 09:36:25 +0000777 t_write = u_in->val2;
njn4f9c9342002-04-29 16:03:24 +0000778 t_write_addr = newTemp(cb);
779 uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
njn25e49d8e72002-09-23 09:36:25 +0000780 /* 28 and 108 B data-sized instructions will be done
781 * inaccurately but they're very rare and this avoids errors
782 * from hitting more than two cache lines in the simulation. */
783 data_size = ( u_in->size <= MIN_LINE_SIZE
784 ? u_in->size
785 : MIN_LINE_SIZE);
njn4ba5a792002-09-30 10:23:54 +0000786 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000787 break;
788
njn25e49d8e72002-09-23 09:36:25 +0000789
790 /* For rep-prefixed instructions, log a single I-cache access
791 * before the UCode loop that implements the repeated part, which
792 * is where the multiple D-cache accesses are logged. */
793 case JIFZ:
794 has_rep_prefix = True;
795
796 /* Setup 1st and only arg: CC addr */
797 t_CC_addr = newTemp(cb);
798 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
799 uLiteral(cb, BBCC_ptr);
800
801 /* Call helper */
802 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
803 uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
njn4ba5a792002-09-30 10:23:54 +0000804 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000805 break;
806
807
808 /* INCEIP: insert instrumentation */
809 case INCEIP:
810 x86_instr_size = u_in->val1;
811 goto instrument_x86_instr;
812
813 /* JMP: insert instrumentation if the first JMP */
814 case JMP:
815 if (instrumented_Jcond) {
njne427a662002-10-02 11:08:25 +0000816 sk_assert(CondAlways == u_in->cond);
njn810086f2002-11-14 12:42:47 +0000817 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn4ba5a792002-09-30 10:23:54 +0000818 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000819 instrumented_Jcond = False; /* reset */
820 break;
821 }
822 /* The first JMP... instrument. */
823 if (CondAlways != u_in->cond) {
njn810086f2002-11-14 12:42:47 +0000824 sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000825 instrumented_Jcond = True;
826 } else {
njn810086f2002-11-14 12:42:47 +0000827 sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
njn25e49d8e72002-09-23 09:36:25 +0000828 }
829
830 /* Get x86 instr size from final JMP. */
njn810086f2002-11-14 12:42:47 +0000831 x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
832
njn25e49d8e72002-09-23 09:36:25 +0000833 goto instrument_x86_instr;
834
835
836 /* Code executed at the end of each x86 instruction. */
837 instrument_x86_instr:
838
839 /* Initialise the CC in the BBCC array appropriately if it
840 * hasn't been initialised before. Then call appropriate sim
841 * function, passing it the CC address. */
842 stack_used = 0;
843
njne427a662002-10-02 11:08:25 +0000844 sk_assert(x86_instr_size >= 1 &&
njn25e49d8e72002-09-23 09:36:25 +0000845 x86_instr_size <= MAX_x86_INSTR_SIZE);
846
847#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
848
849 if (!IS_(read) && !IS_(write)) {
njne427a662002-10-02 11:08:25 +0000850 sk_assert(INVALID_DATA_SIZE == data_size);
851 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000852 INVALID_TEMPREG == t_read &&
853 INVALID_TEMPREG == t_write_addr &&
854 INVALID_TEMPREG == t_write);
855 CC_size = sizeof(iCC);
856 if (!BB_seen_before)
857 init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
858 helper = ( has_rep_prefix
859 ? (Addr)0 /* no extra log needed */
860 : (Addr) & log_1I_0D_cache_access
861 );
862 argc = 1;
863
864 } else {
njne427a662002-10-02 11:08:25 +0000865 sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
njn25e49d8e72002-09-23 09:36:25 +0000866 8 == data_size || 10 == data_size ||
867 MIN_LINE_SIZE == data_size);
868
869 if (IS_(read) && !IS_(write)) {
870 CC_size = sizeof(idCC);
871 /* If it uses 'rep', we've already logged the I-cache
872 * access at the JIFZ UInstr (see JIFZ case below) so
873 * don't do it here */
874 helper = ( has_rep_prefix
875 ? (Addr) & log_0I_1D_cache_access
876 : (Addr) & log_1I_1D_cache_access
877 );
878 argc = 2;
879 if (!BB_seen_before)
880 init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
881 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000882 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000883 INVALID_TEMPREG != t_read &&
884 INVALID_TEMPREG == t_write_addr &&
885 INVALID_TEMPREG == t_write);
886 t_data_addr1 = t_read_addr;
887
888 } else if (!IS_(read) && IS_(write)) {
889 CC_size = sizeof(idCC);
890 helper = ( has_rep_prefix
891 ? (Addr) & log_0I_1D_cache_access
892 : (Addr) & log_1I_1D_cache_access
893 );
894 argc = 2;
895 if (!BB_seen_before)
896 init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
897 x86_instr_size, data_size);
njne427a662002-10-02 11:08:25 +0000898 sk_assert(INVALID_TEMPREG == t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000899 INVALID_TEMPREG == t_read &&
900 INVALID_TEMPREG != t_write_addr &&
901 INVALID_TEMPREG != t_write);
902 t_data_addr1 = t_write_addr;
903
904 } else {
njne427a662002-10-02 11:08:25 +0000905 sk_assert(IS_(read) && IS_(write));
906 sk_assert(INVALID_TEMPREG != t_read_addr &&
njn25e49d8e72002-09-23 09:36:25 +0000907 INVALID_TEMPREG != t_read &&
908 INVALID_TEMPREG != t_write_addr &&
909 INVALID_TEMPREG != t_write);
910 if (t_read == t_write) {
911 CC_size = sizeof(idCC);
912 helper = ( has_rep_prefix
913 ? (Addr) & log_0I_1D_cache_access
914 : (Addr) & log_1I_1D_cache_access
915 );
916 argc = 2;
917 if (!BB_seen_before)
918 init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
919 x86_instr_size, data_size);
920 t_data_addr1 = t_read_addr;
921 } else {
922 CC_size = sizeof(iddCC);
923 helper = ( has_rep_prefix
924 ? (Addr) & log_0I_2D_cache_access
925 : (Addr) & log_1I_2D_cache_access
926 );
927 argc = 3;
928 if (!BB_seen_before)
929 init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
930 x86_instr_size, data_size);
931 t_data_addr1 = t_read_addr;
932 t_data_addr2 = t_write_addr;
933 }
934 }
935#undef IS_
936 }
937
938 /* Call the helper, if necessary */
939 if ((Addr)0 != helper) {
940
941 /* Setup 1st arg: CC addr */
942 t_CC_addr = newTemp(cb);
943 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
944 uLiteral(cb, BBCC_ptr);
945
946 /* Call the helper */
947 if (1 == argc)
948 uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
949 else if (2 == argc)
950 uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
951 TempReg, t_data_addr1);
952 else if (3 == argc)
953 uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
954 TempReg, t_data_addr1,
955 TempReg, t_data_addr2);
956 else
njne427a662002-10-02 11:08:25 +0000957 VG_(skin_panic)("argc... not 1 or 2 or 3?");
njn25e49d8e72002-09-23 09:36:25 +0000958
959 uCCall(cb, helper, argc, argc, False);
960 }
961
962 /* Copy original UInstr (INCEIP or JMP) */
njn4ba5a792002-09-30 10:23:54 +0000963 VG_(copy_UInstr)(cb, u_in);
njn25e49d8e72002-09-23 09:36:25 +0000964
965 /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
966 BBCC_ptr += CC_size;
967 x86_instr_addr += x86_instr_size;
968 t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
969 t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
970 data_size = INVALID_DATA_SIZE;
971 has_rep_prefix = False;
njn4f9c9342002-04-29 16:03:24 +0000972 break;
973
974 default:
njn4ba5a792002-09-30 10:23:54 +0000975 VG_(copy_UInstr)(cb, u_in);
njn4f9c9342002-04-29 16:03:24 +0000976 break;
977 }
978 }
979
980 /* Just check everything looks ok */
njne427a662002-10-02 11:08:25 +0000981 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +0000982
njn4ba5a792002-09-30 10:23:54 +0000983 VG_(free_UCodeBlock)(cb_in);
njn4f9c9342002-04-29 16:03:24 +0000984 return cb;
njn25e49d8e72002-09-23 09:36:25 +0000985
986#undef INVALID_DATA_SIZE
njn4f9c9342002-04-29 16:03:24 +0000987}
988
989/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +0000990/*--- Automagic cache initialisation stuff ---*/
njn4f9c9342002-04-29 16:03:24 +0000991/*------------------------------------------------------------*/
992
993/* Total reads/writes/misses. Calculated during CC traversal at the end. */
994static CC Ir_total;
995static CC Dr_total;
996static CC Dw_total;
997
njn25e49d8e72002-09-23 09:36:25 +0000998#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
999
1000static cache_t clo_I1_cache = UNDEFINED_CACHE;
1001static cache_t clo_D1_cache = UNDEFINED_CACHE;
1002static cache_t clo_L2_cache = UNDEFINED_CACHE;
1003
njn7cf0bd32002-06-08 13:36:03 +00001004/* All CPUID info taken from sandpile.org/a32/cpuid.htm */
1005/* Probably only works for Intel and AMD chips, and probably only for some of
1006 * them.
1007 */
1008
sewardj05bcdcb2003-05-18 10:05:38 +00001009static __inline__ void cpuid(Int n, UInt *a, UInt *b, UInt *c, UInt *d)
njn7cf0bd32002-06-08 13:36:03 +00001010{
1011 __asm__ __volatile__ (
1012 "cpuid"
1013 : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */
1014 : "0" (n) /* input */
1015 );
1016}
1017
sewardj07133bf2002-06-13 10:25:56 +00001018static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
njn7cf0bd32002-06-08 13:36:03 +00001019{
1020 VG_(message)(Vg_DebugMsg,
njn25e49d8e72002-09-23 09:36:25 +00001021 "warning: Pentium with %d K micro-op instruction trace cache",
sewardj07133bf2002-06-13 10:25:56 +00001022 actual_size);
njn7cf0bd32002-06-08 13:36:03 +00001023 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001024 " Simulating a %d KB cache with %d B lines",
1025 used_size, line_size);
njn7cf0bd32002-06-08 13:36:03 +00001026}
1027
1028/* Intel method is truly wretched. We have to do an insane indexing into an
1029 * array of pre-defined configurations for various parts of the memory
1030 * hierarchy.
1031 */
1032static
sewardj07133bf2002-06-13 10:25:56 +00001033Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001034{
sewardj07133bf2002-06-13 10:25:56 +00001035 UChar info[16];
1036 Int i, trials;
njn25e49d8e72002-09-23 09:36:25 +00001037 Bool L2_found = False;
njn7cf0bd32002-06-08 13:36:03 +00001038
1039 if (level < 2) {
1040 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001041 "warning: CPUID level < 2 for Intel processor (%d)",
1042 level);
njn7cf0bd32002-06-08 13:36:03 +00001043 return -1;
1044 }
1045
sewardj07133bf2002-06-13 10:25:56 +00001046 cpuid(2, (Int*)&info[0], (Int*)&info[4],
1047 (Int*)&info[8], (Int*)&info[12]);
njn7cf0bd32002-06-08 13:36:03 +00001048 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
1049 info[0] = 0x0; /* reset AL */
1050
1051 if (0 != trials) {
1052 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001053 "warning: non-zero CPUID trials for Intel processor (%d)",
1054 trials);
njn7cf0bd32002-06-08 13:36:03 +00001055 return -1;
1056 }
1057
1058 for (i = 0; i < 16; i++) {
1059
1060 switch (info[i]) {
1061
1062 case 0x0: /* ignore zeros */
1063 break;
1064
njn25e49d8e72002-09-23 09:36:25 +00001065 /* TLB info, ignore */
1066 case 0x01: case 0x02: case 0x03: case 0x04:
1067 case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
njn7cf0bd32002-06-08 13:36:03 +00001068 break;
1069
1070 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
1071 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
1072
1073 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
1074 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
1075
njn25e49d8e72002-09-23 09:36:25 +00001076 /* IA-64 info -- panic! */
1077 case 0x10: case 0x15: case 0x1a:
1078 case 0x88: case 0x89: case 0x8a: case 0x8d:
1079 case 0x90: case 0x96: case 0x9b:
1080 VG_(message)(Vg_DebugMsg,
1081 "error: IA-64 cache stats! Cachegrind doesn't run on IA-64...");
njne427a662002-10-02 11:08:25 +00001082 VG_(skin_panic)("IA-64 detected");
njn25e49d8e72002-09-23 09:36:25 +00001083
njn7cf0bd32002-06-08 13:36:03 +00001084 case 0x22: case 0x23: case 0x25: case 0x29:
sewardj07133bf2002-06-13 10:25:56 +00001085 VG_(message)(Vg_DebugMsg,
1086 "warning: L3 cache detected but ignored\n");
njn7cf0bd32002-06-08 13:36:03 +00001087 break;
1088
njn25e49d8e72002-09-23 09:36:25 +00001089 /* These are sectored, whatever that means */
1090 case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
1091 case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
1092
1093 /* If a P6 core, this means "no L2 cache".
1094 If a P4 core, this means "no L3 cache".
1095 We don't know what core it is, so don't issue a warning. To detect
1096 a missing L2 cache, we use 'L2_found'. */
1097 case 0x40:
njn7cf0bd32002-06-08 13:36:03 +00001098 break;
1099
njn25e49d8e72002-09-23 09:36:25 +00001100 case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
1101 case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
1102 case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
1103 case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
1104 case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001105
1106 /* These are sectored, whatever that means */
1107 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
1108 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
1109 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
1110
1111 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
1112 * conversion to byte size is a total guess; treat the 12K and 16K
1113 * cases the same since the cache byte size must be a power of two for
1114 * everything to work!. Also guessing 32 bytes for the line size...
1115 */
1116 case 0x70: /* 12K micro-ops, 8-way */
1117 *I1c = (cache_t) { 16, 8, 32 };
1118 micro_ops_warn(12, 16, 32);
1119 break;
1120 case 0x71: /* 16K micro-ops, 8-way */
1121 *I1c = (cache_t) { 16, 8, 32 };
1122 micro_ops_warn(16, 16, 32);
1123 break;
1124 case 0x72: /* 32K micro-ops, 8-way */
1125 *I1c = (cache_t) { 32, 8, 32 };
1126 micro_ops_warn(32, 32, 32);
1127 break;
1128
njn25e49d8e72002-09-23 09:36:25 +00001129 /* These are sectored, whatever that means */
1130 case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
1131 case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
1132 case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
1133 case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
1134 case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001135
njn25e49d8e72002-09-23 09:36:25 +00001136 case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
1137 case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
1138 case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
1139 case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
1140 case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
njn7cf0bd32002-06-08 13:36:03 +00001141
1142 default:
1143 VG_(message)(Vg_DebugMsg,
sewardj07133bf2002-06-13 10:25:56 +00001144 "warning: Unknown Intel cache config value "
njn25e49d8e72002-09-23 09:36:25 +00001145 "(0x%x), ignoring", info[i]);
njn7cf0bd32002-06-08 13:36:03 +00001146 break;
1147 }
1148 }
njn25e49d8e72002-09-23 09:36:25 +00001149
1150 if (!L2_found)
1151 VG_(message)(Vg_DebugMsg,
1152 "warning: L2 cache not installed, ignore L2 results.");
1153
njn7cf0bd32002-06-08 13:36:03 +00001154 return 0;
1155}
1156
1157/* AMD method is straightforward, just extract appropriate bits from the
1158 * result registers.
1159 *
1160 * Bits, for D1 and I1:
1161 * 31..24 data L1 cache size in KBs
1162 * 23..16 data L1 cache associativity (FFh=full)
1163 * 15.. 8 data L1 cache lines per tag
1164 * 7.. 0 data L1 cache line size in bytes
1165 *
1166 * Bits, for L2:
1167 * 31..16 unified L2 cache size in KBs
1168 * 15..12 unified L2 cache associativity (0=off, FFh=full)
1169 * 11.. 8 unified L2 cache lines per tag
1170 * 7.. 0 unified L2 cache line size in bytes
1171 *
1172 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
1173 * upon this information. (Whatever that means -- njn)
1174 *
njn25e49d8e72002-09-23 09:36:25 +00001175 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
1176 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
1177 * so we detect that.
1178 *
njn7cf0bd32002-06-08 13:36:03 +00001179 * Returns 0 on success, non-zero on failure.
1180 */
sewardj07133bf2002-06-13 10:25:56 +00001181static
1182Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001183{
sewardj05bcdcb2003-05-18 10:05:38 +00001184 UInt ext_level;
1185 Int dummy, model;
sewardj07133bf2002-06-13 10:25:56 +00001186 Int I1i, D1i, L2i;
njn7cf0bd32002-06-08 13:36:03 +00001187
1188 cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
1189
1190 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
1191 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001192 "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
1193 ext_level);
njn7cf0bd32002-06-08 13:36:03 +00001194 return -1;
1195 }
1196
1197 cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
1198 cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
1199
njn25e49d8e72002-09-23 09:36:25 +00001200 cpuid(0x1, &model, &dummy, &dummy, &dummy);
1201 /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
1202
1203 /* Check for Duron bug */
1204 if (model == 0x630) {
1205 VG_(message)(Vg_UserMsg,
1206 "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
1207 L2i = (64 << 16) | (L2i & 0xffff);
1208 }
1209
njn7cf0bd32002-06-08 13:36:03 +00001210 D1c->size = (D1i >> 24) & 0xff;
1211 D1c->assoc = (D1i >> 16) & 0xff;
1212 D1c->line_size = (D1i >> 0) & 0xff;
1213
1214 I1c->size = (I1i >> 24) & 0xff;
1215 I1c->assoc = (I1i >> 16) & 0xff;
1216 I1c->line_size = (I1i >> 0) & 0xff;
1217
1218 L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
1219 L2c->assoc = (L2i >> 12) & 0xf;
1220 L2c->line_size = (L2i >> 0) & 0xff;
1221
1222 return 0;
1223}
1224
1225static jmp_buf cpuid_jmpbuf;
1226
1227static
1228void cpuid_SIGILL_handler(int signum)
1229{
1230 __builtin_longjmp(cpuid_jmpbuf, 1);
1231}
1232
1233static
sewardj07133bf2002-06-13 10:25:56 +00001234Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001235{
sewardj07133bf2002-06-13 10:25:56 +00001236 Int level, res, ret;
1237 Char vendor_id[13];
njn7cf0bd32002-06-08 13:36:03 +00001238 vki_ksigaction sigill_new, sigill_saved;
njn7cf0bd32002-06-08 13:36:03 +00001239
1240 /* Install own SIGILL handler */
1241 sigill_new.ksa_handler = cpuid_SIGILL_handler;
1242 sigill_new.ksa_flags = 0;
1243 sigill_new.ksa_restorer = NULL;
1244 res = VG_(ksigemptyset)( &sigill_new.ksa_mask );
njne427a662002-10-02 11:08:25 +00001245 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001246
1247 res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
njne427a662002-10-02 11:08:25 +00001248 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001249
1250 /* Trap for illegal instruction, in case it's a really old processor that
1251 * doesn't support CPUID. */
1252 if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
1253 cpuid(0, &level, (int*)&vendor_id[0],
1254 (int*)&vendor_id[8], (int*)&vendor_id[4]);
1255 vendor_id[12] = '\0';
1256
1257 /* Restore old SIGILL handler */
1258 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001259 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001260
1261 } else {
1262 VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
1263
1264 /* Restore old SIGILL handler */
1265 res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL );
njne427a662002-10-02 11:08:25 +00001266 sk_assert(res == 0);
njn7cf0bd32002-06-08 13:36:03 +00001267 return -1;
1268 }
1269
1270 if (0 == level) {
1271 VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
1272 return -1;
1273 }
1274
1275 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
1276 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
1277 ret = Intel_cache_info(level, I1c, D1c, L2c);
1278
1279 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
1280 ret = AMD_cache_info(I1c, D1c, L2c);
1281
1282 } else {
1283 VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
1284 vendor_id);
1285 return -1;
1286 }
1287
1288 /* Successful! Convert sizes from KB to bytes */
1289 I1c->size *= 1024;
1290 D1c->size *= 1024;
1291 L2c->size *= 1024;
1292
1293 return ret;
1294}
1295
1296/* Checks cache config is ok; makes it so if not. */
sewardj07133bf2002-06-13 10:25:56 +00001297static
1298void check_cache(cache_t* cache, cache_t* dflt, Char *name)
njn7cf0bd32002-06-08 13:36:03 +00001299{
1300 /* First check they're all powers of two */
sewardj07133bf2002-06-13 10:25:56 +00001301 if (-1 == VG_(log2)(cache->size)) {
njn7cf0bd32002-06-08 13:36:03 +00001302 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001303 "warning: %s size of %dB not a power of two; "
1304 "defaulting to %dB", name, cache->size, dflt->size);
njn7cf0bd32002-06-08 13:36:03 +00001305 cache->size = dflt->size;
1306 }
1307
sewardj07133bf2002-06-13 10:25:56 +00001308 if (-1 == VG_(log2)(cache->assoc)) {
njn7cf0bd32002-06-08 13:36:03 +00001309 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001310 "warning: %s associativity of %d not a power of two; "
1311 "defaulting to %d-way", name, cache->assoc, dflt->assoc);
njn7cf0bd32002-06-08 13:36:03 +00001312 cache->assoc = dflt->assoc;
1313 }
1314
sewardj07133bf2002-06-13 10:25:56 +00001315 if (-1 == VG_(log2)(cache->line_size)) {
njn7cf0bd32002-06-08 13:36:03 +00001316 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001317 "warning: %s line size of %dB not a power of two; "
1318 "defaulting to %dB",
1319 name, cache->line_size, dflt->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001320 cache->line_size = dflt->line_size;
1321 }
1322
1323 /* Then check line size >= 16 -- any smaller and a single instruction could
1324 * straddle three cache lines, which breaks a simulation assertion and is
1325 * stupid anyway. */
1326 if (cache->line_size < MIN_LINE_SIZE) {
1327 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001328 "warning: %s line size of %dB too small; "
1329 "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE);
njn7cf0bd32002-06-08 13:36:03 +00001330 cache->line_size = MIN_LINE_SIZE;
1331 }
1332
1333 /* Then check cache size > line size (causes seg faults if not). */
1334 if (cache->size <= cache->line_size) {
1335 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001336 "warning: %s cache size of %dB <= line size of %dB; "
1337 "increasing to %dB", name, cache->size, cache->line_size,
1338 cache->line_size * 2);
njn7cf0bd32002-06-08 13:36:03 +00001339 cache->size = cache->line_size * 2;
1340 }
1341
1342 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1343 if (cache->assoc > (cache->size / cache->line_size)) {
1344 VG_(message)(Vg_UserMsg,
sewardj07133bf2002-06-13 10:25:56 +00001345 "warning: %s associativity > (size / line size); "
1346 "increasing size to %dB",
1347 name, cache->assoc * cache->line_size);
njn7cf0bd32002-06-08 13:36:03 +00001348 cache->size = cache->assoc * cache->line_size;
1349 }
1350}
1351
1352/* On entry, args are undefined. Fill them with any info from the
1353 * command-line, then fill in any remaining with CPUID instruction if possible,
1354 * otherwise use defaults. Then check them and fix if not ok. */
sewardj07133bf2002-06-13 10:25:56 +00001355static
1356void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
njn7cf0bd32002-06-08 13:36:03 +00001357{
1358 /* Defaults are for a model 3 or 4 Athlon */
1359 cache_t I1_dflt = (cache_t) { 65536, 2, 64 };
1360 cache_t D1_dflt = (cache_t) { 65536, 2, 64 };
1361 cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
1362
njn25e49d8e72002-09-23 09:36:25 +00001363#define CMD_LINE_DEFINED(L) \
1364 (-1 != clo_##L##_cache.size || \
1365 -1 != clo_##L##_cache.assoc || \
1366 -1 != clo_##L##_cache.line_size)
njn7cf0bd32002-06-08 13:36:03 +00001367
njn25e49d8e72002-09-23 09:36:25 +00001368 *I1c = clo_I1_cache;
1369 *D1c = clo_D1_cache;
1370 *L2c = clo_L2_cache;
sewardjb1a77a42002-07-13 13:31:20 +00001371
njn7cf0bd32002-06-08 13:36:03 +00001372 /* If any undefined on command-line, try CPUID */
1373 if (! CMD_LINE_DEFINED(I1) ||
1374 ! CMD_LINE_DEFINED(D1) ||
1375 ! CMD_LINE_DEFINED(L2)) {
1376
1377 /* Overwrite CPUID result for any cache defined on command-line */
1378 if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
1379
njn25e49d8e72002-09-23 09:36:25 +00001380 if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
1381 if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
1382 if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
njn7cf0bd32002-06-08 13:36:03 +00001383
1384 /* CPUID failed, use defaults for each undefined by command-line */
1385 } else {
1386 VG_(message)(Vg_DebugMsg,
1387 "Couldn't detect cache configuration, using one "
1388 "or more defaults ");
1389
njn25e49d8e72002-09-23 09:36:25 +00001390 *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
1391 *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
1392 *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
njn7cf0bd32002-06-08 13:36:03 +00001393 }
1394 }
1395#undef CMD_LINE_DEFINED
1396
1397 check_cache(I1c, &I1_dflt, "I1");
1398 check_cache(D1c, &D1_dflt, "D1");
1399 check_cache(L2c, &L2_dflt, "L2");
1400
1401 if (VG_(clo_verbosity) > 1) {
1402 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1403 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1404 I1c->size, I1c->assoc, I1c->line_size);
1405 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1406 D1c->size, D1c->assoc, D1c->line_size);
1407 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1408 L2c->size, L2c->assoc, L2c->line_size);
1409 }
1410}
1411
njn4f9c9342002-04-29 16:03:24 +00001412/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001413/*--- SK_(fini)() and related function ---*/
njn4f9c9342002-04-29 16:03:24 +00001414/*------------------------------------------------------------*/
1415
njn4f9c9342002-04-29 16:03:24 +00001416static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl,
1417 Char *first_instr_fn)
1418{
1419 Addr BBCC_ptr0, BBCC_ptr;
sewardj07133bf2002-06-13 10:25:56 +00001420 Char buf[BUF_LEN], curr_file[BUF_LEN],
1421 fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001422 UInt line_num;
1423
1424 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1425
njne0ee0712002-05-03 16:41:05 +00001426 /* Mark start of basic block in output, just to ease debugging */
1427 VG_(write)(fd, (void*)"\n", 1);
njn4f9c9342002-04-29 16:03:24 +00001428
1429 VG_(strcpy)(curr_file, first_instr_fl);
1430
1431 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1432
1433 /* We pretend the CC is an iCC for getting the tag. This is ok
1434 * because both CC types have tag as their first byte. Once we know
1435 * the type, we can cast and act appropriately. */
1436
1437 Char fl_buf[FILENAME_LEN];
1438 Char fn_buf[FN_NAME_LEN];
1439
njne0ee0712002-05-03 16:41:05 +00001440 Addr instr_addr;
njn4f9c9342002-04-29 16:03:24 +00001441 switch ( ((iCC*)BBCC_ptr)->tag ) {
1442
njn25e49d8e72002-09-23 09:36:25 +00001443 case InstrCC:
njne0ee0712002-05-03 16:41:05 +00001444 instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
1445 sprint_iCC(buf, (iCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001446 ADD_CC_TO(iCC, I, Ir_total);
1447 BBCC_ptr += sizeof(iCC);
1448 break;
1449
njn25e49d8e72002-09-23 09:36:25 +00001450 case ReadCC:
1451 case ModCC:
njne0ee0712002-05-03 16:41:05 +00001452 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1453 sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001454 ADD_CC_TO(idCC, I, Ir_total);
1455 ADD_CC_TO(idCC, D, Dr_total);
1456 BBCC_ptr += sizeof(idCC);
1457 break;
1458
njn25e49d8e72002-09-23 09:36:25 +00001459 case WriteCC:
njne0ee0712002-05-03 16:41:05 +00001460 instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
1461 sprint_write_CC(buf, (idCC*)BBCC_ptr);
njn4f9c9342002-04-29 16:03:24 +00001462 ADD_CC_TO(idCC, I, Ir_total);
1463 ADD_CC_TO(idCC, D, Dw_total);
1464 BBCC_ptr += sizeof(idCC);
1465 break;
1466
njn25e49d8e72002-09-23 09:36:25 +00001467 case ReadWriteCC:
1468 instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
1469 sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
1470 ADD_CC_TO(iddCC, I, Ir_total);
1471 ADD_CC_TO(iddCC, Da, Dr_total);
1472 ADD_CC_TO(iddCC, Db, Dw_total);
1473 BBCC_ptr += sizeof(iddCC);
1474 break;
1475
njn4f9c9342002-04-29 16:03:24 +00001476 default:
njne427a662002-10-02 11:08:25 +00001477 VG_(skin_panic)("Unknown CC type in fprint_BBCC()\n");
njn4f9c9342002-04-29 16:03:24 +00001478 break;
1479 }
1480 distinct_instrs++;
1481
njne0ee0712002-05-03 16:41:05 +00001482 get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
1483
1484 /* Allow for filename switching in the middle of a BB; if this happens,
1485 * must print the new filename with the function name. */
sewardj18d75132002-05-16 11:06:21 +00001486 if (0 != VG_(strcmp)(fl_buf, curr_file)) {
njne0ee0712002-05-03 16:41:05 +00001487 VG_(strcpy)(curr_file, fl_buf);
1488 VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
1489 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1490 }
1491
njn4f9c9342002-04-29 16:03:24 +00001492 /* If the function name for this instruction doesn't match that of the
njne0ee0712002-05-03 16:41:05 +00001493 * first instruction in the BB, print warning. */
njn25e49d8e72002-09-23 09:36:25 +00001494 if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
njn4f9c9342002-04-29 16:03:24 +00001495 VG_(printf)("Mismatched function names\n");
njne0ee0712002-05-03 16:41:05 +00001496 VG_(printf)(" filenames: BB:%s, instr:%s;"
1497 " fn_names: BB:%s, instr:%s;"
1498 " line: %d\n",
njn4f9c9342002-04-29 16:03:24 +00001499 first_instr_fl, fl_buf,
1500 first_instr_fn, fn_buf,
1501 line_num);
1502 }
1503
njne0ee0712002-05-03 16:41:05 +00001504 VG_(sprintf)(lbuf, "%u ", line_num);
1505 VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */
1506 VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */
njn4f9c9342002-04-29 16:03:24 +00001507 }
1508 /* If we switched filenames in the middle of the BB without switching back,
1509 * switch back now because the subsequent BB may be relying on falling under
1510 * the original file name. */
1511 if (0 != VG_(strcmp)(first_instr_fl, curr_file)) {
1512 VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
1513 VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
1514 }
njne0ee0712002-05-03 16:41:05 +00001515
1516 /* Mark end of basic block */
1517 /* VG_(write)(fd, (void*)"#}\n", 3); */
njn4f9c9342002-04-29 16:03:24 +00001518
njne427a662002-10-02 11:08:25 +00001519 sk_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
njn4f9c9342002-04-29 16:03:24 +00001520}
1521
njn25e49d8e72002-09-23 09:36:25 +00001522static void fprint_BBCC_table_and_calc_totals(void)
njn4f9c9342002-04-29 16:03:24 +00001523{
1524 Int fd;
1525 Char buf[BUF_LEN];
1526 file_node *curr_file_node;
1527 fn_node *curr_fn_node;
1528 BBCC *curr_BBCC;
1529 Int i,j,k;
1530
njn25e49d8e72002-09-23 09:36:25 +00001531 VGP_PUSHCC(VgpCacheResults);
njn13f02932003-04-30 20:23:58 +00001532
1533 fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_WRONLY,
1534 VKI_S_IRUSR|VKI_S_IWUSR);
1535 if (-1 == fd) {
sewardj0744b6c2002-12-11 00:45:42 +00001536 /* If the file can't be opened for whatever reason (conflict
1537 between multiple cachegrinded processes?), give up now. */
1538 file_err();
1539 return;
1540 }
njn4f9c9342002-04-29 16:03:24 +00001541
1542 /* "desc:" lines (giving I1/D1/L2 cache configuration) */
njn7cf0bd32002-06-08 13:36:03 +00001543 VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line);
1544 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1545 VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line);
1546 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1547 VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line);
1548 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn4f9c9342002-04-29 16:03:24 +00001549
1550 /* "cmd:" line */
1551 VG_(strcpy)(buf, "cmd:");
1552 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
njn25e49d8e72002-09-23 09:36:25 +00001553 for (i = 0; i < VG_(client_argc); i++) {
1554 VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
njn4f9c9342002-04-29 16:03:24 +00001555 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1556 }
1557 /* "events:" line */
1558 VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n");
1559 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1560
1561 /* Six loops here: three for the hash table arrays, and three for the
1562 * chains hanging off the hash table arrays. */
1563 for (i = 0; i < N_FILE_ENTRIES; i++) {
1564 curr_file_node = BBCC_table[i];
1565 while (curr_file_node != NULL) {
1566 VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename);
1567 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1568
1569 for (j = 0; j < N_FN_ENTRIES; j++) {
1570 curr_fn_node = curr_file_node->fns[j];
1571 while (curr_fn_node != NULL) {
1572 VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name);
1573 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1574
1575 for (k = 0; k < N_BBCC_ENTRIES; k++) {
1576 curr_BBCC = curr_fn_node->BBCCs[k];
1577 while (curr_BBCC != NULL) {
1578 fprint_BBCC(fd, curr_BBCC,
1579
1580 curr_file_node->filename,
1581 curr_fn_node->fn_name);
1582
1583 curr_BBCC = curr_BBCC->next;
1584 }
1585 }
1586 curr_fn_node = curr_fn_node->next;
1587 }
1588 }
1589 curr_file_node = curr_file_node->next;
1590 }
1591 }
1592
njn4294fd42002-06-05 14:41:10 +00001593 /* Print stats from any discarded basic blocks */
1594 if (0 != Ir_discards.a) {
1595
1596 VG_(sprintf)(buf, "fl=(discarded)\n");
1597 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1598 VG_(sprintf)(buf, "fn=(discarded)\n");
1599 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1600
1601 /* Use 0 as line number */
1602 VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
1603 Ir_discards.a, Ir_discards.m1, Ir_discards.m2,
1604 Dr_discards.a, Dr_discards.m1, Dr_discards.m2,
1605 Dw_discards.a, Dw_discards.m1, Dw_discards.m2);
1606 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1607
1608 Ir_total.a += Ir_discards.a;
1609 Ir_total.m1 += Ir_discards.m1;
1610 Ir_total.m2 += Ir_discards.m2;
1611 Dr_total.a += Dr_discards.a;
1612 Dr_total.m1 += Dr_discards.m1;
1613 Dr_total.m2 += Dr_discards.m2;
1614 Dw_total.a += Dw_discards.a;
1615 Dw_total.m1 += Dw_discards.m1;
1616 Dw_total.m2 += Dw_discards.m2;
1617 }
1618
njn4f9c9342002-04-29 16:03:24 +00001619 /* Summary stats must come after rest of table, since we calculate them
1620 * during traversal. */
1621 VG_(sprintf)(buf, "summary: "
1622 "%llu %llu %llu "
1623 "%llu %llu %llu "
1624 "%llu %llu %llu\n",
1625 Ir_total.a, Ir_total.m1, Ir_total.m2,
1626 Dr_total.a, Dr_total.m1, Dr_total.m2,
1627 Dw_total.a, Dw_total.m1, Dw_total.m2);
1628 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1629 VG_(close)(fd);
1630}
1631
1632/* Adds commas to ULong, right justifying in a field field_width wide, returns
1633 * the string in buf. */
sewardj4f29ddf2002-05-03 22:29:04 +00001634static
njn4f9c9342002-04-29 16:03:24 +00001635Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN])
1636{
1637 int len, n_commas, i, j, new_len, space;
1638
njne0205ff2003-04-08 00:56:14 +00001639 VG_(sprintf)(buf, "%llu", n);
njn4f9c9342002-04-29 16:03:24 +00001640 len = VG_(strlen)(buf);
1641 n_commas = (len - 1) / 3;
1642 new_len = len + n_commas;
1643 space = field_width - new_len;
1644
1645 /* Allow for printing a number in a field_width smaller than it's size */
1646 if (space < 0) space = 0;
1647
1648 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1649 * of three. */
1650 for (j = -1, i = len ; i >= 0; i--) {
1651 buf[i + n_commas + space] = buf[i];
1652
1653 if (3 == ++j) {
1654 j = 0;
1655 n_commas--;
1656 buf[i + n_commas + space] = ',';
1657 }
1658 }
1659 /* Right justify in field. */
1660 for (i = 0; i < space; i++) buf[i] = ' ';
1661 return new_len;
1662}
1663
sewardj4f29ddf2002-05-03 22:29:04 +00001664static
njn4f9c9342002-04-29 16:03:24 +00001665void percentify(Int n, Int pow, Int field_width, char buf[])
1666{
1667 int i, len, space;
1668
1669 VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
1670 len = VG_(strlen)(buf);
1671 space = field_width - len;
njn25e49d8e72002-09-23 09:36:25 +00001672 if (space < 0) space = 0; /* Allow for v. small field_width */
njn4f9c9342002-04-29 16:03:24 +00001673 i = len;
1674
1675 /* Right justify in field */
1676 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1677 for (i = 0; i < space; i++) buf[i] = ' ';
1678}
1679
njn7d9f94d2003-04-22 21:41:40 +00001680void SK_(fini)(Int exitcode)
njn4f9c9342002-04-29 16:03:24 +00001681{
1682 CC D_total;
njn1d021fa2002-05-02 13:56:34 +00001683 ULong L2_total_m, L2_total_mr, L2_total_mw,
1684 L2_total, L2_total_r, L2_total_w;
njne0ee0712002-05-03 16:41:05 +00001685 char buf1[RESULTS_BUF_LEN],
1686 buf2[RESULTS_BUF_LEN],
1687 buf3[RESULTS_BUF_LEN];
njn4f9c9342002-04-29 16:03:24 +00001688 Int l1, l2, l3;
1689 Int p;
1690
njn25e49d8e72002-09-23 09:36:25 +00001691 fprint_BBCC_table_and_calc_totals();
njn4f9c9342002-04-29 16:03:24 +00001692
njn7cf0bd32002-06-08 13:36:03 +00001693 if (VG_(clo_verbosity) == 0)
1694 return;
1695
njn4f9c9342002-04-29 16:03:24 +00001696 /* I cache results. Use the I_refs value to determine the first column
1697 * width. */
1698 l1 = commify(Ir_total.a, 0, buf1);
1699 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1700
1701 commify(Ir_total.m1, l1, buf1);
1702 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1703
1704 commify(Ir_total.m2, l1, buf1);
njn95114da2002-06-05 09:39:31 +00001705 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
njn4f9c9342002-04-29 16:03:24 +00001706
1707 p = 100;
1708
njn25e49d8e72002-09-23 09:36:25 +00001709 if (0 == Ir_total.a) Ir_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001710 percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
1711 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1712
1713 percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1);
1714 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1715 VG_(message)(Vg_UserMsg, "");
1716
1717 /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the
1718 * width of columns 2 & 3. */
1719 D_total.a = Dr_total.a + Dw_total.a;
1720 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1721 D_total.m2 = Dr_total.m2 + Dw_total.m2;
1722
njn1d021fa2002-05-02 13:56:34 +00001723 commify( D_total.a, l1, buf1);
1724 l2 = commify(Dr_total.a, 0, buf2);
1725 l3 = commify(Dw_total.a, 0, buf3);
njn4f9c9342002-04-29 16:03:24 +00001726 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1727 buf1, buf2, buf3);
1728
1729 commify( D_total.m1, l1, buf1);
1730 commify(Dr_total.m1, l2, buf2);
1731 commify(Dw_total.m1, l3, buf3);
1732 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1733 buf1, buf2, buf3);
1734
1735 commify( D_total.m2, l1, buf1);
1736 commify(Dr_total.m2, l2, buf2);
1737 commify(Dw_total.m2, l3, buf3);
njn95114da2002-06-05 09:39:31 +00001738 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
njn4f9c9342002-04-29 16:03:24 +00001739 buf1, buf2, buf3);
1740
1741 p = 10;
1742
njn25e49d8e72002-09-23 09:36:25 +00001743 if (0 == D_total.a) D_total.a = 1;
1744 if (0 == Dr_total.a) Dr_total.a = 1;
1745 if (0 == Dw_total.a) Dw_total.a = 1;
njn4f9c9342002-04-29 16:03:24 +00001746 percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1);
1747 percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
1748 percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
1749 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1750
1751 percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1);
1752 percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2);
1753 percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3);
1754 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1755 VG_(message)(Vg_UserMsg, "");
1756
1757 /* L2 overall results */
njn1d021fa2002-05-02 13:56:34 +00001758
1759 L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1760 L2_total_r = Dr_total.m1 + Ir_total.m1;
1761 L2_total_w = Dw_total.m1;
1762 commify(L2_total, l1, buf1);
1763 commify(L2_total_r, l2, buf2);
1764 commify(L2_total_w, l3, buf3);
1765 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1766 buf1, buf2, buf3);
1767
njn4f9c9342002-04-29 16:03:24 +00001768 L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2;
1769 L2_total_mr = Dr_total.m2 + Ir_total.m2;
1770 L2_total_mw = Dw_total.m2;
njn4f9c9342002-04-29 16:03:24 +00001771 commify(L2_total_m, l1, buf1);
1772 commify(L2_total_mr, l2, buf2);
1773 commify(L2_total_mw, l3, buf3);
1774 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1775 buf1, buf2, buf3);
1776
1777 percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1);
1778 percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2);
1779 percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3);
1780 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1781
1782
1783 /* Hash table stats */
1784 if (VG_(clo_verbosity) > 1) {
1785 int BB_lookups = full_debug_BBs + fn_name_debug_BBs +
1786 file_line_debug_BBs + no_debug_BBs;
1787
1788 VG_(message)(Vg_DebugMsg, "");
1789 VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files);
1790 VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns);
1791 VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups);
1792 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)",
1793 full_debug_BBs * 100 / BB_lookups,
1794 full_debug_BBs);
1795 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)",
1796 file_line_debug_BBs * 100 / BB_lookups,
1797 file_line_debug_BBs);
1798 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)",
1799 fn_name_debug_BBs * 100 / BB_lookups,
1800 fn_name_debug_BBs);
1801 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)",
1802 no_debug_BBs * 100 / BB_lookups,
1803 no_debug_BBs);
1804 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
1805 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs);
1806 }
njn25e49d8e72002-09-23 09:36:25 +00001807 VGP_POPCC(VgpCacheResults);
njn4f9c9342002-04-29 16:03:24 +00001808}
1809
sewardj18d75132002-05-16 11:06:21 +00001810
njn4294fd42002-06-05 14:41:10 +00001811/* Called when a translation is invalidated due to self-modifying code or
1812 * unloaded of a shared object.
1813 *
1814 * Finds the BBCC in the table, removes it, adds the counts to the discard
1815 * counters, and then frees the BBCC. */
njn25e49d8e72002-09-23 09:36:25 +00001816void SK_(discard_basic_block_info) ( Addr a, UInt size )
sewardj18d75132002-05-16 11:06:21 +00001817{
njn4294fd42002-06-05 14:41:10 +00001818 BBCC *BBCC_node;
1819 Addr BBCC_ptr0, BBCC_ptr;
1820 Bool BB_seen_before;
1821
sewardj83205b32002-06-14 11:08:07 +00001822 if (0)
njn25e49d8e72002-09-23 09:36:25 +00001823 VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
njn4294fd42002-06-05 14:41:10 +00001824
1825 /* 2nd arg won't be used since BB should have been seen before (assertions
1826 * ensure this). */
njn25e49d8e72002-09-23 09:36:25 +00001827 BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001828 BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
1829
njne427a662002-10-02 11:08:25 +00001830 sk_assert(True == BB_seen_before);
njn4294fd42002-06-05 14:41:10 +00001831
1832 while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) {
1833
1834 /* We pretend the CC is an iCC for getting the tag. This is ok
1835 * because both CC types have tag as their first byte. Once we know
1836 * the type, we can cast and act appropriately. */
1837
1838 switch ( ((iCC*)BBCC_ptr)->tag ) {
1839
njn25e49d8e72002-09-23 09:36:25 +00001840 case InstrCC:
njn4294fd42002-06-05 14:41:10 +00001841 ADD_CC_TO(iCC, I, Ir_discards);
1842 BBCC_ptr += sizeof(iCC);
1843 break;
1844
njn25e49d8e72002-09-23 09:36:25 +00001845 case ReadCC:
1846 case ModCC:
njn4294fd42002-06-05 14:41:10 +00001847 ADD_CC_TO(idCC, I, Ir_discards);
1848 ADD_CC_TO(idCC, D, Dr_discards);
1849 BBCC_ptr += sizeof(idCC);
1850 break;
1851
njn25e49d8e72002-09-23 09:36:25 +00001852 case WriteCC:
njn4294fd42002-06-05 14:41:10 +00001853 ADD_CC_TO(idCC, I, Ir_discards);
1854 ADD_CC_TO(idCC, D, Dw_discards);
1855 BBCC_ptr += sizeof(idCC);
1856 break;
1857
njn25e49d8e72002-09-23 09:36:25 +00001858 case ReadWriteCC:
1859 ADD_CC_TO(iddCC, I, Ir_discards);
1860 ADD_CC_TO(iddCC, Da, Dr_discards);
1861 ADD_CC_TO(iddCC, Db, Dw_discards);
1862 BBCC_ptr += sizeof(iddCC);
1863 break;
1864
njn4294fd42002-06-05 14:41:10 +00001865 default:
njne427a662002-10-02 11:08:25 +00001866 VG_(skin_panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
njn4294fd42002-06-05 14:41:10 +00001867 break;
1868 }
1869 }
njn25e49d8e72002-09-23 09:36:25 +00001870 VG_(free)(BBCC_node);
sewardj18d75132002-05-16 11:06:21 +00001871}
1872
1873/*--------------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00001874/*--- Command line processing ---*/
1875/*--------------------------------------------------------------------*/
1876
1877static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
1878{
1879 int i1, i2, i3;
1880 int i;
1881 char *opt = VG_(strdup)(orig_opt);
1882
1883 i = i1 = opt_len;
1884
1885 /* Option looks like "--I1=65536,2,64".
1886 * Find commas, replace with NULs to make three independent
1887 * strings, then extract numbers. Yuck. */
1888 while (VG_(isdigit)(opt[i])) i++;
1889 if (',' == opt[i]) {
1890 opt[i++] = '\0';
1891 i2 = i;
1892 } else goto bad;
1893 while (VG_(isdigit)(opt[i])) i++;
1894 if (',' == opt[i]) {
1895 opt[i++] = '\0';
1896 i3 = i;
1897 } else goto bad;
1898 while (VG_(isdigit)(opt[i])) i++;
1899 if ('\0' != opt[i]) goto bad;
1900
1901 cache->size = (Int)VG_(atoll)(opt + i1);
1902 cache->assoc = (Int)VG_(atoll)(opt + i2);
1903 cache->line_size = (Int)VG_(atoll)(opt + i3);
1904
1905 VG_(free)(opt);
1906
1907 return;
1908
1909 bad:
1910 VG_(bad_option)(orig_opt);
1911}
1912
1913Bool SK_(process_cmd_line_option)(Char* arg)
1914{
1915 /* 5 is length of "--I1=" */
njn39c86652003-05-21 10:13:39 +00001916 if (VG_CLO_STREQN(5, arg, "--I1="))
njn25e49d8e72002-09-23 09:36:25 +00001917 parse_cache_opt(&clo_I1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001918 else if (VG_CLO_STREQN(5, arg, "--D1="))
njn25e49d8e72002-09-23 09:36:25 +00001919 parse_cache_opt(&clo_D1_cache, arg, 5);
njn39c86652003-05-21 10:13:39 +00001920 else if (VG_CLO_STREQN(5, arg, "--L2="))
njn25e49d8e72002-09-23 09:36:25 +00001921 parse_cache_opt(&clo_L2_cache, arg, 5);
1922 else
1923 return False;
1924
1925 return True;
1926}
1927
njn3e884182003-04-15 13:03:23 +00001928void SK_(print_usage)(void)
njn25e49d8e72002-09-23 09:36:25 +00001929{
njn3e884182003-04-15 13:03:23 +00001930 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001931" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1932" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
njn3e884182003-04-15 13:03:23 +00001933" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1934 );
1935}
1936
1937void SK_(print_debug_usage)(void)
1938{
1939 VG_(printf)(
1940" (none)\n"
1941 );
njn25e49d8e72002-09-23 09:36:25 +00001942}
1943
1944/*--------------------------------------------------------------------*/
1945/*--- Setup ---*/
1946/*--------------------------------------------------------------------*/
1947
njn810086f2002-11-14 12:42:47 +00001948void SK_(pre_clo_init)(void)
njn25e49d8e72002-09-23 09:36:25 +00001949{
njn13f02932003-04-30 20:23:58 +00001950 UInt buf_size = 100;
1951 Char* base_dir = NULL;
1952
njn810086f2002-11-14 12:42:47 +00001953 VG_(details_name) ("Cachegrind");
1954 VG_(details_version) (NULL);
1955 VG_(details_description) ("an I1/D1/L2 cache profiler");
1956 VG_(details_copyright_author)(
njn0e1b5142003-04-15 14:58:06 +00001957 "Copyright (C) 2002-2003, and GNU GPL'd, by Nicholas Nethercote.");
njn810086f2002-11-14 12:42:47 +00001958 VG_(details_bug_reports_to) ("njn25@cam.ac.uk");
sewardj78210aa2002-12-01 02:55:46 +00001959 VG_(details_avg_translation_sizeB) ( 155 );
njn25e49d8e72002-09-23 09:36:25 +00001960
njn810086f2002-11-14 12:42:47 +00001961 VG_(needs_basic_block_discards)();
1962 VG_(needs_command_line_options)();
njn25e49d8e72002-09-23 09:36:25 +00001963
1964 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
1965 VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
1966 VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
1967 VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
1968 VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
1969 VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
njn13f02932003-04-30 20:23:58 +00001970
1971 /* getcwd() fails if the buffer isn't big enough -- keep doubling size
1972 until it succeeds. */
1973 while (NULL == base_dir) {
1974 base_dir = VG_(malloc)(buf_size);
1975 if (NULL == VG_(getcwd)(base_dir, buf_size))
1976 buf_size *= 2;
1977 }
1978 /* Block is big enough for dir name + cachegrind.out.<pid> */
1979 cachegrind_out_file = VG_(malloc)((VG_(strlen)(base_dir) + 32)*sizeof(Char));
1980 VG_(sprintf)(cachegrind_out_file, "%s/cachegrind.out.%d",
1981 base_dir, VG_(getpid)());
njn25e49d8e72002-09-23 09:36:25 +00001982}
1983
1984void SK_(post_clo_init)(void)
1985{
1986 cache_t I1c, D1c, L2c;
njn25e49d8e72002-09-23 09:36:25 +00001987
1988 initCC(&Ir_total);
1989 initCC(&Dr_total);
1990 initCC(&Dw_total);
1991
1992 initCC(&Ir_discards);
1993 initCC(&Dr_discards);
1994 initCC(&Dw_discards);
1995
1996 get_caches(&I1c, &D1c, &L2c);
1997
1998 cachesim_I1_initcache(I1c);
1999 cachesim_D1_initcache(D1c);
2000 cachesim_L2_initcache(L2c);
2001
2002 VGP_(register_profile_event)(VgpGetBBCC, "get-BBCC");
2003 VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
2004 VGP_(register_profile_event)(VgpCacheResults, "cache-results");
2005
2006 init_BBCC_table();
2007}
2008
2009#if 0
2010Bool SK_(cheap_sanity_check)(void) { return True; }
2011
2012extern TTEntry* vg_tt;
2013
2014Bool SK_(expensive_sanity_check)(void)
2015{
2016 Int i;
2017 Bool dummy;
2018 for (i = 0; i < 200191; i++) {
2019 if (vg_tt[i].orig_addr != (Addr)1 &&
2020 vg_tt[i].orig_addr != (Addr)3) {
2021 VG_(printf)(".");
2022 get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
2023 }
2024 }
2025 return True;
2026}
2027#endif
2028
2029/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002030/*--- end cg_main.c ---*/
sewardj18d75132002-05-16 11:06:21 +00002031/*--------------------------------------------------------------------*/