blob: 9e53f8916916f14ebc6c78d272f05252050d3b22 [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001
2/*--------------------------------------------------------------------*/
3/*--- Cache simulation. ---*/
4/*--- sim.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njn9a0cba42007-04-15 22:15:57 +00008 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
weidendoa17f2a32006-03-20 10:27:30 +000010
njn9a0cba42007-04-15 22:15:57 +000011 Copyright (C) 2003-2005, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
weidendoa17f2a32006-03-20 10:27:30 +000012
njn9a0cba42007-04-15 22:15:57 +000013 This tool is derived from and contains code from Cachegrind
njn9f207462009-03-10 22:02:09 +000014 Copyright (C) 2002-2009 Nicholas Nethercote (njn@valgrind.org)
weidendoa17f2a32006-03-20 10:27:30 +000015
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
30
31 The GNU General Public License is contained in the file COPYING.
32*/
33
34#include "global.h"
35
36
37/* Notes:
38 - simulates a write-allocate cache
39 - (block --> set) hash function uses simple bit selection
40 - handling of references straddling two cache blocks:
41 - counts as only one cache access (not two)
42 - both blocks hit --> one hit
43 - one block hits, the other misses --> one miss
44 - both blocks miss --> one miss (not two)
45*/
46
47/* Cache configuration */
48#include "cg_arch.h"
49
50/* additional structures for cache use info, separated
51 * according usage frequency:
52 * - line_loaded : pointer to cost center of instruction
53 * which loaded the line into cache.
54 * Needed to increment counters when line is evicted.
55 * - line_use : updated on every access
56 */
57typedef struct {
58 UInt count;
59 UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */
60} line_use;
61
62typedef struct {
63 Addr memline, iaddr;
64 line_use* dep_use; /* point to higher-level cacheblock for this memline */
65 ULong* use_base;
66} line_loaded;
67
68/* Cache state */
69typedef struct {
70 char* name;
71 int size; /* bytes */
72 int assoc;
73 int line_size; /* bytes */
74 Bool sectored; /* prefetch nearside cacheline on read */
75 int sets;
76 int sets_min_1;
weidendoa17f2a32006-03-20 10:27:30 +000077 int line_size_bits;
78 int tag_shift;
79 UWord tag_mask;
80 char desc_line[128];
81 UWord* tags;
82
83 /* for cache use */
84 int line_size_mask;
85 int* line_start_mask;
86 int* line_end_mask;
87 line_loaded* loaded;
88 line_use* use;
89} cache_t2;
90
91/*
92 * States of flat caches in our model.
93 * We use a 2-level hierarchy,
94 */
95static cache_t2 I1, D1, L2;
96
97/* Lower bits of cache tags are used as flags for a cache line */
98#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1)
99#define CACHELINE_DIRTY 1
100
101
102/* Cache simulator Options */
103static Bool clo_simulate_writeback = False;
104static Bool clo_simulate_hwpref = False;
105static Bool clo_simulate_sectors = False;
106static Bool clo_collect_cacheuse = False;
107
108/* Following global vars are setup before by
109 * setup_bbcc()/cachesim_after_bbsetup():
110 *
111 * - Addr bb_base (instruction start address of original BB)
112 * - ULong* cost_base (start of cost array for BB)
113 * - BBCC* nonskipped (only != 0 when in a function not skipped)
114 */
115
weidendo0a1951d2009-06-15 00:16:36 +0000116/* Offset to events in event set, used in log_* functions
117 * <off_EventSet_BasicEventSet>: offset where basic set is found
118 */
119static Int off_UIr_Ir;
120static Int off_UIrDr_Ir, off_UIrDr_Dr;
121static Int off_UIrDrDw_Ir, off_UIrDrDw_Dr, off_UIrDrDw_Dw;
122static Int off_UIrDw_Ir, off_UIrDw_Dw;
123static Int off_UIrDwDr_Ir, off_UIrDwDr_Dr, off_UIrDwDr_Dw;
weidendoa17f2a32006-03-20 10:27:30 +0000124
125static Addr bb_base;
126static ULong* cost_base;
127static InstrInfo* current_ii;
128
129/* Cache use offsets */
weidendo0a1951d2009-06-15 00:16:36 +0000130/* The offsets are only correct because all per-instruction event sets get
weidendoa17f2a32006-03-20 10:27:30 +0000131 * the "Use" set added first !
132 */
133static Int off_I1_AcCost = 0;
134static Int off_I1_SpLoss = 1;
135static Int off_D1_AcCost = 0;
136static Int off_D1_SpLoss = 1;
137static Int off_L2_AcCost = 2;
138static Int off_L2_SpLoss = 3;
139
140/* Cache access types */
141typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType;
142
143/* Result of a reference into a flat cache */
144typedef enum { Hit = 0, Miss, MissDirty } CacheResult;
145
146/* Result of a reference into a hierarchical cache model */
147typedef enum {
148 L1_Hit,
149 L2_Hit,
150 MemAccess,
151 WriteBackMemAccess } CacheModelResult;
152
153typedef CacheModelResult (*simcall_type)(Addr, UChar);
154
155static struct {
156 simcall_type I1_Read;
157 simcall_type D1_Read;
158 simcall_type D1_Write;
159} simulator;
160
161/*------------------------------------------------------------*/
162/*--- Cache Simulator Initialization ---*/
163/*------------------------------------------------------------*/
164
165static void cachesim_clearcache(cache_t2* c)
166{
167 Int i;
168
169 for (i = 0; i < c->sets * c->assoc; i++)
170 c->tags[i] = 0;
171 if (c->use) {
172 for (i = 0; i < c->sets * c->assoc; i++) {
173 c->loaded[i].memline = 0;
174 c->loaded[i].use_base = 0;
175 c->loaded[i].dep_use = 0;
176 c->loaded[i].iaddr = 0;
177 c->use[i].mask = 0;
178 c->use[i].count = 0;
179 c->tags[i] = i % c->assoc; /* init lower bits as pointer */
180 }
181 }
182}
183
184static void cacheuse_initcache(cache_t2* c);
185
186/* By this point, the size/assoc/line_size has been checked. */
187static void cachesim_initcache(cache_t config, cache_t2* c)
188{
189 c->size = config.size;
190 c->assoc = config.assoc;
191 c->line_size = config.line_size;
192 c->sectored = False; // FIXME
193
194 c->sets = (c->size / c->line_size) / c->assoc;
195 c->sets_min_1 = c->sets - 1;
weidendoa17f2a32006-03-20 10:27:30 +0000196 c->line_size_bits = VG_(log2)(c->line_size);
197 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
198 c->tag_mask = ~((1<<c->tag_shift)-1);
199
200 /* Can bits in tag entries be used for flags?
201 * Should be always true as MIN_LINE_SIZE >= 16 */
202 CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0);
203
204 if (c->assoc == 1) {
205 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s",
206 c->size, c->line_size,
207 c->sectored ? ", sectored":"");
208 } else {
209 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s",
210 c->size, c->line_size, c->assoc,
211 c->sectored ? ", sectored":"");
212 }
213
sewardj9c606bd2008-09-18 18:12:50 +0000214 c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1",
215 sizeof(UWord) * c->sets * c->assoc);
weidendoa17f2a32006-03-20 10:27:30 +0000216 if (clo_collect_cacheuse)
217 cacheuse_initcache(c);
218 else
219 c->use = 0;
220 cachesim_clearcache(c);
221}
222
223
224#if 0
225static void print_cache(cache_t2* c)
226{
227 UInt set, way, i;
228
229 /* Note initialisation and update of 'i'. */
230 for (i = 0, set = 0; set < c->sets; set++) {
231 for (way = 0; way < c->assoc; way++, i++) {
232 VG_(printf)("%8x ", c->tags[i]);
233 }
234 VG_(printf)("\n");
235 }
236}
237#endif
238
239
240/*------------------------------------------------------------*/
241/*--- Write Through Cache Simulation ---*/
242/*------------------------------------------------------------*/
243
244/*
245 * Simple model: L1 & L2 Write Through
246 * Does not distinguish among read and write references
247 *
248 * Simulator functions:
249 * CacheModelResult cachesim_I1_ref(Addr a, UChar size)
250 * CacheModelResult cachesim_D1_ref(Addr a, UChar size)
251 */
252
253static __inline__
254CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag)
255{
256 int i, j;
257 UWord *set;
258
weidendo144b76c2009-01-26 22:56:14 +0000259 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000260
261 /* This loop is unrolled for just the first case, which is the most */
262 /* common. We can't unroll any further because it would screw up */
263 /* if we have a direct-mapped (1-way) cache. */
264 if (tag == set[0])
265 return Hit;
266
267 /* If the tag is one other than the MRU, move it into the MRU spot */
268 /* and shuffle the rest down. */
269 for (i = 1; i < c->assoc; i++) {
270 if (tag == set[i]) {
271 for (j = i; j > 0; j--) {
272 set[j] = set[j - 1];
273 }
274 set[0] = tag;
275 return Hit;
276 }
277 }
278
279 /* A miss; install this tag as MRU, shuffle rest down. */
280 for (j = c->assoc - 1; j > 0; j--) {
281 set[j] = set[j - 1];
282 }
283 set[0] = tag;
284
285 return Miss;
286}
287
288static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size)
289{
290 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
291 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
292 UWord tag = a >> c->tag_shift;
293
294 /* Access entirely within line. */
295 if (set1 == set2)
296 return cachesim_setref(c, set1, tag);
297
298 /* Access straddles two lines. */
299 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
300 else if (((set1 + 1) & (c->sets-1)) == set2) {
weidendo28e2a142006-11-22 21:00:53 +0000301 UWord tag2 = (a+size-1) >> c->tag_shift;
weidendoa17f2a32006-03-20 10:27:30 +0000302
303 /* the call updates cache structures as side effect */
304 CacheResult res1 = cachesim_setref(c, set1, tag);
weidendo28e2a142006-11-22 21:00:53 +0000305 CacheResult res2 = cachesim_setref(c, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000306 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
307
308 } else {
njn8a7b41b2007-09-23 00:51:24 +0000309 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);
weidendoa17f2a32006-03-20 10:27:30 +0000310 VG_(tool_panic)("item straddles more than two cache sets");
311 }
312 return Hit;
313}
314
315static
316CacheModelResult cachesim_I1_ref(Addr a, UChar size)
317{
318 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
319 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
320 return MemAccess;
321}
322
323static
324CacheModelResult cachesim_D1_ref(Addr a, UChar size)
325{
326 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
327 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
328 return MemAccess;
329}
330
331
332/*------------------------------------------------------------*/
333/*--- Write Back Cache Simulation ---*/
334/*------------------------------------------------------------*/
335
336/*
337 * More complex model: L1 Write-through, L2 Write-back
338 * This needs to distinguish among read and write references.
339 *
340 * Simulator functions:
341 * CacheModelResult cachesim_I1_Read(Addr a, UChar size)
342 * CacheModelResult cachesim_D1_Read(Addr a, UChar size)
343 * CacheModelResult cachesim_D1_Write(Addr a, UChar size)
344 */
345
346/*
347 * With write-back, result can be a miss evicting a dirty line
348 * The dirty state of a cache line is stored in Bit0 of the tag for
349 * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference
350 * type (Read/Write), the line gets dirty on a write.
351 */
352static __inline__
353CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag)
354{
355 int i, j;
356 UWord *set, tmp_tag;
357
weidendo144b76c2009-01-26 22:56:14 +0000358 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000359
360 /* This loop is unrolled for just the first case, which is the most */
361 /* common. We can't unroll any further because it would screw up */
362 /* if we have a direct-mapped (1-way) cache. */
363 if (tag == (set[0] & ~CACHELINE_DIRTY)) {
364 set[0] |= ref;
365 return Hit;
366 }
367 /* If the tag is one other than the MRU, move it into the MRU spot */
368 /* and shuffle the rest down. */
369 for (i = 1; i < c->assoc; i++) {
370 if (tag == (set[i] & ~CACHELINE_DIRTY)) {
371 tmp_tag = set[i] | ref; // update dirty flag
372 for (j = i; j > 0; j--) {
373 set[j] = set[j - 1];
374 }
375 set[0] = tmp_tag;
376 return Hit;
377 }
378 }
379
380 /* A miss; install this tag as MRU, shuffle rest down. */
381 tmp_tag = set[c->assoc - 1];
382 for (j = c->assoc - 1; j > 0; j--) {
383 set[j] = set[j - 1];
384 }
385 set[0] = tag | ref;
386
387 return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss;
388}
389
390
391static __inline__
392CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size)
393{
394 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
395 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
396 UWord tag = a & c->tag_mask;
397
398 /* Access entirely within line. */
399 if (set1 == set2)
400 return cachesim_setref_wb(c, ref, set1, tag);
401
402 /* Access straddles two lines. */
403 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
404 else if (((set1 + 1) & (c->sets-1)) == set2) {
weidendo144b76c2009-01-26 22:56:14 +0000405 UWord tag2 = (a+size-1) & c->tag_mask;
weidendoa17f2a32006-03-20 10:27:30 +0000406
407 /* the call updates cache structures as side effect */
408 CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag);
weidendo28e2a142006-11-22 21:00:53 +0000409 CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000410
411 if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty;
412 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
413
414 } else {
njn8a7b41b2007-09-23 00:51:24 +0000415 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);
weidendoa17f2a32006-03-20 10:27:30 +0000416 VG_(tool_panic)("item straddles more than two cache sets");
417 }
418 return Hit;
419}
420
421
422static
423CacheModelResult cachesim_I1_Read(Addr a, UChar size)
424{
425 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
426 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
427 case Hit: return L2_Hit;
428 case Miss: return MemAccess;
429 default: break;
430 }
431 return WriteBackMemAccess;
432}
433
434static
435CacheModelResult cachesim_D1_Read(Addr a, UChar size)
436{
437 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
438 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
439 case Hit: return L2_Hit;
440 case Miss: return MemAccess;
441 default: break;
442 }
443 return WriteBackMemAccess;
444}
445
446static
447CacheModelResult cachesim_D1_Write(Addr a, UChar size)
448{
449 if ( cachesim_ref( &D1, a, size) == Hit ) {
450 /* Even for a L1 hit, the write-trough L1 passes
451 * the write to the L2 to make the L2 line dirty.
452 * But this causes no latency, so return the hit.
453 */
454 cachesim_ref_wb( &L2, Write, a, size);
455 return L1_Hit;
456 }
457 switch( cachesim_ref_wb( &L2, Write, a, size) ) {
458 case Hit: return L2_Hit;
459 case Miss: return MemAccess;
460 default: break;
461 }
462 return WriteBackMemAccess;
463}
464
465
466/*------------------------------------------------------------*/
467/*--- Hardware Prefetch Simulation ---*/
468/*------------------------------------------------------------*/
469
470static ULong prefetch_up = 0;
471static ULong prefetch_down = 0;
472
473#define PF_STREAMS 8
474#define PF_PAGEBITS 12
475
476static UInt pf_lastblock[PF_STREAMS];
477static Int pf_seqblocks[PF_STREAMS];
478
479static
480void prefetch_clear(void)
481{
482 int i;
483 for(i=0;i<PF_STREAMS;i++)
484 pf_lastblock[i] = pf_seqblocks[i] = 0;
485}
486
487/*
488 * HW Prefetch emulation
489 * Start prefetching when detecting sequential access to 3 memory blocks.
490 * One stream can be detected per 4k page.
491 */
492static __inline__
weidendo09ee78e2009-02-24 12:26:53 +0000493void prefetch_L2_doref(Addr a)
weidendoa17f2a32006-03-20 10:27:30 +0000494{
495 UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS;
496 UInt block = ( a >> L2.line_size_bits);
497
498 if (block != pf_lastblock[stream]) {
499 if (pf_seqblocks[stream] == 0) {
500 if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++;
501 else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--;
502 }
503 else if (pf_seqblocks[stream] >0) {
504 if (pf_lastblock[stream] +1 == block) {
505 pf_seqblocks[stream]++;
506 if (pf_seqblocks[stream] >= 2) {
507 prefetch_up++;
508 cachesim_ref(&L2, a + 5 * L2.line_size,1);
509 }
510 }
511 else pf_seqblocks[stream] = 0;
512 }
513 else if (pf_seqblocks[stream] <0) {
514 if (pf_lastblock[stream] -1 == block) {
515 pf_seqblocks[stream]--;
516 if (pf_seqblocks[stream] <= -2) {
517 prefetch_down++;
518 cachesim_ref(&L2, a - 5 * L2.line_size,1);
519 }
520 }
521 else pf_seqblocks[stream] = 0;
522 }
523 pf_lastblock[stream] = block;
524 }
525}
526
527/* simple model with hardware prefetch */
528
529static
530CacheModelResult prefetch_I1_ref(Addr a, UChar size)
531{
532 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000533 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000534 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
535 return MemAccess;
536}
537
538static
539CacheModelResult prefetch_D1_ref(Addr a, UChar size)
540{
541 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000542 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000543 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
544 return MemAccess;
545}
546
547
548/* complex model with hardware prefetch */
549
550static
551CacheModelResult prefetch_I1_Read(Addr a, UChar size)
552{
553 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000554 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000555 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
556 case Hit: return L2_Hit;
557 case Miss: return MemAccess;
558 default: break;
559 }
560 return WriteBackMemAccess;
561}
562
563static
564CacheModelResult prefetch_D1_Read(Addr a, UChar size)
565{
566 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000567 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000568 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
569 case Hit: return L2_Hit;
570 case Miss: return MemAccess;
571 default: break;
572 }
573 return WriteBackMemAccess;
574}
575
576static
577CacheModelResult prefetch_D1_Write(Addr a, UChar size)
578{
weidendo09ee78e2009-02-24 12:26:53 +0000579 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000580 if ( cachesim_ref( &D1, a, size) == Hit ) {
581 /* Even for a L1 hit, the write-trough L1 passes
582 * the write to the L2 to make the L2 line dirty.
583 * But this causes no latency, so return the hit.
584 */
585 cachesim_ref_wb( &L2, Write, a, size);
586 return L1_Hit;
587 }
588 switch( cachesim_ref_wb( &L2, Write, a, size) ) {
589 case Hit: return L2_Hit;
590 case Miss: return MemAccess;
591 default: break;
592 }
593 return WriteBackMemAccess;
594}
595
596
597/*------------------------------------------------------------*/
598/*--- Cache Simulation with use metric collection ---*/
599/*------------------------------------------------------------*/
600
601/* can not be combined with write-back or prefetch */
602
603static
604void cacheuse_initcache(cache_t2* c)
605{
606 int i;
607 unsigned int start_mask, start_val;
608 unsigned int end_mask, end_val;
609
sewardj9c606bd2008-09-18 18:12:50 +0000610 c->use = CLG_MALLOC("cl.sim.cu_ic.1",
611 sizeof(line_use) * c->sets * c->assoc);
612 c->loaded = CLG_MALLOC("cl.sim.cu_ic.2",
613 sizeof(line_loaded) * c->sets * c->assoc);
614 c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3",
615 sizeof(int) * c->line_size);
616 c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4",
617 sizeof(int) * c->line_size);
weidendoa17f2a32006-03-20 10:27:30 +0000618
weidendoa17f2a32006-03-20 10:27:30 +0000619 c->line_size_mask = c->line_size-1;
620
621 /* Meaning of line_start_mask/line_end_mask
622 * Example: for a given cache line, you get an access starting at
623 * byte offset 5, length 4, byte 5 - 8 was touched. For a cache
624 * line size of 32, you have 1 bit per byte in the mask:
625 *
626 * bit31 bit8 bit5 bit 0
627 * | | | |
628 * 11..111111100000 line_start_mask[5]
629 * 00..000111111111 line_end_mask[(5+4)-1]
630 *
631 * use_mask |= line_start_mask[5] && line_end_mask[8]
632 *
633 */
634 start_val = end_val = ~0;
635 if (c->line_size < 32) {
636 int bits_per_byte = 32/c->line_size;
637 start_mask = (1<<bits_per_byte)-1;
638 end_mask = start_mask << (32-bits_per_byte);
639 for(i=0;i<c->line_size;i++) {
640 c->line_start_mask[i] = start_val;
641 start_val = start_val & ~start_mask;
642 start_mask = start_mask << bits_per_byte;
643
644 c->line_end_mask[c->line_size-i-1] = end_val;
645 end_val = end_val & ~end_mask;
646 end_mask = end_mask >> bits_per_byte;
647 }
648 }
649 else {
650 int bytes_per_bit = c->line_size/32;
651 start_mask = 1;
652 end_mask = 1 << 31;
653 for(i=0;i<c->line_size;i++) {
654 c->line_start_mask[i] = start_val;
655 c->line_end_mask[c->line_size-i-1] = end_val;
656 if ( ((i+1)%bytes_per_bit) == 0) {
657 start_val &= ~start_mask;
658 end_val &= ~end_mask;
659 start_mask <<= 1;
660 end_mask >>= 1;
661 }
662 }
663 }
664
665 CLG_DEBUG(6, "Config %s:\n", c->desc_line);
666 for(i=0;i<c->line_size;i++) {
667 CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n",
668 i, c->line_start_mask[i], c->line_end_mask[i]);
669 }
670
671 /* We use lower tag bits as offset pointers to cache use info.
672 * I.e. some cache parameters don't work.
673 */
weidendo144b76c2009-01-26 22:56:14 +0000674 if ( (1<<c->tag_shift) < c->assoc) {
weidendoa17f2a32006-03-20 10:27:30 +0000675 VG_(message)(Vg_DebugMsg,
676 "error: Use associativity < %d for cache use statistics!",
677 (1<<c->tag_shift) );
678 VG_(tool_panic)("Unsupported cache configuration");
679 }
680}
681
weidendoa17f2a32006-03-20 10:27:30 +0000682
683/* for I1/D1 caches */
684#define CACHEUSE(L) \
685 \
686static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \
687{ \
weidendo28e2a142006-11-22 21:00:53 +0000688 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
689 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
690 UWord tag = a & L.tag_mask; \
691 UWord tag2; \
weidendoa17f2a32006-03-20 10:27:30 +0000692 int i, j, idx; \
693 UWord *set, tmp_tag; \
694 UInt use_mask; \
695 \
barta0b6b2c2008-07-07 06:49:24 +0000696 CLG_DEBUG(6,"%s.Acc(Addr %#lx, size %d): Sets [%d/%d]\n", \
weidendoa17f2a32006-03-20 10:27:30 +0000697 L.name, a, size, set1, set2); \
698 \
699 /* First case: word entirely within line. */ \
700 if (set1 == set2) { \
701 \
weidendo144b76c2009-01-26 22:56:14 +0000702 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000703 use_mask = L.line_start_mask[a & L.line_size_mask] & \
704 L.line_end_mask[(a+size-1) & L.line_size_mask]; \
705 \
706 /* This loop is unrolled for just the first case, which is the most */\
707 /* common. We can't unroll any further because it would screw up */\
708 /* if we have a direct-mapped (1-way) cache. */\
709 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000710 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000711 L.use[idx].count ++; \
712 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000713 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000714 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
715 use_mask, L.use[idx].mask, L.use[idx].count); \
716 return L1_Hit; \
717 } \
718 /* If the tag is one other than the MRU, move it into the MRU spot */\
719 /* and shuffle the rest down. */\
720 for (i = 1; i < L.assoc; i++) { \
721 if (tag == (set[i] & L.tag_mask)) { \
722 tmp_tag = set[i]; \
723 for (j = i; j > 0; j--) { \
724 set[j] = set[j - 1]; \
725 } \
726 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000727 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000728 L.use[idx].count ++; \
729 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000730 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000731 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
732 use_mask, L.use[idx].mask, L.use[idx].count); \
733 return L1_Hit; \
734 } \
735 } \
736 \
737 /* A miss; install this tag as MRU, shuffle rest down. */ \
738 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
739 for (j = L.assoc - 1; j > 0; j--) { \
740 set[j] = set[j - 1]; \
741 } \
742 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000743 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000744 return update_##L##_use(&L, idx, \
745 use_mask, a &~ L.line_size_mask); \
746 \
747 /* Second case: word straddles two lines. */ \
748 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
749 } else if (((set1 + 1) & (L.sets-1)) == set2) { \
750 Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:L2 miss */ \
weidendo144b76c2009-01-26 22:56:14 +0000751 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000752 use_mask = L.line_start_mask[a & L.line_size_mask]; \
753 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000754 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000755 L.use[idx].count ++; \
756 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000757 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000758 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
759 use_mask, L.use[idx].mask, L.use[idx].count); \
760 goto block2; \
761 } \
762 for (i = 1; i < L.assoc; i++) { \
763 if (tag == (set[i] & L.tag_mask)) { \
764 tmp_tag = set[i]; \
765 for (j = i; j > 0; j--) { \
766 set[j] = set[j - 1]; \
767 } \
768 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000769 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000770 L.use[idx].count ++; \
771 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000772 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000773 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
774 use_mask, L.use[idx].mask, L.use[idx].count); \
775 goto block2; \
776 } \
777 } \
778 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
779 for (j = L.assoc - 1; j > 0; j--) { \
780 set[j] = set[j - 1]; \
781 } \
782 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000783 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000784 miss1 = update_##L##_use(&L, idx, \
785 use_mask, a &~ L.line_size_mask); \
786block2: \
weidendo144b76c2009-01-26 22:56:14 +0000787 set = &(L.tags[set2 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000788 use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \
weidendo28e2a142006-11-22 21:00:53 +0000789 tag2 = (a+size-1) & L.tag_mask; \
790 if (tag2 == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000791 idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000792 L.use[idx].count ++; \
793 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000794 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000795 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
796 use_mask, L.use[idx].mask, L.use[idx].count); \
797 return miss1; \
798 } \
799 for (i = 1; i < L.assoc; i++) { \
weidendo28e2a142006-11-22 21:00:53 +0000800 if (tag2 == (set[i] & L.tag_mask)) { \
weidendoa17f2a32006-03-20 10:27:30 +0000801 tmp_tag = set[i]; \
802 for (j = i; j > 0; j--) { \
803 set[j] = set[j - 1]; \
804 } \
805 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000806 idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000807 L.use[idx].count ++; \
808 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000809 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000810 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
811 use_mask, L.use[idx].mask, L.use[idx].count); \
812 return miss1; \
813 } \
814 } \
815 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
816 for (j = L.assoc - 1; j > 0; j--) { \
817 set[j] = set[j - 1]; \
818 } \
weidendo28e2a142006-11-22 21:00:53 +0000819 set[0] = tag2 | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000820 idx = (set2 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000821 miss2 = update_##L##_use(&L, idx, \
822 use_mask, (a+size-1) &~ L.line_size_mask); \
823 return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:L2_Hit; \
824 \
825 } else { \
barta0b6b2c2008-07-07 06:49:24 +0000826 VG_(printf)("addr: %#lx size: %u sets: %d %d", a, size, set1, set2); \
weidendoa17f2a32006-03-20 10:27:30 +0000827 VG_(tool_panic)("item straddles more than two cache sets"); \
828 } \
829 return 0; \
830}
831
832
833/* logarithmic bitcounting algorithm, see
834 * http://graphics.stanford.edu/~seander/bithacks.html
835 */
836static __inline__ unsigned int countBits(unsigned int bits)
837{
838 unsigned int c; // store the total here
839 const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers
840 const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF};
841
842 c = bits;
843 c = ((c >> S[0]) & B[0]) + (c & B[0]);
844 c = ((c >> S[1]) & B[1]) + (c & B[1]);
845 c = ((c >> S[2]) & B[2]) + (c & B[2]);
846 c = ((c >> S[3]) & B[3]) + (c & B[3]);
847 c = ((c >> S[4]) & B[4]) + (c & B[4]);
848 return c;
849}
850
851static void update_L2_use(int idx, Addr memline)
852{
853 line_loaded* loaded = &(L2.loaded[idx]);
854 line_use* use = &(L2.use[idx]);
855 int i = ((32 - countBits(use->mask)) * L2.line_size)>>5;
856
barta0b6b2c2008-07-07 06:49:24 +0000857 CLG_DEBUG(2, " L2.miss [%d]: at %#lx accessing memline %#lx\n",
weidendoa17f2a32006-03-20 10:27:30 +0000858 idx, bb_base + current_ii->instr_offset, memline);
859 if (use->count>0) {
barta0b6b2c2008-07-07 06:49:24 +0000860 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",
weidendoa17f2a32006-03-20 10:27:30 +0000861 use->count, i, use->mask, loaded->memline, loaded->iaddr);
862 CLG_DEBUG(2, " collect: %d, use_base %p\n",
863 CLG_(current_state).collect, loaded->use_base);
864
865 if (CLG_(current_state).collect && loaded->use_base) {
866 (loaded->use_base)[off_L2_AcCost] += 1000 / use->count;
867 (loaded->use_base)[off_L2_SpLoss] += i;
868 }
869 }
870
871 use->count = 0;
872 use->mask = 0;
873
874 loaded->memline = memline;
875 loaded->iaddr = bb_base + current_ii->instr_offset;
876 loaded->use_base = (CLG_(current_state).nonskipped) ?
877 CLG_(current_state).nonskipped->skipped :
878 cost_base + current_ii->cost_offset;
879}
880
881static
882CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
883{
884 UInt setNo = (memline >> L2.line_size_bits) & (L2.sets_min_1);
weidendo144b76c2009-01-26 22:56:14 +0000885 UWord* set = &(L2.tags[setNo * L2.assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000886 UWord tag = memline & L2.tag_mask;
887
888 int i, j, idx;
889 UWord tmp_tag;
890
barta0b6b2c2008-07-07 06:49:24 +0000891 CLG_DEBUG(6,"L2.Acc(Memline %#lx): Set %d\n", memline, setNo);
weidendoa17f2a32006-03-20 10:27:30 +0000892
893 if (tag == (set[0] & L2.tag_mask)) {
weidendo144b76c2009-01-26 22:56:14 +0000894 idx = (setNo * L2.assoc) + (set[0] & ~L2.tag_mask);
weidendoa17f2a32006-03-20 10:27:30 +0000895 l1_loaded->dep_use = &(L2.use[idx]);
896
barta0b6b2c2008-07-07 06:49:24 +0000897 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
weidendoa17f2a32006-03-20 10:27:30 +0000898 idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,
899 L2.use[idx].mask, L2.use[idx].count);
900 return L2_Hit;
901 }
902 for (i = 1; i < L2.assoc; i++) {
903 if (tag == (set[i] & L2.tag_mask)) {
904 tmp_tag = set[i];
905 for (j = i; j > 0; j--) {
906 set[j] = set[j - 1];
907 }
908 set[0] = tmp_tag;
weidendo144b76c2009-01-26 22:56:14 +0000909 idx = (setNo * L2.assoc) + (tmp_tag & ~L2.tag_mask);
weidendoa17f2a32006-03-20 10:27:30 +0000910 l1_loaded->dep_use = &(L2.use[idx]);
911
barta0b6b2c2008-07-07 06:49:24 +0000912 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
weidendoa17f2a32006-03-20 10:27:30 +0000913 i, idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,
914 L2.use[idx].mask, L2.use[idx].count);
915 return L2_Hit;
916 }
917 }
918
919 /* A miss; install this tag as MRU, shuffle rest down. */
920 tmp_tag = set[L2.assoc - 1] & ~L2.tag_mask;
921 for (j = L2.assoc - 1; j > 0; j--) {
922 set[j] = set[j - 1];
923 }
924 set[0] = tag | tmp_tag;
weidendo144b76c2009-01-26 22:56:14 +0000925 idx = (setNo * L2.assoc) + tmp_tag;
weidendoa17f2a32006-03-20 10:27:30 +0000926 l1_loaded->dep_use = &(L2.use[idx]);
927
928 update_L2_use(idx, memline);
929
930 return MemAccess;
931}
932
933
934
935
936#define UPDATE_USE(L) \
937 \
938static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \
939 UInt mask, Addr memline) \
940{ \
941 line_loaded* loaded = &(cache->loaded[idx]); \
942 line_use* use = &(cache->use[idx]); \
943 int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \
944 \
barta0b6b2c2008-07-07 06:49:24 +0000945 CLG_DEBUG(2, " %s.miss [%d]: at %#lx accessing memline %#lx (mask %08x)\n", \
weidendoa17f2a32006-03-20 10:27:30 +0000946 cache->name, idx, bb_base + current_ii->instr_offset, memline, mask); \
947 if (use->count>0) { \
barta0b6b2c2008-07-07 06:49:24 +0000948 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000949 use->count, c, use->mask, loaded->memline, loaded->iaddr); \
950 CLG_DEBUG(2, " collect: %d, use_base %p\n", \
951 CLG_(current_state).collect, loaded->use_base); \
952 \
953 if (CLG_(current_state).collect && loaded->use_base) { \
954 (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \
955 (loaded->use_base)[off_##L##_SpLoss] += c; \
956 \
957 /* FIXME (?): L1/L2 line sizes must be equal ! */ \
958 loaded->dep_use->mask |= use->mask; \
959 loaded->dep_use->count += use->count; \
960 } \
961 } \
962 \
963 use->count = 1; \
964 use->mask = mask; \
965 loaded->memline = memline; \
966 loaded->iaddr = bb_base + current_ii->instr_offset; \
967 loaded->use_base = (CLG_(current_state).nonskipped) ? \
968 CLG_(current_state).nonskipped->skipped : \
969 cost_base + current_ii->cost_offset; \
970 \
971 if (memline == 0) return L2_Hit; \
972 return cacheuse_L2_access(memline, loaded); \
973}
974
975UPDATE_USE(I1);
976UPDATE_USE(D1);
977
978CACHEUSE(I1);
979CACHEUSE(D1);
980
981
982static
983void cacheuse_finish(void)
984{
985 int i;
weidendo0a1951d2009-06-15 00:16:36 +0000986 InstrInfo ii = { 0,0,0,0 };
weidendoa17f2a32006-03-20 10:27:30 +0000987
988 if (!CLG_(current_state).collect) return;
989
990 bb_base = 0;
991 current_ii = &ii;
weidendo0a1951d2009-06-15 00:16:36 +0000992 cost_base = 0;
weidendoa17f2a32006-03-20 10:27:30 +0000993
994 /* update usage counters */
995 if (I1.use)
996 for (i = 0; i < I1.sets * I1.assoc; i++)
997 if (I1.loaded[i].use_base)
998 update_I1_use( &I1, i, 0,0);
999
1000 if (D1.use)
1001 for (i = 0; i < D1.sets * D1.assoc; i++)
1002 if (D1.loaded[i].use_base)
1003 update_D1_use( &D1, i, 0,0);
1004
1005 if (L2.use)
1006 for (i = 0; i < L2.sets * L2.assoc; i++)
1007 if (L2.loaded[i].use_base)
1008 update_L2_use(i, 0);
1009}
1010
1011
1012
1013/*------------------------------------------------------------*/
1014/*--- Helper functions called by instrumented code ---*/
1015/*------------------------------------------------------------*/
1016
1017
1018static __inline__
1019void inc_costs(CacheModelResult r, ULong* c1, ULong* c2)
1020{
1021 switch(r) {
1022 case WriteBackMemAccess:
1023 if (clo_simulate_writeback) {
1024 c1[3]++;
1025 c2[3]++;
1026 }
1027 // fall through
1028
1029 case MemAccess:
1030 c1[2]++;
1031 c2[2]++;
1032 // fall through
1033
1034 case L2_Hit:
1035 c1[1]++;
1036 c2[1]++;
1037 // fall through
1038
1039 default:
1040 c1[0]++;
1041 c2[0]++;
1042 }
1043}
1044
weidendo0a1951d2009-06-15 00:16:36 +00001045static
1046Char* cacheRes(CacheModelResult r)
1047{
1048 switch(r) {
1049 case L1_Hit: return "L1 Hit ";
1050 case L2_Hit: return "L2 Hit ";
1051 case MemAccess: return "L2 Miss";
1052 case WriteBackMemAccess: return "L2 Miss (dirty)";
1053 default:
1054 tl_assert(0);
1055 }
1056 return "??";
1057}
weidendoa17f2a32006-03-20 10:27:30 +00001058
1059VG_REGPARM(1)
1060static void log_1I0D(InstrInfo* ii)
1061{
1062 CacheModelResult IrRes;
1063
1064 current_ii = ii;
1065 IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
1066
weidendo0a1951d2009-06-15 00:16:36 +00001067 CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n",
1068 bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001069
1070 if (CLG_(current_state).collect) {
1071 ULong* cost_Ir;
weidendo0a1951d2009-06-15 00:16:36 +00001072
weidendoa17f2a32006-03-20 10:27:30 +00001073 if (CLG_(current_state).nonskipped)
1074 cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;
1075 else
weidendo0a1951d2009-06-15 00:16:36 +00001076 cost_Ir = cost_base + ii->cost_offset + off_UIr_Ir;
weidendoa17f2a32006-03-20 10:27:30 +00001077
1078 inc_costs(IrRes, cost_Ir,
1079 CLG_(current_state).cost + CLG_(sets).off_full_Ir );
1080 }
1081}
1082
weidendo0a1951d2009-06-15 00:16:36 +00001083VG_REGPARM(2)
1084static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2)
1085{
1086 CacheModelResult Ir1Res, Ir2Res;
1087 ULong *global_cost_Ir;
1088
1089 current_ii = ii1;
1090 Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size);
1091 current_ii = ii2;
1092 Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size);
1093
1094 CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n",
1095 bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1096 bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) );
1097
1098 if (!CLG_(current_state).collect) return;
1099
1100 global_cost_Ir = CLG_(current_state).cost + CLG_(sets).off_full_Ir;
1101 if (CLG_(current_state).nonskipped) {
1102 ULong* skipped_cost_Ir = CLG_(current_state).nonskipped->skipped +
1103 CLG_(sets).off_full_Ir;
1104 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1105 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1106 return;
1107 }
1108
1109 inc_costs(Ir1Res, global_cost_Ir, cost_base + ii1->cost_offset + off_UIr_Ir);
1110 inc_costs(Ir2Res, global_cost_Ir, cost_base + ii2->cost_offset + off_UIr_Ir);
1111}
1112
1113VG_REGPARM(3)
1114static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3)
1115{
1116 CacheModelResult Ir1Res, Ir2Res, Ir3Res;
1117 ULong *global_cost_Ir;
1118
1119 current_ii = ii1;
1120 Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size);
1121 current_ii = ii2;
1122 Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size);
1123 current_ii = ii3;
1124 Ir3Res = (*simulator.I1_Read)(bb_base + ii3->instr_offset, ii3->instr_size);
1125
1126 CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n",
1127 bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1128 bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res),
1129 bb_base + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) );
1130
1131 if (!CLG_(current_state).collect) return;
1132
1133 global_cost_Ir = CLG_(current_state).cost + CLG_(sets).off_full_Ir;
1134 if (CLG_(current_state).nonskipped) {
1135 ULong* skipped_cost_Ir = CLG_(current_state).nonskipped->skipped +
1136 CLG_(sets).off_full_Ir;
1137 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1138 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1139 inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir);
1140 return;
1141 }
1142
1143 inc_costs(Ir1Res, global_cost_Ir, cost_base + ii1->cost_offset + off_UIr_Ir);
1144 inc_costs(Ir2Res, global_cost_Ir, cost_base + ii2->cost_offset + off_UIr_Ir);
1145 inc_costs(Ir3Res, global_cost_Ir, cost_base + ii3->cost_offset + off_UIr_Ir);
1146}
weidendoa17f2a32006-03-20 10:27:30 +00001147
1148/* Instruction doing a read access */
1149
weidendo0a1951d2009-06-15 00:16:36 +00001150VG_REGPARM(3)
1151static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001152{
1153 CacheModelResult IrRes, DrRes;
1154
1155 current_ii = ii;
1156 IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001157 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001158
weidendo0a1951d2009-06-15 00:16:36 +00001159 CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n",
1160 bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
1161 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001162
1163 if (CLG_(current_state).collect) {
1164 ULong *cost_Ir, *cost_Dr;
1165
1166 if (CLG_(current_state).nonskipped) {
1167 cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;
1168 cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;
1169 }
1170 else {
weidendo0a1951d2009-06-15 00:16:36 +00001171 // event set must be UIrDr or extension
1172 CLG_ASSERT((ii->eventset == CLG_(sets).UIrDr) ||
1173 (ii->eventset == CLG_(sets).UIrDrDw));
1174 cost_Ir = cost_base + ii->cost_offset + off_UIrDr_Ir;
1175 cost_Dr = cost_base + ii->cost_offset + off_UIrDr_Dr;
weidendoa17f2a32006-03-20 10:27:30 +00001176 }
1177
1178 inc_costs(IrRes, cost_Ir,
1179 CLG_(current_state).cost + CLG_(sets).off_full_Ir );
1180 inc_costs(DrRes, cost_Dr,
1181 CLG_(current_state).cost + CLG_(sets).off_full_Dr );
1182 }
1183}
1184
1185
weidendo0a1951d2009-06-15 00:16:36 +00001186VG_REGPARM(3)
1187static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001188{
1189 CacheModelResult DrRes;
1190
1191 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001192 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001193
weidendo0a1951d2009-06-15 00:16:36 +00001194 CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n",
1195 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001196
1197 if (CLG_(current_state).collect) {
1198 ULong *cost_Dr;
1199
1200 if (CLG_(current_state).nonskipped) {
1201 cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;
1202 }
1203 else {
weidendo0a1951d2009-06-15 00:16:36 +00001204 Int off_Dr;
1205 if (ii->eventset == CLG_(sets).UIrDr) off_Dr = off_UIrDr_Dr;
1206 else if (ii->eventset == CLG_(sets).UIrDrDw) off_Dr = off_UIrDrDw_Dr;
1207 else if (ii->eventset == CLG_(sets).UIrDwDr) off_Dr = off_UIrDwDr_Dr;
1208 else CLG_ASSERT(0);
1209
1210 cost_Dr = cost_base + ii->cost_offset + off_Dr;
weidendoa17f2a32006-03-20 10:27:30 +00001211 }
weidendo0a1951d2009-06-15 00:16:36 +00001212
weidendoa17f2a32006-03-20 10:27:30 +00001213 inc_costs(DrRes, cost_Dr,
1214 CLG_(current_state).cost + CLG_(sets).off_full_Dr );
1215 }
1216}
1217
1218
1219/* Instruction doing a write access */
1220
weidendo0a1951d2009-06-15 00:16:36 +00001221VG_REGPARM(3)
1222static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001223{
1224 CacheModelResult IrRes, DwRes;
1225
1226 current_ii = ii;
1227 IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001228 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001229
weidendo0a1951d2009-06-15 00:16:36 +00001230 CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n",
1231 bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
1232 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001233
1234 if (CLG_(current_state).collect) {
1235 ULong *cost_Ir, *cost_Dw;
1236
1237 if (CLG_(current_state).nonskipped) {
weidendo0a1951d2009-06-15 00:16:36 +00001238 cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;
1239 cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001240 }
1241 else {
weidendo0a1951d2009-06-15 00:16:36 +00001242 // This helper is called when a Dr event follows Ir;
1243 // Event set must be UIrDw or extension
1244 CLG_ASSERT((ii->eventset == CLG_(sets).UIrDw) ||
1245 (ii->eventset == CLG_(sets).UIrDwDr));
1246 cost_Ir = cost_base + ii->cost_offset + off_UIrDw_Ir;
1247 cost_Dw = cost_base + ii->cost_offset + off_UIrDw_Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001248 }
1249
1250 inc_costs(IrRes, cost_Ir,
1251 CLG_(current_state).cost + CLG_(sets).off_full_Ir );
1252 inc_costs(DwRes, cost_Dw,
1253 CLG_(current_state).cost + CLG_(sets).off_full_Dw );
1254 }
1255}
1256
weidendo0a1951d2009-06-15 00:16:36 +00001257VG_REGPARM(3)
1258static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001259{
1260 CacheModelResult DwRes;
1261
1262 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001263 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001264
weidendo0a1951d2009-06-15 00:16:36 +00001265 CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n",
1266 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001267
1268 if (CLG_(current_state).collect) {
1269 ULong *cost_Dw;
1270
1271 if (CLG_(current_state).nonskipped) {
1272 cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw;
1273 }
1274 else {
weidendo0a1951d2009-06-15 00:16:36 +00001275 Int off_Dw;
1276 if (ii->eventset == CLG_(sets).UIrDw) off_Dw = off_UIrDw_Dw;
1277 else if (ii->eventset == CLG_(sets).UIrDwDr) off_Dw = off_UIrDwDr_Dw;
1278 else if (ii->eventset == CLG_(sets).UIrDrDw) off_Dw = off_UIrDrDw_Dw;
1279 else CLG_ASSERT(0);
1280
1281 cost_Dw = cost_base + ii->cost_offset + off_Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001282 }
1283
1284 inc_costs(DwRes, cost_Dw,
1285 CLG_(current_state).cost + CLG_(sets).off_full_Dw );
1286 }
1287}
1288
weidendoa17f2a32006-03-20 10:27:30 +00001289
1290
1291/*------------------------------------------------------------*/
1292/*--- Cache configuration ---*/
1293/*------------------------------------------------------------*/
1294
1295#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
1296
1297static cache_t clo_I1_cache = UNDEFINED_CACHE;
1298static cache_t clo_D1_cache = UNDEFINED_CACHE;
1299static cache_t clo_L2_cache = UNDEFINED_CACHE;
1300
1301
1302/* Checks cache config is ok; makes it so if not. */
1303static
1304void check_cache(cache_t* cache, Char *name)
1305{
weidendo144b76c2009-01-26 22:56:14 +00001306 /* Simulator requires line size and set count to be powers of two */
1307 if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
1308 (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
weidendoa17f2a32006-03-20 10:27:30 +00001309 VG_(message)(Vg_UserMsg,
weidendo144b76c2009-01-26 22:56:14 +00001310 "error: %s set count not a power of two; aborting.",
1311 name);
weidendoa17f2a32006-03-20 10:27:30 +00001312 }
1313
weidendo144b76c2009-01-26 22:56:14 +00001314 if (-1 == VG_(log2)(cache->line_size)) {
weidendoa17f2a32006-03-20 10:27:30 +00001315 VG_(message)(Vg_UserMsg,
1316 "error: %s line size of %dB not a power of two; aborting.",
1317 name, cache->line_size);
1318 VG_(exit)(1);
1319 }
1320
1321 // Then check line size >= 16 -- any smaller and a single instruction could
1322 // straddle three cache lines, which breaks a simulation assertion and is
1323 // stupid anyway.
1324 if (cache->line_size < MIN_LINE_SIZE) {
1325 VG_(message)(Vg_UserMsg,
1326 "error: %s line size of %dB too small; aborting.",
1327 name, cache->line_size);
1328 VG_(exit)(1);
1329 }
1330
1331 /* Then check cache size > line size (causes seg faults if not). */
1332 if (cache->size <= cache->line_size) {
1333 VG_(message)(Vg_UserMsg,
1334 "error: %s cache size of %dB <= line size of %dB; aborting.",
1335 name, cache->size, cache->line_size);
1336 VG_(exit)(1);
1337 }
1338
1339 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1340 if (cache->assoc > (cache->size / cache->line_size)) {
1341 VG_(message)(Vg_UserMsg,
1342 "warning: %s associativity > (size / line size); aborting.", name);
1343 VG_(exit)(1);
1344 }
1345}
1346
1347static
1348void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
1349{
1350#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1351
1352 Int n_clos = 0;
1353
1354 // Count how many were defined on the command line.
1355 if (DEFINED(clo_I1_cache)) { n_clos++; }
1356 if (DEFINED(clo_D1_cache)) { n_clos++; }
1357 if (DEFINED(clo_L2_cache)) { n_clos++; }
1358
1359 // Set the cache config (using auto-detection, if supported by the
1360 // architecture)
1361 VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) );
1362
1363 // Then replace with any defined on the command line.
1364 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
1365 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
1366 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
1367
1368 // Then check values and fix if not acceptable.
1369 check_cache(I1c, "I1");
1370 check_cache(D1c, "D1");
1371 check_cache(L2c, "L2");
1372
1373 if (VG_(clo_verbosity) > 1) {
1374 VG_(message)(Vg_UserMsg, "Cache configuration used:");
1375 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",
1376 I1c->size, I1c->assoc, I1c->line_size);
1377 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",
1378 D1c->size, D1c->assoc, D1c->line_size);
1379 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",
1380 L2c->size, L2c->assoc, L2c->line_size);
1381 }
1382#undef CMD_LINE_DEFINED
1383}
1384
1385
1386/* Initialize and clear simulator state */
1387static void cachesim_post_clo_init(void)
1388{
1389 /* Cache configurations. */
1390 cache_t I1c, D1c, L2c;
1391
1392 /* Initialize access handlers */
1393 if (!CLG_(clo).simulate_cache) {
1394 CLG_(cachesim).log_1I0D = 0;
1395 CLG_(cachesim).log_1I0D_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001396 CLG_(cachesim).log_2I0D = 0;
1397 CLG_(cachesim).log_2I0D_name = "(no function)";
1398 CLG_(cachesim).log_3I0D = 0;
1399 CLG_(cachesim).log_3I0D_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001400
1401 CLG_(cachesim).log_1I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001402 CLG_(cachesim).log_1I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001403 CLG_(cachesim).log_1I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001404 CLG_(cachesim).log_1I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001405
1406 CLG_(cachesim).log_0I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001407 CLG_(cachesim).log_0I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001408 CLG_(cachesim).log_0I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001409 CLG_(cachesim).log_0I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001410 return;
1411 }
1412
1413 /* Configuration of caches only needed with real cache simulation */
1414 configure_caches(&I1c, &D1c, &L2c);
1415
1416 I1.name = "I1";
1417 D1.name = "D1";
1418 L2.name = "L2";
1419
1420 cachesim_initcache(I1c, &I1);
1421 cachesim_initcache(D1c, &D1);
1422 cachesim_initcache(L2c, &L2);
1423
1424 /* the other cache simulators use the standard helpers
1425 * with dispatching via simulator struct */
1426
1427 CLG_(cachesim).log_1I0D = log_1I0D;
1428 CLG_(cachesim).log_1I0D_name = "log_1I0D";
weidendo0a1951d2009-06-15 00:16:36 +00001429 CLG_(cachesim).log_2I0D = log_2I0D;
1430 CLG_(cachesim).log_2I0D_name = "log_2I0D";
1431 CLG_(cachesim).log_3I0D = log_3I0D;
1432 CLG_(cachesim).log_3I0D_name = "log_3I0D";
weidendoa17f2a32006-03-20 10:27:30 +00001433
1434 CLG_(cachesim).log_1I1Dr = log_1I1Dr;
1435 CLG_(cachesim).log_1I1Dw = log_1I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001436 CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr";
1437 CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001438
1439 CLG_(cachesim).log_0I1Dr = log_0I1Dr;
1440 CLG_(cachesim).log_0I1Dw = log_0I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001441 CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr";
1442 CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001443
1444 if (clo_collect_cacheuse) {
1445
1446 /* Output warning for not supported option combinations */
1447 if (clo_simulate_hwpref) {
1448 VG_(message)(Vg_DebugMsg,
1449 "warning: prefetch simulation can not be used with cache usage");
1450 clo_simulate_hwpref = False;
1451 }
1452
1453 if (clo_simulate_writeback) {
1454 VG_(message)(Vg_DebugMsg,
1455 "warning: write-back simulation can not be used with cache usage");
1456 clo_simulate_writeback = False;
1457 }
1458
1459 simulator.I1_Read = cacheuse_I1_doRead;
1460 simulator.D1_Read = cacheuse_D1_doRead;
1461 simulator.D1_Write = cacheuse_D1_doRead;
1462 return;
1463 }
1464
1465 if (clo_simulate_hwpref) {
1466 prefetch_clear();
1467
1468 if (clo_simulate_writeback) {
1469 simulator.I1_Read = prefetch_I1_Read;
1470 simulator.D1_Read = prefetch_D1_Read;
1471 simulator.D1_Write = prefetch_D1_Write;
1472 }
1473 else {
1474 simulator.I1_Read = prefetch_I1_ref;
1475 simulator.D1_Read = prefetch_D1_ref;
1476 simulator.D1_Write = prefetch_D1_ref;
1477 }
1478
1479 return;
1480 }
1481
1482 if (clo_simulate_writeback) {
1483 simulator.I1_Read = cachesim_I1_Read;
1484 simulator.D1_Read = cachesim_D1_Read;
1485 simulator.D1_Write = cachesim_D1_Write;
1486 }
1487 else {
1488 simulator.I1_Read = cachesim_I1_ref;
1489 simulator.D1_Read = cachesim_D1_ref;
1490 simulator.D1_Write = cachesim_D1_ref;
1491 }
1492}
1493
1494
1495/* Clear simulator state. Has to be initialized before */
1496static
1497void cachesim_clear(void)
1498{
1499 cachesim_clearcache(&I1);
1500 cachesim_clearcache(&D1);
1501 cachesim_clearcache(&L2);
1502
1503 prefetch_clear();
1504}
1505
1506
1507static void cachesim_getdesc(Char* buf)
1508{
1509 Int p;
1510 p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line);
1511 p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line);
1512 VG_(sprintf)(buf+p, "desc: L2 cache: %s\n", L2.desc_line);
1513}
1514
1515static
1516void cachesim_print_opts(void)
1517{
1518 VG_(printf)(
1519"\n cache simulator options:\n"
1520" --simulate-cache=no|yes Do cache simulation [no]\n"
1521" --simulate-wb=no|yes Count write-back events [no]\n"
1522" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n"
1523#if CLG_EXPERIMENTAL
1524" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n"
1525#endif
1526" --cacheuse=no|yes Collect cache block use [no]\n"
1527" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1528" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
1529" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1530 );
1531}
1532
njn83df0b62009-02-25 01:01:05 +00001533static void parse_opt ( cache_t* cache, char* opt )
weidendoa17f2a32006-03-20 10:27:30 +00001534{
njn83df0b62009-02-25 01:01:05 +00001535 Long i1, i2, i3;
1536 Char* endptr;
weidendoa17f2a32006-03-20 10:27:30 +00001537
njn83df0b62009-02-25 01:01:05 +00001538 // Option argument looks like "65536,2,64". Extract them.
1539 i1 = VG_(strtoll10)(opt, &endptr); if (*endptr != ',') goto bad;
1540 i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
1541 i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
weidendoa17f2a32006-03-20 10:27:30 +00001542
njn83df0b62009-02-25 01:01:05 +00001543 // Check for overflow.
1544 cache->size = (Int)i1;
1545 cache->assoc = (Int)i2;
1546 cache->line_size = (Int)i3;
1547 if (cache->size != i1) goto overflow;
1548 if (cache->assoc != i2) goto overflow;
1549 if (cache->line_size != i3) goto overflow;
weidendoa17f2a32006-03-20 10:27:30 +00001550
1551 return;
1552
njn83df0b62009-02-25 01:01:05 +00001553 overflow:
1554 VG_(message)(Vg_UserMsg,
1555 "one of the cache parameters was too large and overflowed\n");
weidendoa17f2a32006-03-20 10:27:30 +00001556 bad:
njn83df0b62009-02-25 01:01:05 +00001557 // XXX: this omits the "--I1/D1/L2=" part from the message, but that's
1558 // not a big deal.
1559 VG_(err_bad_option)(opt);
weidendoa17f2a32006-03-20 10:27:30 +00001560}
1561
1562/* Check for command line option for cache configuration.
1563 * Return False if unknown and not handled.
1564 *
1565 * Called from CLG_(process_cmd_line_option)() in clo.c
1566 */
1567static Bool cachesim_parse_opt(Char* arg)
1568{
njn83df0b62009-02-25 01:01:05 +00001569 Char* tmp_str;
weidendoa17f2a32006-03-20 10:27:30 +00001570
njn83df0b62009-02-25 01:01:05 +00001571 if VG_BOOL_CLO(arg, "--simulate-wb", clo_simulate_writeback) {}
1572 else if VG_BOOL_CLO(arg, "--simulate-hwpref", clo_simulate_hwpref) {}
1573 else if VG_BOOL_CLO(arg, "--simulate-sectors", clo_simulate_sectors) {}
weidendoa17f2a32006-03-20 10:27:30 +00001574
njn83df0b62009-02-25 01:01:05 +00001575 else if VG_BOOL_CLO(arg, "--cacheuse", clo_collect_cacheuse) {
1576 if (clo_collect_cacheuse) {
1577 /* Use counters only make sense with fine dumping */
1578 CLG_(clo).dump_instr = True;
1579 }
1580 }
weidendoa17f2a32006-03-20 10:27:30 +00001581
njn83df0b62009-02-25 01:01:05 +00001582 else if VG_STR_CLO(arg, "--I1", tmp_str)
1583 parse_opt(&clo_I1_cache, tmp_str);
1584 else if VG_STR_CLO(arg, "--D1", tmp_str)
1585 parse_opt(&clo_D1_cache, tmp_str);
1586 else if VG_STR_CLO(arg, "--L2", tmp_str)
1587 parse_opt(&clo_L2_cache, tmp_str);
weidendoa17f2a32006-03-20 10:27:30 +00001588 else
1589 return False;
1590
1591 return True;
1592}
1593
1594/* Adds commas to ULong, right justifying in a field field_width wide, returns
1595 * the string in buf. */
1596static
1597Int commify(ULong n, int field_width, char* buf)
1598{
1599 int len, n_commas, i, j, new_len, space;
1600
1601 VG_(sprintf)(buf, "%llu", n);
1602 len = VG_(strlen)(buf);
1603 n_commas = (len - 1) / 3;
1604 new_len = len + n_commas;
1605 space = field_width - new_len;
1606
1607 /* Allow for printing a number in a field_width smaller than it's size */
1608 if (space < 0) space = 0;
1609
1610 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1611 * of three. */
1612 for (j = -1, i = len ; i >= 0; i--) {
1613 buf[i + n_commas + space] = buf[i];
1614
1615 if ((i>0) && (3 == ++j)) {
1616 j = 0;
1617 n_commas--;
1618 buf[i + n_commas + space] = ',';
1619 }
1620 }
1621 /* Right justify in field. */
1622 for (i = 0; i < space; i++) buf[i] = ' ';
1623 return new_len;
1624}
1625
1626static
1627void percentify(Int n, Int ex, Int field_width, char buf[])
1628{
1629 int i, len, space;
1630
1631 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
1632 len = VG_(strlen)(buf);
1633 space = field_width - len;
1634 if (space < 0) space = 0; /* Allow for v. small field_width */
1635 i = len;
1636
1637 /* Right justify in field */
1638 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1639 for (i = 0; i < space; i++) buf[i] = ' ';
1640}
1641
1642static
1643void cachesim_printstat(void)
1644{
1645 FullCost total = CLG_(total_cost), D_total = 0;
1646 ULong L2_total_m, L2_total_mr, L2_total_mw,
1647 L2_total, L2_total_r, L2_total_w;
1648 char buf1[RESULTS_BUF_LEN],
1649 buf2[RESULTS_BUF_LEN],
1650 buf3[RESULTS_BUF_LEN];
1651 Int l1, l2, l3;
1652 Int p;
1653
1654 if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) {
1655 VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu",
1656 prefetch_up);
1657 VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu",
1658 prefetch_down);
1659 VG_(message)(Vg_DebugMsg, "");
1660 }
1661
1662 /* I cache results. Use the I_refs value to determine the first column
1663 * width. */
1664 l1 = commify(total[CLG_(sets).off_full_Ir], 0, buf1);
1665 VG_(message)(Vg_UserMsg, "I refs: %s", buf1);
1666
1667 if (!CLG_(clo).simulate_cache) return;
1668
1669 commify(total[CLG_(sets).off_full_Ir +1], l1, buf1);
1670 VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);
1671
1672 commify(total[CLG_(sets).off_full_Ir +2], l1, buf1);
1673 VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);
1674
1675 p = 100;
1676
1677 if (0 == total[CLG_(sets).off_full_Ir])
1678 total[CLG_(sets).off_full_Ir] = 1;
1679
1680 percentify(total[CLG_(sets).off_full_Ir+1] * 100 * p /
1681 total[CLG_(sets).off_full_Ir], p, l1+1, buf1);
1682 VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);
1683
1684 percentify(total[CLG_(sets).off_full_Ir+2] * 100 * p /
1685 total[CLG_(sets).off_full_Ir], p, l1+1, buf1);
1686 VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);
1687 VG_(message)(Vg_UserMsg, "");
1688
1689 /* D cache results.
1690 Use the D_refs.rd and D_refs.wr values to determine the
1691 * width of columns 2 & 3. */
1692
1693 D_total = CLG_(get_eventset_cost)( CLG_(sets).full );
1694 CLG_(init_cost)( CLG_(sets).full, D_total);
1695 CLG_(copy_cost)( CLG_(sets).Dr, D_total, total + CLG_(sets).off_full_Dr );
1696 CLG_(add_cost) ( CLG_(sets).Dw, D_total, total + CLG_(sets).off_full_Dw );
1697
1698 commify( D_total[0], l1, buf1);
1699 l2 = commify(total[CLG_(sets).off_full_Dr], 0, buf2);
1700 l3 = commify(total[CLG_(sets).off_full_Dw], 0, buf3);
1701 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",
1702 buf1, buf2, buf3);
1703
1704 commify( D_total[1], l1, buf1);
1705 commify(total[CLG_(sets).off_full_Dr+1], l2, buf2);
1706 commify(total[CLG_(sets).off_full_Dw+1], l3, buf3);
1707 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",
1708 buf1, buf2, buf3);
1709
1710 commify( D_total[2], l1, buf1);
1711 commify(total[CLG_(sets).off_full_Dr+2], l2, buf2);
1712 commify(total[CLG_(sets).off_full_Dw+2], l3, buf3);
1713 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",
1714 buf1, buf2, buf3);
1715
1716 p = 10;
1717
1718 if (0 == D_total[0]) D_total[0] = 1;
1719 if (0 == total[CLG_(sets).off_full_Dr]) total[CLG_(sets).off_full_Dr] = 1;
1720 if (0 == total[CLG_(sets).off_full_Dw]) total[CLG_(sets).off_full_Dw] = 1;
1721
1722 percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1);
1723 percentify(total[CLG_(sets).off_full_Dr+1] * 100 * p /
1724 total[CLG_(sets).off_full_Dr], p, l2+1, buf2);
1725 percentify(total[CLG_(sets).off_full_Dw+1] * 100 * p /
1726 total[CLG_(sets).off_full_Dw], p, l3+1, buf3);
1727 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);
1728
1729 percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1);
1730 percentify(total[CLG_(sets).off_full_Dr+2] * 100 * p /
1731 total[CLG_(sets).off_full_Dr], p, l2+1, buf2);
1732 percentify(total[CLG_(sets).off_full_Dw+2] * 100 * p /
1733 total[CLG_(sets).off_full_Dw], p, l3+1, buf3);
1734 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);
1735 VG_(message)(Vg_UserMsg, "");
1736
1737
1738
1739 /* L2 overall results */
1740
1741 L2_total =
1742 total[CLG_(sets).off_full_Dr +1] +
1743 total[CLG_(sets).off_full_Dw +1] +
1744 total[CLG_(sets).off_full_Ir +1];
1745 L2_total_r =
1746 total[CLG_(sets).off_full_Dr +1] +
1747 total[CLG_(sets).off_full_Ir +1];
1748 L2_total_w = total[CLG_(sets).off_full_Dw +1];
1749 commify(L2_total, l1, buf1);
1750 commify(L2_total_r, l2, buf2);
1751 commify(L2_total_w, l3, buf3);
1752 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",
1753 buf1, buf2, buf3);
1754
1755 L2_total_m =
1756 total[CLG_(sets).off_full_Dr +2] +
1757 total[CLG_(sets).off_full_Dw +2] +
1758 total[CLG_(sets).off_full_Ir +2];
1759 L2_total_mr =
1760 total[CLG_(sets).off_full_Dr +2] +
1761 total[CLG_(sets).off_full_Ir +2];
1762 L2_total_mw = total[CLG_(sets).off_full_Dw +2];
1763 commify(L2_total_m, l1, buf1);
1764 commify(L2_total_mr, l2, buf2);
1765 commify(L2_total_mw, l3, buf3);
1766 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",
1767 buf1, buf2, buf3);
1768
1769 percentify(L2_total_m * 100 * p /
1770 (total[CLG_(sets).off_full_Ir] + D_total[0]), p, l1+1, buf1);
1771 percentify(L2_total_mr * 100 * p /
1772 (total[CLG_(sets).off_full_Ir] + total[CLG_(sets).off_full_Dr]),
1773 p, l2+1, buf2);
1774 percentify(L2_total_mw * 100 * p /
1775 total[CLG_(sets).off_full_Dw], p, l3+1, buf3);
1776 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )",
1777 buf1, buf2,buf3);
1778}
1779
1780
1781/*------------------------------------------------------------*/
1782/*--- Setup for Event set. ---*/
1783/*------------------------------------------------------------*/
1784
1785struct event_sets CLG_(sets);
1786
1787void CLG_(init_eventsets)(Int max_user)
1788{
1789 EventType * e1, *e2, *e3, *e4;
weidendo0a1951d2009-06-15 00:16:36 +00001790 // Basic event sets from which others are composed
1791 EventSet *Use, *Ir, *Dr, *Dw;
1792 // Compositions of basic sets used for per-instruction counters
1793 EventSet *UIr, *UIrDr, *UIrDrDw, *UIrDw, *UIrDwDr;
1794 // Composition used for global counters and aggregation
1795 EventSet *full;
weidendoa17f2a32006-03-20 10:27:30 +00001796 int sizeOfUseIr;
1797
weidendo0a1951d2009-06-15 00:16:36 +00001798 // the "Use" events types only are used with "cacheuse" simulation
1799 Use = CLG_(get_eventset)("Use", 4);
weidendoa17f2a32006-03-20 10:27:30 +00001800 if (clo_collect_cacheuse) {
1801 /* if TUse is 0, there was never a load, and no loss, too */
1802 e1 = CLG_(register_eventtype)("AcCost1");
weidendo0a1951d2009-06-15 00:16:36 +00001803 CLG_(add_eventtype)(Use, e1);
weidendoa17f2a32006-03-20 10:27:30 +00001804 e1 = CLG_(register_eventtype)("SpLoss1");
weidendo0a1951d2009-06-15 00:16:36 +00001805 CLG_(add_eventtype)(Use, e1);
weidendoa17f2a32006-03-20 10:27:30 +00001806 e1 = CLG_(register_eventtype)("AcCost2");
weidendo0a1951d2009-06-15 00:16:36 +00001807 CLG_(add_eventtype)(Use, e1);
weidendoa17f2a32006-03-20 10:27:30 +00001808 e1 = CLG_(register_eventtype)("SpLoss2");
weidendo0a1951d2009-06-15 00:16:36 +00001809 CLG_(add_eventtype)(Use, e1);
weidendoa17f2a32006-03-20 10:27:30 +00001810 }
1811
weidendo0a1951d2009-06-15 00:16:36 +00001812 Ir = CLG_(get_eventset)("Ir", 4);
weidendoa17f2a32006-03-20 10:27:30 +00001813 Dr = CLG_(get_eventset)("Dr", 4);
1814 Dw = CLG_(get_eventset)("Dw", 4);
1815 if (CLG_(clo).simulate_cache) {
1816 e1 = CLG_(register_eventtype)("Ir");
1817 e2 = CLG_(register_eventtype)("I1mr");
1818 e3 = CLG_(register_eventtype)("I2mr");
1819 if (clo_simulate_writeback) {
1820 e4 = CLG_(register_eventtype)("I2dmr");
1821 CLG_(add_dep_event4)(Ir, e1,e2,e3,e4);
1822 }
1823 else
1824 CLG_(add_dep_event3)(Ir, e1,e2,e3);
1825
1826 e1 = CLG_(register_eventtype)("Dr");
1827 e2 = CLG_(register_eventtype)("D1mr");
1828 e3 = CLG_(register_eventtype)("D2mr");
1829 if (clo_simulate_writeback) {
1830 e4 = CLG_(register_eventtype)("D2dmr");
1831 CLG_(add_dep_event4)(Dr, e1,e2,e3,e4);
1832 }
1833 else
1834 CLG_(add_dep_event3)(Dr, e1,e2,e3);
1835
1836 e1 = CLG_(register_eventtype)("Dw");
1837 e2 = CLG_(register_eventtype)("D1mw");
1838 e3 = CLG_(register_eventtype)("D2mw");
1839 if (clo_simulate_writeback) {
1840 e4 = CLG_(register_eventtype)("D2dmw");
1841 CLG_(add_dep_event4)(Dw, e1,e2,e3,e4);
1842 }
1843 else
1844 CLG_(add_dep_event3)(Dw, e1,e2,e3);
1845
1846 }
1847 else {
1848 e1 = CLG_(register_eventtype)("Ir");
1849 CLG_(add_eventtype)(Ir, e1);
1850 }
1851
weidendo0a1951d2009-06-15 00:16:36 +00001852 // Self cost event sets per guest instruction (U used only for cacheUse).
1853 // Each basic event set only appears once, as eg. multiple different Dr's
1854 // in one guest instruction are counted in the same counter.
weidendoa17f2a32006-03-20 10:27:30 +00001855
weidendo0a1951d2009-06-15 00:16:36 +00001856 sizeOfUseIr = Use->size + Ir->size;
1857 UIr = CLG_(get_eventset)("UIr", sizeOfUseIr);
1858 CLG_(add_eventset)(UIr, Use);
1859 off_UIr_Ir = CLG_(add_eventset)(UIr, Ir);
weidendoa17f2a32006-03-20 10:27:30 +00001860
weidendo0a1951d2009-06-15 00:16:36 +00001861 UIrDr = CLG_(get_eventset)("UIrDr", sizeOfUseIr + Dr->size);
1862 CLG_(add_eventset)(UIrDr, Use);
1863 off_UIrDr_Ir = CLG_(add_eventset)(UIrDr, Ir);
1864 off_UIrDr_Dr = CLG_(add_eventset)(UIrDr, Dr);
weidendoa17f2a32006-03-20 10:27:30 +00001865
weidendo0a1951d2009-06-15 00:16:36 +00001866 UIrDrDw = CLG_(get_eventset)("IrDrDw", sizeOfUseIr + Dr->size + Dw->size);
1867 CLG_(add_eventset)(UIrDrDw, Use);
1868 off_UIrDrDw_Ir = CLG_(add_eventset)(UIrDrDw, Ir);
1869 off_UIrDrDw_Dr = CLG_(add_eventset)(UIrDrDw, Dr);
1870 off_UIrDrDw_Dw = CLG_(add_eventset)(UIrDrDw, Dw);
weidendoa17f2a32006-03-20 10:27:30 +00001871
weidendo0a1951d2009-06-15 00:16:36 +00001872 UIrDw = CLG_(get_eventset)("UIrDw", sizeOfUseIr + Dw->size);
1873 CLG_(add_eventset)(UIrDw, Use);
1874 off_UIrDw_Ir = CLG_(add_eventset)(UIrDw, Ir);
1875 off_UIrDw_Dw = CLG_(add_eventset)(UIrDw, Dw);
weidendoa17f2a32006-03-20 10:27:30 +00001876
weidendo0a1951d2009-06-15 00:16:36 +00001877 UIrDwDr = CLG_(get_eventset)("IrDwDr", sizeOfUseIr + Dw->size + Dr->size);
1878 CLG_(add_eventset)(UIrDwDr, Use);
1879 off_UIrDwDr_Ir = CLG_(add_eventset)(UIrDrDw, Ir);
1880 off_UIrDwDr_Dw = CLG_(add_eventset)(UIrDrDw, Dw);
1881 off_UIrDwDr_Dr = CLG_(add_eventset)(UIrDrDw, Dr);
1882
1883
1884 // the "full" event set is used as global counter and for aggregation
weidendoa17f2a32006-03-20 10:27:30 +00001885 if (CLG_(clo).collect_alloc) max_user += 2;
1886 if (CLG_(clo).collect_systime) max_user += 2;
weidendo0a1951d2009-06-15 00:16:36 +00001887 full = CLG_(get_eventset)("full",
1888 sizeOfUseIr + Dr->size + Dw->size + max_user);
1889 CLG_(add_eventset)(full, Use);
1890 CLG_(sets).off_full_Ir = CLG_(add_eventset)(full, Ir);
1891 CLG_(sets).off_full_Dr = CLG_(add_eventset)(full, Dr);
1892 CLG_(sets).off_full_Dw = CLG_(add_eventset)(full, Dw);
1893 if (CLG_(clo).collect_alloc) {
1894 e1 = CLG_(register_eventtype)("allocCount");
1895 e2 = CLG_(register_eventtype)("allocSize");
1896 CLG_(sets).off_full_alloc = CLG_(add_dep_event2)(full, e1,e2);
1897 }
1898 if (CLG_(clo).collect_systime) {
1899 e1 = CLG_(register_eventtype)("sysCount");
1900 e2 = CLG_(register_eventtype)("sysTime");
1901 CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2);
1902 }
weidendoa17f2a32006-03-20 10:27:30 +00001903
weidendo0a1951d2009-06-15 00:16:36 +00001904 CLG_(sets).Use = Use;
weidendoa17f2a32006-03-20 10:27:30 +00001905 CLG_(sets).Ir = Ir;
1906 CLG_(sets).Dr = Dr;
1907 CLG_(sets).Dw = Dw;
weidendo0a1951d2009-06-15 00:16:36 +00001908 CLG_(sets).UIr = UIr;
1909 CLG_(sets).UIrDr = UIrDr;
1910 CLG_(sets).UIrDrDw = UIrDrDw;
1911 CLG_(sets).UIrDw = UIrDw;
1912 CLG_(sets).UIrDwDr = UIrDwDr;
weidendoa17f2a32006-03-20 10:27:30 +00001913 CLG_(sets).full = full;
1914
weidendoa17f2a32006-03-20 10:27:30 +00001915
1916 CLG_DEBUGIF(1) {
1917 CLG_DEBUG(1, "EventSets:\n");
weidendo0a1951d2009-06-15 00:16:36 +00001918 CLG_(print_eventset)(-2, Use);
weidendoa17f2a32006-03-20 10:27:30 +00001919 CLG_(print_eventset)(-2, Ir);
1920 CLG_(print_eventset)(-2, Dr);
1921 CLG_(print_eventset)(-2, Dw);
weidendoa17f2a32006-03-20 10:27:30 +00001922 CLG_(print_eventset)(-2, full);
1923 }
1924
1925 /* Not-existing events are silently ignored */
1926 CLG_(dumpmap) = CLG_(get_eventmapping)(full);
1927 CLG_(append_event)(CLG_(dumpmap), "Ir");
1928 CLG_(append_event)(CLG_(dumpmap), "Dr");
1929 CLG_(append_event)(CLG_(dumpmap), "Dw");
1930 CLG_(append_event)(CLG_(dumpmap), "I1mr");
1931 CLG_(append_event)(CLG_(dumpmap), "D1mr");
1932 CLG_(append_event)(CLG_(dumpmap), "D1mw");
1933 CLG_(append_event)(CLG_(dumpmap), "I2mr");
1934 CLG_(append_event)(CLG_(dumpmap), "D2mr");
1935 CLG_(append_event)(CLG_(dumpmap), "D2mw");
1936 CLG_(append_event)(CLG_(dumpmap), "I2dmr");
1937 CLG_(append_event)(CLG_(dumpmap), "D2dmr");
1938 CLG_(append_event)(CLG_(dumpmap), "D2dmw");
1939 CLG_(append_event)(CLG_(dumpmap), "AcCost1");
1940 CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
1941 CLG_(append_event)(CLG_(dumpmap), "AcCost2");
1942 CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
1943 CLG_(append_event)(CLG_(dumpmap), "allocCount");
1944 CLG_(append_event)(CLG_(dumpmap), "allocSize");
1945 CLG_(append_event)(CLG_(dumpmap), "sysCount");
1946 CLG_(append_event)(CLG_(dumpmap), "sysTime");
1947
1948}
1949
1950
1951
1952static
1953void add_and_zero_Dx(EventSet* es, SimCost dst, ULong* cost)
1954{
1955 /* if eventset use is defined, it is always first (hardcoded!) */
weidendo0a1951d2009-06-15 00:16:36 +00001956 CLG_(add_and_zero_cost)( CLG_(sets).Use, dst, cost);
weidendoa17f2a32006-03-20 10:27:30 +00001957
weidendo0a1951d2009-06-15 00:16:36 +00001958 if (es == CLG_(sets).UIr) {
1959 CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
1960 cost + off_UIr_Ir);
weidendoa17f2a32006-03-20 10:27:30 +00001961 }
weidendo0a1951d2009-06-15 00:16:36 +00001962 else if (es == CLG_(sets).UIrDr) {
1963 CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
1964 cost + off_UIrDr_Ir);
1965 CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr,
1966 cost + off_UIrDr_Dr);
weidendoa17f2a32006-03-20 10:27:30 +00001967 }
weidendo0a1951d2009-06-15 00:16:36 +00001968 else if (es == CLG_(sets).UIrDrDw) {
1969 CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
1970 cost + off_UIrDrDw_Ir);
1971 CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr,
1972 cost + off_UIrDrDw_Dr);
1973 CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw,
1974 cost + off_UIrDrDw_Dw);
weidendoa17f2a32006-03-20 10:27:30 +00001975 }
weidendo0a1951d2009-06-15 00:16:36 +00001976 else if (es == CLG_(sets).UIrDw) {
1977 CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
1978 cost + off_UIrDw_Ir);
1979 CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw,
1980 cost + off_UIrDw_Dw);
weidendoa17f2a32006-03-20 10:27:30 +00001981 }
weidendo0a1951d2009-06-15 00:16:36 +00001982 else if (es == CLG_(sets).UIrDwDr) {
1983 CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
1984 cost + off_UIrDwDr_Ir);
1985 CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw,
1986 cost + off_UIrDwDr_Dw);
1987 CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr,
1988 cost + off_UIrDwDr_Dr);
1989 }
1990 else CLG_ASSERT(0);
weidendoa17f2a32006-03-20 10:27:30 +00001991}
1992
1993/* this is called at dump time for every instruction executed */
1994static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
1995 InstrInfo* ii, ULong exe_count)
1996{
1997 if (!CLG_(clo).simulate_cache)
weidendo0a1951d2009-06-15 00:16:36 +00001998 cost[CLG_(sets).off_full_Ir] += exe_count;
weidendoa17f2a32006-03-20 10:27:30 +00001999 else {
2000
2001#if 0
2002/* There is always a trivial case where exe_count and Ir can be
2003 * slightly different because ecounter is updated when executing
2004 * the next BB. E.g. for last BB executed, or when toggling collection
2005 */
2006 /* FIXME: Hardcoded that each eventset has Ir as first */
2007 if ((bbcc->cost + ii->cost_offset)[0] != exe_count) {
2008 VG_(printf)("==> Ir %llu, exe %llu\n",
2009 (bbcc->cost + ii->cost_offset)[0], exe_count);
2010 CLG_(print_bbcc_cost)(-2, bbcc);
2011 //CLG_ASSERT((bbcc->cost + ii->cost_offset)[0] == exe_count);
2012 }
2013#endif
2014
2015 add_and_zero_Dx(ii->eventset, cost,
2016 bbcc->cost + ii->cost_offset);
2017 }
2018}
2019
2020static
2021void cachesim_after_bbsetup(void)
2022{
2023 BBCC* bbcc = CLG_(current_state).bbcc;
2024
2025 if (CLG_(clo).simulate_cache) {
2026 BB* bb = bbcc->bb;
2027
2028 /* only needed if log_* functions are called */
2029 bb_base = bb->obj->offset + bb->offset;
2030 cost_base = bbcc->cost;
2031 }
2032}
2033
2034static
2035void cachesim_finish(void)
2036{
2037 if (clo_collect_cacheuse)
2038 cacheuse_finish();
2039}
2040
2041/*------------------------------------------------------------*/
2042/*--- The simulator defined in this file ---*/
2043/*------------------------------------------------------------*/
2044
2045struct cachesim_if CLG_(cachesim) = {
2046 .print_opts = cachesim_print_opts,
2047 .parse_opt = cachesim_parse_opt,
2048 .post_clo_init = cachesim_post_clo_init,
2049 .clear = cachesim_clear,
2050 .getdesc = cachesim_getdesc,
2051 .printstat = cachesim_printstat,
2052 .add_icost = cachesim_add_icost,
2053 .after_bbsetup = cachesim_after_bbsetup,
2054 .finish = cachesim_finish,
2055
2056 /* these will be set by cachesim_post_clo_init */
2057 .log_1I0D = 0,
weidendo0a1951d2009-06-15 00:16:36 +00002058 .log_2I0D = 0,
2059 .log_3I0D = 0,
weidendoa17f2a32006-03-20 10:27:30 +00002060
2061 .log_1I1Dr = 0,
2062 .log_1I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00002063
2064 .log_0I1Dr = 0,
2065 .log_0I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00002066
2067 .log_1I0D_name = "(no function)",
weidendo0a1951d2009-06-15 00:16:36 +00002068 .log_2I0D_name = "(no function)",
2069 .log_3I0D_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00002070
2071 .log_1I1Dr_name = "(no function)",
2072 .log_1I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00002073
2074 .log_0I1Dr_name = "(no function)",
2075 .log_0I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00002076};
2077
2078
2079/*--------------------------------------------------------------------*/
2080/*--- end ct_sim.c ---*/
2081/*--------------------------------------------------------------------*/
2082