blob: 2348cbec1af1a9c34d365d9b8d0547ada2ad07aa [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001/*--------------------------------------------------------------------*/
2/*--- Cache simulation. ---*/
3/*--- sim.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
njn9a0cba42007-04-15 22:15:57 +00007 This file is part of Callgrind, a Valgrind tool for call graph
8 profiling programs.
weidendoa17f2a32006-03-20 10:27:30 +00009
sewardj03f8d3f2012-08-05 15:46:46 +000010 Copyright (C) 2003-2012, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
weidendoa17f2a32006-03-20 10:27:30 +000011
njn9a0cba42007-04-15 22:15:57 +000012 This tool is derived from and contains code from Cachegrind
sewardj03f8d3f2012-08-05 15:46:46 +000013 Copyright (C) 2002-2012 Nicholas Nethercote (njn@valgrind.org)
weidendoa17f2a32006-03-20 10:27:30 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "global.h"
34
35
36/* Notes:
37 - simulates a write-allocate cache
38 - (block --> set) hash function uses simple bit selection
39 - handling of references straddling two cache blocks:
40 - counts as only one cache access (not two)
41 - both blocks hit --> one hit
42 - one block hits, the other misses --> one miss
43 - both blocks miss --> one miss (not two)
44*/
45
46/* Cache configuration */
47#include "cg_arch.h"
48
49/* additional structures for cache use info, separated
50 * according usage frequency:
51 * - line_loaded : pointer to cost center of instruction
52 * which loaded the line into cache.
53 * Needed to increment counters when line is evicted.
54 * - line_use : updated on every access
55 */
56typedef struct {
57 UInt count;
58 UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */
59} line_use;
60
61typedef struct {
62 Addr memline, iaddr;
63 line_use* dep_use; /* point to higher-level cacheblock for this memline */
64 ULong* use_base;
65} line_loaded;
66
67/* Cache state */
68typedef struct {
69 char* name;
70 int size; /* bytes */
71 int assoc;
72 int line_size; /* bytes */
73 Bool sectored; /* prefetch nearside cacheline on read */
74 int sets;
75 int sets_min_1;
weidendoa17f2a32006-03-20 10:27:30 +000076 int line_size_bits;
77 int tag_shift;
78 UWord tag_mask;
79 char desc_line[128];
80 UWord* tags;
81
82 /* for cache use */
83 int line_size_mask;
84 int* line_start_mask;
85 int* line_end_mask;
86 line_loaded* loaded;
87 line_use* use;
88} cache_t2;
89
90/*
91 * States of flat caches in our model.
92 * We use a 2-level hierarchy,
93 */
njn2d853a12010-10-06 22:46:31 +000094static cache_t2 I1, D1, LL;
weidendoa17f2a32006-03-20 10:27:30 +000095
96/* Lower bits of cache tags are used as flags for a cache line */
97#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1)
98#define CACHELINE_DIRTY 1
99
100
101/* Cache simulator Options */
102static Bool clo_simulate_writeback = False;
103static Bool clo_simulate_hwpref = False;
104static Bool clo_simulate_sectors = False;
105static Bool clo_collect_cacheuse = False;
106
weidendo75a5c2d2010-06-09 22:32:58 +0000107/* Following global vars are setup before by setup_bbcc():
weidendoa17f2a32006-03-20 10:27:30 +0000108 *
weidendo75a5c2d2010-06-09 22:32:58 +0000109 * - Addr CLG_(bb_base) (instruction start address of original BB)
110 * - ULong* CLG_(cost_base) (start of cost array for BB)
weidendoa17f2a32006-03-20 10:27:30 +0000111 */
112
weidendo75a5c2d2010-06-09 22:32:58 +0000113Addr CLG_(bb_base);
114ULong* CLG_(cost_base);
115
weidendoa17f2a32006-03-20 10:27:30 +0000116static InstrInfo* current_ii;
117
118/* Cache use offsets */
weidendo0a1951d2009-06-15 00:16:36 +0000119/* The offsets are only correct because all per-instruction event sets get
weidendoa17f2a32006-03-20 10:27:30 +0000120 * the "Use" set added first !
121 */
122static Int off_I1_AcCost = 0;
123static Int off_I1_SpLoss = 1;
124static Int off_D1_AcCost = 0;
125static Int off_D1_SpLoss = 1;
njn2d853a12010-10-06 22:46:31 +0000126static Int off_LL_AcCost = 2;
127static Int off_LL_SpLoss = 3;
weidendoa17f2a32006-03-20 10:27:30 +0000128
129/* Cache access types */
130typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType;
131
132/* Result of a reference into a flat cache */
133typedef enum { Hit = 0, Miss, MissDirty } CacheResult;
134
135/* Result of a reference into a hierarchical cache model */
136typedef enum {
137 L1_Hit,
njn2d853a12010-10-06 22:46:31 +0000138 LL_Hit,
weidendoa17f2a32006-03-20 10:27:30 +0000139 MemAccess,
140 WriteBackMemAccess } CacheModelResult;
141
142typedef CacheModelResult (*simcall_type)(Addr, UChar);
143
144static struct {
145 simcall_type I1_Read;
146 simcall_type D1_Read;
147 simcall_type D1_Write;
148} simulator;
149
150/*------------------------------------------------------------*/
151/*--- Cache Simulator Initialization ---*/
152/*------------------------------------------------------------*/
153
154static void cachesim_clearcache(cache_t2* c)
155{
156 Int i;
157
158 for (i = 0; i < c->sets * c->assoc; i++)
159 c->tags[i] = 0;
160 if (c->use) {
161 for (i = 0; i < c->sets * c->assoc; i++) {
162 c->loaded[i].memline = 0;
163 c->loaded[i].use_base = 0;
164 c->loaded[i].dep_use = 0;
165 c->loaded[i].iaddr = 0;
166 c->use[i].mask = 0;
167 c->use[i].count = 0;
168 c->tags[i] = i % c->assoc; /* init lower bits as pointer */
169 }
170 }
171}
172
173static void cacheuse_initcache(cache_t2* c);
174
175/* By this point, the size/assoc/line_size has been checked. */
176static void cachesim_initcache(cache_t config, cache_t2* c)
177{
178 c->size = config.size;
179 c->assoc = config.assoc;
180 c->line_size = config.line_size;
181 c->sectored = False; // FIXME
182
183 c->sets = (c->size / c->line_size) / c->assoc;
184 c->sets_min_1 = c->sets - 1;
weidendoa17f2a32006-03-20 10:27:30 +0000185 c->line_size_bits = VG_(log2)(c->line_size);
186 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
187 c->tag_mask = ~((1<<c->tag_shift)-1);
188
189 /* Can bits in tag entries be used for flags?
190 * Should be always true as MIN_LINE_SIZE >= 16 */
191 CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0);
192
193 if (c->assoc == 1) {
194 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s",
195 c->size, c->line_size,
196 c->sectored ? ", sectored":"");
197 } else {
198 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s",
199 c->size, c->line_size, c->assoc,
200 c->sectored ? ", sectored":"");
201 }
202
sewardj9c606bd2008-09-18 18:12:50 +0000203 c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1",
204 sizeof(UWord) * c->sets * c->assoc);
weidendoa17f2a32006-03-20 10:27:30 +0000205 if (clo_collect_cacheuse)
206 cacheuse_initcache(c);
207 else
208 c->use = 0;
209 cachesim_clearcache(c);
210}
211
212
213#if 0
214static void print_cache(cache_t2* c)
215{
216 UInt set, way, i;
217
218 /* Note initialisation and update of 'i'. */
219 for (i = 0, set = 0; set < c->sets; set++) {
220 for (way = 0; way < c->assoc; way++, i++) {
221 VG_(printf)("%8x ", c->tags[i]);
222 }
223 VG_(printf)("\n");
224 }
225}
226#endif
227
228
229/*------------------------------------------------------------*/
230/*--- Write Through Cache Simulation ---*/
231/*------------------------------------------------------------*/
232
233/*
njn2d853a12010-10-06 22:46:31 +0000234 * Simple model: L1 & LL Write Through
weidendoa17f2a32006-03-20 10:27:30 +0000235 * Does not distinguish among read and write references
236 *
237 * Simulator functions:
238 * CacheModelResult cachesim_I1_ref(Addr a, UChar size)
239 * CacheModelResult cachesim_D1_ref(Addr a, UChar size)
240 */
241
242static __inline__
243CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag)
244{
245 int i, j;
246 UWord *set;
247
weidendo144b76c2009-01-26 22:56:14 +0000248 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000249
250 /* This loop is unrolled for just the first case, which is the most */
251 /* common. We can't unroll any further because it would screw up */
252 /* if we have a direct-mapped (1-way) cache. */
253 if (tag == set[0])
254 return Hit;
255
256 /* If the tag is one other than the MRU, move it into the MRU spot */
257 /* and shuffle the rest down. */
258 for (i = 1; i < c->assoc; i++) {
259 if (tag == set[i]) {
260 for (j = i; j > 0; j--) {
261 set[j] = set[j - 1];
262 }
263 set[0] = tag;
264 return Hit;
265 }
266 }
267
268 /* A miss; install this tag as MRU, shuffle rest down. */
269 for (j = c->assoc - 1; j > 0; j--) {
270 set[j] = set[j - 1];
271 }
272 set[0] = tag;
273
274 return Miss;
275}
276
277static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size)
278{
279 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
280 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
281 UWord tag = a >> c->tag_shift;
282
283 /* Access entirely within line. */
284 if (set1 == set2)
285 return cachesim_setref(c, set1, tag);
286
287 /* Access straddles two lines. */
288 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
tom7c0f6392011-06-09 12:26:42 +0000289 else if (((set1 + 1) & (c->sets_min_1)) == set2) {
weidendo28e2a142006-11-22 21:00:53 +0000290 UWord tag2 = (a+size-1) >> c->tag_shift;
weidendoa17f2a32006-03-20 10:27:30 +0000291
292 /* the call updates cache structures as side effect */
293 CacheResult res1 = cachesim_setref(c, set1, tag);
weidendo28e2a142006-11-22 21:00:53 +0000294 CacheResult res2 = cachesim_setref(c, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000295 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
296
297 } else {
njn8a7b41b2007-09-23 00:51:24 +0000298 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);
weidendoa17f2a32006-03-20 10:27:30 +0000299 VG_(tool_panic)("item straddles more than two cache sets");
300 }
301 return Hit;
302}
303
304static
305CacheModelResult cachesim_I1_ref(Addr a, UChar size)
306{
307 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000308 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000309 return MemAccess;
310}
311
312static
313CacheModelResult cachesim_D1_ref(Addr a, UChar size)
314{
315 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000316 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000317 return MemAccess;
318}
319
320
321/*------------------------------------------------------------*/
322/*--- Write Back Cache Simulation ---*/
323/*------------------------------------------------------------*/
324
325/*
njn2d853a12010-10-06 22:46:31 +0000326 * More complex model: L1 Write-through, LL Write-back
weidendoa17f2a32006-03-20 10:27:30 +0000327 * This needs to distinguish among read and write references.
328 *
329 * Simulator functions:
330 * CacheModelResult cachesim_I1_Read(Addr a, UChar size)
331 * CacheModelResult cachesim_D1_Read(Addr a, UChar size)
332 * CacheModelResult cachesim_D1_Write(Addr a, UChar size)
333 */
334
335/*
336 * With write-back, result can be a miss evicting a dirty line
337 * The dirty state of a cache line is stored in Bit0 of the tag for
338 * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference
339 * type (Read/Write), the line gets dirty on a write.
340 */
341static __inline__
342CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag)
343{
344 int i, j;
345 UWord *set, tmp_tag;
346
weidendo144b76c2009-01-26 22:56:14 +0000347 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000348
349 /* This loop is unrolled for just the first case, which is the most */
350 /* common. We can't unroll any further because it would screw up */
351 /* if we have a direct-mapped (1-way) cache. */
352 if (tag == (set[0] & ~CACHELINE_DIRTY)) {
353 set[0] |= ref;
354 return Hit;
355 }
356 /* If the tag is one other than the MRU, move it into the MRU spot */
357 /* and shuffle the rest down. */
358 for (i = 1; i < c->assoc; i++) {
359 if (tag == (set[i] & ~CACHELINE_DIRTY)) {
360 tmp_tag = set[i] | ref; // update dirty flag
361 for (j = i; j > 0; j--) {
362 set[j] = set[j - 1];
363 }
364 set[0] = tmp_tag;
365 return Hit;
366 }
367 }
368
369 /* A miss; install this tag as MRU, shuffle rest down. */
370 tmp_tag = set[c->assoc - 1];
371 for (j = c->assoc - 1; j > 0; j--) {
372 set[j] = set[j - 1];
373 }
374 set[0] = tag | ref;
375
376 return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss;
377}
378
379
380static __inline__
381CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size)
382{
383 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
384 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
385 UWord tag = a & c->tag_mask;
386
387 /* Access entirely within line. */
388 if (set1 == set2)
389 return cachesim_setref_wb(c, ref, set1, tag);
390
391 /* Access straddles two lines. */
392 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
tom7c0f6392011-06-09 12:26:42 +0000393 else if (((set1 + 1) & (c->sets_min_1)) == set2) {
weidendo144b76c2009-01-26 22:56:14 +0000394 UWord tag2 = (a+size-1) & c->tag_mask;
weidendoa17f2a32006-03-20 10:27:30 +0000395
396 /* the call updates cache structures as side effect */
397 CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag);
weidendo28e2a142006-11-22 21:00:53 +0000398 CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000399
400 if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty;
401 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
402
403 } else {
njn8a7b41b2007-09-23 00:51:24 +0000404 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);
weidendoa17f2a32006-03-20 10:27:30 +0000405 VG_(tool_panic)("item straddles more than two cache sets");
406 }
407 return Hit;
408}
409
410
411static
412CacheModelResult cachesim_I1_Read(Addr a, UChar size)
413{
414 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000415 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
416 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000417 case Miss: return MemAccess;
418 default: break;
419 }
420 return WriteBackMemAccess;
421}
422
423static
424CacheModelResult cachesim_D1_Read(Addr a, UChar size)
425{
426 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000427 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
428 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000429 case Miss: return MemAccess;
430 default: break;
431 }
432 return WriteBackMemAccess;
433}
434
435static
436CacheModelResult cachesim_D1_Write(Addr a, UChar size)
437{
438 if ( cachesim_ref( &D1, a, size) == Hit ) {
439 /* Even for a L1 hit, the write-trough L1 passes
njn2d853a12010-10-06 22:46:31 +0000440 * the write to the LL to make the LL line dirty.
weidendoa17f2a32006-03-20 10:27:30 +0000441 * But this causes no latency, so return the hit.
442 */
njn2d853a12010-10-06 22:46:31 +0000443 cachesim_ref_wb( &LL, Write, a, size);
weidendoa17f2a32006-03-20 10:27:30 +0000444 return L1_Hit;
445 }
njn2d853a12010-10-06 22:46:31 +0000446 switch( cachesim_ref_wb( &LL, Write, a, size) ) {
447 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000448 case Miss: return MemAccess;
449 default: break;
450 }
451 return WriteBackMemAccess;
452}
453
454
455/*------------------------------------------------------------*/
456/*--- Hardware Prefetch Simulation ---*/
457/*------------------------------------------------------------*/
458
459static ULong prefetch_up = 0;
460static ULong prefetch_down = 0;
461
462#define PF_STREAMS 8
463#define PF_PAGEBITS 12
464
465static UInt pf_lastblock[PF_STREAMS];
466static Int pf_seqblocks[PF_STREAMS];
467
468static
469void prefetch_clear(void)
470{
471 int i;
472 for(i=0;i<PF_STREAMS;i++)
473 pf_lastblock[i] = pf_seqblocks[i] = 0;
474}
475
476/*
477 * HW Prefetch emulation
478 * Start prefetching when detecting sequential access to 3 memory blocks.
479 * One stream can be detected per 4k page.
480 */
481static __inline__
njn2d853a12010-10-06 22:46:31 +0000482void prefetch_LL_doref(Addr a)
weidendoa17f2a32006-03-20 10:27:30 +0000483{
484 UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS;
njn2d853a12010-10-06 22:46:31 +0000485 UInt block = ( a >> LL.line_size_bits);
weidendoa17f2a32006-03-20 10:27:30 +0000486
487 if (block != pf_lastblock[stream]) {
488 if (pf_seqblocks[stream] == 0) {
489 if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++;
490 else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--;
491 }
492 else if (pf_seqblocks[stream] >0) {
493 if (pf_lastblock[stream] +1 == block) {
494 pf_seqblocks[stream]++;
495 if (pf_seqblocks[stream] >= 2) {
496 prefetch_up++;
njn2d853a12010-10-06 22:46:31 +0000497 cachesim_ref(&LL, a + 5 * LL.line_size,1);
weidendoa17f2a32006-03-20 10:27:30 +0000498 }
499 }
500 else pf_seqblocks[stream] = 0;
501 }
502 else if (pf_seqblocks[stream] <0) {
503 if (pf_lastblock[stream] -1 == block) {
504 pf_seqblocks[stream]--;
505 if (pf_seqblocks[stream] <= -2) {
506 prefetch_down++;
njn2d853a12010-10-06 22:46:31 +0000507 cachesim_ref(&LL, a - 5 * LL.line_size,1);
weidendoa17f2a32006-03-20 10:27:30 +0000508 }
509 }
510 else pf_seqblocks[stream] = 0;
511 }
512 pf_lastblock[stream] = block;
513 }
514}
515
516/* simple model with hardware prefetch */
517
518static
519CacheModelResult prefetch_I1_ref(Addr a, UChar size)
520{
521 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000522 prefetch_LL_doref(a);
523 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000524 return MemAccess;
525}
526
527static
528CacheModelResult prefetch_D1_ref(Addr a, UChar size)
529{
530 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000531 prefetch_LL_doref(a);
532 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000533 return MemAccess;
534}
535
536
537/* complex model with hardware prefetch */
538
539static
540CacheModelResult prefetch_I1_Read(Addr a, UChar size)
541{
542 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000543 prefetch_LL_doref(a);
544 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
545 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000546 case Miss: return MemAccess;
547 default: break;
548 }
549 return WriteBackMemAccess;
550}
551
552static
553CacheModelResult prefetch_D1_Read(Addr a, UChar size)
554{
555 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000556 prefetch_LL_doref(a);
557 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
558 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000559 case Miss: return MemAccess;
560 default: break;
561 }
562 return WriteBackMemAccess;
563}
564
565static
566CacheModelResult prefetch_D1_Write(Addr a, UChar size)
567{
njn2d853a12010-10-06 22:46:31 +0000568 prefetch_LL_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000569 if ( cachesim_ref( &D1, a, size) == Hit ) {
570 /* Even for a L1 hit, the write-trough L1 passes
njn2d853a12010-10-06 22:46:31 +0000571 * the write to the LL to make the LL line dirty.
weidendoa17f2a32006-03-20 10:27:30 +0000572 * But this causes no latency, so return the hit.
573 */
njn2d853a12010-10-06 22:46:31 +0000574 cachesim_ref_wb( &LL, Write, a, size);
weidendoa17f2a32006-03-20 10:27:30 +0000575 return L1_Hit;
576 }
njn2d853a12010-10-06 22:46:31 +0000577 switch( cachesim_ref_wb( &LL, Write, a, size) ) {
578 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000579 case Miss: return MemAccess;
580 default: break;
581 }
582 return WriteBackMemAccess;
583}
584
585
586/*------------------------------------------------------------*/
587/*--- Cache Simulation with use metric collection ---*/
588/*------------------------------------------------------------*/
589
590/* can not be combined with write-back or prefetch */
591
592static
593void cacheuse_initcache(cache_t2* c)
594{
595 int i;
596 unsigned int start_mask, start_val;
597 unsigned int end_mask, end_val;
598
sewardj9c606bd2008-09-18 18:12:50 +0000599 c->use = CLG_MALLOC("cl.sim.cu_ic.1",
600 sizeof(line_use) * c->sets * c->assoc);
601 c->loaded = CLG_MALLOC("cl.sim.cu_ic.2",
602 sizeof(line_loaded) * c->sets * c->assoc);
603 c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3",
604 sizeof(int) * c->line_size);
605 c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4",
606 sizeof(int) * c->line_size);
weidendoa17f2a32006-03-20 10:27:30 +0000607
weidendoa17f2a32006-03-20 10:27:30 +0000608 c->line_size_mask = c->line_size-1;
609
610 /* Meaning of line_start_mask/line_end_mask
611 * Example: for a given cache line, you get an access starting at
612 * byte offset 5, length 4, byte 5 - 8 was touched. For a cache
613 * line size of 32, you have 1 bit per byte in the mask:
614 *
615 * bit31 bit8 bit5 bit 0
616 * | | | |
617 * 11..111111100000 line_start_mask[5]
618 * 00..000111111111 line_end_mask[(5+4)-1]
619 *
620 * use_mask |= line_start_mask[5] && line_end_mask[8]
621 *
622 */
623 start_val = end_val = ~0;
624 if (c->line_size < 32) {
625 int bits_per_byte = 32/c->line_size;
626 start_mask = (1<<bits_per_byte)-1;
627 end_mask = start_mask << (32-bits_per_byte);
628 for(i=0;i<c->line_size;i++) {
629 c->line_start_mask[i] = start_val;
630 start_val = start_val & ~start_mask;
631 start_mask = start_mask << bits_per_byte;
632
633 c->line_end_mask[c->line_size-i-1] = end_val;
634 end_val = end_val & ~end_mask;
635 end_mask = end_mask >> bits_per_byte;
636 }
637 }
638 else {
639 int bytes_per_bit = c->line_size/32;
640 start_mask = 1;
641 end_mask = 1 << 31;
642 for(i=0;i<c->line_size;i++) {
643 c->line_start_mask[i] = start_val;
644 c->line_end_mask[c->line_size-i-1] = end_val;
645 if ( ((i+1)%bytes_per_bit) == 0) {
646 start_val &= ~start_mask;
647 end_val &= ~end_mask;
648 start_mask <<= 1;
649 end_mask >>= 1;
650 }
651 }
652 }
653
654 CLG_DEBUG(6, "Config %s:\n", c->desc_line);
655 for(i=0;i<c->line_size;i++) {
656 CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n",
657 i, c->line_start_mask[i], c->line_end_mask[i]);
658 }
659
660 /* We use lower tag bits as offset pointers to cache use info.
661 * I.e. some cache parameters don't work.
662 */
weidendo144b76c2009-01-26 22:56:14 +0000663 if ( (1<<c->tag_shift) < c->assoc) {
weidendoa17f2a32006-03-20 10:27:30 +0000664 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +0000665 "error: Use associativity < %d for cache use statistics!\n",
weidendoa17f2a32006-03-20 10:27:30 +0000666 (1<<c->tag_shift) );
667 VG_(tool_panic)("Unsupported cache configuration");
668 }
669}
670
weidendoa17f2a32006-03-20 10:27:30 +0000671
672/* for I1/D1 caches */
673#define CACHEUSE(L) \
674 \
675static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \
676{ \
weidendo28e2a142006-11-22 21:00:53 +0000677 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
678 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
679 UWord tag = a & L.tag_mask; \
680 UWord tag2; \
weidendoa17f2a32006-03-20 10:27:30 +0000681 int i, j, idx; \
682 UWord *set, tmp_tag; \
683 UInt use_mask; \
684 \
barta0b6b2c2008-07-07 06:49:24 +0000685 CLG_DEBUG(6,"%s.Acc(Addr %#lx, size %d): Sets [%d/%d]\n", \
weidendoa17f2a32006-03-20 10:27:30 +0000686 L.name, a, size, set1, set2); \
687 \
688 /* First case: word entirely within line. */ \
689 if (set1 == set2) { \
690 \
weidendo144b76c2009-01-26 22:56:14 +0000691 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000692 use_mask = L.line_start_mask[a & L.line_size_mask] & \
693 L.line_end_mask[(a+size-1) & L.line_size_mask]; \
694 \
695 /* This loop is unrolled for just the first case, which is the most */\
696 /* common. We can't unroll any further because it would screw up */\
697 /* if we have a direct-mapped (1-way) cache. */\
698 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000699 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000700 L.use[idx].count ++; \
701 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000702 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000703 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
704 use_mask, L.use[idx].mask, L.use[idx].count); \
705 return L1_Hit; \
706 } \
707 /* If the tag is one other than the MRU, move it into the MRU spot */\
708 /* and shuffle the rest down. */\
709 for (i = 1; i < L.assoc; i++) { \
710 if (tag == (set[i] & L.tag_mask)) { \
711 tmp_tag = set[i]; \
712 for (j = i; j > 0; j--) { \
713 set[j] = set[j - 1]; \
714 } \
715 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000716 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000717 L.use[idx].count ++; \
718 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000719 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000720 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
721 use_mask, L.use[idx].mask, L.use[idx].count); \
722 return L1_Hit; \
723 } \
724 } \
725 \
726 /* A miss; install this tag as MRU, shuffle rest down. */ \
727 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
728 for (j = L.assoc - 1; j > 0; j--) { \
729 set[j] = set[j - 1]; \
730 } \
731 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000732 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000733 return update_##L##_use(&L, idx, \
734 use_mask, a &~ L.line_size_mask); \
735 \
736 /* Second case: word straddles two lines. */ \
737 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
tom7c0f6392011-06-09 12:26:42 +0000738 } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \
njn2d853a12010-10-06 22:46:31 +0000739 Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:LL miss */ \
weidendo144b76c2009-01-26 22:56:14 +0000740 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000741 use_mask = L.line_start_mask[a & L.line_size_mask]; \
742 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000743 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000744 L.use[idx].count ++; \
745 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000746 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000747 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
748 use_mask, L.use[idx].mask, L.use[idx].count); \
749 goto block2; \
750 } \
751 for (i = 1; i < L.assoc; i++) { \
752 if (tag == (set[i] & L.tag_mask)) { \
753 tmp_tag = set[i]; \
754 for (j = i; j > 0; j--) { \
755 set[j] = set[j - 1]; \
756 } \
757 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000758 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000759 L.use[idx].count ++; \
760 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000761 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000762 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
763 use_mask, L.use[idx].mask, L.use[idx].count); \
764 goto block2; \
765 } \
766 } \
767 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
768 for (j = L.assoc - 1; j > 0; j--) { \
769 set[j] = set[j - 1]; \
770 } \
771 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000772 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000773 miss1 = update_##L##_use(&L, idx, \
774 use_mask, a &~ L.line_size_mask); \
775block2: \
weidendo144b76c2009-01-26 22:56:14 +0000776 set = &(L.tags[set2 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000777 use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \
weidendo28e2a142006-11-22 21:00:53 +0000778 tag2 = (a+size-1) & L.tag_mask; \
779 if (tag2 == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000780 idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000781 L.use[idx].count ++; \
782 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000783 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000784 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
785 use_mask, L.use[idx].mask, L.use[idx].count); \
786 return miss1; \
787 } \
788 for (i = 1; i < L.assoc; i++) { \
weidendo28e2a142006-11-22 21:00:53 +0000789 if (tag2 == (set[i] & L.tag_mask)) { \
weidendoa17f2a32006-03-20 10:27:30 +0000790 tmp_tag = set[i]; \
791 for (j = i; j > 0; j--) { \
792 set[j] = set[j - 1]; \
793 } \
794 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000795 idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000796 L.use[idx].count ++; \
797 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000798 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000799 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
800 use_mask, L.use[idx].mask, L.use[idx].count); \
801 return miss1; \
802 } \
803 } \
804 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
805 for (j = L.assoc - 1; j > 0; j--) { \
806 set[j] = set[j - 1]; \
807 } \
weidendo28e2a142006-11-22 21:00:53 +0000808 set[0] = tag2 | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000809 idx = (set2 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000810 miss2 = update_##L##_use(&L, idx, \
811 use_mask, (a+size-1) &~ L.line_size_mask); \
njn2d853a12010-10-06 22:46:31 +0000812 return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:LL_Hit; \
weidendoa17f2a32006-03-20 10:27:30 +0000813 \
814 } else { \
barta0b6b2c2008-07-07 06:49:24 +0000815 VG_(printf)("addr: %#lx size: %u sets: %d %d", a, size, set1, set2); \
weidendoa17f2a32006-03-20 10:27:30 +0000816 VG_(tool_panic)("item straddles more than two cache sets"); \
817 } \
818 return 0; \
819}
820
821
822/* logarithmic bitcounting algorithm, see
823 * http://graphics.stanford.edu/~seander/bithacks.html
824 */
825static __inline__ unsigned int countBits(unsigned int bits)
826{
827 unsigned int c; // store the total here
828 const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers
829 const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF};
830
831 c = bits;
832 c = ((c >> S[0]) & B[0]) + (c & B[0]);
833 c = ((c >> S[1]) & B[1]) + (c & B[1]);
834 c = ((c >> S[2]) & B[2]) + (c & B[2]);
835 c = ((c >> S[3]) & B[3]) + (c & B[3]);
836 c = ((c >> S[4]) & B[4]) + (c & B[4]);
837 return c;
838}
839
njn2d853a12010-10-06 22:46:31 +0000840static void update_LL_use(int idx, Addr memline)
weidendoa17f2a32006-03-20 10:27:30 +0000841{
njn2d853a12010-10-06 22:46:31 +0000842 line_loaded* loaded = &(LL.loaded[idx]);
843 line_use* use = &(LL.use[idx]);
844 int i = ((32 - countBits(use->mask)) * LL.line_size)>>5;
weidendoa17f2a32006-03-20 10:27:30 +0000845
njn2d853a12010-10-06 22:46:31 +0000846 CLG_DEBUG(2, " LL.miss [%d]: at %#lx accessing memline %#lx\n",
weidendo75a5c2d2010-06-09 22:32:58 +0000847 idx, CLG_(bb_base) + current_ii->instr_offset, memline);
weidendoa17f2a32006-03-20 10:27:30 +0000848 if (use->count>0) {
barta0b6b2c2008-07-07 06:49:24 +0000849 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",
weidendoa17f2a32006-03-20 10:27:30 +0000850 use->count, i, use->mask, loaded->memline, loaded->iaddr);
851 CLG_DEBUG(2, " collect: %d, use_base %p\n",
852 CLG_(current_state).collect, loaded->use_base);
853
854 if (CLG_(current_state).collect && loaded->use_base) {
njn2d853a12010-10-06 22:46:31 +0000855 (loaded->use_base)[off_LL_AcCost] += 1000 / use->count;
856 (loaded->use_base)[off_LL_SpLoss] += i;
weidendoa17f2a32006-03-20 10:27:30 +0000857 }
858 }
859
860 use->count = 0;
861 use->mask = 0;
862
863 loaded->memline = memline;
weidendo75a5c2d2010-06-09 22:32:58 +0000864 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset;
weidendoa17f2a32006-03-20 10:27:30 +0000865 loaded->use_base = (CLG_(current_state).nonskipped) ?
866 CLG_(current_state).nonskipped->skipped :
weidendo75a5c2d2010-06-09 22:32:58 +0000867 CLG_(cost_base) + current_ii->cost_offset;
weidendoa17f2a32006-03-20 10:27:30 +0000868}
869
870static
njn2d853a12010-10-06 22:46:31 +0000871CacheModelResult cacheuse_LL_access(Addr memline, line_loaded* l1_loaded)
weidendoa17f2a32006-03-20 10:27:30 +0000872{
njn2d853a12010-10-06 22:46:31 +0000873 UInt setNo = (memline >> LL.line_size_bits) & (LL.sets_min_1);
874 UWord* set = &(LL.tags[setNo * LL.assoc]);
875 UWord tag = memline & LL.tag_mask;
weidendoa17f2a32006-03-20 10:27:30 +0000876
877 int i, j, idx;
878 UWord tmp_tag;
879
njn2d853a12010-10-06 22:46:31 +0000880 CLG_DEBUG(6,"LL.Acc(Memline %#lx): Set %d\n", memline, setNo);
weidendoa17f2a32006-03-20 10:27:30 +0000881
njn2d853a12010-10-06 22:46:31 +0000882 if (tag == (set[0] & LL.tag_mask)) {
883 idx = (setNo * LL.assoc) + (set[0] & ~LL.tag_mask);
884 l1_loaded->dep_use = &(LL.use[idx]);
weidendoa17f2a32006-03-20 10:27:30 +0000885
barta0b6b2c2008-07-07 06:49:24 +0000886 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
njn2d853a12010-10-06 22:46:31 +0000887 idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr,
888 LL.use[idx].mask, LL.use[idx].count);
889 return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000890 }
njn2d853a12010-10-06 22:46:31 +0000891 for (i = 1; i < LL.assoc; i++) {
892 if (tag == (set[i] & LL.tag_mask)) {
weidendoa17f2a32006-03-20 10:27:30 +0000893 tmp_tag = set[i];
894 for (j = i; j > 0; j--) {
895 set[j] = set[j - 1];
896 }
897 set[0] = tmp_tag;
njn2d853a12010-10-06 22:46:31 +0000898 idx = (setNo * LL.assoc) + (tmp_tag & ~LL.tag_mask);
899 l1_loaded->dep_use = &(LL.use[idx]);
weidendoa17f2a32006-03-20 10:27:30 +0000900
barta0b6b2c2008-07-07 06:49:24 +0000901 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
njn2d853a12010-10-06 22:46:31 +0000902 i, idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr,
903 LL.use[idx].mask, LL.use[idx].count);
904 return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000905 }
906 }
907
908 /* A miss; install this tag as MRU, shuffle rest down. */
njn2d853a12010-10-06 22:46:31 +0000909 tmp_tag = set[LL.assoc - 1] & ~LL.tag_mask;
910 for (j = LL.assoc - 1; j > 0; j--) {
weidendoa17f2a32006-03-20 10:27:30 +0000911 set[j] = set[j - 1];
912 }
913 set[0] = tag | tmp_tag;
njn2d853a12010-10-06 22:46:31 +0000914 idx = (setNo * LL.assoc) + tmp_tag;
915 l1_loaded->dep_use = &(LL.use[idx]);
weidendoa17f2a32006-03-20 10:27:30 +0000916
njn2d853a12010-10-06 22:46:31 +0000917 update_LL_use(idx, memline);
weidendoa17f2a32006-03-20 10:27:30 +0000918
919 return MemAccess;
920}
921
922
923
924
925#define UPDATE_USE(L) \
926 \
927static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \
928 UInt mask, Addr memline) \
929{ \
930 line_loaded* loaded = &(cache->loaded[idx]); \
931 line_use* use = &(cache->use[idx]); \
932 int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \
933 \
barta0b6b2c2008-07-07 06:49:24 +0000934 CLG_DEBUG(2, " %s.miss [%d]: at %#lx accessing memline %#lx (mask %08x)\n", \
weidendo75a5c2d2010-06-09 22:32:58 +0000935 cache->name, idx, CLG_(bb_base) + current_ii->instr_offset, memline, mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000936 if (use->count>0) { \
barta0b6b2c2008-07-07 06:49:24 +0000937 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000938 use->count, c, use->mask, loaded->memline, loaded->iaddr); \
939 CLG_DEBUG(2, " collect: %d, use_base %p\n", \
940 CLG_(current_state).collect, loaded->use_base); \
941 \
weidendo75a5c2d2010-06-09 22:32:58 +0000942 if (CLG_(current_state).collect && loaded->use_base) { \
weidendoa17f2a32006-03-20 10:27:30 +0000943 (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \
944 (loaded->use_base)[off_##L##_SpLoss] += c; \
945 \
njn2d853a12010-10-06 22:46:31 +0000946 /* FIXME (?): L1/LL line sizes must be equal ! */ \
weidendoa17f2a32006-03-20 10:27:30 +0000947 loaded->dep_use->mask |= use->mask; \
948 loaded->dep_use->count += use->count; \
949 } \
950 } \
951 \
952 use->count = 1; \
953 use->mask = mask; \
954 loaded->memline = memline; \
weidendo75a5c2d2010-06-09 22:32:58 +0000955 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; \
956 loaded->use_base = (CLG_(current_state).nonskipped) ? \
957 CLG_(current_state).nonskipped->skipped : \
958 CLG_(cost_base) + current_ii->cost_offset; \
weidendoa17f2a32006-03-20 10:27:30 +0000959 \
njn2d853a12010-10-06 22:46:31 +0000960 if (memline == 0) return LL_Hit; \
961 return cacheuse_LL_access(memline, loaded); \
weidendoa17f2a32006-03-20 10:27:30 +0000962}
963
964UPDATE_USE(I1);
965UPDATE_USE(D1);
966
967CACHEUSE(I1);
968CACHEUSE(D1);
969
970
971static
972void cacheuse_finish(void)
973{
974 int i;
weidendo0a1951d2009-06-15 00:16:36 +0000975 InstrInfo ii = { 0,0,0,0 };
weidendoa17f2a32006-03-20 10:27:30 +0000976
977 if (!CLG_(current_state).collect) return;
978
weidendo75a5c2d2010-06-09 22:32:58 +0000979 CLG_(bb_base) = 0;
weidendo061f0792011-05-11 12:28:01 +0000980 current_ii = &ii; /* needs to be set for update_XX_use */
weidendo75a5c2d2010-06-09 22:32:58 +0000981 CLG_(cost_base) = 0;
weidendoa17f2a32006-03-20 10:27:30 +0000982
983 /* update usage counters */
984 if (I1.use)
985 for (i = 0; i < I1.sets * I1.assoc; i++)
986 if (I1.loaded[i].use_base)
987 update_I1_use( &I1, i, 0,0);
988
989 if (D1.use)
990 for (i = 0; i < D1.sets * D1.assoc; i++)
991 if (D1.loaded[i].use_base)
992 update_D1_use( &D1, i, 0,0);
993
njn2d853a12010-10-06 22:46:31 +0000994 if (LL.use)
995 for (i = 0; i < LL.sets * LL.assoc; i++)
996 if (LL.loaded[i].use_base)
997 update_LL_use(i, 0);
weidendo061f0792011-05-11 12:28:01 +0000998
999 current_ii = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001000}
1001
1002
1003
1004/*------------------------------------------------------------*/
1005/*--- Helper functions called by instrumented code ---*/
1006/*------------------------------------------------------------*/
1007
1008
1009static __inline__
1010void inc_costs(CacheModelResult r, ULong* c1, ULong* c2)
1011{
1012 switch(r) {
1013 case WriteBackMemAccess:
1014 if (clo_simulate_writeback) {
1015 c1[3]++;
1016 c2[3]++;
1017 }
1018 // fall through
1019
1020 case MemAccess:
1021 c1[2]++;
1022 c2[2]++;
1023 // fall through
1024
njn2d853a12010-10-06 22:46:31 +00001025 case LL_Hit:
weidendoa17f2a32006-03-20 10:27:30 +00001026 c1[1]++;
1027 c2[1]++;
1028 // fall through
1029
1030 default:
1031 c1[0]++;
1032 c2[0]++;
1033 }
1034}
1035
weidendo0a1951d2009-06-15 00:16:36 +00001036static
1037Char* cacheRes(CacheModelResult r)
1038{
1039 switch(r) {
1040 case L1_Hit: return "L1 Hit ";
njn2d853a12010-10-06 22:46:31 +00001041 case LL_Hit: return "LL Hit ";
1042 case MemAccess: return "LL Miss";
1043 case WriteBackMemAccess: return "LL Miss (dirty)";
weidendo0a1951d2009-06-15 00:16:36 +00001044 default:
1045 tl_assert(0);
1046 }
1047 return "??";
1048}
weidendoa17f2a32006-03-20 10:27:30 +00001049
1050VG_REGPARM(1)
1051static void log_1I0D(InstrInfo* ii)
1052{
1053 CacheModelResult IrRes;
1054
1055 current_ii = ii;
weidendo75a5c2d2010-06-09 22:32:58 +00001056 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size);
weidendoa17f2a32006-03-20 10:27:30 +00001057
weidendo0a1951d2009-06-15 00:16:36 +00001058 CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001059 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001060
1061 if (CLG_(current_state).collect) {
1062 ULong* cost_Ir;
weidendo0a1951d2009-06-15 00:16:36 +00001063
weidendoa17f2a32006-03-20 10:27:30 +00001064 if (CLG_(current_state).nonskipped)
weidendo5bba5252010-06-09 22:32:53 +00001065 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
weidendoa17f2a32006-03-20 10:27:30 +00001066 else
weidendo75a5c2d2010-06-09 22:32:58 +00001067 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR];
weidendoa17f2a32006-03-20 10:27:30 +00001068
1069 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001070 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001071 }
1072}
1073
weidendo0a1951d2009-06-15 00:16:36 +00001074VG_REGPARM(2)
1075static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2)
1076{
1077 CacheModelResult Ir1Res, Ir2Res;
1078 ULong *global_cost_Ir;
1079
1080 current_ii = ii1;
weidendo75a5c2d2010-06-09 22:32:58 +00001081 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001082 current_ii = ii2;
weidendo75a5c2d2010-06-09 22:32:58 +00001083 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001084
1085 CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001086 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1087 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) );
weidendo0a1951d2009-06-15 00:16:36 +00001088
1089 if (!CLG_(current_state).collect) return;
1090
weidendo5bba5252010-06-09 22:32:53 +00001091 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001092 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001093 ULong* skipped_cost_Ir =
1094 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1095
weidendo0a1951d2009-06-15 00:16:36 +00001096 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1097 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1098 return;
1099 }
1100
weidendo5bba5252010-06-09 22:32:53 +00001101 inc_costs(Ir1Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001102 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]);
weidendo5bba5252010-06-09 22:32:53 +00001103 inc_costs(Ir2Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001104 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]);
weidendo0a1951d2009-06-15 00:16:36 +00001105}
1106
1107VG_REGPARM(3)
1108static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3)
1109{
1110 CacheModelResult Ir1Res, Ir2Res, Ir3Res;
1111 ULong *global_cost_Ir;
1112
1113 current_ii = ii1;
weidendo75a5c2d2010-06-09 22:32:58 +00001114 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001115 current_ii = ii2;
weidendo75a5c2d2010-06-09 22:32:58 +00001116 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001117 current_ii = ii3;
weidendo75a5c2d2010-06-09 22:32:58 +00001118 Ir3Res = (*simulator.I1_Read)(CLG_(bb_base) + ii3->instr_offset, ii3->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001119
1120 CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001121 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1122 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res),
1123 CLG_(bb_base) + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) );
weidendo0a1951d2009-06-15 00:16:36 +00001124
1125 if (!CLG_(current_state).collect) return;
1126
weidendo5bba5252010-06-09 22:32:53 +00001127 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001128 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001129 ULong* skipped_cost_Ir =
1130 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001131 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1132 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1133 inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir);
1134 return;
1135 }
1136
weidendo5bba5252010-06-09 22:32:53 +00001137 inc_costs(Ir1Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001138 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]);
weidendo5bba5252010-06-09 22:32:53 +00001139 inc_costs(Ir2Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001140 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]);
weidendo5bba5252010-06-09 22:32:53 +00001141 inc_costs(Ir3Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001142 CLG_(cost_base) + ii3->cost_offset + ii3->eventset->offset[EG_IR]);
weidendo0a1951d2009-06-15 00:16:36 +00001143}
weidendoa17f2a32006-03-20 10:27:30 +00001144
1145/* Instruction doing a read access */
1146
weidendo0a1951d2009-06-15 00:16:36 +00001147VG_REGPARM(3)
1148static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001149{
1150 CacheModelResult IrRes, DrRes;
1151
1152 current_ii = ii;
weidendo75a5c2d2010-06-09 22:32:58 +00001153 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001154 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001155
weidendo0a1951d2009-06-15 00:16:36 +00001156 CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001157 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
weidendo0a1951d2009-06-15 00:16:36 +00001158 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001159
1160 if (CLG_(current_state).collect) {
1161 ULong *cost_Ir, *cost_Dr;
1162
1163 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001164 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1165 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR);
weidendoa17f2a32006-03-20 10:27:30 +00001166 }
1167 else {
weidendo75a5c2d2010-06-09 22:32:58 +00001168 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR];
1169 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR];
weidendoa17f2a32006-03-20 10:27:30 +00001170 }
1171
1172 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001173 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001174 inc_costs(DrRes, cost_Dr,
weidendo5bba5252010-06-09 22:32:53 +00001175 CLG_(current_state).cost + fullOffset(EG_DR) );
weidendoa17f2a32006-03-20 10:27:30 +00001176 }
1177}
1178
1179
weidendo0a1951d2009-06-15 00:16:36 +00001180VG_REGPARM(3)
1181static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001182{
1183 CacheModelResult DrRes;
1184
1185 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001186 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001187
weidendo0a1951d2009-06-15 00:16:36 +00001188 CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n",
1189 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001190
1191 if (CLG_(current_state).collect) {
1192 ULong *cost_Dr;
1193
weidendo5bba5252010-06-09 22:32:53 +00001194 if (CLG_(current_state).nonskipped)
1195 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR);
1196 else
weidendo75a5c2d2010-06-09 22:32:58 +00001197 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR];
weidendo0a1951d2009-06-15 00:16:36 +00001198
weidendoa17f2a32006-03-20 10:27:30 +00001199 inc_costs(DrRes, cost_Dr,
weidendo5bba5252010-06-09 22:32:53 +00001200 CLG_(current_state).cost + fullOffset(EG_DR) );
weidendoa17f2a32006-03-20 10:27:30 +00001201 }
1202}
1203
1204
1205/* Instruction doing a write access */
1206
weidendo0a1951d2009-06-15 00:16:36 +00001207VG_REGPARM(3)
1208static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001209{
1210 CacheModelResult IrRes, DwRes;
1211
1212 current_ii = ii;
weidendo75a5c2d2010-06-09 22:32:58 +00001213 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001214 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001215
weidendo0a1951d2009-06-15 00:16:36 +00001216 CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001217 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
weidendo0a1951d2009-06-15 00:16:36 +00001218 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001219
1220 if (CLG_(current_state).collect) {
1221 ULong *cost_Ir, *cost_Dw;
1222
1223 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001224 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1225 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW);
weidendoa17f2a32006-03-20 10:27:30 +00001226 }
1227 else {
weidendo75a5c2d2010-06-09 22:32:58 +00001228 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR];
1229 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW];
weidendoa17f2a32006-03-20 10:27:30 +00001230 }
1231
1232 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001233 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001234 inc_costs(DwRes, cost_Dw,
weidendo5bba5252010-06-09 22:32:53 +00001235 CLG_(current_state).cost + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001236 }
1237}
1238
weidendo0a1951d2009-06-15 00:16:36 +00001239VG_REGPARM(3)
1240static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001241{
1242 CacheModelResult DwRes;
1243
1244 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001245 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001246
weidendo0a1951d2009-06-15 00:16:36 +00001247 CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n",
1248 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001249
1250 if (CLG_(current_state).collect) {
1251 ULong *cost_Dw;
1252
weidendo5bba5252010-06-09 22:32:53 +00001253 if (CLG_(current_state).nonskipped)
1254 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW);
1255 else
weidendo75a5c2d2010-06-09 22:32:58 +00001256 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW];
weidendoa17f2a32006-03-20 10:27:30 +00001257
1258 inc_costs(DwRes, cost_Dw,
weidendo5bba5252010-06-09 22:32:53 +00001259 CLG_(current_state).cost + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001260 }
1261}
1262
weidendoa17f2a32006-03-20 10:27:30 +00001263
1264
1265/*------------------------------------------------------------*/
1266/*--- Cache configuration ---*/
1267/*------------------------------------------------------------*/
1268
weidendoa17f2a32006-03-20 10:27:30 +00001269static cache_t clo_I1_cache = UNDEFINED_CACHE;
1270static cache_t clo_D1_cache = UNDEFINED_CACHE;
njn2d853a12010-10-06 22:46:31 +00001271static cache_t clo_LL_cache = UNDEFINED_CACHE;
weidendoa17f2a32006-03-20 10:27:30 +00001272
weidendoa17f2a32006-03-20 10:27:30 +00001273/* Initialize and clear simulator state */
1274static void cachesim_post_clo_init(void)
1275{
1276 /* Cache configurations. */
njn2d853a12010-10-06 22:46:31 +00001277 cache_t I1c, D1c, LLc;
weidendoa17f2a32006-03-20 10:27:30 +00001278
1279 /* Initialize access handlers */
1280 if (!CLG_(clo).simulate_cache) {
1281 CLG_(cachesim).log_1I0D = 0;
1282 CLG_(cachesim).log_1I0D_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001283 CLG_(cachesim).log_2I0D = 0;
1284 CLG_(cachesim).log_2I0D_name = "(no function)";
1285 CLG_(cachesim).log_3I0D = 0;
1286 CLG_(cachesim).log_3I0D_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001287
1288 CLG_(cachesim).log_1I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001289 CLG_(cachesim).log_1I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001290 CLG_(cachesim).log_1I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001291 CLG_(cachesim).log_1I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001292
1293 CLG_(cachesim).log_0I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001294 CLG_(cachesim).log_0I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001295 CLG_(cachesim).log_0I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001296 CLG_(cachesim).log_0I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001297 return;
1298 }
1299
1300 /* Configuration of caches only needed with real cache simulation */
weidendo23642272011-09-06 19:08:31 +00001301 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1302 &clo_I1_cache,
1303 &clo_D1_cache,
1304 &clo_LL_cache);
1305
weidendoa17f2a32006-03-20 10:27:30 +00001306 I1.name = "I1";
1307 D1.name = "D1";
njn2d853a12010-10-06 22:46:31 +00001308 LL.name = "LL";
weidendoa17f2a32006-03-20 10:27:30 +00001309
sewardjc4ebde32012-06-03 23:10:55 +00001310 // min_line_size is used to make sure that we never feed
1311 // accesses to the simulator straddling more than two
1312 // cache lines at any cache level
1313 CLG_(min_line_size) = (I1c.line_size < D1c.line_size)
1314 ? I1c.line_size : D1c.line_size;
1315 CLG_(min_line_size) = (LLc.line_size < CLG_(min_line_size))
1316 ? LLc.line_size : CLG_(min_line_size);
1317
1318 Int largest_load_or_store_size
1319 = VG_(machine_get_size_of_largest_guest_register)();
1320 if (CLG_(min_line_size) < largest_load_or_store_size) {
1321 /* We can't continue, because the cache simulation might
1322 straddle more than 2 lines, and it will assert. So let's
1323 just stop before we start. */
1324 VG_(umsg)("Callgrind: cannot continue: the minimum line size (%d)\n",
1325 (Int)CLG_(min_line_size));
1326 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1327 largest_load_or_store_size );
1328 VG_(umsg)(" but it is not. Exiting now.\n");
1329 VG_(exit)(1);
1330 }
1331
weidendoa17f2a32006-03-20 10:27:30 +00001332 cachesim_initcache(I1c, &I1);
1333 cachesim_initcache(D1c, &D1);
njn2d853a12010-10-06 22:46:31 +00001334 cachesim_initcache(LLc, &LL);
weidendoa17f2a32006-03-20 10:27:30 +00001335
1336 /* the other cache simulators use the standard helpers
1337 * with dispatching via simulator struct */
1338
1339 CLG_(cachesim).log_1I0D = log_1I0D;
1340 CLG_(cachesim).log_1I0D_name = "log_1I0D";
weidendo0a1951d2009-06-15 00:16:36 +00001341 CLG_(cachesim).log_2I0D = log_2I0D;
1342 CLG_(cachesim).log_2I0D_name = "log_2I0D";
1343 CLG_(cachesim).log_3I0D = log_3I0D;
1344 CLG_(cachesim).log_3I0D_name = "log_3I0D";
weidendoa17f2a32006-03-20 10:27:30 +00001345
1346 CLG_(cachesim).log_1I1Dr = log_1I1Dr;
1347 CLG_(cachesim).log_1I1Dw = log_1I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001348 CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr";
1349 CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001350
1351 CLG_(cachesim).log_0I1Dr = log_0I1Dr;
1352 CLG_(cachesim).log_0I1Dw = log_0I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001353 CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr";
1354 CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001355
1356 if (clo_collect_cacheuse) {
1357
1358 /* Output warning for not supported option combinations */
1359 if (clo_simulate_hwpref) {
1360 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001361 "warning: prefetch simulation can not be "
1362 "used with cache usage\n");
weidendoa17f2a32006-03-20 10:27:30 +00001363 clo_simulate_hwpref = False;
1364 }
1365
1366 if (clo_simulate_writeback) {
1367 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001368 "warning: write-back simulation can not be "
1369 "used with cache usage\n");
weidendoa17f2a32006-03-20 10:27:30 +00001370 clo_simulate_writeback = False;
1371 }
1372
1373 simulator.I1_Read = cacheuse_I1_doRead;
1374 simulator.D1_Read = cacheuse_D1_doRead;
1375 simulator.D1_Write = cacheuse_D1_doRead;
1376 return;
1377 }
1378
1379 if (clo_simulate_hwpref) {
1380 prefetch_clear();
1381
1382 if (clo_simulate_writeback) {
1383 simulator.I1_Read = prefetch_I1_Read;
1384 simulator.D1_Read = prefetch_D1_Read;
1385 simulator.D1_Write = prefetch_D1_Write;
1386 }
1387 else {
1388 simulator.I1_Read = prefetch_I1_ref;
1389 simulator.D1_Read = prefetch_D1_ref;
1390 simulator.D1_Write = prefetch_D1_ref;
1391 }
1392
1393 return;
1394 }
1395
1396 if (clo_simulate_writeback) {
1397 simulator.I1_Read = cachesim_I1_Read;
1398 simulator.D1_Read = cachesim_D1_Read;
1399 simulator.D1_Write = cachesim_D1_Write;
1400 }
1401 else {
1402 simulator.I1_Read = cachesim_I1_ref;
1403 simulator.D1_Read = cachesim_D1_ref;
1404 simulator.D1_Write = cachesim_D1_ref;
1405 }
1406}
1407
1408
1409/* Clear simulator state. Has to be initialized before */
1410static
1411void cachesim_clear(void)
1412{
1413 cachesim_clearcache(&I1);
1414 cachesim_clearcache(&D1);
njn2d853a12010-10-06 22:46:31 +00001415 cachesim_clearcache(&LL);
weidendoa17f2a32006-03-20 10:27:30 +00001416
1417 prefetch_clear();
1418}
1419
1420
1421static void cachesim_getdesc(Char* buf)
1422{
1423 Int p;
1424 p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line);
1425 p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line);
njn2d853a12010-10-06 22:46:31 +00001426 VG_(sprintf)(buf+p, "desc: LL cache: %s\n", LL.desc_line);
weidendoa17f2a32006-03-20 10:27:30 +00001427}
1428
1429static
1430void cachesim_print_opts(void)
1431{
1432 VG_(printf)(
weidendo320705f2010-07-02 19:56:23 +00001433"\n cache simulator options (does cache simulation if used):\n"
weidendoa17f2a32006-03-20 10:27:30 +00001434" --simulate-wb=no|yes Count write-back events [no]\n"
1435" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n"
1436#if CLG_EXPERIMENTAL
1437" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n"
1438#endif
weidendo23642272011-09-06 19:08:31 +00001439" --cacheuse=no|yes Collect cache block use [no]\n");
1440 VG_(print_cache_clo_opts)();
weidendoa17f2a32006-03-20 10:27:30 +00001441}
1442
1443/* Check for command line option for cache configuration.
1444 * Return False if unknown and not handled.
1445 *
1446 * Called from CLG_(process_cmd_line_option)() in clo.c
1447 */
1448static Bool cachesim_parse_opt(Char* arg)
1449{
njn83df0b62009-02-25 01:01:05 +00001450 if VG_BOOL_CLO(arg, "--simulate-wb", clo_simulate_writeback) {}
1451 else if VG_BOOL_CLO(arg, "--simulate-hwpref", clo_simulate_hwpref) {}
1452 else if VG_BOOL_CLO(arg, "--simulate-sectors", clo_simulate_sectors) {}
weidendoa17f2a32006-03-20 10:27:30 +00001453
njn83df0b62009-02-25 01:01:05 +00001454 else if VG_BOOL_CLO(arg, "--cacheuse", clo_collect_cacheuse) {
1455 if (clo_collect_cacheuse) {
1456 /* Use counters only make sense with fine dumping */
1457 CLG_(clo).dump_instr = True;
1458 }
1459 }
weidendoa17f2a32006-03-20 10:27:30 +00001460
weidendo23642272011-09-06 19:08:31 +00001461 else if (VG_(str_clo_cache_opt)(arg,
1462 &clo_I1_cache,
1463 &clo_D1_cache,
1464 &clo_LL_cache)) {}
1465
1466 else
1467 return False;
weidendoa17f2a32006-03-20 10:27:30 +00001468
1469 return True;
1470}
1471
1472/* Adds commas to ULong, right justifying in a field field_width wide, returns
1473 * the string in buf. */
1474static
1475Int commify(ULong n, int field_width, char* buf)
1476{
1477 int len, n_commas, i, j, new_len, space;
1478
1479 VG_(sprintf)(buf, "%llu", n);
1480 len = VG_(strlen)(buf);
1481 n_commas = (len - 1) / 3;
1482 new_len = len + n_commas;
1483 space = field_width - new_len;
1484
1485 /* Allow for printing a number in a field_width smaller than it's size */
1486 if (space < 0) space = 0;
1487
1488 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1489 * of three. */
1490 for (j = -1, i = len ; i >= 0; i--) {
1491 buf[i + n_commas + space] = buf[i];
1492
1493 if ((i>0) && (3 == ++j)) {
1494 j = 0;
1495 n_commas--;
1496 buf[i + n_commas + space] = ',';
1497 }
1498 }
1499 /* Right justify in field. */
1500 for (i = 0; i < space; i++) buf[i] = ' ';
1501 return new_len;
1502}
1503
1504static
1505void percentify(Int n, Int ex, Int field_width, char buf[])
1506{
1507 int i, len, space;
1508
1509 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
1510 len = VG_(strlen)(buf);
1511 space = field_width - len;
1512 if (space < 0) space = 0; /* Allow for v. small field_width */
1513 i = len;
1514
1515 /* Right justify in field */
1516 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1517 for (i = 0; i < space; i++) buf[i] = ' ';
1518}
1519
1520static
weidendo320705f2010-07-02 19:56:23 +00001521void cachesim_printstat(Int l1, Int l2, Int l3)
weidendoa17f2a32006-03-20 10:27:30 +00001522{
1523 FullCost total = CLG_(total_cost), D_total = 0;
njn2d853a12010-10-06 22:46:31 +00001524 ULong LL_total_m, LL_total_mr, LL_total_mw,
1525 LL_total, LL_total_r, LL_total_w;
weidendoa17f2a32006-03-20 10:27:30 +00001526 char buf1[RESULTS_BUF_LEN],
1527 buf2[RESULTS_BUF_LEN],
1528 buf3[RESULTS_BUF_LEN];
weidendoa17f2a32006-03-20 10:27:30 +00001529 Int p;
1530
1531 if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) {
sewardj0f33adf2009-07-15 14:51:03 +00001532 VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001533 prefetch_up);
sewardj0f33adf2009-07-15 14:51:03 +00001534 VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001535 prefetch_down);
sewardj0f33adf2009-07-15 14:51:03 +00001536 VG_(message)(Vg_DebugMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001537 }
1538
weidendo5bba5252010-06-09 22:32:53 +00001539 commify(total[fullOffset(EG_IR) +1], l1, buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001540 VG_(message)(Vg_UserMsg, "I1 misses: %s\n", buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001541
weidendo5bba5252010-06-09 22:32:53 +00001542 commify(total[fullOffset(EG_IR) +2], l1, buf1);
njn2d853a12010-10-06 22:46:31 +00001543 VG_(message)(Vg_UserMsg, "LLi misses: %s\n", buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001544
1545 p = 100;
1546
weidendo5bba5252010-06-09 22:32:53 +00001547 if (0 == total[fullOffset(EG_IR)])
1548 total[fullOffset(EG_IR)] = 1;
weidendoa17f2a32006-03-20 10:27:30 +00001549
weidendo5bba5252010-06-09 22:32:53 +00001550 percentify(total[fullOffset(EG_IR)+1] * 100 * p /
1551 total[fullOffset(EG_IR)], p, l1+1, buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001552 VG_(message)(Vg_UserMsg, "I1 miss rate: %s\n", buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001553
weidendo5bba5252010-06-09 22:32:53 +00001554 percentify(total[fullOffset(EG_IR)+2] * 100 * p /
1555 total[fullOffset(EG_IR)], p, l1+1, buf1);
njn2d853a12010-10-06 22:46:31 +00001556 VG_(message)(Vg_UserMsg, "LLi miss rate: %s\n", buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001557 VG_(message)(Vg_UserMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001558
1559 /* D cache results.
1560 Use the D_refs.rd and D_refs.wr values to determine the
1561 * width of columns 2 & 3. */
1562
1563 D_total = CLG_(get_eventset_cost)( CLG_(sets).full );
1564 CLG_(init_cost)( CLG_(sets).full, D_total);
weidendo5bba5252010-06-09 22:32:53 +00001565 // we only use the first 3 values of D_total, adding up Dr and Dw costs
1566 CLG_(copy_cost)( CLG_(get_event_set)(EG_DR), D_total, total + fullOffset(EG_DR) );
1567 CLG_(add_cost) ( CLG_(get_event_set)(EG_DW), D_total, total + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001568
1569 commify( D_total[0], l1, buf1);
weidendo320705f2010-07-02 19:56:23 +00001570 commify(total[fullOffset(EG_DR)], l2, buf2);
1571 commify(total[fullOffset(EG_DW)], l3, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001572 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001573 buf1, buf2, buf3);
1574
1575 commify( D_total[1], l1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001576 commify(total[fullOffset(EG_DR)+1], l2, buf2);
1577 commify(total[fullOffset(EG_DW)+1], l3, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001578 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001579 buf1, buf2, buf3);
1580
1581 commify( D_total[2], l1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001582 commify(total[fullOffset(EG_DR)+2], l2, buf2);
1583 commify(total[fullOffset(EG_DW)+2], l3, buf3);
njn2d853a12010-10-06 22:46:31 +00001584 VG_(message)(Vg_UserMsg, "LLd misses: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001585 buf1, buf2, buf3);
1586
1587 p = 10;
1588
1589 if (0 == D_total[0]) D_total[0] = 1;
weidendo5bba5252010-06-09 22:32:53 +00001590 if (0 == total[fullOffset(EG_DR)]) total[fullOffset(EG_DR)] = 1;
1591 if (0 == total[fullOffset(EG_DW)]) total[fullOffset(EG_DW)] = 1;
weidendoa17f2a32006-03-20 10:27:30 +00001592
1593 percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001594 percentify(total[fullOffset(EG_DR)+1] * 100 * p /
1595 total[fullOffset(EG_DR)], p, l2+1, buf2);
1596 percentify(total[fullOffset(EG_DW)+1] * 100 * p /
1597 total[fullOffset(EG_DW)], p, l3+1, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001598 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )\n",
1599 buf1, buf2,buf3);
weidendoa17f2a32006-03-20 10:27:30 +00001600
1601 percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001602 percentify(total[fullOffset(EG_DR)+2] * 100 * p /
1603 total[fullOffset(EG_DR)], p, l2+1, buf2);
1604 percentify(total[fullOffset(EG_DW)+2] * 100 * p /
1605 total[fullOffset(EG_DW)], p, l3+1, buf3);
njn2d853a12010-10-06 22:46:31 +00001606 VG_(message)(Vg_UserMsg, "LLd miss rate: %s (%s + %s )\n",
sewardj0f33adf2009-07-15 14:51:03 +00001607 buf1, buf2,buf3);
1608 VG_(message)(Vg_UserMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001609
1610
1611
njn2d853a12010-10-06 22:46:31 +00001612 /* LL overall results */
weidendoa17f2a32006-03-20 10:27:30 +00001613
njn2d853a12010-10-06 22:46:31 +00001614 LL_total =
weidendo5bba5252010-06-09 22:32:53 +00001615 total[fullOffset(EG_DR) +1] +
1616 total[fullOffset(EG_DW) +1] +
1617 total[fullOffset(EG_IR) +1];
njn2d853a12010-10-06 22:46:31 +00001618 LL_total_r =
weidendo5bba5252010-06-09 22:32:53 +00001619 total[fullOffset(EG_DR) +1] +
1620 total[fullOffset(EG_IR) +1];
njn2d853a12010-10-06 22:46:31 +00001621 LL_total_w = total[fullOffset(EG_DW) +1];
1622 commify(LL_total, l1, buf1);
1623 commify(LL_total_r, l2, buf2);
1624 commify(LL_total_w, l3, buf3);
1625 VG_(message)(Vg_UserMsg, "LL refs: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001626 buf1, buf2, buf3);
1627
njn2d853a12010-10-06 22:46:31 +00001628 LL_total_m =
weidendo5bba5252010-06-09 22:32:53 +00001629 total[fullOffset(EG_DR) +2] +
1630 total[fullOffset(EG_DW) +2] +
1631 total[fullOffset(EG_IR) +2];
njn2d853a12010-10-06 22:46:31 +00001632 LL_total_mr =
weidendo5bba5252010-06-09 22:32:53 +00001633 total[fullOffset(EG_DR) +2] +
1634 total[fullOffset(EG_IR) +2];
njn2d853a12010-10-06 22:46:31 +00001635 LL_total_mw = total[fullOffset(EG_DW) +2];
1636 commify(LL_total_m, l1, buf1);
1637 commify(LL_total_mr, l2, buf2);
1638 commify(LL_total_mw, l3, buf3);
1639 VG_(message)(Vg_UserMsg, "LL misses: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001640 buf1, buf2, buf3);
1641
njn2d853a12010-10-06 22:46:31 +00001642 percentify(LL_total_m * 100 * p /
weidendo5bba5252010-06-09 22:32:53 +00001643 (total[fullOffset(EG_IR)] + D_total[0]), p, l1+1, buf1);
njn2d853a12010-10-06 22:46:31 +00001644 percentify(LL_total_mr * 100 * p /
weidendo5bba5252010-06-09 22:32:53 +00001645 (total[fullOffset(EG_IR)] + total[fullOffset(EG_DR)]),
weidendoa17f2a32006-03-20 10:27:30 +00001646 p, l2+1, buf2);
njn2d853a12010-10-06 22:46:31 +00001647 percentify(LL_total_mw * 100 * p /
weidendo5bba5252010-06-09 22:32:53 +00001648 total[fullOffset(EG_DW)], p, l3+1, buf3);
njn2d853a12010-10-06 22:46:31 +00001649 VG_(message)(Vg_UserMsg, "LL miss rate: %s (%s + %s )\n",
weidendoa17f2a32006-03-20 10:27:30 +00001650 buf1, buf2,buf3);
1651}
1652
1653
1654/*------------------------------------------------------------*/
1655/*--- Setup for Event set. ---*/
1656/*------------------------------------------------------------*/
1657
1658struct event_sets CLG_(sets);
1659
weidendo5bba5252010-06-09 22:32:53 +00001660void CLG_(init_eventsets)()
weidendoa17f2a32006-03-20 10:27:30 +00001661{
weidendo5bba5252010-06-09 22:32:53 +00001662 // Event groups from which the event sets are composed
1663 // the "Use" group only is used with "cacheuse" simulation
1664 if (clo_collect_cacheuse)
1665 CLG_(register_event_group4)(EG_USE,
1666 "AcCost1", "SpLoss1", "AcCost2", "SpLoss2");
weidendoa17f2a32006-03-20 10:27:30 +00001667
weidendo5bba5252010-06-09 22:32:53 +00001668 if (!CLG_(clo).simulate_cache)
1669 CLG_(register_event_group)(EG_IR, "Ir");
1670 else if (!clo_simulate_writeback) {
njn2d853a12010-10-06 22:46:31 +00001671 CLG_(register_event_group3)(EG_IR, "Ir", "I1mr", "ILmr");
1672 CLG_(register_event_group3)(EG_DR, "Dr", "D1mr", "DLmr");
1673 CLG_(register_event_group3)(EG_DW, "Dw", "D1mw", "DLmw");
weidendoa17f2a32006-03-20 10:27:30 +00001674 }
weidendo5bba5252010-06-09 22:32:53 +00001675 else { // clo_simulate_writeback
njn2d853a12010-10-06 22:46:31 +00001676 CLG_(register_event_group4)(EG_IR, "Ir", "I1mr", "ILmr", "ILdmr");
1677 CLG_(register_event_group4)(EG_DR, "Dr", "D1mr", "DLmr", "DLdmr");
1678 CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "DLmw", "DLdmw");
weidendoa17f2a32006-03-20 10:27:30 +00001679 }
weidendo5bba5252010-06-09 22:32:53 +00001680
weidendo320705f2010-07-02 19:56:23 +00001681 if (CLG_(clo).simulate_branch) {
1682 CLG_(register_event_group2)(EG_BC, "Bc", "Bcm");
1683 CLG_(register_event_group2)(EG_BI, "Bi", "Bim");
1684 }
1685
weidendoaeb86222010-06-09 22:33:02 +00001686 if (CLG_(clo).collect_bus)
1687 CLG_(register_event_group)(EG_BUS, "Ge");
1688
weidendo5bba5252010-06-09 22:32:53 +00001689 if (CLG_(clo).collect_alloc)
1690 CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
1691
1692 if (CLG_(clo).collect_systime)
1693 CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime");
1694
1695 // event set used as base for instruction self cost
1696 CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR);
1697
1698 // event set comprising all event groups, used for inclusive cost
1699 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
weidendo320705f2010-07-02 19:56:23 +00001700 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_BC, EG_BI);
weidendoaeb86222010-06-09 22:33:02 +00001701 CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
weidendo5bba5252010-06-09 22:32:53 +00001702 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
1703
1704 CLG_DEBUGIF(1) {
1705 CLG_DEBUG(1, "EventSets:\n");
1706 CLG_(print_eventset)(-2, CLG_(sets).base);
1707 CLG_(print_eventset)(-2, CLG_(sets).full);
weidendoa17f2a32006-03-20 10:27:30 +00001708 }
weidendoa17f2a32006-03-20 10:27:30 +00001709
weidendo5bba5252010-06-09 22:32:53 +00001710 /* Not-existing events are silently ignored */
1711 CLG_(dumpmap) = CLG_(get_eventmapping)(CLG_(sets).full);
1712 CLG_(append_event)(CLG_(dumpmap), "Ir");
1713 CLG_(append_event)(CLG_(dumpmap), "Dr");
1714 CLG_(append_event)(CLG_(dumpmap), "Dw");
1715 CLG_(append_event)(CLG_(dumpmap), "I1mr");
1716 CLG_(append_event)(CLG_(dumpmap), "D1mr");
1717 CLG_(append_event)(CLG_(dumpmap), "D1mw");
njn2d853a12010-10-06 22:46:31 +00001718 CLG_(append_event)(CLG_(dumpmap), "ILmr");
1719 CLG_(append_event)(CLG_(dumpmap), "DLmr");
1720 CLG_(append_event)(CLG_(dumpmap), "DLmw");
1721 CLG_(append_event)(CLG_(dumpmap), "ILdmr");
1722 CLG_(append_event)(CLG_(dumpmap), "DLdmr");
1723 CLG_(append_event)(CLG_(dumpmap), "DLdmw");
weidendo320705f2010-07-02 19:56:23 +00001724 CLG_(append_event)(CLG_(dumpmap), "Bc");
1725 CLG_(append_event)(CLG_(dumpmap), "Bcm");
1726 CLG_(append_event)(CLG_(dumpmap), "Bi");
1727 CLG_(append_event)(CLG_(dumpmap), "Bim");
weidendo5bba5252010-06-09 22:32:53 +00001728 CLG_(append_event)(CLG_(dumpmap), "AcCost1");
1729 CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
1730 CLG_(append_event)(CLG_(dumpmap), "AcCost2");
1731 CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
weidendoaeb86222010-06-09 22:33:02 +00001732 CLG_(append_event)(CLG_(dumpmap), "Ge");
weidendo5bba5252010-06-09 22:32:53 +00001733 CLG_(append_event)(CLG_(dumpmap), "allocCount");
1734 CLG_(append_event)(CLG_(dumpmap), "allocSize");
1735 CLG_(append_event)(CLG_(dumpmap), "sysCount");
1736 CLG_(append_event)(CLG_(dumpmap), "sysTime");
weidendoa17f2a32006-03-20 10:27:30 +00001737}
1738
1739
weidendoa17f2a32006-03-20 10:27:30 +00001740/* this is called at dump time for every instruction executed */
1741static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
1742 InstrInfo* ii, ULong exe_count)
1743{
weidendo5bba5252010-06-09 22:32:53 +00001744 if (!CLG_(clo).simulate_cache)
1745 cost[ fullOffset(EG_IR) ] += exe_count;
weidendoaeb86222010-06-09 22:33:02 +00001746
1747 if (ii->eventset)
weidendo5bba5252010-06-09 22:32:53 +00001748 CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
1749 ii->eventset, bbcc->cost + ii->cost_offset);
weidendoa17f2a32006-03-20 10:27:30 +00001750}
1751
1752static
weidendoa17f2a32006-03-20 10:27:30 +00001753void cachesim_finish(void)
1754{
1755 if (clo_collect_cacheuse)
1756 cacheuse_finish();
1757}
1758
1759/*------------------------------------------------------------*/
1760/*--- The simulator defined in this file ---*/
1761/*------------------------------------------------------------*/
1762
1763struct cachesim_if CLG_(cachesim) = {
1764 .print_opts = cachesim_print_opts,
1765 .parse_opt = cachesim_parse_opt,
1766 .post_clo_init = cachesim_post_clo_init,
1767 .clear = cachesim_clear,
1768 .getdesc = cachesim_getdesc,
1769 .printstat = cachesim_printstat,
1770 .add_icost = cachesim_add_icost,
weidendoa17f2a32006-03-20 10:27:30 +00001771 .finish = cachesim_finish,
1772
1773 /* these will be set by cachesim_post_clo_init */
1774 .log_1I0D = 0,
weidendo0a1951d2009-06-15 00:16:36 +00001775 .log_2I0D = 0,
1776 .log_3I0D = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001777
1778 .log_1I1Dr = 0,
1779 .log_1I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001780
1781 .log_0I1Dr = 0,
1782 .log_0I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001783
1784 .log_1I0D_name = "(no function)",
weidendo0a1951d2009-06-15 00:16:36 +00001785 .log_2I0D_name = "(no function)",
1786 .log_3I0D_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001787
1788 .log_1I1Dr_name = "(no function)",
1789 .log_1I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001790
1791 .log_0I1Dr_name = "(no function)",
1792 .log_0I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001793};
1794
1795
1796/*--------------------------------------------------------------------*/
1797/*--- end ct_sim.c ---*/
1798/*--------------------------------------------------------------------*/
1799