blob: 28012c8bd8bdf2fd8c675d93979100f767401502 [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001/*--------------------------------------------------------------------*/
2/*--- Cache simulation. ---*/
3/*--- sim.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
njn9a0cba42007-04-15 22:15:57 +00007 This file is part of Callgrind, a Valgrind tool for call graph
8 profiling programs.
weidendoa17f2a32006-03-20 10:27:30 +00009
Elliott Hughesed398002017-06-21 14:41:24 -070010 Copyright (C) 2003-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
weidendoa17f2a32006-03-20 10:27:30 +000011
njn9a0cba42007-04-15 22:15:57 +000012 This tool is derived from and contains code from Cachegrind
Elliott Hughesed398002017-06-21 14:41:24 -070013 Copyright (C) 2002-2017 Nicholas Nethercote (njn@valgrind.org)
weidendoa17f2a32006-03-20 10:27:30 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "global.h"
34
35
36/* Notes:
37 - simulates a write-allocate cache
38 - (block --> set) hash function uses simple bit selection
39 - handling of references straddling two cache blocks:
40 - counts as only one cache access (not two)
41 - both blocks hit --> one hit
42 - one block hits, the other misses --> one miss
43 - both blocks miss --> one miss (not two)
44*/
45
46/* Cache configuration */
mjw0c8cd402013-09-03 15:22:14 +000047#include "cg_arch.c"
weidendoa17f2a32006-03-20 10:27:30 +000048
49/* additional structures for cache use info, separated
50 * according usage frequency:
51 * - line_loaded : pointer to cost center of instruction
52 * which loaded the line into cache.
53 * Needed to increment counters when line is evicted.
54 * - line_use : updated on every access
55 */
56typedef struct {
57 UInt count;
58 UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */
59} line_use;
60
61typedef struct {
62 Addr memline, iaddr;
63 line_use* dep_use; /* point to higher-level cacheblock for this memline */
64 ULong* use_base;
65} line_loaded;
66
67/* Cache state */
68typedef struct {
floriandbb35842012-10-27 18:39:11 +000069 const HChar* name;
weidendoa17f2a32006-03-20 10:27:30 +000070 int size; /* bytes */
71 int assoc;
72 int line_size; /* bytes */
73 Bool sectored; /* prefetch nearside cacheline on read */
74 int sets;
75 int sets_min_1;
weidendoa17f2a32006-03-20 10:27:30 +000076 int line_size_bits;
77 int tag_shift;
78 UWord tag_mask;
florian73382952014-11-13 22:45:58 +000079 HChar desc_line[128]; // large enough
weidendoa17f2a32006-03-20 10:27:30 +000080 UWord* tags;
81
82 /* for cache use */
83 int line_size_mask;
84 int* line_start_mask;
85 int* line_end_mask;
86 line_loaded* loaded;
87 line_use* use;
88} cache_t2;
89
90/*
91 * States of flat caches in our model.
92 * We use a 2-level hierarchy,
93 */
njn2d853a12010-10-06 22:46:31 +000094static cache_t2 I1, D1, LL;
weidendoa17f2a32006-03-20 10:27:30 +000095
96/* Lower bits of cache tags are used as flags for a cache line */
97#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1)
98#define CACHELINE_DIRTY 1
99
100
101/* Cache simulator Options */
102static Bool clo_simulate_writeback = False;
103static Bool clo_simulate_hwpref = False;
104static Bool clo_simulate_sectors = False;
105static Bool clo_collect_cacheuse = False;
106
weidendo75a5c2d2010-06-09 22:32:58 +0000107/* Following global vars are setup before by setup_bbcc():
weidendoa17f2a32006-03-20 10:27:30 +0000108 *
weidendo75a5c2d2010-06-09 22:32:58 +0000109 * - Addr CLG_(bb_base) (instruction start address of original BB)
110 * - ULong* CLG_(cost_base) (start of cost array for BB)
weidendoa17f2a32006-03-20 10:27:30 +0000111 */
112
weidendo75a5c2d2010-06-09 22:32:58 +0000113Addr CLG_(bb_base);
114ULong* CLG_(cost_base);
115
weidendoa17f2a32006-03-20 10:27:30 +0000116static InstrInfo* current_ii;
117
118/* Cache use offsets */
weidendo0a1951d2009-06-15 00:16:36 +0000119/* The offsets are only correct because all per-instruction event sets get
weidendoa17f2a32006-03-20 10:27:30 +0000120 * the "Use" set added first !
121 */
122static Int off_I1_AcCost = 0;
123static Int off_I1_SpLoss = 1;
124static Int off_D1_AcCost = 0;
125static Int off_D1_SpLoss = 1;
njn2d853a12010-10-06 22:46:31 +0000126static Int off_LL_AcCost = 2;
127static Int off_LL_SpLoss = 3;
weidendoa17f2a32006-03-20 10:27:30 +0000128
129/* Cache access types */
130typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType;
131
132/* Result of a reference into a flat cache */
133typedef enum { Hit = 0, Miss, MissDirty } CacheResult;
134
135/* Result of a reference into a hierarchical cache model */
136typedef enum {
137 L1_Hit,
njn2d853a12010-10-06 22:46:31 +0000138 LL_Hit,
weidendoa17f2a32006-03-20 10:27:30 +0000139 MemAccess,
140 WriteBackMemAccess } CacheModelResult;
141
142typedef CacheModelResult (*simcall_type)(Addr, UChar);
143
144static struct {
145 simcall_type I1_Read;
146 simcall_type D1_Read;
147 simcall_type D1_Write;
148} simulator;
149
150/*------------------------------------------------------------*/
151/*--- Cache Simulator Initialization ---*/
152/*------------------------------------------------------------*/
153
154static void cachesim_clearcache(cache_t2* c)
155{
156 Int i;
157
158 for (i = 0; i < c->sets * c->assoc; i++)
159 c->tags[i] = 0;
160 if (c->use) {
161 for (i = 0; i < c->sets * c->assoc; i++) {
162 c->loaded[i].memline = 0;
163 c->loaded[i].use_base = 0;
164 c->loaded[i].dep_use = 0;
165 c->loaded[i].iaddr = 0;
166 c->use[i].mask = 0;
167 c->use[i].count = 0;
168 c->tags[i] = i % c->assoc; /* init lower bits as pointer */
169 }
170 }
171}
172
173static void cacheuse_initcache(cache_t2* c);
174
175/* By this point, the size/assoc/line_size has been checked. */
176static void cachesim_initcache(cache_t config, cache_t2* c)
177{
178 c->size = config.size;
179 c->assoc = config.assoc;
180 c->line_size = config.line_size;
181 c->sectored = False; // FIXME
182
183 c->sets = (c->size / c->line_size) / c->assoc;
184 c->sets_min_1 = c->sets - 1;
weidendoa17f2a32006-03-20 10:27:30 +0000185 c->line_size_bits = VG_(log2)(c->line_size);
florian45ee0362015-02-06 20:32:15 +0000186 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
187 c->tag_mask = ~((1u<<c->tag_shift)-1);
weidendoa17f2a32006-03-20 10:27:30 +0000188
189 /* Can bits in tag entries be used for flags?
190 * Should be always true as MIN_LINE_SIZE >= 16 */
191 CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0);
192
193 if (c->assoc == 1) {
194 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s",
195 c->size, c->line_size,
196 c->sectored ? ", sectored":"");
197 } else {
198 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s",
199 c->size, c->line_size, c->assoc,
200 c->sectored ? ", sectored":"");
201 }
202
sewardj9c606bd2008-09-18 18:12:50 +0000203 c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1",
204 sizeof(UWord) * c->sets * c->assoc);
weidendoa17f2a32006-03-20 10:27:30 +0000205 if (clo_collect_cacheuse)
206 cacheuse_initcache(c);
207 else
208 c->use = 0;
209 cachesim_clearcache(c);
210}
211
212
213#if 0
214static void print_cache(cache_t2* c)
215{
216 UInt set, way, i;
217
218 /* Note initialisation and update of 'i'. */
219 for (i = 0, set = 0; set < c->sets; set++) {
220 for (way = 0; way < c->assoc; way++, i++) {
221 VG_(printf)("%8x ", c->tags[i]);
222 }
223 VG_(printf)("\n");
224 }
225}
226#endif
227
228
229/*------------------------------------------------------------*/
weidendo8ff46582012-10-29 21:28:05 +0000230/*--- Simple Cache Simulation ---*/
weidendoa17f2a32006-03-20 10:27:30 +0000231/*------------------------------------------------------------*/
232
233/*
weidendo8ff46582012-10-29 21:28:05 +0000234 * Model: single inclusive, 2-level cache hierarchy (L1/LL)
235 * with write-allocate
236 *
237 * For simple cache hit/miss counts, we do not have to
238 * maintain the dirty state of lines (no need to distinguish
239 * read/write references), and the resulting counts are the
240 * same for write-through and write-back caches.
weidendoa17f2a32006-03-20 10:27:30 +0000241 *
242 * Simulator functions:
243 * CacheModelResult cachesim_I1_ref(Addr a, UChar size)
244 * CacheModelResult cachesim_D1_ref(Addr a, UChar size)
245 */
weidendo8ff46582012-10-29 21:28:05 +0000246__attribute__((always_inline))
weidendoa17f2a32006-03-20 10:27:30 +0000247static __inline__
248CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag)
249{
250 int i, j;
251 UWord *set;
252
weidendo144b76c2009-01-26 22:56:14 +0000253 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000254
255 /* This loop is unrolled for just the first case, which is the most */
256 /* common. We can't unroll any further because it would screw up */
257 /* if we have a direct-mapped (1-way) cache. */
258 if (tag == set[0])
259 return Hit;
260
261 /* If the tag is one other than the MRU, move it into the MRU spot */
262 /* and shuffle the rest down. */
263 for (i = 1; i < c->assoc; i++) {
264 if (tag == set[i]) {
265 for (j = i; j > 0; j--) {
266 set[j] = set[j - 1];
267 }
268 set[0] = tag;
269 return Hit;
270 }
271 }
272
273 /* A miss; install this tag as MRU, shuffle rest down. */
274 for (j = c->assoc - 1; j > 0; j--) {
275 set[j] = set[j - 1];
276 }
277 set[0] = tag;
278
279 return Miss;
280}
281
weidendo8ff46582012-10-29 21:28:05 +0000282__attribute__((always_inline))
283static __inline__
284CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size)
weidendoa17f2a32006-03-20 10:27:30 +0000285{
weidendo157c7fb2012-10-29 21:47:45 +0000286 UWord block1 = a >> c->line_size_bits;
287 UWord block2 = (a+size-1) >> c->line_size_bits;
288 UInt set1 = block1 & c->sets_min_1;
289 /* the tag does not need to include bits specifying the set,
290 * but it can, and this saves instructions */
291 UWord tag1 = block1;
weidendoa17f2a32006-03-20 10:27:30 +0000292
293 /* Access entirely within line. */
weidendo157c7fb2012-10-29 21:47:45 +0000294 if (block1 == block2)
295 return cachesim_setref(c, set1, tag1);
weidendoa17f2a32006-03-20 10:27:30 +0000296
297 /* Access straddles two lines. */
weidendo157c7fb2012-10-29 21:47:45 +0000298 else if (block1 + 1 == block2) {
299 UInt set2 = block2 & c->sets_min_1;
300 UWord tag2 = block2;
weidendoa17f2a32006-03-20 10:27:30 +0000301
302 /* the call updates cache structures as side effect */
weidendo157c7fb2012-10-29 21:47:45 +0000303 CacheResult res1 = cachesim_setref(c, set1, tag1);
weidendo28e2a142006-11-22 21:00:53 +0000304 CacheResult res2 = cachesim_setref(c, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000305 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
306
307 } else {
florianb7876db2015-08-05 19:04:51 +0000308 VG_(printf)("addr: %lx size: %u blocks: %lu %lu",
weidendo157c7fb2012-10-29 21:47:45 +0000309 a, size, block1, block2);
weidendoa17f2a32006-03-20 10:27:30 +0000310 VG_(tool_panic)("item straddles more than two cache sets");
311 }
312 return Hit;
313}
314
315static
316CacheModelResult cachesim_I1_ref(Addr a, UChar size)
317{
318 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000319 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000320 return MemAccess;
321}
322
323static
324CacheModelResult cachesim_D1_ref(Addr a, UChar size)
325{
326 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000327 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000328 return MemAccess;
329}
330
331
332/*------------------------------------------------------------*/
333/*--- Write Back Cache Simulation ---*/
334/*------------------------------------------------------------*/
335
336/*
njn2d853a12010-10-06 22:46:31 +0000337 * More complex model: L1 Write-through, LL Write-back
weidendoa17f2a32006-03-20 10:27:30 +0000338 * This needs to distinguish among read and write references.
339 *
340 * Simulator functions:
341 * CacheModelResult cachesim_I1_Read(Addr a, UChar size)
342 * CacheModelResult cachesim_D1_Read(Addr a, UChar size)
343 * CacheModelResult cachesim_D1_Write(Addr a, UChar size)
344 */
345
346/*
347 * With write-back, result can be a miss evicting a dirty line
348 * The dirty state of a cache line is stored in Bit0 of the tag for
349 * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference
350 * type (Read/Write), the line gets dirty on a write.
351 */
weidendo8ff46582012-10-29 21:28:05 +0000352__attribute__((always_inline))
weidendoa17f2a32006-03-20 10:27:30 +0000353static __inline__
354CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag)
355{
356 int i, j;
357 UWord *set, tmp_tag;
358
weidendo144b76c2009-01-26 22:56:14 +0000359 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000360
361 /* This loop is unrolled for just the first case, which is the most */
362 /* common. We can't unroll any further because it would screw up */
363 /* if we have a direct-mapped (1-way) cache. */
364 if (tag == (set[0] & ~CACHELINE_DIRTY)) {
365 set[0] |= ref;
366 return Hit;
367 }
368 /* If the tag is one other than the MRU, move it into the MRU spot */
369 /* and shuffle the rest down. */
370 for (i = 1; i < c->assoc; i++) {
371 if (tag == (set[i] & ~CACHELINE_DIRTY)) {
372 tmp_tag = set[i] | ref; // update dirty flag
373 for (j = i; j > 0; j--) {
374 set[j] = set[j - 1];
375 }
376 set[0] = tmp_tag;
377 return Hit;
378 }
379 }
380
381 /* A miss; install this tag as MRU, shuffle rest down. */
382 tmp_tag = set[c->assoc - 1];
383 for (j = c->assoc - 1; j > 0; j--) {
384 set[j] = set[j - 1];
385 }
386 set[0] = tag | ref;
387
388 return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss;
389}
390
weidendo8ff46582012-10-29 21:28:05 +0000391__attribute__((always_inline))
weidendoa17f2a32006-03-20 10:27:30 +0000392static __inline__
393CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size)
394{
395 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
396 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
397 UWord tag = a & c->tag_mask;
398
399 /* Access entirely within line. */
400 if (set1 == set2)
401 return cachesim_setref_wb(c, ref, set1, tag);
402
403 /* Access straddles two lines. */
404 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
tom7c0f6392011-06-09 12:26:42 +0000405 else if (((set1 + 1) & (c->sets_min_1)) == set2) {
weidendo144b76c2009-01-26 22:56:14 +0000406 UWord tag2 = (a+size-1) & c->tag_mask;
weidendoa17f2a32006-03-20 10:27:30 +0000407
408 /* the call updates cache structures as side effect */
409 CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag);
weidendo28e2a142006-11-22 21:00:53 +0000410 CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000411
412 if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty;
413 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
414
415 } else {
florianb7876db2015-08-05 19:04:51 +0000416 VG_(printf)("addr: %lx size: %u sets: %u %u", a, size, set1, set2);
weidendoa17f2a32006-03-20 10:27:30 +0000417 VG_(tool_panic)("item straddles more than two cache sets");
418 }
419 return Hit;
420}
421
422
423static
424CacheModelResult cachesim_I1_Read(Addr a, UChar size)
425{
426 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000427 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
428 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000429 case Miss: return MemAccess;
430 default: break;
431 }
432 return WriteBackMemAccess;
433}
434
435static
436CacheModelResult cachesim_D1_Read(Addr a, UChar size)
437{
438 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000439 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
440 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000441 case Miss: return MemAccess;
442 default: break;
443 }
444 return WriteBackMemAccess;
445}
446
447static
448CacheModelResult cachesim_D1_Write(Addr a, UChar size)
449{
450 if ( cachesim_ref( &D1, a, size) == Hit ) {
451 /* Even for a L1 hit, the write-trough L1 passes
njn2d853a12010-10-06 22:46:31 +0000452 * the write to the LL to make the LL line dirty.
weidendoa17f2a32006-03-20 10:27:30 +0000453 * But this causes no latency, so return the hit.
454 */
njn2d853a12010-10-06 22:46:31 +0000455 cachesim_ref_wb( &LL, Write, a, size);
weidendoa17f2a32006-03-20 10:27:30 +0000456 return L1_Hit;
457 }
njn2d853a12010-10-06 22:46:31 +0000458 switch( cachesim_ref_wb( &LL, Write, a, size) ) {
459 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000460 case Miss: return MemAccess;
461 default: break;
462 }
463 return WriteBackMemAccess;
464}
465
466
467/*------------------------------------------------------------*/
468/*--- Hardware Prefetch Simulation ---*/
469/*------------------------------------------------------------*/
470
471static ULong prefetch_up = 0;
472static ULong prefetch_down = 0;
473
474#define PF_STREAMS 8
475#define PF_PAGEBITS 12
476
477static UInt pf_lastblock[PF_STREAMS];
478static Int pf_seqblocks[PF_STREAMS];
479
480static
481void prefetch_clear(void)
482{
483 int i;
484 for(i=0;i<PF_STREAMS;i++)
485 pf_lastblock[i] = pf_seqblocks[i] = 0;
486}
487
488/*
489 * HW Prefetch emulation
490 * Start prefetching when detecting sequential access to 3 memory blocks.
491 * One stream can be detected per 4k page.
492 */
493static __inline__
njn2d853a12010-10-06 22:46:31 +0000494void prefetch_LL_doref(Addr a)
weidendoa17f2a32006-03-20 10:27:30 +0000495{
496 UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS;
njn2d853a12010-10-06 22:46:31 +0000497 UInt block = ( a >> LL.line_size_bits);
weidendoa17f2a32006-03-20 10:27:30 +0000498
499 if (block != pf_lastblock[stream]) {
500 if (pf_seqblocks[stream] == 0) {
501 if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++;
502 else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--;
503 }
504 else if (pf_seqblocks[stream] >0) {
505 if (pf_lastblock[stream] +1 == block) {
506 pf_seqblocks[stream]++;
507 if (pf_seqblocks[stream] >= 2) {
508 prefetch_up++;
njn2d853a12010-10-06 22:46:31 +0000509 cachesim_ref(&LL, a + 5 * LL.line_size,1);
weidendoa17f2a32006-03-20 10:27:30 +0000510 }
511 }
512 else pf_seqblocks[stream] = 0;
513 }
514 else if (pf_seqblocks[stream] <0) {
515 if (pf_lastblock[stream] -1 == block) {
516 pf_seqblocks[stream]--;
517 if (pf_seqblocks[stream] <= -2) {
518 prefetch_down++;
njn2d853a12010-10-06 22:46:31 +0000519 cachesim_ref(&LL, a - 5 * LL.line_size,1);
weidendoa17f2a32006-03-20 10:27:30 +0000520 }
521 }
522 else pf_seqblocks[stream] = 0;
523 }
524 pf_lastblock[stream] = block;
525 }
526}
527
528/* simple model with hardware prefetch */
529
530static
531CacheModelResult prefetch_I1_ref(Addr a, UChar size)
532{
533 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000534 prefetch_LL_doref(a);
535 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000536 return MemAccess;
537}
538
539static
540CacheModelResult prefetch_D1_ref(Addr a, UChar size)
541{
542 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000543 prefetch_LL_doref(a);
544 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000545 return MemAccess;
546}
547
548
549/* complex model with hardware prefetch */
550
551static
552CacheModelResult prefetch_I1_Read(Addr a, UChar size)
553{
554 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000555 prefetch_LL_doref(a);
556 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
557 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000558 case Miss: return MemAccess;
559 default: break;
560 }
561 return WriteBackMemAccess;
562}
563
564static
565CacheModelResult prefetch_D1_Read(Addr a, UChar size)
566{
567 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
njn2d853a12010-10-06 22:46:31 +0000568 prefetch_LL_doref(a);
569 switch( cachesim_ref_wb( &LL, Read, a, size) ) {
570 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000571 case Miss: return MemAccess;
572 default: break;
573 }
574 return WriteBackMemAccess;
575}
576
577static
578CacheModelResult prefetch_D1_Write(Addr a, UChar size)
579{
njn2d853a12010-10-06 22:46:31 +0000580 prefetch_LL_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000581 if ( cachesim_ref( &D1, a, size) == Hit ) {
582 /* Even for a L1 hit, the write-trough L1 passes
njn2d853a12010-10-06 22:46:31 +0000583 * the write to the LL to make the LL line dirty.
weidendoa17f2a32006-03-20 10:27:30 +0000584 * But this causes no latency, so return the hit.
585 */
njn2d853a12010-10-06 22:46:31 +0000586 cachesim_ref_wb( &LL, Write, a, size);
weidendoa17f2a32006-03-20 10:27:30 +0000587 return L1_Hit;
588 }
njn2d853a12010-10-06 22:46:31 +0000589 switch( cachesim_ref_wb( &LL, Write, a, size) ) {
590 case Hit: return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000591 case Miss: return MemAccess;
592 default: break;
593 }
594 return WriteBackMemAccess;
595}
596
597
598/*------------------------------------------------------------*/
599/*--- Cache Simulation with use metric collection ---*/
600/*------------------------------------------------------------*/
601
602/* can not be combined with write-back or prefetch */
603
604static
605void cacheuse_initcache(cache_t2* c)
606{
607 int i;
608 unsigned int start_mask, start_val;
609 unsigned int end_mask, end_val;
610
sewardj9c606bd2008-09-18 18:12:50 +0000611 c->use = CLG_MALLOC("cl.sim.cu_ic.1",
612 sizeof(line_use) * c->sets * c->assoc);
613 c->loaded = CLG_MALLOC("cl.sim.cu_ic.2",
614 sizeof(line_loaded) * c->sets * c->assoc);
615 c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3",
616 sizeof(int) * c->line_size);
617 c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4",
618 sizeof(int) * c->line_size);
weidendoa17f2a32006-03-20 10:27:30 +0000619
weidendoa17f2a32006-03-20 10:27:30 +0000620 c->line_size_mask = c->line_size-1;
621
622 /* Meaning of line_start_mask/line_end_mask
623 * Example: for a given cache line, you get an access starting at
624 * byte offset 5, length 4, byte 5 - 8 was touched. For a cache
625 * line size of 32, you have 1 bit per byte in the mask:
626 *
627 * bit31 bit8 bit5 bit 0
628 * | | | |
629 * 11..111111100000 line_start_mask[5]
630 * 00..000111111111 line_end_mask[(5+4)-1]
631 *
632 * use_mask |= line_start_mask[5] && line_end_mask[8]
633 *
634 */
635 start_val = end_val = ~0;
636 if (c->line_size < 32) {
637 int bits_per_byte = 32/c->line_size;
638 start_mask = (1<<bits_per_byte)-1;
639 end_mask = start_mask << (32-bits_per_byte);
640 for(i=0;i<c->line_size;i++) {
641 c->line_start_mask[i] = start_val;
642 start_val = start_val & ~start_mask;
643 start_mask = start_mask << bits_per_byte;
644
645 c->line_end_mask[c->line_size-i-1] = end_val;
646 end_val = end_val & ~end_mask;
647 end_mask = end_mask >> bits_per_byte;
648 }
649 }
650 else {
651 int bytes_per_bit = c->line_size/32;
652 start_mask = 1;
florian45ee0362015-02-06 20:32:15 +0000653 end_mask = 1u << 31;
weidendoa17f2a32006-03-20 10:27:30 +0000654 for(i=0;i<c->line_size;i++) {
655 c->line_start_mask[i] = start_val;
656 c->line_end_mask[c->line_size-i-1] = end_val;
657 if ( ((i+1)%bytes_per_bit) == 0) {
658 start_val &= ~start_mask;
659 end_val &= ~end_mask;
660 start_mask <<= 1;
661 end_mask >>= 1;
662 }
663 }
664 }
665
666 CLG_DEBUG(6, "Config %s:\n", c->desc_line);
667 for(i=0;i<c->line_size;i++) {
668 CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n",
florianb7876db2015-08-05 19:04:51 +0000669 i, (UInt)c->line_start_mask[i], (UInt)c->line_end_mask[i]);
weidendoa17f2a32006-03-20 10:27:30 +0000670 }
671
672 /* We use lower tag bits as offset pointers to cache use info.
673 * I.e. some cache parameters don't work.
674 */
weidendo144b76c2009-01-26 22:56:14 +0000675 if ( (1<<c->tag_shift) < c->assoc) {
weidendoa17f2a32006-03-20 10:27:30 +0000676 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +0000677 "error: Use associativity < %d for cache use statistics!\n",
weidendoa17f2a32006-03-20 10:27:30 +0000678 (1<<c->tag_shift) );
679 VG_(tool_panic)("Unsupported cache configuration");
680 }
681}
682
weidendoa17f2a32006-03-20 10:27:30 +0000683
684/* for I1/D1 caches */
685#define CACHEUSE(L) \
686 \
687static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \
688{ \
weidendo28e2a142006-11-22 21:00:53 +0000689 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
690 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
691 UWord tag = a & L.tag_mask; \
692 UWord tag2; \
weidendoa17f2a32006-03-20 10:27:30 +0000693 int i, j, idx; \
694 UWord *set, tmp_tag; \
695 UInt use_mask; \
696 \
florianb7876db2015-08-05 19:04:51 +0000697 CLG_DEBUG(6,"%s.Acc(Addr %#lx, size %d): Sets [%u/%u]\n", \
weidendoa17f2a32006-03-20 10:27:30 +0000698 L.name, a, size, set1, set2); \
699 \
700 /* First case: word entirely within line. */ \
701 if (set1 == set2) { \
702 \
weidendo144b76c2009-01-26 22:56:14 +0000703 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000704 use_mask = L.line_start_mask[a & L.line_size_mask] & \
705 L.line_end_mask[(a+size-1) & L.line_size_mask]; \
706 \
707 /* This loop is unrolled for just the first case, which is the most */\
708 /* common. We can't unroll any further because it would screw up */\
709 /* if we have a direct-mapped (1-way) cache. */\
710 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000711 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000712 L.use[idx].count ++; \
713 L.use[idx].mask |= use_mask; \
florianb7876db2015-08-05 19:04:51 +0000714 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %u\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000715 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
716 use_mask, L.use[idx].mask, L.use[idx].count); \
717 return L1_Hit; \
718 } \
719 /* If the tag is one other than the MRU, move it into the MRU spot */\
720 /* and shuffle the rest down. */\
721 for (i = 1; i < L.assoc; i++) { \
722 if (tag == (set[i] & L.tag_mask)) { \
723 tmp_tag = set[i]; \
724 for (j = i; j > 0; j--) { \
725 set[j] = set[j - 1]; \
726 } \
727 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000728 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000729 L.use[idx].count ++; \
730 L.use[idx].mask |= use_mask; \
florianb7876db2015-08-05 19:04:51 +0000731 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %u\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000732 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
733 use_mask, L.use[idx].mask, L.use[idx].count); \
734 return L1_Hit; \
735 } \
736 } \
737 \
738 /* A miss; install this tag as MRU, shuffle rest down. */ \
739 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
740 for (j = L.assoc - 1; j > 0; j--) { \
741 set[j] = set[j - 1]; \
742 } \
743 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000744 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000745 return update_##L##_use(&L, idx, \
746 use_mask, a &~ L.line_size_mask); \
747 \
748 /* Second case: word straddles two lines. */ \
749 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
tom7c0f6392011-06-09 12:26:42 +0000750 } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \
njn2d853a12010-10-06 22:46:31 +0000751 Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:LL miss */ \
weidendo144b76c2009-01-26 22:56:14 +0000752 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000753 use_mask = L.line_start_mask[a & L.line_size_mask]; \
754 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000755 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000756 L.use[idx].count ++; \
757 L.use[idx].mask |= use_mask; \
florianb7876db2015-08-05 19:04:51 +0000758 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %u\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000759 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
760 use_mask, L.use[idx].mask, L.use[idx].count); \
761 goto block2; \
762 } \
763 for (i = 1; i < L.assoc; i++) { \
764 if (tag == (set[i] & L.tag_mask)) { \
765 tmp_tag = set[i]; \
766 for (j = i; j > 0; j--) { \
767 set[j] = set[j - 1]; \
768 } \
769 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000770 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000771 L.use[idx].count ++; \
772 L.use[idx].mask |= use_mask; \
florianb7876db2015-08-05 19:04:51 +0000773 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %u\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000774 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
775 use_mask, L.use[idx].mask, L.use[idx].count); \
776 goto block2; \
777 } \
778 } \
779 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
780 for (j = L.assoc - 1; j > 0; j--) { \
781 set[j] = set[j - 1]; \
782 } \
783 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000784 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000785 miss1 = update_##L##_use(&L, idx, \
786 use_mask, a &~ L.line_size_mask); \
787block2: \
weidendo144b76c2009-01-26 22:56:14 +0000788 set = &(L.tags[set2 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000789 use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \
weidendo28e2a142006-11-22 21:00:53 +0000790 tag2 = (a+size-1) & L.tag_mask; \
791 if (tag2 == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000792 idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000793 L.use[idx].count ++; \
794 L.use[idx].mask |= use_mask; \
florianb7876db2015-08-05 19:04:51 +0000795 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %u\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000796 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
797 use_mask, L.use[idx].mask, L.use[idx].count); \
798 return miss1; \
799 } \
800 for (i = 1; i < L.assoc; i++) { \
weidendo28e2a142006-11-22 21:00:53 +0000801 if (tag2 == (set[i] & L.tag_mask)) { \
weidendoa17f2a32006-03-20 10:27:30 +0000802 tmp_tag = set[i]; \
803 for (j = i; j > 0; j--) { \
804 set[j] = set[j - 1]; \
805 } \
806 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000807 idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000808 L.use[idx].count ++; \
809 L.use[idx].mask |= use_mask; \
florianb7876db2015-08-05 19:04:51 +0000810 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %u\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000811 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
812 use_mask, L.use[idx].mask, L.use[idx].count); \
813 return miss1; \
814 } \
815 } \
816 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
817 for (j = L.assoc - 1; j > 0; j--) { \
818 set[j] = set[j - 1]; \
819 } \
weidendo28e2a142006-11-22 21:00:53 +0000820 set[0] = tag2 | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000821 idx = (set2 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000822 miss2 = update_##L##_use(&L, idx, \
823 use_mask, (a+size-1) &~ L.line_size_mask); \
njn2d853a12010-10-06 22:46:31 +0000824 return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:LL_Hit; \
weidendoa17f2a32006-03-20 10:27:30 +0000825 \
826 } else { \
florianb7876db2015-08-05 19:04:51 +0000827 VG_(printf)("addr: %#lx size: %u sets: %u %u", a, size, set1, set2); \
weidendoa17f2a32006-03-20 10:27:30 +0000828 VG_(tool_panic)("item straddles more than two cache sets"); \
829 } \
830 return 0; \
831}
832
833
834/* logarithmic bitcounting algorithm, see
835 * http://graphics.stanford.edu/~seander/bithacks.html
836 */
837static __inline__ unsigned int countBits(unsigned int bits)
838{
839 unsigned int c; // store the total here
840 const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers
841 const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF};
842
843 c = bits;
844 c = ((c >> S[0]) & B[0]) + (c & B[0]);
845 c = ((c >> S[1]) & B[1]) + (c & B[1]);
846 c = ((c >> S[2]) & B[2]) + (c & B[2]);
847 c = ((c >> S[3]) & B[3]) + (c & B[3]);
848 c = ((c >> S[4]) & B[4]) + (c & B[4]);
849 return c;
850}
851
njn2d853a12010-10-06 22:46:31 +0000852static void update_LL_use(int idx, Addr memline)
weidendoa17f2a32006-03-20 10:27:30 +0000853{
njn2d853a12010-10-06 22:46:31 +0000854 line_loaded* loaded = &(LL.loaded[idx]);
855 line_use* use = &(LL.use[idx]);
856 int i = ((32 - countBits(use->mask)) * LL.line_size)>>5;
weidendoa17f2a32006-03-20 10:27:30 +0000857
njn2d853a12010-10-06 22:46:31 +0000858 CLG_DEBUG(2, " LL.miss [%d]: at %#lx accessing memline %#lx\n",
weidendo75a5c2d2010-06-09 22:32:58 +0000859 idx, CLG_(bb_base) + current_ii->instr_offset, memline);
weidendoa17f2a32006-03-20 10:27:30 +0000860 if (use->count>0) {
florianb7876db2015-08-05 19:04:51 +0000861 CLG_DEBUG(2, " old: used %u, loss bits %d (%08x) [line %#lx from %#lx]\n",
weidendoa17f2a32006-03-20 10:27:30 +0000862 use->count, i, use->mask, loaded->memline, loaded->iaddr);
863 CLG_DEBUG(2, " collect: %d, use_base %p\n",
864 CLG_(current_state).collect, loaded->use_base);
865
866 if (CLG_(current_state).collect && loaded->use_base) {
njn2d853a12010-10-06 22:46:31 +0000867 (loaded->use_base)[off_LL_AcCost] += 1000 / use->count;
868 (loaded->use_base)[off_LL_SpLoss] += i;
weidendoa17f2a32006-03-20 10:27:30 +0000869 }
870 }
871
872 use->count = 0;
873 use->mask = 0;
874
875 loaded->memline = memline;
weidendo75a5c2d2010-06-09 22:32:58 +0000876 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset;
weidendoa17f2a32006-03-20 10:27:30 +0000877 loaded->use_base = (CLG_(current_state).nonskipped) ?
878 CLG_(current_state).nonskipped->skipped :
weidendo75a5c2d2010-06-09 22:32:58 +0000879 CLG_(cost_base) + current_ii->cost_offset;
weidendoa17f2a32006-03-20 10:27:30 +0000880}
881
882static
njn2d853a12010-10-06 22:46:31 +0000883CacheModelResult cacheuse_LL_access(Addr memline, line_loaded* l1_loaded)
weidendoa17f2a32006-03-20 10:27:30 +0000884{
njn2d853a12010-10-06 22:46:31 +0000885 UInt setNo = (memline >> LL.line_size_bits) & (LL.sets_min_1);
886 UWord* set = &(LL.tags[setNo * LL.assoc]);
887 UWord tag = memline & LL.tag_mask;
weidendoa17f2a32006-03-20 10:27:30 +0000888
889 int i, j, idx;
890 UWord tmp_tag;
891
florianb7876db2015-08-05 19:04:51 +0000892 CLG_DEBUG(6,"LL.Acc(Memline %#lx): Set %u\n", memline, setNo);
weidendoa17f2a32006-03-20 10:27:30 +0000893
njn2d853a12010-10-06 22:46:31 +0000894 if (tag == (set[0] & LL.tag_mask)) {
895 idx = (setNo * LL.assoc) + (set[0] & ~LL.tag_mask);
896 l1_loaded->dep_use = &(LL.use[idx]);
weidendoa17f2a32006-03-20 10:27:30 +0000897
florianb7876db2015-08-05 19:04:51 +0000898 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %u\n",
njn2d853a12010-10-06 22:46:31 +0000899 idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr,
900 LL.use[idx].mask, LL.use[idx].count);
901 return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000902 }
njn2d853a12010-10-06 22:46:31 +0000903 for (i = 1; i < LL.assoc; i++) {
904 if (tag == (set[i] & LL.tag_mask)) {
weidendoa17f2a32006-03-20 10:27:30 +0000905 tmp_tag = set[i];
906 for (j = i; j > 0; j--) {
907 set[j] = set[j - 1];
908 }
909 set[0] = tmp_tag;
njn2d853a12010-10-06 22:46:31 +0000910 idx = (setNo * LL.assoc) + (tmp_tag & ~LL.tag_mask);
911 l1_loaded->dep_use = &(LL.use[idx]);
weidendoa17f2a32006-03-20 10:27:30 +0000912
florianb7876db2015-08-05 19:04:51 +0000913 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %u\n",
njn2d853a12010-10-06 22:46:31 +0000914 i, idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr,
915 LL.use[idx].mask, LL.use[idx].count);
916 return LL_Hit;
weidendoa17f2a32006-03-20 10:27:30 +0000917 }
918 }
919
920 /* A miss; install this tag as MRU, shuffle rest down. */
njn2d853a12010-10-06 22:46:31 +0000921 tmp_tag = set[LL.assoc - 1] & ~LL.tag_mask;
922 for (j = LL.assoc - 1; j > 0; j--) {
weidendoa17f2a32006-03-20 10:27:30 +0000923 set[j] = set[j - 1];
924 }
925 set[0] = tag | tmp_tag;
njn2d853a12010-10-06 22:46:31 +0000926 idx = (setNo * LL.assoc) + tmp_tag;
927 l1_loaded->dep_use = &(LL.use[idx]);
weidendoa17f2a32006-03-20 10:27:30 +0000928
njn2d853a12010-10-06 22:46:31 +0000929 update_LL_use(idx, memline);
weidendoa17f2a32006-03-20 10:27:30 +0000930
931 return MemAccess;
932}
933
934
935
936
937#define UPDATE_USE(L) \
938 \
939static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \
940 UInt mask, Addr memline) \
941{ \
942 line_loaded* loaded = &(cache->loaded[idx]); \
943 line_use* use = &(cache->use[idx]); \
944 int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \
945 \
barta0b6b2c2008-07-07 06:49:24 +0000946 CLG_DEBUG(2, " %s.miss [%d]: at %#lx accessing memline %#lx (mask %08x)\n", \
weidendo75a5c2d2010-06-09 22:32:58 +0000947 cache->name, idx, CLG_(bb_base) + current_ii->instr_offset, memline, mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000948 if (use->count>0) { \
florianb7876db2015-08-05 19:04:51 +0000949 CLG_DEBUG(2, " old: used %u, loss bits %d (%08x) [line %#lx from %#lx]\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000950 use->count, c, use->mask, loaded->memline, loaded->iaddr); \
951 CLG_DEBUG(2, " collect: %d, use_base %p\n", \
952 CLG_(current_state).collect, loaded->use_base); \
953 \
weidendo75a5c2d2010-06-09 22:32:58 +0000954 if (CLG_(current_state).collect && loaded->use_base) { \
weidendoa17f2a32006-03-20 10:27:30 +0000955 (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \
956 (loaded->use_base)[off_##L##_SpLoss] += c; \
957 \
njn2d853a12010-10-06 22:46:31 +0000958 /* FIXME (?): L1/LL line sizes must be equal ! */ \
weidendoa17f2a32006-03-20 10:27:30 +0000959 loaded->dep_use->mask |= use->mask; \
960 loaded->dep_use->count += use->count; \
961 } \
962 } \
963 \
964 use->count = 1; \
965 use->mask = mask; \
966 loaded->memline = memline; \
weidendo75a5c2d2010-06-09 22:32:58 +0000967 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; \
968 loaded->use_base = (CLG_(current_state).nonskipped) ? \
969 CLG_(current_state).nonskipped->skipped : \
970 CLG_(cost_base) + current_ii->cost_offset; \
weidendoa17f2a32006-03-20 10:27:30 +0000971 \
njn2d853a12010-10-06 22:46:31 +0000972 if (memline == 0) return LL_Hit; \
973 return cacheuse_LL_access(memline, loaded); \
weidendoa17f2a32006-03-20 10:27:30 +0000974}
975
976UPDATE_USE(I1);
977UPDATE_USE(D1);
978
979CACHEUSE(I1);
980CACHEUSE(D1);
981
982
983static
984void cacheuse_finish(void)
985{
986 int i;
weidendo0a1951d2009-06-15 00:16:36 +0000987 InstrInfo ii = { 0,0,0,0 };
weidendoa17f2a32006-03-20 10:27:30 +0000988
989 if (!CLG_(current_state).collect) return;
990
weidendo75a5c2d2010-06-09 22:32:58 +0000991 CLG_(bb_base) = 0;
weidendo061f0792011-05-11 12:28:01 +0000992 current_ii = &ii; /* needs to be set for update_XX_use */
weidendo75a5c2d2010-06-09 22:32:58 +0000993 CLG_(cost_base) = 0;
weidendoa17f2a32006-03-20 10:27:30 +0000994
995 /* update usage counters */
996 if (I1.use)
997 for (i = 0; i < I1.sets * I1.assoc; i++)
998 if (I1.loaded[i].use_base)
999 update_I1_use( &I1, i, 0,0);
1000
1001 if (D1.use)
1002 for (i = 0; i < D1.sets * D1.assoc; i++)
1003 if (D1.loaded[i].use_base)
1004 update_D1_use( &D1, i, 0,0);
1005
njn2d853a12010-10-06 22:46:31 +00001006 if (LL.use)
1007 for (i = 0; i < LL.sets * LL.assoc; i++)
1008 if (LL.loaded[i].use_base)
1009 update_LL_use(i, 0);
weidendo061f0792011-05-11 12:28:01 +00001010
1011 current_ii = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001012}
1013
1014
1015
1016/*------------------------------------------------------------*/
1017/*--- Helper functions called by instrumented code ---*/
1018/*------------------------------------------------------------*/
1019
1020
1021static __inline__
1022void inc_costs(CacheModelResult r, ULong* c1, ULong* c2)
1023{
1024 switch(r) {
1025 case WriteBackMemAccess:
1026 if (clo_simulate_writeback) {
1027 c1[3]++;
1028 c2[3]++;
1029 }
1030 // fall through
1031
1032 case MemAccess:
1033 c1[2]++;
1034 c2[2]++;
1035 // fall through
1036
njn2d853a12010-10-06 22:46:31 +00001037 case LL_Hit:
weidendoa17f2a32006-03-20 10:27:30 +00001038 c1[1]++;
1039 c2[1]++;
1040 // fall through
1041
1042 default:
1043 c1[0]++;
1044 c2[0]++;
1045 }
1046}
1047
weidendo0a1951d2009-06-15 00:16:36 +00001048static
florian25f6c572012-10-21 02:55:56 +00001049const HChar* cacheRes(CacheModelResult r)
weidendo0a1951d2009-06-15 00:16:36 +00001050{
1051 switch(r) {
1052 case L1_Hit: return "L1 Hit ";
njn2d853a12010-10-06 22:46:31 +00001053 case LL_Hit: return "LL Hit ";
1054 case MemAccess: return "LL Miss";
1055 case WriteBackMemAccess: return "LL Miss (dirty)";
weidendo0a1951d2009-06-15 00:16:36 +00001056 default:
1057 tl_assert(0);
1058 }
1059 return "??";
1060}
weidendoa17f2a32006-03-20 10:27:30 +00001061
1062VG_REGPARM(1)
1063static void log_1I0D(InstrInfo* ii)
1064{
1065 CacheModelResult IrRes;
1066
1067 current_ii = ii;
weidendo75a5c2d2010-06-09 22:32:58 +00001068 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size);
weidendoa17f2a32006-03-20 10:27:30 +00001069
weidendo0a1951d2009-06-15 00:16:36 +00001070 CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001071 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001072
1073 if (CLG_(current_state).collect) {
1074 ULong* cost_Ir;
weidendo0a1951d2009-06-15 00:16:36 +00001075
weidendoa17f2a32006-03-20 10:27:30 +00001076 if (CLG_(current_state).nonskipped)
weidendo5bba5252010-06-09 22:32:53 +00001077 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
weidendoa17f2a32006-03-20 10:27:30 +00001078 else
weidendo75a5c2d2010-06-09 22:32:58 +00001079 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR];
weidendoa17f2a32006-03-20 10:27:30 +00001080
1081 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001082 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001083 }
1084}
1085
weidendo0a1951d2009-06-15 00:16:36 +00001086VG_REGPARM(2)
1087static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2)
1088{
1089 CacheModelResult Ir1Res, Ir2Res;
1090 ULong *global_cost_Ir;
1091
1092 current_ii = ii1;
weidendo75a5c2d2010-06-09 22:32:58 +00001093 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001094 current_ii = ii2;
weidendo75a5c2d2010-06-09 22:32:58 +00001095 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001096
1097 CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001098 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1099 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) );
weidendo0a1951d2009-06-15 00:16:36 +00001100
1101 if (!CLG_(current_state).collect) return;
1102
weidendo5bba5252010-06-09 22:32:53 +00001103 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001104 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001105 ULong* skipped_cost_Ir =
1106 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1107
weidendo0a1951d2009-06-15 00:16:36 +00001108 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1109 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1110 return;
1111 }
1112
weidendo5bba5252010-06-09 22:32:53 +00001113 inc_costs(Ir1Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001114 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]);
weidendo5bba5252010-06-09 22:32:53 +00001115 inc_costs(Ir2Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001116 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]);
weidendo0a1951d2009-06-15 00:16:36 +00001117}
1118
1119VG_REGPARM(3)
1120static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3)
1121{
1122 CacheModelResult Ir1Res, Ir2Res, Ir3Res;
1123 ULong *global_cost_Ir;
1124
1125 current_ii = ii1;
weidendo75a5c2d2010-06-09 22:32:58 +00001126 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001127 current_ii = ii2;
weidendo75a5c2d2010-06-09 22:32:58 +00001128 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001129 current_ii = ii3;
weidendo75a5c2d2010-06-09 22:32:58 +00001130 Ir3Res = (*simulator.I1_Read)(CLG_(bb_base) + ii3->instr_offset, ii3->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001131
1132 CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001133 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1134 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res),
1135 CLG_(bb_base) + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) );
weidendo0a1951d2009-06-15 00:16:36 +00001136
1137 if (!CLG_(current_state).collect) return;
1138
weidendo5bba5252010-06-09 22:32:53 +00001139 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001140 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001141 ULong* skipped_cost_Ir =
1142 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001143 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1144 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1145 inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir);
1146 return;
1147 }
1148
weidendo5bba5252010-06-09 22:32:53 +00001149 inc_costs(Ir1Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001150 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]);
weidendo5bba5252010-06-09 22:32:53 +00001151 inc_costs(Ir2Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001152 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]);
weidendo5bba5252010-06-09 22:32:53 +00001153 inc_costs(Ir3Res, global_cost_Ir,
weidendo75a5c2d2010-06-09 22:32:58 +00001154 CLG_(cost_base) + ii3->cost_offset + ii3->eventset->offset[EG_IR]);
weidendo0a1951d2009-06-15 00:16:36 +00001155}
weidendoa17f2a32006-03-20 10:27:30 +00001156
1157/* Instruction doing a read access */
1158
weidendo0a1951d2009-06-15 00:16:36 +00001159VG_REGPARM(3)
1160static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001161{
1162 CacheModelResult IrRes, DrRes;
1163
1164 current_ii = ii;
weidendo75a5c2d2010-06-09 22:32:58 +00001165 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001166 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001167
florianb7876db2015-08-05 19:04:51 +00001168 CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%ld => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001169 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
weidendo0a1951d2009-06-15 00:16:36 +00001170 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001171
1172 if (CLG_(current_state).collect) {
1173 ULong *cost_Ir, *cost_Dr;
1174
1175 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001176 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1177 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR);
weidendoa17f2a32006-03-20 10:27:30 +00001178 }
1179 else {
weidendo75a5c2d2010-06-09 22:32:58 +00001180 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR];
1181 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR];
weidendoa17f2a32006-03-20 10:27:30 +00001182 }
1183
1184 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001185 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001186 inc_costs(DrRes, cost_Dr,
weidendo5bba5252010-06-09 22:32:53 +00001187 CLG_(current_state).cost + fullOffset(EG_DR) );
weidendoa17f2a32006-03-20 10:27:30 +00001188 }
1189}
1190
1191
sewardjcafe5052013-01-17 14:24:35 +00001192/* Note that addEvent_D_guarded assumes that log_0I1Dr and log_0I1Dw
1193 have exactly the same prototype. If you change them, you must
1194 change addEvent_D_guarded too. */
weidendo0a1951d2009-06-15 00:16:36 +00001195VG_REGPARM(3)
1196static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001197{
1198 CacheModelResult DrRes;
1199
1200 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001201 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001202
florianb7876db2015-08-05 19:04:51 +00001203 CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%ld => %s\n",
weidendo0a1951d2009-06-15 00:16:36 +00001204 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001205
1206 if (CLG_(current_state).collect) {
1207 ULong *cost_Dr;
1208
weidendo5bba5252010-06-09 22:32:53 +00001209 if (CLG_(current_state).nonskipped)
1210 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR);
1211 else
weidendo75a5c2d2010-06-09 22:32:58 +00001212 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR];
weidendo0a1951d2009-06-15 00:16:36 +00001213
weidendoa17f2a32006-03-20 10:27:30 +00001214 inc_costs(DrRes, cost_Dr,
weidendo5bba5252010-06-09 22:32:53 +00001215 CLG_(current_state).cost + fullOffset(EG_DR) );
weidendoa17f2a32006-03-20 10:27:30 +00001216 }
1217}
1218
1219
1220/* Instruction doing a write access */
1221
weidendo0a1951d2009-06-15 00:16:36 +00001222VG_REGPARM(3)
1223static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001224{
1225 CacheModelResult IrRes, DwRes;
1226
1227 current_ii = ii;
weidendo75a5c2d2010-06-09 22:32:58 +00001228 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001229 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001230
florianb7876db2015-08-05 19:04:51 +00001231 CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%ld => %s\n",
weidendo75a5c2d2010-06-09 22:32:58 +00001232 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
weidendo0a1951d2009-06-15 00:16:36 +00001233 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001234
1235 if (CLG_(current_state).collect) {
1236 ULong *cost_Ir, *cost_Dw;
1237
1238 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001239 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1240 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW);
weidendoa17f2a32006-03-20 10:27:30 +00001241 }
1242 else {
weidendo75a5c2d2010-06-09 22:32:58 +00001243 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR];
1244 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW];
weidendoa17f2a32006-03-20 10:27:30 +00001245 }
1246
1247 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001248 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001249 inc_costs(DwRes, cost_Dw,
weidendo5bba5252010-06-09 22:32:53 +00001250 CLG_(current_state).cost + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001251 }
1252}
1253
sewardjcafe5052013-01-17 14:24:35 +00001254/* See comment on log_0I1Dr. */
weidendo0a1951d2009-06-15 00:16:36 +00001255VG_REGPARM(3)
1256static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001257{
1258 CacheModelResult DwRes;
1259
1260 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001261 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001262
florianb7876db2015-08-05 19:04:51 +00001263 CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%ld => %s\n",
weidendo0a1951d2009-06-15 00:16:36 +00001264 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001265
1266 if (CLG_(current_state).collect) {
1267 ULong *cost_Dw;
1268
weidendo5bba5252010-06-09 22:32:53 +00001269 if (CLG_(current_state).nonskipped)
1270 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW);
1271 else
weidendo75a5c2d2010-06-09 22:32:58 +00001272 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW];
weidendoa17f2a32006-03-20 10:27:30 +00001273
1274 inc_costs(DwRes, cost_Dw,
weidendo5bba5252010-06-09 22:32:53 +00001275 CLG_(current_state).cost + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001276 }
1277}
1278
weidendoa17f2a32006-03-20 10:27:30 +00001279
1280
1281/*------------------------------------------------------------*/
1282/*--- Cache configuration ---*/
1283/*------------------------------------------------------------*/
1284
weidendoa17f2a32006-03-20 10:27:30 +00001285static cache_t clo_I1_cache = UNDEFINED_CACHE;
1286static cache_t clo_D1_cache = UNDEFINED_CACHE;
njn2d853a12010-10-06 22:46:31 +00001287static cache_t clo_LL_cache = UNDEFINED_CACHE;
weidendoa17f2a32006-03-20 10:27:30 +00001288
weidendoa17f2a32006-03-20 10:27:30 +00001289/* Initialize and clear simulator state */
1290static void cachesim_post_clo_init(void)
1291{
1292 /* Cache configurations. */
njn2d853a12010-10-06 22:46:31 +00001293 cache_t I1c, D1c, LLc;
weidendoa17f2a32006-03-20 10:27:30 +00001294
1295 /* Initialize access handlers */
1296 if (!CLG_(clo).simulate_cache) {
1297 CLG_(cachesim).log_1I0D = 0;
1298 CLG_(cachesim).log_1I0D_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001299 CLG_(cachesim).log_2I0D = 0;
1300 CLG_(cachesim).log_2I0D_name = "(no function)";
1301 CLG_(cachesim).log_3I0D = 0;
1302 CLG_(cachesim).log_3I0D_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001303
1304 CLG_(cachesim).log_1I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001305 CLG_(cachesim).log_1I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001306 CLG_(cachesim).log_1I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001307 CLG_(cachesim).log_1I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001308
1309 CLG_(cachesim).log_0I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001310 CLG_(cachesim).log_0I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001311 CLG_(cachesim).log_0I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001312 CLG_(cachesim).log_0I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001313 return;
1314 }
1315
1316 /* Configuration of caches only needed with real cache simulation */
weidendo23642272011-09-06 19:08:31 +00001317 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1318 &clo_I1_cache,
1319 &clo_D1_cache,
1320 &clo_LL_cache);
1321
weidendoa17f2a32006-03-20 10:27:30 +00001322 I1.name = "I1";
1323 D1.name = "D1";
njn2d853a12010-10-06 22:46:31 +00001324 LL.name = "LL";
weidendoa17f2a32006-03-20 10:27:30 +00001325
sewardjc4ebde32012-06-03 23:10:55 +00001326 // min_line_size is used to make sure that we never feed
1327 // accesses to the simulator straddling more than two
1328 // cache lines at any cache level
1329 CLG_(min_line_size) = (I1c.line_size < D1c.line_size)
1330 ? I1c.line_size : D1c.line_size;
1331 CLG_(min_line_size) = (LLc.line_size < CLG_(min_line_size))
1332 ? LLc.line_size : CLG_(min_line_size);
1333
1334 Int largest_load_or_store_size
1335 = VG_(machine_get_size_of_largest_guest_register)();
1336 if (CLG_(min_line_size) < largest_load_or_store_size) {
1337 /* We can't continue, because the cache simulation might
1338 straddle more than 2 lines, and it will assert. So let's
1339 just stop before we start. */
1340 VG_(umsg)("Callgrind: cannot continue: the minimum line size (%d)\n",
1341 (Int)CLG_(min_line_size));
1342 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1343 largest_load_or_store_size );
1344 VG_(umsg)(" but it is not. Exiting now.\n");
1345 VG_(exit)(1);
1346 }
1347
weidendoa17f2a32006-03-20 10:27:30 +00001348 cachesim_initcache(I1c, &I1);
1349 cachesim_initcache(D1c, &D1);
njn2d853a12010-10-06 22:46:31 +00001350 cachesim_initcache(LLc, &LL);
weidendoa17f2a32006-03-20 10:27:30 +00001351
1352 /* the other cache simulators use the standard helpers
1353 * with dispatching via simulator struct */
1354
1355 CLG_(cachesim).log_1I0D = log_1I0D;
1356 CLG_(cachesim).log_1I0D_name = "log_1I0D";
weidendo0a1951d2009-06-15 00:16:36 +00001357 CLG_(cachesim).log_2I0D = log_2I0D;
1358 CLG_(cachesim).log_2I0D_name = "log_2I0D";
1359 CLG_(cachesim).log_3I0D = log_3I0D;
1360 CLG_(cachesim).log_3I0D_name = "log_3I0D";
weidendoa17f2a32006-03-20 10:27:30 +00001361
1362 CLG_(cachesim).log_1I1Dr = log_1I1Dr;
1363 CLG_(cachesim).log_1I1Dw = log_1I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001364 CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr";
1365 CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001366
1367 CLG_(cachesim).log_0I1Dr = log_0I1Dr;
1368 CLG_(cachesim).log_0I1Dw = log_0I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001369 CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr";
1370 CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001371
1372 if (clo_collect_cacheuse) {
1373
1374 /* Output warning for not supported option combinations */
1375 if (clo_simulate_hwpref) {
1376 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001377 "warning: prefetch simulation can not be "
1378 "used with cache usage\n");
weidendoa17f2a32006-03-20 10:27:30 +00001379 clo_simulate_hwpref = False;
1380 }
1381
1382 if (clo_simulate_writeback) {
1383 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001384 "warning: write-back simulation can not be "
1385 "used with cache usage\n");
weidendoa17f2a32006-03-20 10:27:30 +00001386 clo_simulate_writeback = False;
1387 }
1388
1389 simulator.I1_Read = cacheuse_I1_doRead;
1390 simulator.D1_Read = cacheuse_D1_doRead;
1391 simulator.D1_Write = cacheuse_D1_doRead;
1392 return;
1393 }
1394
1395 if (clo_simulate_hwpref) {
1396 prefetch_clear();
1397
1398 if (clo_simulate_writeback) {
1399 simulator.I1_Read = prefetch_I1_Read;
1400 simulator.D1_Read = prefetch_D1_Read;
1401 simulator.D1_Write = prefetch_D1_Write;
1402 }
1403 else {
1404 simulator.I1_Read = prefetch_I1_ref;
1405 simulator.D1_Read = prefetch_D1_ref;
1406 simulator.D1_Write = prefetch_D1_ref;
1407 }
1408
1409 return;
1410 }
1411
1412 if (clo_simulate_writeback) {
1413 simulator.I1_Read = cachesim_I1_Read;
1414 simulator.D1_Read = cachesim_D1_Read;
1415 simulator.D1_Write = cachesim_D1_Write;
1416 }
1417 else {
1418 simulator.I1_Read = cachesim_I1_ref;
1419 simulator.D1_Read = cachesim_D1_ref;
1420 simulator.D1_Write = cachesim_D1_ref;
1421 }
1422}
1423
1424
1425/* Clear simulator state. Has to be initialized before */
1426static
1427void cachesim_clear(void)
1428{
1429 cachesim_clearcache(&I1);
1430 cachesim_clearcache(&D1);
njn2d853a12010-10-06 22:46:31 +00001431 cachesim_clearcache(&LL);
weidendoa17f2a32006-03-20 10:27:30 +00001432
1433 prefetch_clear();
1434}
1435
1436
floriancc9480f2014-11-11 20:46:34 +00001437static void cachesim_dump_desc(VgFile *fp)
weidendoa17f2a32006-03-20 10:27:30 +00001438{
floriancc9480f2014-11-11 20:46:34 +00001439 VG_(fprintf)(fp, "\ndesc: I1 cache: %s\n", I1.desc_line);
1440 VG_(fprintf)(fp, "desc: D1 cache: %s\n", D1.desc_line);
1441 VG_(fprintf)(fp, "desc: LL cache: %s\n", LL.desc_line);
weidendoa17f2a32006-03-20 10:27:30 +00001442}
1443
1444static
1445void cachesim_print_opts(void)
1446{
1447 VG_(printf)(
weidendo320705f2010-07-02 19:56:23 +00001448"\n cache simulator options (does cache simulation if used):\n"
weidendoa17f2a32006-03-20 10:27:30 +00001449" --simulate-wb=no|yes Count write-back events [no]\n"
1450" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n"
1451#if CLG_EXPERIMENTAL
1452" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n"
1453#endif
weidendo23642272011-09-06 19:08:31 +00001454" --cacheuse=no|yes Collect cache block use [no]\n");
1455 VG_(print_cache_clo_opts)();
weidendoa17f2a32006-03-20 10:27:30 +00001456}
1457
1458/* Check for command line option for cache configuration.
1459 * Return False if unknown and not handled.
1460 *
1461 * Called from CLG_(process_cmd_line_option)() in clo.c
1462 */
florian19f91bb2012-11-10 22:29:54 +00001463static Bool cachesim_parse_opt(const HChar* arg)
weidendoa17f2a32006-03-20 10:27:30 +00001464{
njn83df0b62009-02-25 01:01:05 +00001465 if VG_BOOL_CLO(arg, "--simulate-wb", clo_simulate_writeback) {}
1466 else if VG_BOOL_CLO(arg, "--simulate-hwpref", clo_simulate_hwpref) {}
1467 else if VG_BOOL_CLO(arg, "--simulate-sectors", clo_simulate_sectors) {}
weidendoa17f2a32006-03-20 10:27:30 +00001468
njn83df0b62009-02-25 01:01:05 +00001469 else if VG_BOOL_CLO(arg, "--cacheuse", clo_collect_cacheuse) {
1470 if (clo_collect_cacheuse) {
1471 /* Use counters only make sense with fine dumping */
1472 CLG_(clo).dump_instr = True;
1473 }
1474 }
weidendoa17f2a32006-03-20 10:27:30 +00001475
weidendo23642272011-09-06 19:08:31 +00001476 else if (VG_(str_clo_cache_opt)(arg,
1477 &clo_I1_cache,
1478 &clo_D1_cache,
1479 &clo_LL_cache)) {}
1480
1481 else
1482 return False;
weidendoa17f2a32006-03-20 10:27:30 +00001483
1484 return True;
1485}
1486
weidendoa17f2a32006-03-20 10:27:30 +00001487static
weidendo320705f2010-07-02 19:56:23 +00001488void cachesim_printstat(Int l1, Int l2, Int l3)
weidendoa17f2a32006-03-20 10:27:30 +00001489{
1490 FullCost total = CLG_(total_cost), D_total = 0;
njn2d853a12010-10-06 22:46:31 +00001491 ULong LL_total_m, LL_total_mr, LL_total_mw,
1492 LL_total, LL_total_r, LL_total_w;
weidendoa17f2a32006-03-20 10:27:30 +00001493
1494 if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) {
sewardj0f33adf2009-07-15 14:51:03 +00001495 VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001496 prefetch_up);
sewardj0f33adf2009-07-15 14:51:03 +00001497 VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001498 prefetch_down);
sewardj0f33adf2009-07-15 14:51:03 +00001499 VG_(message)(Vg_DebugMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001500 }
1501
florian29582f82014-12-14 15:39:57 +00001502 VG_(message)(Vg_UserMsg, "I1 misses: %'*llu\n", l1,
1503 total[fullOffset(EG_IR) +1]);
weidendoa17f2a32006-03-20 10:27:30 +00001504
florian29582f82014-12-14 15:39:57 +00001505 VG_(message)(Vg_UserMsg, "LLi misses: %'*llu\n", l1,
1506 total[fullOffset(EG_IR) +2]);
weidendoa17f2a32006-03-20 10:27:30 +00001507
weidendo5bba5252010-06-09 22:32:53 +00001508 if (0 == total[fullOffset(EG_IR)])
1509 total[fullOffset(EG_IR)] = 1;
weidendoa17f2a32006-03-20 10:27:30 +00001510
florian29582f82014-12-14 15:39:57 +00001511 VG_(message)(Vg_UserMsg, "I1 miss rate: %*.2f%%\n", l1,
1512 total[fullOffset(EG_IR)+1] * 100.0 / total[fullOffset(EG_IR)]);
weidendoa17f2a32006-03-20 10:27:30 +00001513
florian29582f82014-12-14 15:39:57 +00001514 VG_(message)(Vg_UserMsg, "LLi miss rate: %*.2f%%\n", l1,
1515 total[fullOffset(EG_IR)+2] * 100.0 / total[fullOffset(EG_IR)]);
1516
sewardj0f33adf2009-07-15 14:51:03 +00001517 VG_(message)(Vg_UserMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001518
1519 /* D cache results.
1520 Use the D_refs.rd and D_refs.wr values to determine the
1521 * width of columns 2 & 3. */
1522
1523 D_total = CLG_(get_eventset_cost)( CLG_(sets).full );
1524 CLG_(init_cost)( CLG_(sets).full, D_total);
weidendo5bba5252010-06-09 22:32:53 +00001525 // we only use the first 3 values of D_total, adding up Dr and Dw costs
1526 CLG_(copy_cost)( CLG_(get_event_set)(EG_DR), D_total, total + fullOffset(EG_DR) );
1527 CLG_(add_cost) ( CLG_(get_event_set)(EG_DW), D_total, total + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001528
florian29582f82014-12-14 15:39:57 +00001529 VG_(message)(Vg_UserMsg, "D refs: %'*llu (%'*llu rd + %'*llu wr)\n",
1530 l1, D_total[0],
1531 l2, total[fullOffset(EG_DR)],
1532 l3, total[fullOffset(EG_DW)]);
weidendoa17f2a32006-03-20 10:27:30 +00001533
florian29582f82014-12-14 15:39:57 +00001534 VG_(message)(Vg_UserMsg, "D1 misses: %'*llu (%'*llu rd + %'*llu wr)\n",
1535 l1, D_total[1],
1536 l2, total[fullOffset(EG_DR)+1],
1537 l3, total[fullOffset(EG_DW)+1]);
weidendoa17f2a32006-03-20 10:27:30 +00001538
florian29582f82014-12-14 15:39:57 +00001539 VG_(message)(Vg_UserMsg, "LLd misses: %'*llu (%'*llu rd + %'*llu wr)\n",
1540 l1, D_total[2],
1541 l2, total[fullOffset(EG_DR)+2],
1542 l3, total[fullOffset(EG_DW)+2]);
weidendoa17f2a32006-03-20 10:27:30 +00001543
weidendoa17f2a32006-03-20 10:27:30 +00001544 if (0 == D_total[0]) D_total[0] = 1;
weidendo5bba5252010-06-09 22:32:53 +00001545 if (0 == total[fullOffset(EG_DR)]) total[fullOffset(EG_DR)] = 1;
1546 if (0 == total[fullOffset(EG_DW)]) total[fullOffset(EG_DW)] = 1;
weidendoa17f2a32006-03-20 10:27:30 +00001547
florian29582f82014-12-14 15:39:57 +00001548 VG_(message)(Vg_UserMsg, "D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1549 l1, D_total[1] * 100.0 / D_total[0],
1550 l2, total[fullOffset(EG_DR)+1] * 100.0 / total[fullOffset(EG_DR)],
1551 l3, total[fullOffset(EG_DW)+1] * 100.0 / total[fullOffset(EG_DW)]);
weidendoa17f2a32006-03-20 10:27:30 +00001552
florian29582f82014-12-14 15:39:57 +00001553 VG_(message)(Vg_UserMsg, "LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1554 l1, D_total[2] * 100.0 / D_total[0],
1555 l2, total[fullOffset(EG_DR)+2] * 100.0 / total[fullOffset(EG_DR)],
1556 l3, total[fullOffset(EG_DW)+2] * 100.0 / total[fullOffset(EG_DW)]);
sewardj0f33adf2009-07-15 14:51:03 +00001557 VG_(message)(Vg_UserMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001558
1559
1560
njn2d853a12010-10-06 22:46:31 +00001561 /* LL overall results */
weidendoa17f2a32006-03-20 10:27:30 +00001562
njn2d853a12010-10-06 22:46:31 +00001563 LL_total =
weidendo5bba5252010-06-09 22:32:53 +00001564 total[fullOffset(EG_DR) +1] +
1565 total[fullOffset(EG_DW) +1] +
1566 total[fullOffset(EG_IR) +1];
njn2d853a12010-10-06 22:46:31 +00001567 LL_total_r =
weidendo5bba5252010-06-09 22:32:53 +00001568 total[fullOffset(EG_DR) +1] +
1569 total[fullOffset(EG_IR) +1];
njn2d853a12010-10-06 22:46:31 +00001570 LL_total_w = total[fullOffset(EG_DW) +1];
florian29582f82014-12-14 15:39:57 +00001571 VG_(message)(Vg_UserMsg, "LL refs: %'*llu (%'*llu rd + %'*llu wr)\n",
1572 l1, LL_total, l2, LL_total_r, l3, LL_total_w);
weidendoa17f2a32006-03-20 10:27:30 +00001573
njn2d853a12010-10-06 22:46:31 +00001574 LL_total_m =
weidendo5bba5252010-06-09 22:32:53 +00001575 total[fullOffset(EG_DR) +2] +
1576 total[fullOffset(EG_DW) +2] +
1577 total[fullOffset(EG_IR) +2];
njn2d853a12010-10-06 22:46:31 +00001578 LL_total_mr =
weidendo5bba5252010-06-09 22:32:53 +00001579 total[fullOffset(EG_DR) +2] +
1580 total[fullOffset(EG_IR) +2];
njn2d853a12010-10-06 22:46:31 +00001581 LL_total_mw = total[fullOffset(EG_DW) +2];
florian29582f82014-12-14 15:39:57 +00001582 VG_(message)(Vg_UserMsg, "LL misses: %'*llu (%'*llu rd + %'*llu wr)\n",
1583 l1, LL_total_m, l2, LL_total_mr, l3, LL_total_mw);
weidendoa17f2a32006-03-20 10:27:30 +00001584
florian29582f82014-12-14 15:39:57 +00001585 VG_(message)(Vg_UserMsg, "LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1586 l1, LL_total_m * 100.0 / (total[fullOffset(EG_IR)] + D_total[0]),
1587 l2, LL_total_mr * 100.0 / (total[fullOffset(EG_IR)] + total[fullOffset(EG_DR)]),
1588 l3, LL_total_mw * 100.0 / total[fullOffset(EG_DW)]);
weidendoa17f2a32006-03-20 10:27:30 +00001589}
1590
1591
1592/*------------------------------------------------------------*/
1593/*--- Setup for Event set. ---*/
1594/*------------------------------------------------------------*/
1595
1596struct event_sets CLG_(sets);
1597
weidendo5bba5252010-06-09 22:32:53 +00001598void CLG_(init_eventsets)()
weidendoa17f2a32006-03-20 10:27:30 +00001599{
weidendo5bba5252010-06-09 22:32:53 +00001600 // Event groups from which the event sets are composed
1601 // the "Use" group only is used with "cacheuse" simulation
1602 if (clo_collect_cacheuse)
1603 CLG_(register_event_group4)(EG_USE,
1604 "AcCost1", "SpLoss1", "AcCost2", "SpLoss2");
weidendoa17f2a32006-03-20 10:27:30 +00001605
weidendo5bba5252010-06-09 22:32:53 +00001606 if (!CLG_(clo).simulate_cache)
1607 CLG_(register_event_group)(EG_IR, "Ir");
1608 else if (!clo_simulate_writeback) {
njn2d853a12010-10-06 22:46:31 +00001609 CLG_(register_event_group3)(EG_IR, "Ir", "I1mr", "ILmr");
1610 CLG_(register_event_group3)(EG_DR, "Dr", "D1mr", "DLmr");
1611 CLG_(register_event_group3)(EG_DW, "Dw", "D1mw", "DLmw");
weidendoa17f2a32006-03-20 10:27:30 +00001612 }
weidendo5bba5252010-06-09 22:32:53 +00001613 else { // clo_simulate_writeback
njn2d853a12010-10-06 22:46:31 +00001614 CLG_(register_event_group4)(EG_IR, "Ir", "I1mr", "ILmr", "ILdmr");
1615 CLG_(register_event_group4)(EG_DR, "Dr", "D1mr", "DLmr", "DLdmr");
1616 CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "DLmw", "DLdmw");
weidendoa17f2a32006-03-20 10:27:30 +00001617 }
weidendo5bba5252010-06-09 22:32:53 +00001618
weidendo320705f2010-07-02 19:56:23 +00001619 if (CLG_(clo).simulate_branch) {
1620 CLG_(register_event_group2)(EG_BC, "Bc", "Bcm");
1621 CLG_(register_event_group2)(EG_BI, "Bi", "Bim");
1622 }
1623
weidendoaeb86222010-06-09 22:33:02 +00001624 if (CLG_(clo).collect_bus)
1625 CLG_(register_event_group)(EG_BUS, "Ge");
1626
weidendo5bba5252010-06-09 22:32:53 +00001627 if (CLG_(clo).collect_alloc)
1628 CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
1629
1630 if (CLG_(clo).collect_systime)
1631 CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime");
1632
1633 // event set used as base for instruction self cost
1634 CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR);
1635
1636 // event set comprising all event groups, used for inclusive cost
1637 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
weidendo320705f2010-07-02 19:56:23 +00001638 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_BC, EG_BI);
weidendoaeb86222010-06-09 22:33:02 +00001639 CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
weidendo5bba5252010-06-09 22:32:53 +00001640 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
1641
1642 CLG_DEBUGIF(1) {
1643 CLG_DEBUG(1, "EventSets:\n");
1644 CLG_(print_eventset)(-2, CLG_(sets).base);
1645 CLG_(print_eventset)(-2, CLG_(sets).full);
weidendoa17f2a32006-03-20 10:27:30 +00001646 }
weidendoa17f2a32006-03-20 10:27:30 +00001647
weidendo5bba5252010-06-09 22:32:53 +00001648 /* Not-existing events are silently ignored */
1649 CLG_(dumpmap) = CLG_(get_eventmapping)(CLG_(sets).full);
1650 CLG_(append_event)(CLG_(dumpmap), "Ir");
1651 CLG_(append_event)(CLG_(dumpmap), "Dr");
1652 CLG_(append_event)(CLG_(dumpmap), "Dw");
1653 CLG_(append_event)(CLG_(dumpmap), "I1mr");
1654 CLG_(append_event)(CLG_(dumpmap), "D1mr");
1655 CLG_(append_event)(CLG_(dumpmap), "D1mw");
njn2d853a12010-10-06 22:46:31 +00001656 CLG_(append_event)(CLG_(dumpmap), "ILmr");
1657 CLG_(append_event)(CLG_(dumpmap), "DLmr");
1658 CLG_(append_event)(CLG_(dumpmap), "DLmw");
1659 CLG_(append_event)(CLG_(dumpmap), "ILdmr");
1660 CLG_(append_event)(CLG_(dumpmap), "DLdmr");
1661 CLG_(append_event)(CLG_(dumpmap), "DLdmw");
weidendo320705f2010-07-02 19:56:23 +00001662 CLG_(append_event)(CLG_(dumpmap), "Bc");
1663 CLG_(append_event)(CLG_(dumpmap), "Bcm");
1664 CLG_(append_event)(CLG_(dumpmap), "Bi");
1665 CLG_(append_event)(CLG_(dumpmap), "Bim");
weidendo5bba5252010-06-09 22:32:53 +00001666 CLG_(append_event)(CLG_(dumpmap), "AcCost1");
1667 CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
1668 CLG_(append_event)(CLG_(dumpmap), "AcCost2");
1669 CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
weidendoaeb86222010-06-09 22:33:02 +00001670 CLG_(append_event)(CLG_(dumpmap), "Ge");
weidendo5bba5252010-06-09 22:32:53 +00001671 CLG_(append_event)(CLG_(dumpmap), "allocCount");
1672 CLG_(append_event)(CLG_(dumpmap), "allocSize");
1673 CLG_(append_event)(CLG_(dumpmap), "sysCount");
1674 CLG_(append_event)(CLG_(dumpmap), "sysTime");
weidendoa17f2a32006-03-20 10:27:30 +00001675}
1676
1677
weidendoa17f2a32006-03-20 10:27:30 +00001678/* this is called at dump time for every instruction executed */
1679static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
1680 InstrInfo* ii, ULong exe_count)
1681{
weidendo5bba5252010-06-09 22:32:53 +00001682 if (!CLG_(clo).simulate_cache)
1683 cost[ fullOffset(EG_IR) ] += exe_count;
weidendoaeb86222010-06-09 22:33:02 +00001684
1685 if (ii->eventset)
weidendo5bba5252010-06-09 22:32:53 +00001686 CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
1687 ii->eventset, bbcc->cost + ii->cost_offset);
weidendoa17f2a32006-03-20 10:27:30 +00001688}
1689
1690static
weidendoa17f2a32006-03-20 10:27:30 +00001691void cachesim_finish(void)
1692{
1693 if (clo_collect_cacheuse)
1694 cacheuse_finish();
1695}
1696
1697/*------------------------------------------------------------*/
1698/*--- The simulator defined in this file ---*/
1699/*------------------------------------------------------------*/
1700
1701struct cachesim_if CLG_(cachesim) = {
1702 .print_opts = cachesim_print_opts,
1703 .parse_opt = cachesim_parse_opt,
1704 .post_clo_init = cachesim_post_clo_init,
1705 .clear = cachesim_clear,
floriancc9480f2014-11-11 20:46:34 +00001706 .dump_desc = cachesim_dump_desc,
weidendoa17f2a32006-03-20 10:27:30 +00001707 .printstat = cachesim_printstat,
1708 .add_icost = cachesim_add_icost,
weidendoa17f2a32006-03-20 10:27:30 +00001709 .finish = cachesim_finish,
1710
1711 /* these will be set by cachesim_post_clo_init */
1712 .log_1I0D = 0,
weidendo0a1951d2009-06-15 00:16:36 +00001713 .log_2I0D = 0,
1714 .log_3I0D = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001715
1716 .log_1I1Dr = 0,
1717 .log_1I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001718
1719 .log_0I1Dr = 0,
1720 .log_0I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001721
1722 .log_1I0D_name = "(no function)",
weidendo0a1951d2009-06-15 00:16:36 +00001723 .log_2I0D_name = "(no function)",
1724 .log_3I0D_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001725
1726 .log_1I1Dr_name = "(no function)",
1727 .log_1I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001728
1729 .log_0I1Dr_name = "(no function)",
1730 .log_0I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001731};
1732
1733
1734/*--------------------------------------------------------------------*/
1735/*--- end ct_sim.c ---*/
1736/*--------------------------------------------------------------------*/