blob: a69f7603a91bb973addda412d9820da31311050d [file] [log] [blame]
weidendoa17f2a32006-03-20 10:27:30 +00001/*--------------------------------------------------------------------*/
2/*--- Cache simulation. ---*/
3/*--- sim.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
njn9a0cba42007-04-15 22:15:57 +00007 This file is part of Callgrind, a Valgrind tool for call graph
8 profiling programs.
weidendoa17f2a32006-03-20 10:27:30 +00009
weidendo5bba5252010-06-09 22:32:53 +000010 Copyright (C) 2003-2010, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
weidendoa17f2a32006-03-20 10:27:30 +000011
njn9a0cba42007-04-15 22:15:57 +000012 This tool is derived from and contains code from Cachegrind
sewardj9eecbbb2010-05-03 21:37:12 +000013 Copyright (C) 2002-2010 Nicholas Nethercote (njn@valgrind.org)
weidendoa17f2a32006-03-20 10:27:30 +000014
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "global.h"
34
35
36/* Notes:
37 - simulates a write-allocate cache
38 - (block --> set) hash function uses simple bit selection
39 - handling of references straddling two cache blocks:
40 - counts as only one cache access (not two)
41 - both blocks hit --> one hit
42 - one block hits, the other misses --> one miss
43 - both blocks miss --> one miss (not two)
44*/
45
46/* Cache configuration */
47#include "cg_arch.h"
48
49/* additional structures for cache use info, separated
50 * according usage frequency:
51 * - line_loaded : pointer to cost center of instruction
52 * which loaded the line into cache.
53 * Needed to increment counters when line is evicted.
54 * - line_use : updated on every access
55 */
56typedef struct {
57 UInt count;
58 UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */
59} line_use;
60
61typedef struct {
62 Addr memline, iaddr;
63 line_use* dep_use; /* point to higher-level cacheblock for this memline */
64 ULong* use_base;
65} line_loaded;
66
67/* Cache state */
68typedef struct {
69 char* name;
70 int size; /* bytes */
71 int assoc;
72 int line_size; /* bytes */
73 Bool sectored; /* prefetch nearside cacheline on read */
74 int sets;
75 int sets_min_1;
weidendoa17f2a32006-03-20 10:27:30 +000076 int line_size_bits;
77 int tag_shift;
78 UWord tag_mask;
79 char desc_line[128];
80 UWord* tags;
81
82 /* for cache use */
83 int line_size_mask;
84 int* line_start_mask;
85 int* line_end_mask;
86 line_loaded* loaded;
87 line_use* use;
88} cache_t2;
89
90/*
91 * States of flat caches in our model.
92 * We use a 2-level hierarchy,
93 */
94static cache_t2 I1, D1, L2;
95
96/* Lower bits of cache tags are used as flags for a cache line */
97#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1)
98#define CACHELINE_DIRTY 1
99
100
101/* Cache simulator Options */
102static Bool clo_simulate_writeback = False;
103static Bool clo_simulate_hwpref = False;
104static Bool clo_simulate_sectors = False;
105static Bool clo_collect_cacheuse = False;
106
107/* Following global vars are setup before by
108 * setup_bbcc()/cachesim_after_bbsetup():
109 *
110 * - Addr bb_base (instruction start address of original BB)
111 * - ULong* cost_base (start of cost array for BB)
112 * - BBCC* nonskipped (only != 0 when in a function not skipped)
113 */
114
weidendoa17f2a32006-03-20 10:27:30 +0000115static Addr bb_base;
116static ULong* cost_base;
117static InstrInfo* current_ii;
118
119/* Cache use offsets */
weidendo0a1951d2009-06-15 00:16:36 +0000120/* The offsets are only correct because all per-instruction event sets get
weidendoa17f2a32006-03-20 10:27:30 +0000121 * the "Use" set added first !
122 */
123static Int off_I1_AcCost = 0;
124static Int off_I1_SpLoss = 1;
125static Int off_D1_AcCost = 0;
126static Int off_D1_SpLoss = 1;
127static Int off_L2_AcCost = 2;
128static Int off_L2_SpLoss = 3;
129
130/* Cache access types */
131typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType;
132
133/* Result of a reference into a flat cache */
134typedef enum { Hit = 0, Miss, MissDirty } CacheResult;
135
136/* Result of a reference into a hierarchical cache model */
137typedef enum {
138 L1_Hit,
139 L2_Hit,
140 MemAccess,
141 WriteBackMemAccess } CacheModelResult;
142
143typedef CacheModelResult (*simcall_type)(Addr, UChar);
144
145static struct {
146 simcall_type I1_Read;
147 simcall_type D1_Read;
148 simcall_type D1_Write;
149} simulator;
150
151/*------------------------------------------------------------*/
152/*--- Cache Simulator Initialization ---*/
153/*------------------------------------------------------------*/
154
155static void cachesim_clearcache(cache_t2* c)
156{
157 Int i;
158
159 for (i = 0; i < c->sets * c->assoc; i++)
160 c->tags[i] = 0;
161 if (c->use) {
162 for (i = 0; i < c->sets * c->assoc; i++) {
163 c->loaded[i].memline = 0;
164 c->loaded[i].use_base = 0;
165 c->loaded[i].dep_use = 0;
166 c->loaded[i].iaddr = 0;
167 c->use[i].mask = 0;
168 c->use[i].count = 0;
169 c->tags[i] = i % c->assoc; /* init lower bits as pointer */
170 }
171 }
172}
173
174static void cacheuse_initcache(cache_t2* c);
175
176/* By this point, the size/assoc/line_size has been checked. */
177static void cachesim_initcache(cache_t config, cache_t2* c)
178{
179 c->size = config.size;
180 c->assoc = config.assoc;
181 c->line_size = config.line_size;
182 c->sectored = False; // FIXME
183
184 c->sets = (c->size / c->line_size) / c->assoc;
185 c->sets_min_1 = c->sets - 1;
weidendoa17f2a32006-03-20 10:27:30 +0000186 c->line_size_bits = VG_(log2)(c->line_size);
187 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
188 c->tag_mask = ~((1<<c->tag_shift)-1);
189
190 /* Can bits in tag entries be used for flags?
191 * Should be always true as MIN_LINE_SIZE >= 16 */
192 CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0);
193
194 if (c->assoc == 1) {
195 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s",
196 c->size, c->line_size,
197 c->sectored ? ", sectored":"");
198 } else {
199 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s",
200 c->size, c->line_size, c->assoc,
201 c->sectored ? ", sectored":"");
202 }
203
sewardj9c606bd2008-09-18 18:12:50 +0000204 c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1",
205 sizeof(UWord) * c->sets * c->assoc);
weidendoa17f2a32006-03-20 10:27:30 +0000206 if (clo_collect_cacheuse)
207 cacheuse_initcache(c);
208 else
209 c->use = 0;
210 cachesim_clearcache(c);
211}
212
213
214#if 0
215static void print_cache(cache_t2* c)
216{
217 UInt set, way, i;
218
219 /* Note initialisation and update of 'i'. */
220 for (i = 0, set = 0; set < c->sets; set++) {
221 for (way = 0; way < c->assoc; way++, i++) {
222 VG_(printf)("%8x ", c->tags[i]);
223 }
224 VG_(printf)("\n");
225 }
226}
227#endif
228
229
230/*------------------------------------------------------------*/
231/*--- Write Through Cache Simulation ---*/
232/*------------------------------------------------------------*/
233
234/*
235 * Simple model: L1 & L2 Write Through
236 * Does not distinguish among read and write references
237 *
238 * Simulator functions:
239 * CacheModelResult cachesim_I1_ref(Addr a, UChar size)
240 * CacheModelResult cachesim_D1_ref(Addr a, UChar size)
241 */
242
243static __inline__
244CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag)
245{
246 int i, j;
247 UWord *set;
248
weidendo144b76c2009-01-26 22:56:14 +0000249 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000250
251 /* This loop is unrolled for just the first case, which is the most */
252 /* common. We can't unroll any further because it would screw up */
253 /* if we have a direct-mapped (1-way) cache. */
254 if (tag == set[0])
255 return Hit;
256
257 /* If the tag is one other than the MRU, move it into the MRU spot */
258 /* and shuffle the rest down. */
259 for (i = 1; i < c->assoc; i++) {
260 if (tag == set[i]) {
261 for (j = i; j > 0; j--) {
262 set[j] = set[j - 1];
263 }
264 set[0] = tag;
265 return Hit;
266 }
267 }
268
269 /* A miss; install this tag as MRU, shuffle rest down. */
270 for (j = c->assoc - 1; j > 0; j--) {
271 set[j] = set[j - 1];
272 }
273 set[0] = tag;
274
275 return Miss;
276}
277
278static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size)
279{
280 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
281 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
282 UWord tag = a >> c->tag_shift;
283
284 /* Access entirely within line. */
285 if (set1 == set2)
286 return cachesim_setref(c, set1, tag);
287
288 /* Access straddles two lines. */
289 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
290 else if (((set1 + 1) & (c->sets-1)) == set2) {
weidendo28e2a142006-11-22 21:00:53 +0000291 UWord tag2 = (a+size-1) >> c->tag_shift;
weidendoa17f2a32006-03-20 10:27:30 +0000292
293 /* the call updates cache structures as side effect */
294 CacheResult res1 = cachesim_setref(c, set1, tag);
weidendo28e2a142006-11-22 21:00:53 +0000295 CacheResult res2 = cachesim_setref(c, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000296 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
297
298 } else {
njn8a7b41b2007-09-23 00:51:24 +0000299 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);
weidendoa17f2a32006-03-20 10:27:30 +0000300 VG_(tool_panic)("item straddles more than two cache sets");
301 }
302 return Hit;
303}
304
305static
306CacheModelResult cachesim_I1_ref(Addr a, UChar size)
307{
308 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
309 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
310 return MemAccess;
311}
312
313static
314CacheModelResult cachesim_D1_ref(Addr a, UChar size)
315{
316 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
317 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
318 return MemAccess;
319}
320
321
322/*------------------------------------------------------------*/
323/*--- Write Back Cache Simulation ---*/
324/*------------------------------------------------------------*/
325
326/*
327 * More complex model: L1 Write-through, L2 Write-back
328 * This needs to distinguish among read and write references.
329 *
330 * Simulator functions:
331 * CacheModelResult cachesim_I1_Read(Addr a, UChar size)
332 * CacheModelResult cachesim_D1_Read(Addr a, UChar size)
333 * CacheModelResult cachesim_D1_Write(Addr a, UChar size)
334 */
335
336/*
337 * With write-back, result can be a miss evicting a dirty line
338 * The dirty state of a cache line is stored in Bit0 of the tag for
339 * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference
340 * type (Read/Write), the line gets dirty on a write.
341 */
342static __inline__
343CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag)
344{
345 int i, j;
346 UWord *set, tmp_tag;
347
weidendo144b76c2009-01-26 22:56:14 +0000348 set = &(c->tags[set_no * c->assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000349
350 /* This loop is unrolled for just the first case, which is the most */
351 /* common. We can't unroll any further because it would screw up */
352 /* if we have a direct-mapped (1-way) cache. */
353 if (tag == (set[0] & ~CACHELINE_DIRTY)) {
354 set[0] |= ref;
355 return Hit;
356 }
357 /* If the tag is one other than the MRU, move it into the MRU spot */
358 /* and shuffle the rest down. */
359 for (i = 1; i < c->assoc; i++) {
360 if (tag == (set[i] & ~CACHELINE_DIRTY)) {
361 tmp_tag = set[i] | ref; // update dirty flag
362 for (j = i; j > 0; j--) {
363 set[j] = set[j - 1];
364 }
365 set[0] = tmp_tag;
366 return Hit;
367 }
368 }
369
370 /* A miss; install this tag as MRU, shuffle rest down. */
371 tmp_tag = set[c->assoc - 1];
372 for (j = c->assoc - 1; j > 0; j--) {
373 set[j] = set[j - 1];
374 }
375 set[0] = tag | ref;
376
377 return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss;
378}
379
380
381static __inline__
382CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size)
383{
384 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
385 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
386 UWord tag = a & c->tag_mask;
387
388 /* Access entirely within line. */
389 if (set1 == set2)
390 return cachesim_setref_wb(c, ref, set1, tag);
391
392 /* Access straddles two lines. */
393 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
394 else if (((set1 + 1) & (c->sets-1)) == set2) {
weidendo144b76c2009-01-26 22:56:14 +0000395 UWord tag2 = (a+size-1) & c->tag_mask;
weidendoa17f2a32006-03-20 10:27:30 +0000396
397 /* the call updates cache structures as side effect */
398 CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag);
weidendo28e2a142006-11-22 21:00:53 +0000399 CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag2);
weidendoa17f2a32006-03-20 10:27:30 +0000400
401 if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty;
402 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
403
404 } else {
njn8a7b41b2007-09-23 00:51:24 +0000405 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);
weidendoa17f2a32006-03-20 10:27:30 +0000406 VG_(tool_panic)("item straddles more than two cache sets");
407 }
408 return Hit;
409}
410
411
412static
413CacheModelResult cachesim_I1_Read(Addr a, UChar size)
414{
415 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
416 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
417 case Hit: return L2_Hit;
418 case Miss: return MemAccess;
419 default: break;
420 }
421 return WriteBackMemAccess;
422}
423
424static
425CacheModelResult cachesim_D1_Read(Addr a, UChar size)
426{
427 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
428 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
429 case Hit: return L2_Hit;
430 case Miss: return MemAccess;
431 default: break;
432 }
433 return WriteBackMemAccess;
434}
435
436static
437CacheModelResult cachesim_D1_Write(Addr a, UChar size)
438{
439 if ( cachesim_ref( &D1, a, size) == Hit ) {
440 /* Even for a L1 hit, the write-trough L1 passes
441 * the write to the L2 to make the L2 line dirty.
442 * But this causes no latency, so return the hit.
443 */
444 cachesim_ref_wb( &L2, Write, a, size);
445 return L1_Hit;
446 }
447 switch( cachesim_ref_wb( &L2, Write, a, size) ) {
448 case Hit: return L2_Hit;
449 case Miss: return MemAccess;
450 default: break;
451 }
452 return WriteBackMemAccess;
453}
454
455
456/*------------------------------------------------------------*/
457/*--- Hardware Prefetch Simulation ---*/
458/*------------------------------------------------------------*/
459
460static ULong prefetch_up = 0;
461static ULong prefetch_down = 0;
462
463#define PF_STREAMS 8
464#define PF_PAGEBITS 12
465
466static UInt pf_lastblock[PF_STREAMS];
467static Int pf_seqblocks[PF_STREAMS];
468
469static
470void prefetch_clear(void)
471{
472 int i;
473 for(i=0;i<PF_STREAMS;i++)
474 pf_lastblock[i] = pf_seqblocks[i] = 0;
475}
476
477/*
478 * HW Prefetch emulation
479 * Start prefetching when detecting sequential access to 3 memory blocks.
480 * One stream can be detected per 4k page.
481 */
482static __inline__
weidendo09ee78e2009-02-24 12:26:53 +0000483void prefetch_L2_doref(Addr a)
weidendoa17f2a32006-03-20 10:27:30 +0000484{
485 UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS;
486 UInt block = ( a >> L2.line_size_bits);
487
488 if (block != pf_lastblock[stream]) {
489 if (pf_seqblocks[stream] == 0) {
490 if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++;
491 else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--;
492 }
493 else if (pf_seqblocks[stream] >0) {
494 if (pf_lastblock[stream] +1 == block) {
495 pf_seqblocks[stream]++;
496 if (pf_seqblocks[stream] >= 2) {
497 prefetch_up++;
498 cachesim_ref(&L2, a + 5 * L2.line_size,1);
499 }
500 }
501 else pf_seqblocks[stream] = 0;
502 }
503 else if (pf_seqblocks[stream] <0) {
504 if (pf_lastblock[stream] -1 == block) {
505 pf_seqblocks[stream]--;
506 if (pf_seqblocks[stream] <= -2) {
507 prefetch_down++;
508 cachesim_ref(&L2, a - 5 * L2.line_size,1);
509 }
510 }
511 else pf_seqblocks[stream] = 0;
512 }
513 pf_lastblock[stream] = block;
514 }
515}
516
517/* simple model with hardware prefetch */
518
519static
520CacheModelResult prefetch_I1_ref(Addr a, UChar size)
521{
522 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000523 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000524 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
525 return MemAccess;
526}
527
528static
529CacheModelResult prefetch_D1_ref(Addr a, UChar size)
530{
531 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000532 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000533 if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;
534 return MemAccess;
535}
536
537
538/* complex model with hardware prefetch */
539
540static
541CacheModelResult prefetch_I1_Read(Addr a, UChar size)
542{
543 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000544 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000545 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
546 case Hit: return L2_Hit;
547 case Miss: return MemAccess;
548 default: break;
549 }
550 return WriteBackMemAccess;
551}
552
553static
554CacheModelResult prefetch_D1_Read(Addr a, UChar size)
555{
556 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;
weidendo09ee78e2009-02-24 12:26:53 +0000557 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000558 switch( cachesim_ref_wb( &L2, Read, a, size) ) {
559 case Hit: return L2_Hit;
560 case Miss: return MemAccess;
561 default: break;
562 }
563 return WriteBackMemAccess;
564}
565
566static
567CacheModelResult prefetch_D1_Write(Addr a, UChar size)
568{
weidendo09ee78e2009-02-24 12:26:53 +0000569 prefetch_L2_doref(a);
weidendoa17f2a32006-03-20 10:27:30 +0000570 if ( cachesim_ref( &D1, a, size) == Hit ) {
571 /* Even for a L1 hit, the write-trough L1 passes
572 * the write to the L2 to make the L2 line dirty.
573 * But this causes no latency, so return the hit.
574 */
575 cachesim_ref_wb( &L2, Write, a, size);
576 return L1_Hit;
577 }
578 switch( cachesim_ref_wb( &L2, Write, a, size) ) {
579 case Hit: return L2_Hit;
580 case Miss: return MemAccess;
581 default: break;
582 }
583 return WriteBackMemAccess;
584}
585
586
587/*------------------------------------------------------------*/
588/*--- Cache Simulation with use metric collection ---*/
589/*------------------------------------------------------------*/
590
591/* can not be combined with write-back or prefetch */
592
593static
594void cacheuse_initcache(cache_t2* c)
595{
596 int i;
597 unsigned int start_mask, start_val;
598 unsigned int end_mask, end_val;
599
sewardj9c606bd2008-09-18 18:12:50 +0000600 c->use = CLG_MALLOC("cl.sim.cu_ic.1",
601 sizeof(line_use) * c->sets * c->assoc);
602 c->loaded = CLG_MALLOC("cl.sim.cu_ic.2",
603 sizeof(line_loaded) * c->sets * c->assoc);
604 c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3",
605 sizeof(int) * c->line_size);
606 c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4",
607 sizeof(int) * c->line_size);
weidendoa17f2a32006-03-20 10:27:30 +0000608
weidendoa17f2a32006-03-20 10:27:30 +0000609 c->line_size_mask = c->line_size-1;
610
611 /* Meaning of line_start_mask/line_end_mask
612 * Example: for a given cache line, you get an access starting at
613 * byte offset 5, length 4, byte 5 - 8 was touched. For a cache
614 * line size of 32, you have 1 bit per byte in the mask:
615 *
616 * bit31 bit8 bit5 bit 0
617 * | | | |
618 * 11..111111100000 line_start_mask[5]
619 * 00..000111111111 line_end_mask[(5+4)-1]
620 *
621 * use_mask |= line_start_mask[5] && line_end_mask[8]
622 *
623 */
624 start_val = end_val = ~0;
625 if (c->line_size < 32) {
626 int bits_per_byte = 32/c->line_size;
627 start_mask = (1<<bits_per_byte)-1;
628 end_mask = start_mask << (32-bits_per_byte);
629 for(i=0;i<c->line_size;i++) {
630 c->line_start_mask[i] = start_val;
631 start_val = start_val & ~start_mask;
632 start_mask = start_mask << bits_per_byte;
633
634 c->line_end_mask[c->line_size-i-1] = end_val;
635 end_val = end_val & ~end_mask;
636 end_mask = end_mask >> bits_per_byte;
637 }
638 }
639 else {
640 int bytes_per_bit = c->line_size/32;
641 start_mask = 1;
642 end_mask = 1 << 31;
643 for(i=0;i<c->line_size;i++) {
644 c->line_start_mask[i] = start_val;
645 c->line_end_mask[c->line_size-i-1] = end_val;
646 if ( ((i+1)%bytes_per_bit) == 0) {
647 start_val &= ~start_mask;
648 end_val &= ~end_mask;
649 start_mask <<= 1;
650 end_mask >>= 1;
651 }
652 }
653 }
654
655 CLG_DEBUG(6, "Config %s:\n", c->desc_line);
656 for(i=0;i<c->line_size;i++) {
657 CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n",
658 i, c->line_start_mask[i], c->line_end_mask[i]);
659 }
660
661 /* We use lower tag bits as offset pointers to cache use info.
662 * I.e. some cache parameters don't work.
663 */
weidendo144b76c2009-01-26 22:56:14 +0000664 if ( (1<<c->tag_shift) < c->assoc) {
weidendoa17f2a32006-03-20 10:27:30 +0000665 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +0000666 "error: Use associativity < %d for cache use statistics!\n",
weidendoa17f2a32006-03-20 10:27:30 +0000667 (1<<c->tag_shift) );
668 VG_(tool_panic)("Unsupported cache configuration");
669 }
670}
671
weidendoa17f2a32006-03-20 10:27:30 +0000672
673/* for I1/D1 caches */
674#define CACHEUSE(L) \
675 \
676static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \
677{ \
weidendo28e2a142006-11-22 21:00:53 +0000678 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
679 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
680 UWord tag = a & L.tag_mask; \
681 UWord tag2; \
weidendoa17f2a32006-03-20 10:27:30 +0000682 int i, j, idx; \
683 UWord *set, tmp_tag; \
684 UInt use_mask; \
685 \
barta0b6b2c2008-07-07 06:49:24 +0000686 CLG_DEBUG(6,"%s.Acc(Addr %#lx, size %d): Sets [%d/%d]\n", \
weidendoa17f2a32006-03-20 10:27:30 +0000687 L.name, a, size, set1, set2); \
688 \
689 /* First case: word entirely within line. */ \
690 if (set1 == set2) { \
691 \
weidendo144b76c2009-01-26 22:56:14 +0000692 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000693 use_mask = L.line_start_mask[a & L.line_size_mask] & \
694 L.line_end_mask[(a+size-1) & L.line_size_mask]; \
695 \
696 /* This loop is unrolled for just the first case, which is the most */\
697 /* common. We can't unroll any further because it would screw up */\
698 /* if we have a direct-mapped (1-way) cache. */\
699 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000700 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000701 L.use[idx].count ++; \
702 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000703 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000704 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
705 use_mask, L.use[idx].mask, L.use[idx].count); \
706 return L1_Hit; \
707 } \
708 /* If the tag is one other than the MRU, move it into the MRU spot */\
709 /* and shuffle the rest down. */\
710 for (i = 1; i < L.assoc; i++) { \
711 if (tag == (set[i] & L.tag_mask)) { \
712 tmp_tag = set[i]; \
713 for (j = i; j > 0; j--) { \
714 set[j] = set[j - 1]; \
715 } \
716 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000717 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000718 L.use[idx].count ++; \
719 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000720 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000721 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
722 use_mask, L.use[idx].mask, L.use[idx].count); \
723 return L1_Hit; \
724 } \
725 } \
726 \
727 /* A miss; install this tag as MRU, shuffle rest down. */ \
728 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
729 for (j = L.assoc - 1; j > 0; j--) { \
730 set[j] = set[j - 1]; \
731 } \
732 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000733 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000734 return update_##L##_use(&L, idx, \
735 use_mask, a &~ L.line_size_mask); \
736 \
737 /* Second case: word straddles two lines. */ \
738 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
739 } else if (((set1 + 1) & (L.sets-1)) == set2) { \
740 Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:L2 miss */ \
weidendo144b76c2009-01-26 22:56:14 +0000741 set = &(L.tags[set1 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000742 use_mask = L.line_start_mask[a & L.line_size_mask]; \
743 if (tag == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000744 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000745 L.use[idx].count ++; \
746 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000747 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000748 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
749 use_mask, L.use[idx].mask, L.use[idx].count); \
750 goto block2; \
751 } \
752 for (i = 1; i < L.assoc; i++) { \
753 if (tag == (set[i] & L.tag_mask)) { \
754 tmp_tag = set[i]; \
755 for (j = i; j > 0; j--) { \
756 set[j] = set[j - 1]; \
757 } \
758 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000759 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000760 L.use[idx].count ++; \
761 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000762 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000763 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
764 use_mask, L.use[idx].mask, L.use[idx].count); \
765 goto block2; \
766 } \
767 } \
768 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
769 for (j = L.assoc - 1; j > 0; j--) { \
770 set[j] = set[j - 1]; \
771 } \
772 set[0] = tag | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000773 idx = (set1 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000774 miss1 = update_##L##_use(&L, idx, \
775 use_mask, a &~ L.line_size_mask); \
776block2: \
weidendo144b76c2009-01-26 22:56:14 +0000777 set = &(L.tags[set2 * L.assoc]); \
weidendoa17f2a32006-03-20 10:27:30 +0000778 use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \
weidendo28e2a142006-11-22 21:00:53 +0000779 tag2 = (a+size-1) & L.tag_mask; \
780 if (tag2 == (set[0] & L.tag_mask)) { \
weidendo144b76c2009-01-26 22:56:14 +0000781 idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000782 L.use[idx].count ++; \
783 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000784 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000785 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
786 use_mask, L.use[idx].mask, L.use[idx].count); \
787 return miss1; \
788 } \
789 for (i = 1; i < L.assoc; i++) { \
weidendo28e2a142006-11-22 21:00:53 +0000790 if (tag2 == (set[i] & L.tag_mask)) { \
weidendoa17f2a32006-03-20 10:27:30 +0000791 tmp_tag = set[i]; \
792 for (j = i; j > 0; j--) { \
793 set[j] = set[j - 1]; \
794 } \
795 set[0] = tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000796 idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \
weidendoa17f2a32006-03-20 10:27:30 +0000797 L.use[idx].count ++; \
798 L.use[idx].mask |= use_mask; \
barta0b6b2c2008-07-07 06:49:24 +0000799 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000800 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \
801 use_mask, L.use[idx].mask, L.use[idx].count); \
802 return miss1; \
803 } \
804 } \
805 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \
806 for (j = L.assoc - 1; j > 0; j--) { \
807 set[j] = set[j - 1]; \
808 } \
weidendo28e2a142006-11-22 21:00:53 +0000809 set[0] = tag2 | tmp_tag; \
weidendo144b76c2009-01-26 22:56:14 +0000810 idx = (set2 * L.assoc) + tmp_tag; \
weidendoa17f2a32006-03-20 10:27:30 +0000811 miss2 = update_##L##_use(&L, idx, \
812 use_mask, (a+size-1) &~ L.line_size_mask); \
813 return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:L2_Hit; \
814 \
815 } else { \
barta0b6b2c2008-07-07 06:49:24 +0000816 VG_(printf)("addr: %#lx size: %u sets: %d %d", a, size, set1, set2); \
weidendoa17f2a32006-03-20 10:27:30 +0000817 VG_(tool_panic)("item straddles more than two cache sets"); \
818 } \
819 return 0; \
820}
821
822
823/* logarithmic bitcounting algorithm, see
824 * http://graphics.stanford.edu/~seander/bithacks.html
825 */
826static __inline__ unsigned int countBits(unsigned int bits)
827{
828 unsigned int c; // store the total here
829 const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers
830 const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF};
831
832 c = bits;
833 c = ((c >> S[0]) & B[0]) + (c & B[0]);
834 c = ((c >> S[1]) & B[1]) + (c & B[1]);
835 c = ((c >> S[2]) & B[2]) + (c & B[2]);
836 c = ((c >> S[3]) & B[3]) + (c & B[3]);
837 c = ((c >> S[4]) & B[4]) + (c & B[4]);
838 return c;
839}
840
841static void update_L2_use(int idx, Addr memline)
842{
843 line_loaded* loaded = &(L2.loaded[idx]);
844 line_use* use = &(L2.use[idx]);
845 int i = ((32 - countBits(use->mask)) * L2.line_size)>>5;
846
barta0b6b2c2008-07-07 06:49:24 +0000847 CLG_DEBUG(2, " L2.miss [%d]: at %#lx accessing memline %#lx\n",
weidendoa17f2a32006-03-20 10:27:30 +0000848 idx, bb_base + current_ii->instr_offset, memline);
849 if (use->count>0) {
barta0b6b2c2008-07-07 06:49:24 +0000850 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",
weidendoa17f2a32006-03-20 10:27:30 +0000851 use->count, i, use->mask, loaded->memline, loaded->iaddr);
852 CLG_DEBUG(2, " collect: %d, use_base %p\n",
853 CLG_(current_state).collect, loaded->use_base);
854
855 if (CLG_(current_state).collect && loaded->use_base) {
856 (loaded->use_base)[off_L2_AcCost] += 1000 / use->count;
857 (loaded->use_base)[off_L2_SpLoss] += i;
858 }
859 }
860
861 use->count = 0;
862 use->mask = 0;
863
864 loaded->memline = memline;
865 loaded->iaddr = bb_base + current_ii->instr_offset;
866 loaded->use_base = (CLG_(current_state).nonskipped) ?
867 CLG_(current_state).nonskipped->skipped :
868 cost_base + current_ii->cost_offset;
869}
870
871static
872CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
873{
874 UInt setNo = (memline >> L2.line_size_bits) & (L2.sets_min_1);
weidendo144b76c2009-01-26 22:56:14 +0000875 UWord* set = &(L2.tags[setNo * L2.assoc]);
weidendoa17f2a32006-03-20 10:27:30 +0000876 UWord tag = memline & L2.tag_mask;
877
878 int i, j, idx;
879 UWord tmp_tag;
880
barta0b6b2c2008-07-07 06:49:24 +0000881 CLG_DEBUG(6,"L2.Acc(Memline %#lx): Set %d\n", memline, setNo);
weidendoa17f2a32006-03-20 10:27:30 +0000882
883 if (tag == (set[0] & L2.tag_mask)) {
weidendo144b76c2009-01-26 22:56:14 +0000884 idx = (setNo * L2.assoc) + (set[0] & ~L2.tag_mask);
weidendoa17f2a32006-03-20 10:27:30 +0000885 l1_loaded->dep_use = &(L2.use[idx]);
886
barta0b6b2c2008-07-07 06:49:24 +0000887 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
weidendoa17f2a32006-03-20 10:27:30 +0000888 idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,
889 L2.use[idx].mask, L2.use[idx].count);
890 return L2_Hit;
891 }
892 for (i = 1; i < L2.assoc; i++) {
893 if (tag == (set[i] & L2.tag_mask)) {
894 tmp_tag = set[i];
895 for (j = i; j > 0; j--) {
896 set[j] = set[j - 1];
897 }
898 set[0] = tmp_tag;
weidendo144b76c2009-01-26 22:56:14 +0000899 idx = (setNo * L2.assoc) + (tmp_tag & ~L2.tag_mask);
weidendoa17f2a32006-03-20 10:27:30 +0000900 l1_loaded->dep_use = &(L2.use[idx]);
901
barta0b6b2c2008-07-07 06:49:24 +0000902 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
weidendoa17f2a32006-03-20 10:27:30 +0000903 i, idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,
904 L2.use[idx].mask, L2.use[idx].count);
905 return L2_Hit;
906 }
907 }
908
909 /* A miss; install this tag as MRU, shuffle rest down. */
910 tmp_tag = set[L2.assoc - 1] & ~L2.tag_mask;
911 for (j = L2.assoc - 1; j > 0; j--) {
912 set[j] = set[j - 1];
913 }
914 set[0] = tag | tmp_tag;
weidendo144b76c2009-01-26 22:56:14 +0000915 idx = (setNo * L2.assoc) + tmp_tag;
weidendoa17f2a32006-03-20 10:27:30 +0000916 l1_loaded->dep_use = &(L2.use[idx]);
917
918 update_L2_use(idx, memline);
919
920 return MemAccess;
921}
922
923
924
925
926#define UPDATE_USE(L) \
927 \
928static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \
929 UInt mask, Addr memline) \
930{ \
931 line_loaded* loaded = &(cache->loaded[idx]); \
932 line_use* use = &(cache->use[idx]); \
933 int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \
934 \
barta0b6b2c2008-07-07 06:49:24 +0000935 CLG_DEBUG(2, " %s.miss [%d]: at %#lx accessing memline %#lx (mask %08x)\n", \
weidendoa17f2a32006-03-20 10:27:30 +0000936 cache->name, idx, bb_base + current_ii->instr_offset, memline, mask); \
937 if (use->count>0) { \
barta0b6b2c2008-07-07 06:49:24 +0000938 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",\
weidendoa17f2a32006-03-20 10:27:30 +0000939 use->count, c, use->mask, loaded->memline, loaded->iaddr); \
940 CLG_DEBUG(2, " collect: %d, use_base %p\n", \
941 CLG_(current_state).collect, loaded->use_base); \
942 \
943 if (CLG_(current_state).collect && loaded->use_base) { \
944 (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \
945 (loaded->use_base)[off_##L##_SpLoss] += c; \
946 \
947 /* FIXME (?): L1/L2 line sizes must be equal ! */ \
948 loaded->dep_use->mask |= use->mask; \
949 loaded->dep_use->count += use->count; \
950 } \
951 } \
952 \
953 use->count = 1; \
954 use->mask = mask; \
955 loaded->memline = memline; \
956 loaded->iaddr = bb_base + current_ii->instr_offset; \
957 loaded->use_base = (CLG_(current_state).nonskipped) ? \
958 CLG_(current_state).nonskipped->skipped : \
959 cost_base + current_ii->cost_offset; \
960 \
961 if (memline == 0) return L2_Hit; \
962 return cacheuse_L2_access(memline, loaded); \
963}
964
965UPDATE_USE(I1);
966UPDATE_USE(D1);
967
968CACHEUSE(I1);
969CACHEUSE(D1);
970
971
972static
973void cacheuse_finish(void)
974{
975 int i;
weidendo0a1951d2009-06-15 00:16:36 +0000976 InstrInfo ii = { 0,0,0,0 };
weidendoa17f2a32006-03-20 10:27:30 +0000977
978 if (!CLG_(current_state).collect) return;
979
980 bb_base = 0;
981 current_ii = &ii;
weidendo0a1951d2009-06-15 00:16:36 +0000982 cost_base = 0;
weidendoa17f2a32006-03-20 10:27:30 +0000983
984 /* update usage counters */
985 if (I1.use)
986 for (i = 0; i < I1.sets * I1.assoc; i++)
987 if (I1.loaded[i].use_base)
988 update_I1_use( &I1, i, 0,0);
989
990 if (D1.use)
991 for (i = 0; i < D1.sets * D1.assoc; i++)
992 if (D1.loaded[i].use_base)
993 update_D1_use( &D1, i, 0,0);
994
995 if (L2.use)
996 for (i = 0; i < L2.sets * L2.assoc; i++)
997 if (L2.loaded[i].use_base)
998 update_L2_use(i, 0);
999}
1000
1001
1002
1003/*------------------------------------------------------------*/
1004/*--- Helper functions called by instrumented code ---*/
1005/*------------------------------------------------------------*/
1006
1007
1008static __inline__
1009void inc_costs(CacheModelResult r, ULong* c1, ULong* c2)
1010{
1011 switch(r) {
1012 case WriteBackMemAccess:
1013 if (clo_simulate_writeback) {
1014 c1[3]++;
1015 c2[3]++;
1016 }
1017 // fall through
1018
1019 case MemAccess:
1020 c1[2]++;
1021 c2[2]++;
1022 // fall through
1023
1024 case L2_Hit:
1025 c1[1]++;
1026 c2[1]++;
1027 // fall through
1028
1029 default:
1030 c1[0]++;
1031 c2[0]++;
1032 }
1033}
1034
weidendo0a1951d2009-06-15 00:16:36 +00001035static
1036Char* cacheRes(CacheModelResult r)
1037{
1038 switch(r) {
1039 case L1_Hit: return "L1 Hit ";
1040 case L2_Hit: return "L2 Hit ";
1041 case MemAccess: return "L2 Miss";
1042 case WriteBackMemAccess: return "L2 Miss (dirty)";
1043 default:
1044 tl_assert(0);
1045 }
1046 return "??";
1047}
weidendoa17f2a32006-03-20 10:27:30 +00001048
1049VG_REGPARM(1)
1050static void log_1I0D(InstrInfo* ii)
1051{
1052 CacheModelResult IrRes;
1053
1054 current_ii = ii;
1055 IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
1056
weidendo0a1951d2009-06-15 00:16:36 +00001057 CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n",
1058 bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001059
1060 if (CLG_(current_state).collect) {
1061 ULong* cost_Ir;
weidendo0a1951d2009-06-15 00:16:36 +00001062
weidendoa17f2a32006-03-20 10:27:30 +00001063 if (CLG_(current_state).nonskipped)
weidendo5bba5252010-06-09 22:32:53 +00001064 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
weidendoa17f2a32006-03-20 10:27:30 +00001065 else
weidendo5bba5252010-06-09 22:32:53 +00001066 cost_Ir = cost_base + ii->cost_offset + ii->eventset->offset[EG_IR];
weidendoa17f2a32006-03-20 10:27:30 +00001067
1068 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001069 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001070 }
1071}
1072
weidendo0a1951d2009-06-15 00:16:36 +00001073VG_REGPARM(2)
1074static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2)
1075{
1076 CacheModelResult Ir1Res, Ir2Res;
1077 ULong *global_cost_Ir;
1078
1079 current_ii = ii1;
1080 Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size);
1081 current_ii = ii2;
1082 Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size);
1083
1084 CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n",
1085 bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1086 bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) );
1087
1088 if (!CLG_(current_state).collect) return;
1089
weidendo5bba5252010-06-09 22:32:53 +00001090 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001091 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001092 ULong* skipped_cost_Ir =
1093 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1094
weidendo0a1951d2009-06-15 00:16:36 +00001095 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1096 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1097 return;
1098 }
1099
weidendo5bba5252010-06-09 22:32:53 +00001100 inc_costs(Ir1Res, global_cost_Ir,
1101 cost_base + ii1->cost_offset + ii1->eventset->offset[EG_IR]);
1102 inc_costs(Ir2Res, global_cost_Ir,
1103 cost_base + ii2->cost_offset + ii2->eventset->offset[EG_IR]);
weidendo0a1951d2009-06-15 00:16:36 +00001104}
1105
1106VG_REGPARM(3)
1107static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3)
1108{
1109 CacheModelResult Ir1Res, Ir2Res, Ir3Res;
1110 ULong *global_cost_Ir;
1111
1112 current_ii = ii1;
1113 Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size);
1114 current_ii = ii2;
1115 Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size);
1116 current_ii = ii3;
1117 Ir3Res = (*simulator.I1_Read)(bb_base + ii3->instr_offset, ii3->instr_size);
1118
1119 CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n",
1120 bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
1121 bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res),
1122 bb_base + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) );
1123
1124 if (!CLG_(current_state).collect) return;
1125
weidendo5bba5252010-06-09 22:32:53 +00001126 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001127 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001128 ULong* skipped_cost_Ir =
1129 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
weidendo0a1951d2009-06-15 00:16:36 +00001130 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
1131 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
1132 inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir);
1133 return;
1134 }
1135
weidendo5bba5252010-06-09 22:32:53 +00001136 inc_costs(Ir1Res, global_cost_Ir,
1137 cost_base + ii1->cost_offset + ii1->eventset->offset[EG_IR]);
1138 inc_costs(Ir2Res, global_cost_Ir,
1139 cost_base + ii2->cost_offset + ii2->eventset->offset[EG_IR]);
1140 inc_costs(Ir3Res, global_cost_Ir,
1141 cost_base + ii3->cost_offset + ii3->eventset->offset[EG_IR]);
weidendo0a1951d2009-06-15 00:16:36 +00001142}
weidendoa17f2a32006-03-20 10:27:30 +00001143
1144/* Instruction doing a read access */
1145
weidendo0a1951d2009-06-15 00:16:36 +00001146VG_REGPARM(3)
1147static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001148{
1149 CacheModelResult IrRes, DrRes;
1150
1151 current_ii = ii;
1152 IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001153 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001154
weidendo0a1951d2009-06-15 00:16:36 +00001155 CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n",
1156 bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
1157 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001158
1159 if (CLG_(current_state).collect) {
1160 ULong *cost_Ir, *cost_Dr;
1161
1162 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001163 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1164 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR);
weidendoa17f2a32006-03-20 10:27:30 +00001165 }
1166 else {
weidendo5bba5252010-06-09 22:32:53 +00001167 cost_Ir = cost_base + ii->cost_offset + ii->eventset->offset[EG_IR];
1168 cost_Dr = cost_base + ii->cost_offset + ii->eventset->offset[EG_DR];
weidendoa17f2a32006-03-20 10:27:30 +00001169 }
1170
1171 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001172 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001173 inc_costs(DrRes, cost_Dr,
weidendo5bba5252010-06-09 22:32:53 +00001174 CLG_(current_state).cost + fullOffset(EG_DR) );
weidendoa17f2a32006-03-20 10:27:30 +00001175 }
1176}
1177
1178
weidendo0a1951d2009-06-15 00:16:36 +00001179VG_REGPARM(3)
1180static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001181{
1182 CacheModelResult DrRes;
1183
1184 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001185 DrRes = (*simulator.D1_Read)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001186
weidendo0a1951d2009-06-15 00:16:36 +00001187 CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n",
1188 data_addr, data_size, cacheRes(DrRes));
weidendoa17f2a32006-03-20 10:27:30 +00001189
1190 if (CLG_(current_state).collect) {
1191 ULong *cost_Dr;
1192
weidendo5bba5252010-06-09 22:32:53 +00001193 if (CLG_(current_state).nonskipped)
1194 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR);
1195 else
1196 cost_Dr = cost_base + ii->cost_offset + ii->eventset->offset[EG_DR];
weidendo0a1951d2009-06-15 00:16:36 +00001197
weidendoa17f2a32006-03-20 10:27:30 +00001198 inc_costs(DrRes, cost_Dr,
weidendo5bba5252010-06-09 22:32:53 +00001199 CLG_(current_state).cost + fullOffset(EG_DR) );
weidendoa17f2a32006-03-20 10:27:30 +00001200 }
1201}
1202
1203
1204/* Instruction doing a write access */
1205
weidendo0a1951d2009-06-15 00:16:36 +00001206VG_REGPARM(3)
1207static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001208{
1209 CacheModelResult IrRes, DwRes;
1210
1211 current_ii = ii;
1212 IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
weidendo0a1951d2009-06-15 00:16:36 +00001213 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001214
weidendo0a1951d2009-06-15 00:16:36 +00001215 CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n",
1216 bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
1217 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001218
1219 if (CLG_(current_state).collect) {
1220 ULong *cost_Ir, *cost_Dw;
1221
1222 if (CLG_(current_state).nonskipped) {
weidendo5bba5252010-06-09 22:32:53 +00001223 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR);
1224 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW);
weidendoa17f2a32006-03-20 10:27:30 +00001225 }
1226 else {
weidendo5bba5252010-06-09 22:32:53 +00001227 cost_Ir = cost_base + ii->cost_offset + ii->eventset->offset[EG_IR];
1228 cost_Dw = cost_base + ii->cost_offset + ii->eventset->offset[EG_DW];
weidendoa17f2a32006-03-20 10:27:30 +00001229 }
1230
1231 inc_costs(IrRes, cost_Ir,
weidendo5bba5252010-06-09 22:32:53 +00001232 CLG_(current_state).cost + fullOffset(EG_IR) );
weidendoa17f2a32006-03-20 10:27:30 +00001233 inc_costs(DwRes, cost_Dw,
weidendo5bba5252010-06-09 22:32:53 +00001234 CLG_(current_state).cost + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001235 }
1236}
1237
weidendo0a1951d2009-06-15 00:16:36 +00001238VG_REGPARM(3)
1239static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
weidendoa17f2a32006-03-20 10:27:30 +00001240{
1241 CacheModelResult DwRes;
1242
1243 current_ii = ii;
weidendo0a1951d2009-06-15 00:16:36 +00001244 DwRes = (*simulator.D1_Write)(data_addr, data_size);
weidendoa17f2a32006-03-20 10:27:30 +00001245
weidendo0a1951d2009-06-15 00:16:36 +00001246 CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n",
1247 data_addr, data_size, cacheRes(DwRes));
weidendoa17f2a32006-03-20 10:27:30 +00001248
1249 if (CLG_(current_state).collect) {
1250 ULong *cost_Dw;
1251
weidendo5bba5252010-06-09 22:32:53 +00001252 if (CLG_(current_state).nonskipped)
1253 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW);
1254 else
1255 cost_Dw = cost_base + ii->cost_offset + ii->eventset->offset[EG_DW];
weidendoa17f2a32006-03-20 10:27:30 +00001256
1257 inc_costs(DwRes, cost_Dw,
weidendo5bba5252010-06-09 22:32:53 +00001258 CLG_(current_state).cost + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001259 }
1260}
1261
weidendoa17f2a32006-03-20 10:27:30 +00001262
1263
1264/*------------------------------------------------------------*/
1265/*--- Cache configuration ---*/
1266/*------------------------------------------------------------*/
1267
1268#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
1269
1270static cache_t clo_I1_cache = UNDEFINED_CACHE;
1271static cache_t clo_D1_cache = UNDEFINED_CACHE;
1272static cache_t clo_L2_cache = UNDEFINED_CACHE;
1273
1274
1275/* Checks cache config is ok; makes it so if not. */
1276static
1277void check_cache(cache_t* cache, Char *name)
1278{
weidendo144b76c2009-01-26 22:56:14 +00001279 /* Simulator requires line size and set count to be powers of two */
1280 if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
1281 (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
weidendoa17f2a32006-03-20 10:27:30 +00001282 VG_(message)(Vg_UserMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001283 "error: %s set count not a power of two; aborting.\n",
weidendo144b76c2009-01-26 22:56:14 +00001284 name);
weidendoa17f2a32006-03-20 10:27:30 +00001285 }
1286
weidendo144b76c2009-01-26 22:56:14 +00001287 if (-1 == VG_(log2)(cache->line_size)) {
weidendoa17f2a32006-03-20 10:27:30 +00001288 VG_(message)(Vg_UserMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001289 "error: %s line size of %dB not a power of two; aborting.\n",
weidendoa17f2a32006-03-20 10:27:30 +00001290 name, cache->line_size);
1291 VG_(exit)(1);
1292 }
1293
1294 // Then check line size >= 16 -- any smaller and a single instruction could
1295 // straddle three cache lines, which breaks a simulation assertion and is
1296 // stupid anyway.
1297 if (cache->line_size < MIN_LINE_SIZE) {
1298 VG_(message)(Vg_UserMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001299 "error: %s line size of %dB too small; aborting.\n",
weidendoa17f2a32006-03-20 10:27:30 +00001300 name, cache->line_size);
1301 VG_(exit)(1);
1302 }
1303
1304 /* Then check cache size > line size (causes seg faults if not). */
1305 if (cache->size <= cache->line_size) {
1306 VG_(message)(Vg_UserMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001307 "error: %s cache size of %dB <= line size of %dB; aborting.\n",
weidendoa17f2a32006-03-20 10:27:30 +00001308 name, cache->size, cache->line_size);
1309 VG_(exit)(1);
1310 }
1311
1312 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1313 if (cache->assoc > (cache->size / cache->line_size)) {
1314 VG_(message)(Vg_UserMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001315 "warning: %s associativity > (size / line size); aborting.\n", name);
weidendoa17f2a32006-03-20 10:27:30 +00001316 VG_(exit)(1);
1317 }
1318}
1319
1320static
1321void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)
1322{
1323#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1324
1325 Int n_clos = 0;
1326
1327 // Count how many were defined on the command line.
1328 if (DEFINED(clo_I1_cache)) { n_clos++; }
1329 if (DEFINED(clo_D1_cache)) { n_clos++; }
1330 if (DEFINED(clo_L2_cache)) { n_clos++; }
1331
1332 // Set the cache config (using auto-detection, if supported by the
1333 // architecture)
1334 VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) );
1335
1336 // Then replace with any defined on the command line.
1337 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
1338 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
1339 if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }
1340
1341 // Then check values and fix if not acceptable.
1342 check_cache(I1c, "I1");
1343 check_cache(D1c, "D1");
1344 check_cache(L2c, "L2");
1345
1346 if (VG_(clo_verbosity) > 1) {
sewardj0f33adf2009-07-15 14:51:03 +00001347 VG_(message)(Vg_UserMsg, "Cache configuration used:\n");
1348 VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines\n",
weidendoa17f2a32006-03-20 10:27:30 +00001349 I1c->size, I1c->assoc, I1c->line_size);
sewardj0f33adf2009-07-15 14:51:03 +00001350 VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines\n",
weidendoa17f2a32006-03-20 10:27:30 +00001351 D1c->size, D1c->assoc, D1c->line_size);
sewardj0f33adf2009-07-15 14:51:03 +00001352 VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines\n",
weidendoa17f2a32006-03-20 10:27:30 +00001353 L2c->size, L2c->assoc, L2c->line_size);
1354 }
1355#undef CMD_LINE_DEFINED
1356}
1357
1358
1359/* Initialize and clear simulator state */
1360static void cachesim_post_clo_init(void)
1361{
1362 /* Cache configurations. */
1363 cache_t I1c, D1c, L2c;
1364
1365 /* Initialize access handlers */
1366 if (!CLG_(clo).simulate_cache) {
1367 CLG_(cachesim).log_1I0D = 0;
1368 CLG_(cachesim).log_1I0D_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001369 CLG_(cachesim).log_2I0D = 0;
1370 CLG_(cachesim).log_2I0D_name = "(no function)";
1371 CLG_(cachesim).log_3I0D = 0;
1372 CLG_(cachesim).log_3I0D_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001373
1374 CLG_(cachesim).log_1I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001375 CLG_(cachesim).log_1I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001376 CLG_(cachesim).log_1I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001377 CLG_(cachesim).log_1I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001378
1379 CLG_(cachesim).log_0I1Dr = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001380 CLG_(cachesim).log_0I1Dr_name = "(no function)";
weidendo0a1951d2009-06-15 00:16:36 +00001381 CLG_(cachesim).log_0I1Dw = 0;
weidendoa17f2a32006-03-20 10:27:30 +00001382 CLG_(cachesim).log_0I1Dw_name = "(no function)";
weidendoa17f2a32006-03-20 10:27:30 +00001383 return;
1384 }
1385
1386 /* Configuration of caches only needed with real cache simulation */
1387 configure_caches(&I1c, &D1c, &L2c);
1388
1389 I1.name = "I1";
1390 D1.name = "D1";
1391 L2.name = "L2";
1392
1393 cachesim_initcache(I1c, &I1);
1394 cachesim_initcache(D1c, &D1);
1395 cachesim_initcache(L2c, &L2);
1396
1397 /* the other cache simulators use the standard helpers
1398 * with dispatching via simulator struct */
1399
1400 CLG_(cachesim).log_1I0D = log_1I0D;
1401 CLG_(cachesim).log_1I0D_name = "log_1I0D";
weidendo0a1951d2009-06-15 00:16:36 +00001402 CLG_(cachesim).log_2I0D = log_2I0D;
1403 CLG_(cachesim).log_2I0D_name = "log_2I0D";
1404 CLG_(cachesim).log_3I0D = log_3I0D;
1405 CLG_(cachesim).log_3I0D_name = "log_3I0D";
weidendoa17f2a32006-03-20 10:27:30 +00001406
1407 CLG_(cachesim).log_1I1Dr = log_1I1Dr;
1408 CLG_(cachesim).log_1I1Dw = log_1I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001409 CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr";
1410 CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001411
1412 CLG_(cachesim).log_0I1Dr = log_0I1Dr;
1413 CLG_(cachesim).log_0I1Dw = log_0I1Dw;
weidendoa17f2a32006-03-20 10:27:30 +00001414 CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr";
1415 CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw";
weidendoa17f2a32006-03-20 10:27:30 +00001416
1417 if (clo_collect_cacheuse) {
1418
1419 /* Output warning for not supported option combinations */
1420 if (clo_simulate_hwpref) {
1421 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001422 "warning: prefetch simulation can not be "
1423 "used with cache usage\n");
weidendoa17f2a32006-03-20 10:27:30 +00001424 clo_simulate_hwpref = False;
1425 }
1426
1427 if (clo_simulate_writeback) {
1428 VG_(message)(Vg_DebugMsg,
sewardj0f33adf2009-07-15 14:51:03 +00001429 "warning: write-back simulation can not be "
1430 "used with cache usage\n");
weidendoa17f2a32006-03-20 10:27:30 +00001431 clo_simulate_writeback = False;
1432 }
1433
1434 simulator.I1_Read = cacheuse_I1_doRead;
1435 simulator.D1_Read = cacheuse_D1_doRead;
1436 simulator.D1_Write = cacheuse_D1_doRead;
1437 return;
1438 }
1439
1440 if (clo_simulate_hwpref) {
1441 prefetch_clear();
1442
1443 if (clo_simulate_writeback) {
1444 simulator.I1_Read = prefetch_I1_Read;
1445 simulator.D1_Read = prefetch_D1_Read;
1446 simulator.D1_Write = prefetch_D1_Write;
1447 }
1448 else {
1449 simulator.I1_Read = prefetch_I1_ref;
1450 simulator.D1_Read = prefetch_D1_ref;
1451 simulator.D1_Write = prefetch_D1_ref;
1452 }
1453
1454 return;
1455 }
1456
1457 if (clo_simulate_writeback) {
1458 simulator.I1_Read = cachesim_I1_Read;
1459 simulator.D1_Read = cachesim_D1_Read;
1460 simulator.D1_Write = cachesim_D1_Write;
1461 }
1462 else {
1463 simulator.I1_Read = cachesim_I1_ref;
1464 simulator.D1_Read = cachesim_D1_ref;
1465 simulator.D1_Write = cachesim_D1_ref;
1466 }
1467}
1468
1469
1470/* Clear simulator state. Has to be initialized before */
1471static
1472void cachesim_clear(void)
1473{
1474 cachesim_clearcache(&I1);
1475 cachesim_clearcache(&D1);
1476 cachesim_clearcache(&L2);
1477
1478 prefetch_clear();
1479}
1480
1481
1482static void cachesim_getdesc(Char* buf)
1483{
1484 Int p;
1485 p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line);
1486 p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line);
1487 VG_(sprintf)(buf+p, "desc: L2 cache: %s\n", L2.desc_line);
1488}
1489
1490static
1491void cachesim_print_opts(void)
1492{
1493 VG_(printf)(
1494"\n cache simulator options:\n"
1495" --simulate-cache=no|yes Do cache simulation [no]\n"
1496" --simulate-wb=no|yes Count write-back events [no]\n"
1497" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n"
1498#if CLG_EXPERIMENTAL
1499" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n"
1500#endif
1501" --cacheuse=no|yes Collect cache block use [no]\n"
1502" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1503" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
1504" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"
1505 );
1506}
1507
njn83df0b62009-02-25 01:01:05 +00001508static void parse_opt ( cache_t* cache, char* opt )
weidendoa17f2a32006-03-20 10:27:30 +00001509{
njn83df0b62009-02-25 01:01:05 +00001510 Long i1, i2, i3;
1511 Char* endptr;
weidendoa17f2a32006-03-20 10:27:30 +00001512
njn83df0b62009-02-25 01:01:05 +00001513 // Option argument looks like "65536,2,64". Extract them.
1514 i1 = VG_(strtoll10)(opt, &endptr); if (*endptr != ',') goto bad;
1515 i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
1516 i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
weidendoa17f2a32006-03-20 10:27:30 +00001517
njn83df0b62009-02-25 01:01:05 +00001518 // Check for overflow.
1519 cache->size = (Int)i1;
1520 cache->assoc = (Int)i2;
1521 cache->line_size = (Int)i3;
1522 if (cache->size != i1) goto overflow;
1523 if (cache->assoc != i2) goto overflow;
1524 if (cache->line_size != i3) goto overflow;
weidendoa17f2a32006-03-20 10:27:30 +00001525
1526 return;
1527
njn83df0b62009-02-25 01:01:05 +00001528 overflow:
1529 VG_(message)(Vg_UserMsg,
1530 "one of the cache parameters was too large and overflowed\n");
weidendoa17f2a32006-03-20 10:27:30 +00001531 bad:
njn83df0b62009-02-25 01:01:05 +00001532 // XXX: this omits the "--I1/D1/L2=" part from the message, but that's
1533 // not a big deal.
1534 VG_(err_bad_option)(opt);
weidendoa17f2a32006-03-20 10:27:30 +00001535}
1536
1537/* Check for command line option for cache configuration.
1538 * Return False if unknown and not handled.
1539 *
1540 * Called from CLG_(process_cmd_line_option)() in clo.c
1541 */
1542static Bool cachesim_parse_opt(Char* arg)
1543{
njn83df0b62009-02-25 01:01:05 +00001544 Char* tmp_str;
weidendoa17f2a32006-03-20 10:27:30 +00001545
njn83df0b62009-02-25 01:01:05 +00001546 if VG_BOOL_CLO(arg, "--simulate-wb", clo_simulate_writeback) {}
1547 else if VG_BOOL_CLO(arg, "--simulate-hwpref", clo_simulate_hwpref) {}
1548 else if VG_BOOL_CLO(arg, "--simulate-sectors", clo_simulate_sectors) {}
weidendoa17f2a32006-03-20 10:27:30 +00001549
njn83df0b62009-02-25 01:01:05 +00001550 else if VG_BOOL_CLO(arg, "--cacheuse", clo_collect_cacheuse) {
1551 if (clo_collect_cacheuse) {
1552 /* Use counters only make sense with fine dumping */
1553 CLG_(clo).dump_instr = True;
1554 }
1555 }
weidendoa17f2a32006-03-20 10:27:30 +00001556
njn83df0b62009-02-25 01:01:05 +00001557 else if VG_STR_CLO(arg, "--I1", tmp_str)
1558 parse_opt(&clo_I1_cache, tmp_str);
1559 else if VG_STR_CLO(arg, "--D1", tmp_str)
1560 parse_opt(&clo_D1_cache, tmp_str);
1561 else if VG_STR_CLO(arg, "--L2", tmp_str)
1562 parse_opt(&clo_L2_cache, tmp_str);
weidendoa17f2a32006-03-20 10:27:30 +00001563 else
1564 return False;
1565
1566 return True;
1567}
1568
1569/* Adds commas to ULong, right justifying in a field field_width wide, returns
1570 * the string in buf. */
1571static
1572Int commify(ULong n, int field_width, char* buf)
1573{
1574 int len, n_commas, i, j, new_len, space;
1575
1576 VG_(sprintf)(buf, "%llu", n);
1577 len = VG_(strlen)(buf);
1578 n_commas = (len - 1) / 3;
1579 new_len = len + n_commas;
1580 space = field_width - new_len;
1581
1582 /* Allow for printing a number in a field_width smaller than it's size */
1583 if (space < 0) space = 0;
1584
1585 /* Make j = -1 because we copy the '\0' before doing the numbers in groups
1586 * of three. */
1587 for (j = -1, i = len ; i >= 0; i--) {
1588 buf[i + n_commas + space] = buf[i];
1589
1590 if ((i>0) && (3 == ++j)) {
1591 j = 0;
1592 n_commas--;
1593 buf[i + n_commas + space] = ',';
1594 }
1595 }
1596 /* Right justify in field. */
1597 for (i = 0; i < space; i++) buf[i] = ' ';
1598 return new_len;
1599}
1600
1601static
1602void percentify(Int n, Int ex, Int field_width, char buf[])
1603{
1604 int i, len, space;
1605
1606 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);
1607 len = VG_(strlen)(buf);
1608 space = field_width - len;
1609 if (space < 0) space = 0; /* Allow for v. small field_width */
1610 i = len;
1611
1612 /* Right justify in field */
1613 for ( ; i >= 0; i--) buf[i + space] = buf[i];
1614 for (i = 0; i < space; i++) buf[i] = ' ';
1615}
1616
1617static
1618void cachesim_printstat(void)
1619{
1620 FullCost total = CLG_(total_cost), D_total = 0;
1621 ULong L2_total_m, L2_total_mr, L2_total_mw,
1622 L2_total, L2_total_r, L2_total_w;
1623 char buf1[RESULTS_BUF_LEN],
1624 buf2[RESULTS_BUF_LEN],
1625 buf3[RESULTS_BUF_LEN];
1626 Int l1, l2, l3;
1627 Int p;
1628
1629 if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) {
sewardj0f33adf2009-07-15 14:51:03 +00001630 VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001631 prefetch_up);
sewardj0f33adf2009-07-15 14:51:03 +00001632 VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu\n",
weidendoa17f2a32006-03-20 10:27:30 +00001633 prefetch_down);
sewardj0f33adf2009-07-15 14:51:03 +00001634 VG_(message)(Vg_DebugMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001635 }
1636
1637 /* I cache results. Use the I_refs value to determine the first column
1638 * width. */
weidendo5bba5252010-06-09 22:32:53 +00001639 l1 = commify(total[fullOffset(EG_IR)], 0, buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001640 VG_(message)(Vg_UserMsg, "I refs: %s\n", buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001641
1642 if (!CLG_(clo).simulate_cache) return;
1643
weidendo5bba5252010-06-09 22:32:53 +00001644 commify(total[fullOffset(EG_IR) +1], l1, buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001645 VG_(message)(Vg_UserMsg, "I1 misses: %s\n", buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001646
weidendo5bba5252010-06-09 22:32:53 +00001647 commify(total[fullOffset(EG_IR) +2], l1, buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001648 VG_(message)(Vg_UserMsg, "L2i misses: %s\n", buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001649
1650 p = 100;
1651
weidendo5bba5252010-06-09 22:32:53 +00001652 if (0 == total[fullOffset(EG_IR)])
1653 total[fullOffset(EG_IR)] = 1;
weidendoa17f2a32006-03-20 10:27:30 +00001654
weidendo5bba5252010-06-09 22:32:53 +00001655 percentify(total[fullOffset(EG_IR)+1] * 100 * p /
1656 total[fullOffset(EG_IR)], p, l1+1, buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001657 VG_(message)(Vg_UserMsg, "I1 miss rate: %s\n", buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001658
weidendo5bba5252010-06-09 22:32:53 +00001659 percentify(total[fullOffset(EG_IR)+2] * 100 * p /
1660 total[fullOffset(EG_IR)], p, l1+1, buf1);
sewardj0f33adf2009-07-15 14:51:03 +00001661 VG_(message)(Vg_UserMsg, "L2i miss rate: %s\n", buf1);
1662 VG_(message)(Vg_UserMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001663
1664 /* D cache results.
1665 Use the D_refs.rd and D_refs.wr values to determine the
1666 * width of columns 2 & 3. */
1667
1668 D_total = CLG_(get_eventset_cost)( CLG_(sets).full );
1669 CLG_(init_cost)( CLG_(sets).full, D_total);
weidendo5bba5252010-06-09 22:32:53 +00001670 // we only use the first 3 values of D_total, adding up Dr and Dw costs
1671 CLG_(copy_cost)( CLG_(get_event_set)(EG_DR), D_total, total + fullOffset(EG_DR) );
1672 CLG_(add_cost) ( CLG_(get_event_set)(EG_DW), D_total, total + fullOffset(EG_DW) );
weidendoa17f2a32006-03-20 10:27:30 +00001673
1674 commify( D_total[0], l1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001675 l2 = commify(total[fullOffset(EG_DR)], 0, buf2);
1676 l3 = commify(total[fullOffset(EG_DW)], 0, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001677 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001678 buf1, buf2, buf3);
1679
1680 commify( D_total[1], l1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001681 commify(total[fullOffset(EG_DR)+1], l2, buf2);
1682 commify(total[fullOffset(EG_DW)+1], l3, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001683 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001684 buf1, buf2, buf3);
1685
1686 commify( D_total[2], l1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001687 commify(total[fullOffset(EG_DR)+2], l2, buf2);
1688 commify(total[fullOffset(EG_DW)+2], l3, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001689 VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001690 buf1, buf2, buf3);
1691
1692 p = 10;
1693
1694 if (0 == D_total[0]) D_total[0] = 1;
weidendo5bba5252010-06-09 22:32:53 +00001695 if (0 == total[fullOffset(EG_DR)]) total[fullOffset(EG_DR)] = 1;
1696 if (0 == total[fullOffset(EG_DW)]) total[fullOffset(EG_DW)] = 1;
weidendoa17f2a32006-03-20 10:27:30 +00001697
1698 percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001699 percentify(total[fullOffset(EG_DR)+1] * 100 * p /
1700 total[fullOffset(EG_DR)], p, l2+1, buf2);
1701 percentify(total[fullOffset(EG_DW)+1] * 100 * p /
1702 total[fullOffset(EG_DW)], p, l3+1, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001703 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )\n",
1704 buf1, buf2,buf3);
weidendoa17f2a32006-03-20 10:27:30 +00001705
1706 percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1);
weidendo5bba5252010-06-09 22:32:53 +00001707 percentify(total[fullOffset(EG_DR)+2] * 100 * p /
1708 total[fullOffset(EG_DR)], p, l2+1, buf2);
1709 percentify(total[fullOffset(EG_DW)+2] * 100 * p /
1710 total[fullOffset(EG_DW)], p, l3+1, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001711 VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )\n",
1712 buf1, buf2,buf3);
1713 VG_(message)(Vg_UserMsg, "\n");
weidendoa17f2a32006-03-20 10:27:30 +00001714
1715
1716
1717 /* L2 overall results */
1718
1719 L2_total =
weidendo5bba5252010-06-09 22:32:53 +00001720 total[fullOffset(EG_DR) +1] +
1721 total[fullOffset(EG_DW) +1] +
1722 total[fullOffset(EG_IR) +1];
weidendoa17f2a32006-03-20 10:27:30 +00001723 L2_total_r =
weidendo5bba5252010-06-09 22:32:53 +00001724 total[fullOffset(EG_DR) +1] +
1725 total[fullOffset(EG_IR) +1];
1726 L2_total_w = total[fullOffset(EG_DW) +1];
weidendoa17f2a32006-03-20 10:27:30 +00001727 commify(L2_total, l1, buf1);
1728 commify(L2_total_r, l2, buf2);
1729 commify(L2_total_w, l3, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001730 VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001731 buf1, buf2, buf3);
1732
1733 L2_total_m =
weidendo5bba5252010-06-09 22:32:53 +00001734 total[fullOffset(EG_DR) +2] +
1735 total[fullOffset(EG_DW) +2] +
1736 total[fullOffset(EG_IR) +2];
weidendoa17f2a32006-03-20 10:27:30 +00001737 L2_total_mr =
weidendo5bba5252010-06-09 22:32:53 +00001738 total[fullOffset(EG_DR) +2] +
1739 total[fullOffset(EG_IR) +2];
1740 L2_total_mw = total[fullOffset(EG_DW) +2];
weidendoa17f2a32006-03-20 10:27:30 +00001741 commify(L2_total_m, l1, buf1);
1742 commify(L2_total_mr, l2, buf2);
1743 commify(L2_total_mw, l3, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001744 VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)\n",
weidendoa17f2a32006-03-20 10:27:30 +00001745 buf1, buf2, buf3);
1746
1747 percentify(L2_total_m * 100 * p /
weidendo5bba5252010-06-09 22:32:53 +00001748 (total[fullOffset(EG_IR)] + D_total[0]), p, l1+1, buf1);
weidendoa17f2a32006-03-20 10:27:30 +00001749 percentify(L2_total_mr * 100 * p /
weidendo5bba5252010-06-09 22:32:53 +00001750 (total[fullOffset(EG_IR)] + total[fullOffset(EG_DR)]),
weidendoa17f2a32006-03-20 10:27:30 +00001751 p, l2+1, buf2);
1752 percentify(L2_total_mw * 100 * p /
weidendo5bba5252010-06-09 22:32:53 +00001753 total[fullOffset(EG_DW)], p, l3+1, buf3);
sewardj0f33adf2009-07-15 14:51:03 +00001754 VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )\n",
weidendoa17f2a32006-03-20 10:27:30 +00001755 buf1, buf2,buf3);
1756}
1757
1758
1759/*------------------------------------------------------------*/
1760/*--- Setup for Event set. ---*/
1761/*------------------------------------------------------------*/
1762
1763struct event_sets CLG_(sets);
1764
weidendo5bba5252010-06-09 22:32:53 +00001765void CLG_(init_eventsets)()
weidendoa17f2a32006-03-20 10:27:30 +00001766{
weidendo5bba5252010-06-09 22:32:53 +00001767 // Event groups from which the event sets are composed
1768 // the "Use" group only is used with "cacheuse" simulation
1769 if (clo_collect_cacheuse)
1770 CLG_(register_event_group4)(EG_USE,
1771 "AcCost1", "SpLoss1", "AcCost2", "SpLoss2");
weidendoa17f2a32006-03-20 10:27:30 +00001772
weidendo5bba5252010-06-09 22:32:53 +00001773 if (!CLG_(clo).simulate_cache)
1774 CLG_(register_event_group)(EG_IR, "Ir");
1775 else if (!clo_simulate_writeback) {
1776 CLG_(register_event_group3)(EG_IR, "Ir", "I1mr", "I2mr");
1777 CLG_(register_event_group3)(EG_DR, "Dr", "D1mr", "D2mr");
1778 CLG_(register_event_group3)(EG_DW, "Dw", "D1mw", "D2mw");
weidendoa17f2a32006-03-20 10:27:30 +00001779 }
weidendo5bba5252010-06-09 22:32:53 +00001780 else { // clo_simulate_writeback
1781 CLG_(register_event_group4)(EG_IR, "Ir", "I1mr", "I2mr", "I2dmr");
1782 CLG_(register_event_group4)(EG_DR, "Dr", "D1mr", "D2mr", "I2dmr");
1783 CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "I2dmw");
weidendoa17f2a32006-03-20 10:27:30 +00001784 }
weidendo5bba5252010-06-09 22:32:53 +00001785
1786 if (CLG_(clo).collect_alloc)
1787 CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
1788
1789 if (CLG_(clo).collect_systime)
1790 CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime");
1791
1792 // event set used as base for instruction self cost
1793 CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR);
1794
1795 // event set comprising all event groups, used for inclusive cost
1796 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
1797 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
1798
1799 CLG_DEBUGIF(1) {
1800 CLG_DEBUG(1, "EventSets:\n");
1801 CLG_(print_eventset)(-2, CLG_(sets).base);
1802 CLG_(print_eventset)(-2, CLG_(sets).full);
weidendoa17f2a32006-03-20 10:27:30 +00001803 }
weidendoa17f2a32006-03-20 10:27:30 +00001804
weidendo5bba5252010-06-09 22:32:53 +00001805 /* Not-existing events are silently ignored */
1806 CLG_(dumpmap) = CLG_(get_eventmapping)(CLG_(sets).full);
1807 CLG_(append_event)(CLG_(dumpmap), "Ir");
1808 CLG_(append_event)(CLG_(dumpmap), "Dr");
1809 CLG_(append_event)(CLG_(dumpmap), "Dw");
1810 CLG_(append_event)(CLG_(dumpmap), "I1mr");
1811 CLG_(append_event)(CLG_(dumpmap), "D1mr");
1812 CLG_(append_event)(CLG_(dumpmap), "D1mw");
1813 CLG_(append_event)(CLG_(dumpmap), "I2mr");
1814 CLG_(append_event)(CLG_(dumpmap), "D2mr");
1815 CLG_(append_event)(CLG_(dumpmap), "D2mw");
1816 CLG_(append_event)(CLG_(dumpmap), "I2dmr");
1817 CLG_(append_event)(CLG_(dumpmap), "D2dmr");
1818 CLG_(append_event)(CLG_(dumpmap), "D2dmw");
1819 CLG_(append_event)(CLG_(dumpmap), "AcCost1");
1820 CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
1821 CLG_(append_event)(CLG_(dumpmap), "AcCost2");
1822 CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
1823 CLG_(append_event)(CLG_(dumpmap), "allocCount");
1824 CLG_(append_event)(CLG_(dumpmap), "allocSize");
1825 CLG_(append_event)(CLG_(dumpmap), "sysCount");
1826 CLG_(append_event)(CLG_(dumpmap), "sysTime");
weidendoa17f2a32006-03-20 10:27:30 +00001827}
1828
1829
weidendoa17f2a32006-03-20 10:27:30 +00001830/* this is called at dump time for every instruction executed */
1831static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
1832 InstrInfo* ii, ULong exe_count)
1833{
weidendo5bba5252010-06-09 22:32:53 +00001834 if (!CLG_(clo).simulate_cache)
1835 cost[ fullOffset(EG_IR) ] += exe_count;
1836 else
1837 CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
1838 ii->eventset, bbcc->cost + ii->cost_offset);
weidendoa17f2a32006-03-20 10:27:30 +00001839}
1840
1841static
1842void cachesim_after_bbsetup(void)
1843{
1844 BBCC* bbcc = CLG_(current_state).bbcc;
1845
1846 if (CLG_(clo).simulate_cache) {
1847 BB* bb = bbcc->bb;
1848
1849 /* only needed if log_* functions are called */
1850 bb_base = bb->obj->offset + bb->offset;
1851 cost_base = bbcc->cost;
1852 }
1853}
1854
1855static
1856void cachesim_finish(void)
1857{
1858 if (clo_collect_cacheuse)
1859 cacheuse_finish();
1860}
1861
1862/*------------------------------------------------------------*/
1863/*--- The simulator defined in this file ---*/
1864/*------------------------------------------------------------*/
1865
1866struct cachesim_if CLG_(cachesim) = {
1867 .print_opts = cachesim_print_opts,
1868 .parse_opt = cachesim_parse_opt,
1869 .post_clo_init = cachesim_post_clo_init,
1870 .clear = cachesim_clear,
1871 .getdesc = cachesim_getdesc,
1872 .printstat = cachesim_printstat,
1873 .add_icost = cachesim_add_icost,
1874 .after_bbsetup = cachesim_after_bbsetup,
1875 .finish = cachesim_finish,
1876
1877 /* these will be set by cachesim_post_clo_init */
1878 .log_1I0D = 0,
weidendo0a1951d2009-06-15 00:16:36 +00001879 .log_2I0D = 0,
1880 .log_3I0D = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001881
1882 .log_1I1Dr = 0,
1883 .log_1I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001884
1885 .log_0I1Dr = 0,
1886 .log_0I1Dw = 0,
weidendoa17f2a32006-03-20 10:27:30 +00001887
1888 .log_1I0D_name = "(no function)",
weidendo0a1951d2009-06-15 00:16:36 +00001889 .log_2I0D_name = "(no function)",
1890 .log_3I0D_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001891
1892 .log_1I1Dr_name = "(no function)",
1893 .log_1I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001894
1895 .log_0I1Dr_name = "(no function)",
1896 .log_0I1Dw_name = "(no function)",
weidendoa17f2a32006-03-20 10:27:30 +00001897};
1898
1899
1900/*--------------------------------------------------------------------*/
1901/*--- end ct_sim.c ---*/
1902/*--------------------------------------------------------------------*/
1903