blob: 36b449b7c11aad40dcb37119209ea1cc1ebdbd0e [file] [log] [blame]
nethercote27fc1da2004-01-04 16:56:57 +00001
2/*--------------------------------------------------------------------*/
3/*--- Cache simulation cg_sim.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Cachegrind, a Valgrind tool for cache
8 profiling programs.
9
njn53612422005-03-12 16:22:54 +000010 Copyright (C) 2002-2005 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000011 njn@valgrind.org
nethercote27fc1da2004-01-04 16:56:57 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31/* Notes:
32 - simulates a write-allocate cache
33 - (block --> set) hash function uses simple bit selection
34 - handling of references straddling two cache blocks:
35 - counts as only one cache access (not two)
36 - both blocks hit --> one hit
37 - one block hits, the other misses --> one miss
38 - both blocks miss --> one miss (not two)
39*/
40
41typedef struct {
42 int size; /* bytes */
43 int assoc;
44 int line_size; /* bytes */
45 int sets;
46 int sets_min_1;
47 int assoc_bits;
48 int line_size_bits;
49 int tag_shift;
50 char desc_line[128];
51 int* tags;
52} cache_t2;
53
54/* By this point, the size/assoc/line_size has been checked. */
55static void cachesim_initcache(cache_t config, cache_t2* c)
56{
57 int i;
58
59 c->size = config.size;
60 c->assoc = config.assoc;
61 c->line_size = config.line_size;
62
63 c->sets = (c->size / c->line_size) / c->assoc;
64 c->sets_min_1 = c->sets - 1;
65 c->assoc_bits = VG_(log2)(c->assoc);
66 c->line_size_bits = VG_(log2)(c->line_size);
67 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
68
69 if (c->assoc == 1) {
70 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
71 c->size, c->line_size);
72 } else {
73 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
74 c->size, c->line_size, c->assoc);
75 }
76
77 c->tags = VG_(malloc)(sizeof(UInt) * c->sets * c->assoc);
78
79 for (i = 0; i < c->sets * c->assoc; i++)
80 c->tags[i] = 0;
81}
82
83#if 0
84static void print_cache(cache_t2* c)
85{
86 UInt set, way, i;
87
88 /* Note initialisation and update of 'i'. */
89 for (i = 0, set = 0; set < c->sets; set++) {
90 for (way = 0; way < c->assoc; way++, i++) {
91 VG_(printf)("%8x ", c->tags[i]);
92 }
93 VG_(printf)("\n");
94 }
95}
96#endif
97
98/* This is done as a macro rather than by passing in the cache_t2 as an
99 * arg because it slows things down by a small amount (3-5%) due to all
100 * that extra indirection. */
101
102#define CACHESIM(L, MISS_TREATMENT) \
103/* The cache and associated bits and pieces. */ \
104static cache_t2 L; \
105 \
106static void cachesim_##L##_initcache(cache_t config) \
107{ \
108 cachesim_initcache(config, &L); \
109} \
110 \
111static /* __inline__ */ \
112void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \
113{ \
114 register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
115 register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
116 register UInt tag = a >> L.tag_shift; \
117 int i, j; \
118 Bool is_miss = False; \
119 int* set; \
120 \
121 /* First case: word entirely within line. */ \
122 if (set1 == set2) { \
123 \
124 /* Shifting is a bit faster than multiplying */ \
125 set = &(L.tags[set1 << L.assoc_bits]); \
126 \
127 /* This loop is unrolled for just the first case, which is the most */\
128 /* common. We can't unroll any further because it would screw up */\
129 /* if we have a direct-mapped (1-way) cache. */\
130 if (tag == set[0]) { \
131 return; \
132 } \
133 /* If the tag is one other than the MRU, move it into the MRU spot */\
134 /* and shuffle the rest down. */\
135 for (i = 1; i < L.assoc; i++) { \
136 if (tag == set[i]) { \
137 for (j = i; j > 0; j--) { \
138 set[j] = set[j - 1]; \
139 } \
140 set[0] = tag; \
141 return; \
142 } \
143 } \
144 \
145 /* A miss; install this tag as MRU, shuffle rest down. */ \
146 for (j = L.assoc - 1; j > 0; j--) { \
147 set[j] = set[j - 1]; \
148 } \
149 set[0] = tag; \
150 MISS_TREATMENT; \
151 return; \
152 \
153 /* Second case: word straddles two lines. */ \
154 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
155 } else if (((set1 + 1) & (L.sets-1)) == set2) { \
156 set = &(L.tags[set1 << L.assoc_bits]); \
157 if (tag == set[0]) { \
158 goto block2; \
159 } \
160 for (i = 1; i < L.assoc; i++) { \
161 if (tag == set[i]) { \
162 for (j = i; j > 0; j--) { \
163 set[j] = set[j - 1]; \
164 } \
165 set[0] = tag; \
166 goto block2; \
167 } \
168 } \
169 for (j = L.assoc - 1; j > 0; j--) { \
170 set[j] = set[j - 1]; \
171 } \
172 set[0] = tag; \
173 is_miss = True; \
174block2: \
175 set = &(L.tags[set2 << L.assoc_bits]); \
176 if (tag == set[0]) { \
177 goto miss_treatment; \
178 } \
179 for (i = 1; i < L.assoc; i++) { \
180 if (tag == set[i]) { \
181 for (j = i; j > 0; j--) { \
182 set[j] = set[j - 1]; \
183 } \
184 set[0] = tag; \
185 goto miss_treatment; \
186 } \
187 } \
188 for (j = L.assoc - 1; j > 0; j--) { \
189 set[j] = set[j - 1]; \
190 } \
191 set[0] = tag; \
192 is_miss = True; \
193miss_treatment: \
194 if (is_miss) { MISS_TREATMENT; } \
195 \
196 } else { \
197 VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); \
njn67993252004-11-22 18:02:32 +0000198 VG_(tool_panic)("item straddles more than two cache sets"); \
nethercote27fc1da2004-01-04 16:56:57 +0000199 } \
200 return; \
201}
202
203CACHESIM(L2, (*m2)++ );
204CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
205CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
206
207/*--------------------------------------------------------------------*/
208/*--- end cg_sim.c ---*/
209/*--------------------------------------------------------------------*/
210