blob: 93af0d0611843b57ee62d0ef896048b4b80fb5e5 [file] [log] [blame]
weidendo23642272011-09-06 19:08:31 +00001/*--------------------------------------------------------------------*/
florian78627012012-10-07 19:47:04 +00002/*--- Cachegrind: cache configuration. cg-arch.c ---*/
weidendo23642272011-09-06 19:08:31 +00003/*--------------------------------------------------------------------*/
4
5/*
6 This file is part of Cachegrind, a Valgrind tool for cache
7 profiling programs.
8
sewardj03f8d3f2012-08-05 15:46:46 +00009 Copyright (C) 2011-2012 Nicholas Nethercote
weidendo23642272011-09-06 19:08:31 +000010 njn@valgrind.org
11
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_tool_basics.h"
31#include "pub_tool_libcassert.h"
32#include "pub_tool_libcbase.h"
33#include "pub_tool_libcprint.h"
34#include "pub_tool_options.h"
florian78627012012-10-07 19:47:04 +000035#include "pub_tool_machine.h"
weidendo23642272011-09-06 19:08:31 +000036
37#include "cg_arch.h"
38
florian78627012012-10-07 19:47:04 +000039static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
40 Bool all_caches_clo_defined);
41
weidendo23642272011-09-06 19:08:31 +000042// Checks cache config is ok. Returns NULL if ok, or a pointer to an error
43// string otherwise.
44static Char* check_cache(cache_t* cache)
45{
46 // Simulator requires set count to be a power of two.
47 if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
48 (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
49 {
50 return "Cache set count is not a power of two.\n";
51 }
52
53 // Simulator requires line size to be a power of two.
54 if (-1 == VG_(log2)(cache->line_size)) {
55 return "Cache line size is not a power of two.\n";
56 }
57
58 // Then check line size >= 16 -- any smaller and a single instruction could
59 // straddle three cache lines, which breaks a simulation assertion and is
60 // stupid anyway.
61 if (cache->line_size < MIN_LINE_SIZE) {
62 return "Cache line size is too small.\n";
63 }
64
65 /* Then check cache size > line size (causes seg faults if not). */
66 if (cache->size <= cache->line_size) {
67 return "Cache size <= line size.\n";
68 }
69
70 /* Then check assoc <= (size / line size) (seg faults otherwise). */
71 if (cache->assoc > (cache->size / cache->line_size)) {
72 return "Cache associativity > (size / line size).\n";
73 }
74
75 return NULL;
76}
77
78
79static void parse_cache_opt ( cache_t* cache, Char* opt, Char* optval )
80{
81 Long i1, i2, i3;
82 Char* endptr;
83 Char* checkRes;
84
85 // Option argument looks like "65536,2,64". Extract them.
86 i1 = VG_(strtoll10)(optval, &endptr); if (*endptr != ',') goto bad;
87 i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
88 i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
89
90 // Check for overflow.
91 cache->size = (Int)i1;
92 cache->assoc = (Int)i2;
93 cache->line_size = (Int)i3;
94 if (cache->size != i1) goto overflow;
95 if (cache->assoc != i2) goto overflow;
96 if (cache->line_size != i3) goto overflow;
97
98 checkRes = check_cache(cache);
99 if (checkRes) {
100 VG_(fmsg)("%s", checkRes);
101 goto bad;
102 }
103
104 return;
105
106 bad:
107 VG_(fmsg_bad_option)(opt, "");
108
109 overflow:
110 VG_(fmsg_bad_option)(opt,
111 "One of the cache parameters was too large and overflowed.\n");
112}
113
114
115Bool VG_(str_clo_cache_opt)(Char *arg,
116 cache_t* clo_I1c,
117 cache_t* clo_D1c,
118 cache_t* clo_LLc)
119{
120 Char* tmp_str;
121
122 if VG_STR_CLO(arg, "--I1", tmp_str) {
123 parse_cache_opt(clo_I1c, arg, tmp_str);
124 return True;
125 } else if VG_STR_CLO(arg, "--D1", tmp_str) {
126 parse_cache_opt(clo_D1c, arg, tmp_str);
127 return True;
128 } else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
129 VG_STR_CLO(arg, "--LL", tmp_str)) {
130 parse_cache_opt(clo_LLc, arg, tmp_str);
131 return True;
132 } else
133 return False;
134}
135
136static void umsg_cache_img(Char* desc, cache_t* c)
137{
138 VG_(umsg)(" %s: %'d B, %d-way, %d B lines\n", desc,
139 c->size, c->assoc, c->line_size);
140}
141
142// Verifies if c is a valid cache.
143// An invalid value causes an assert, unless clo_redefined is True.
144static void check_cache_or_override(Char* desc, cache_t* c, Bool clo_redefined)
145{
146 Char* checkRes;
147
148 checkRes = check_cache(c);
149 if (checkRes) {
150 VG_(umsg)("Auto-detected %s cache configuration not supported: %s",
151 desc, checkRes);
152 umsg_cache_img(desc, c);
153 if (!clo_redefined) {
154 VG_(umsg)("As it probably should be supported, please report a bug!\n");
155 VG_(umsg)("Bypass this message by using option --%s=...\n", desc);
156 tl_assert(0);
157 }
158 }
159}
160
florian78627012012-10-07 19:47:04 +0000161
162/* If the LL cache config isn't something the simulation functions
163 can handle, try to adjust it so it is. Caches are characterised
164 by (total size T, line size L, associativity A), and then we
165 have
166
167 number of sets S = T / (L * A)
168
169 The required constraints are:
170
171 * L must be a power of 2, but it always is in practice, so
172 no problem there
173
174 * A can be any value >= 1
175
176 * T can be any value, but ..
177
178 * S must be a power of 2.
179
180 That sometimes gives a problem. For example, some Core iX based
181 Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
182 sets. The "fix" in this case is to increase the associativity
183 by 50% to 24, which reduces the number of sets to 8192, making
184 it a power of 2. That's what the following code does (handing
185 the "3/2 rescaling case".) We might need to deal with other
186 ratios later (5/4 ?).
187
188 The "fix" is "justified" (cough, cough) by alleging that
189 increases of associativity above about 4 have very little effect
190 on the actual miss rate. It would be far more inaccurate to
191 fudge this by changing the size of the simulated cache --
192 changing the associativity is a much better option.
193*/
194
195static void
196maybe_tweak_LLc(cache_t *LLc)
197{
198 if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
199 Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
200 if (/* stay sane */
201 nSets >= 4
202 /* nSets is not a power of 2 */
203 && VG_(log2_64)( (ULong)nSets ) == -1
204 /* nSets is 50% above a power of 2 */
205 && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
206 /* associativity can be increased by exactly 50% */
207 && (LLc->assoc % 2) == 0
208 ) {
209 /* # sets is 1.5 * a power of two, but the associativity is
210 even, so we can increase that up by 50% and implicitly
211 scale the # sets down accordingly. */
212 Int new_assoc = LLc->assoc + (LLc->assoc / 2);
213 VG_(dmsg)("warning: pretending that LL cache has associativity"
214 " %d instead of actual %d\n", new_assoc, LLc->assoc);
215 LLc->assoc = new_assoc;
216 }
217 }
218}
219
weidendo23642272011-09-06 19:08:31 +0000220void VG_(post_clo_init_configure_caches)(cache_t* I1c,
221 cache_t* D1c,
222 cache_t* LLc,
223 cache_t* clo_I1c,
224 cache_t* clo_D1c,
225 cache_t* clo_LLc)
226{
227#define DEFINED(L) (-1 != L->size || -1 != L->assoc || -1 != L->line_size)
228
229 // Count how many were defined on the command line.
230 Bool all_caches_clo_defined =
231 (DEFINED(clo_I1c) &&
232 DEFINED(clo_D1c) &&
233 DEFINED(clo_LLc));
234
235 // Set the cache config (using auto-detection, if supported by the
236 // architecture).
florian78627012012-10-07 19:47:04 +0000237 configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
238
239 maybe_tweak_LLc( LLc );
weidendo23642272011-09-06 19:08:31 +0000240
241 // Check the default/auto-detected values.
242 // Allow the user to override invalid auto-detected caches
243 // with command line.
244 check_cache_or_override ("I1", I1c, DEFINED(clo_I1c));
245 check_cache_or_override ("D1", D1c, DEFINED(clo_D1c));
246 check_cache_or_override ("LL", LLc, DEFINED(clo_LLc));
247
248 // Then replace with any defined on the command line. (Already checked in
249 // VG(parse_clo_cache_opt)().)
250 if (DEFINED(clo_I1c)) { *I1c = *clo_I1c; }
251 if (DEFINED(clo_D1c)) { *D1c = *clo_D1c; }
252 if (DEFINED(clo_LLc)) { *LLc = *clo_LLc; }
253
254 if (VG_(clo_verbosity) >= 2) {
255 VG_(umsg)("Cache configuration used:\n");
256 umsg_cache_img ("I1", I1c);
257 umsg_cache_img ("D1", D1c);
258 umsg_cache_img ("LL", LLc);
259 }
260#undef DEFINED
261}
262
263void VG_(print_cache_clo_opts)()
264{
265 VG_(printf)(
266" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
267" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
268" --LL=<size>,<assoc>,<line_size> set LL cache manually\n"
269 );
270}
florian78627012012-10-07 19:47:04 +0000271
272
273// Traverse the cache info and return a cache of the given kind and level.
274// Return NULL if no such cache exists.
275static const VexCache *
276locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
277{
278 const VexCache *c;
279
280 for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
281 if (c->level == level && c->kind == kind) {
282 return c;
283 }
284 }
285 return NULL; // not found
286}
287
288
289// Gives the auto-detected configuration of I1, D1 and LL caches. They get
290// overridden by any cache configurations specified on the command line.
291static void
292configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
293 Bool all_caches_clo_defined)
294{
295 VexArchInfo vai;
296 const VexCacheInfo *ci;
297 const VexCache *i1, *d1, *ll;
298
299 VG_(machine_get_VexArchInfo)(NULL, &vai);
300 ci = &vai.hwcache_info;
301
302 // Extract what we need
303 i1 = locate_cache(ci, INSN_CACHE, 1);
304 d1 = locate_cache(ci, DATA_CACHE, 1);
florian78627012012-10-07 19:47:04 +0000305 ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
306
307 if (ll && ci->num_levels > 2) {
308 VG_(dmsg)("warning: L%u cache found, using its data for the "
309 "LL simulation.\n", ci->num_levels);
310 }
311
312 if (i1 && d1 && ll) {
313 *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
314 *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
315 *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
316
317 return;
318 }
319
320 // Cache information could not be queried; choose some default
321 // architecture specific default setting.
322
323#if defined(VGA_ppc32)
324
325 // Default cache configuration
326 *I1c = (cache_t) { 65536, 2, 64 };
327 *D1c = (cache_t) { 65536, 2, 64 };
328 *LLc = (cache_t) { 262144, 8, 64 };
329
330#elif defined(VGA_ppc64)
331
332 // Default cache configuration
333 *I1c = (cache_t) { 65536, 2, 64 };
334 *D1c = (cache_t) { 65536, 2, 64 };
335 *LLc = (cache_t) { 262144, 8, 64 };
336
337#elif defined(VGA_arm)
338
339 // Set caches to default (for Cortex-A8 ?)
340 *I1c = (cache_t) { 16384, 4, 64 };
341 *D1c = (cache_t) { 16384, 4, 64 };
342 *LLc = (cache_t) { 262144, 8, 64 };
343
344#elif defined(VGA_s390x)
345 // z900
346 //
347 // Source:
348 // The microarchitecture of the IBM eServer z900 processor
349 // IBM Journal of Research and Development
350 // Volume 46, Number 4/5, pp 381-395, July/September 2002
351 //
352 // Split L1 I/D cache
353 // Size: 256 kB each
354 // Line size: 256 bytes
355 // 4-way set associative
356 // L2 cache: 16 MB x 2 (16 MB per 10 CPs) (Charles Webb)
357
358 // z800
359 //
360 // Source: Charles Webb from IBM
361 //
362 // Split L1 I/D cache
363 // Size: 256 kB each
364 // Line size: 256 bytes
365 // 4-way set associative
366 // L2 cache: 16 MB (or half that size)
367
368 // z990
369 //
370 // The IBM eServer z990 microprocessor
371 // IBM Journal of Research and Development
372 // Volume 48, Number 3/4, pp 295-309, May/July 2004
373 //
374 // Split L1 I/D cache
375 // Size: 256 kB each
376 // Line size: 256 bytes
377 // 4-way set associative
378 // L2 cache: 32 MB x 4 (32 MB per book/node) (Charles Webb)
379
380 // z890
381 //
382 // Source: Charles Webb from IBM
383 //
384 // Split L1 I/D cache
385 // Size: 256 kB each
386 // Line size: 256 bytes
387 // 4-way set associative
388 // L2 cache: 32 MB (or half that size)
389
390 // z9
391 //
392 // Source: Charles Webb from IBM
393 //
394 // Split L1 I/D cache
395 // Size: 256 kB each
396 // Line size: 256 bytes
397 // 4-way set associative
398 // L2 cache: 40 MB x 4 (40 MB per book/node)
399
400 // fixs390: have a table for all models we support and check
401 // fixs390: VEX_S390X_MODEL(hwcaps)
402
403 // Default cache configuration is z10-EC (Source: ECAG insn)
404 *I1c = (cache_t) { 65536, 4, 256 };
405 *D1c = (cache_t) { 131072, 8, 256 };
406 *LLc = (cache_t) { 50331648, 24, 256 };
407
408#elif defined(VGA_mips32)
409
410 // Set caches to default (for MIPS32-r2(mips 74kc))
411 *I1c = (cache_t) { 32768, 4, 32 };
412 *D1c = (cache_t) { 32768, 4, 32 };
413 *L2c = (cache_t) { 524288, 8, 32 };
414
415#elif defined(VGA_x86) || defined(VGA_amd64)
416
417 *I1c = (cache_t) { 65536, 2, 64 };
418 *D1c = (cache_t) { 65536, 2, 64 };
419 *LLc = (cache_t) { 262144, 8, 64 };
420
421#else
422
423#error "Unknown arch"
424
425#endif
426
427 if (!all_caches_clo_defined) {
428 const char warning[] =
429 "Warning: Cannot auto-detect cache config, using defaults.\n"
430 " Run with -v to see.\n";
431 VG_(dmsg)("%s", warning);
432 }
433}
434
435/*--------------------------------------------------------------------*/
436/*--- end ---*/
437/*--------------------------------------------------------------------*/