blob: 6794319b7500b77dff33b9dd5d14055f328e12b6 [file] [log] [blame]
nethercoteb35a8b92004-09-11 16:45:27 +00001
2/*--------------------------------------------------------------------*/
njn8b68b642009-06-24 00:37:09 +00003/*--- x86- and AMD64-specific definitions. cg-x86-amd64.c ---*/
nethercoteb35a8b92004-09-11 16:45:27 +00004/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Cachegrind, a Valgrind tool for cache
8 profiling programs.
9
sewardj9eecbbb2010-05-03 21:37:12 +000010 Copyright (C) 2002-2010 Nicholas Nethercote
njn2bc10122005-05-08 02:10:27 +000011 njn@valgrind.org
nethercoteb35a8b92004-09-11 16:45:27 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
njn8b68b642009-06-24 00:37:09 +000031#if defined(VGA_x86) || defined(VGA_amd64)
32
njnc7561b92005-06-19 01:24:32 +000033#include "pub_tool_basics.h"
njn68980862005-06-18 18:31:26 +000034#include "pub_tool_cpuid.h"
njn97405b22005-06-02 03:39:33 +000035#include "pub_tool_libcbase.h"
njnf39e9a32005-06-12 02:43:17 +000036#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000037#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000038
nethercoteb35a8b92004-09-11 16:45:27 +000039#include "cg_arch.h"
40
sewardjf91b0a32009-08-28 22:34:09 +000041// All CPUID info taken from sandpile.org/ia32/cpuid.htm */
nethercoteb35a8b92004-09-11 16:45:27 +000042// Probably only works for Intel and AMD chips, and probably only for some of
43// them.
44
45static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
46{
sewardjb2c985b2009-07-15 14:51:17 +000047 VG_(dmsg)("warning: Pentium 4 with %d KB micro-op instruction trace cache\n",
48 actual_size);
49 VG_(dmsg)(" Simulating a %d KB I-cache with %d B lines\n",
50 used_size, line_size);
nethercoteb35a8b92004-09-11 16:45:27 +000051}
52
53/* Intel method is truly wretched. We have to do an insane indexing into an
54 * array of pre-defined configurations for various parts of the memory
weidendo1c3e3c52006-11-23 13:04:30 +000055 * hierarchy.
56 * According to Intel Processor Identification, App Note 485.
njn2d853a12010-10-06 22:46:31 +000057 *
58 * If a L3 cache is found, then data for it rather than the L2
59 * is returned via *LLc.
nethercoteb35a8b92004-09-11 16:45:27 +000060 */
61static
njn2d853a12010-10-06 22:46:31 +000062Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
nethercoteb35a8b92004-09-11 16:45:27 +000063{
weidendo1c3e3c52006-11-23 13:04:30 +000064 Int cpuid1_eax;
65 Int cpuid1_ignore;
66 Int family;
67 Int model;
nethercoteb35a8b92004-09-11 16:45:27 +000068 UChar info[16];
69 Int i, trials;
70 Bool L2_found = False;
njn2d853a12010-10-06 22:46:31 +000071 /* If we see L3 cache info, copy it into L3c. Then, at the end,
72 copy it into *LLc. Hence if a L3 cache is specified, *LLc will
73 eventually contain a description of it rather than the L2 cache.
74 The use of the L3c intermediary makes this process independent
75 of the order in which the cache specifications appear in
76 info[]. */
77 Bool L3_found = False;
78 cache_t L3c = { 0, 0, 0 };
nethercoteb35a8b92004-09-11 16:45:27 +000079
80 if (level < 2) {
sewardjb2c985b2009-07-15 14:51:17 +000081 VG_(dmsg)("warning: CPUID level < 2 for Intel processor (%d)\n", level);
nethercoteb35a8b92004-09-11 16:45:27 +000082 return -1;
83 }
84
weidendo1c3e3c52006-11-23 13:04:30 +000085 /* family/model needed to distinguish code reuse (currently 0x49) */
86 VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore,
87 &cpuid1_ignore, &cpuid1_ignore);
88 family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
89 model = (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
90
nethercoteb35a8b92004-09-11 16:45:27 +000091 VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],
92 (Int*)&info[8], (Int*)&info[12]);
93 trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
94 info[0] = 0x0; /* reset AL */
95
96 if (0 != trials) {
sewardjb2c985b2009-07-15 14:51:17 +000097 VG_(dmsg)("warning: non-zero CPUID trials for Intel processor (%d)\n",
98 trials);
nethercoteb35a8b92004-09-11 16:45:27 +000099 return -1;
100 }
101
102 for (i = 0; i < 16; i++) {
103
104 switch (info[i]) {
105
106 case 0x0: /* ignore zeros */
107 break;
108
109 /* TLB info, ignore */
weidendo966b5bd2006-10-12 14:23:38 +0000110 case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
tom55b3a812009-10-28 09:21:53 +0000111 case 0x4f: case 0x50: case 0x51: case 0x52: case 0x55:
tom1e76ff52009-01-02 11:07:18 +0000112 case 0x56: case 0x57: case 0x59:
tom55b3a812009-10-28 09:21:53 +0000113 case 0x5a: case 0x5b: case 0x5c: case 0x5d:
114 case 0xb0: case 0xb1: case 0xb2:
tom1e76ff52009-01-02 11:07:18 +0000115 case 0xb3: case 0xb4: case 0xba: case 0xc0:
tom55b3a812009-10-28 09:21:53 +0000116 case 0xca:
nethercoteb35a8b92004-09-11 16:45:27 +0000117 break;
118
119 case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
120 case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
tom55b3a812009-10-28 09:21:53 +0000121 case 0x09: *I1c = (cache_t) { 32, 4, 64 }; break;
nethercoteb35a8b92004-09-11 16:45:27 +0000122 case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
123
124 case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
125 case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
weidendo144b76c2009-01-26 22:56:14 +0000126 case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break;
nethercoteb35a8b92004-09-11 16:45:27 +0000127 case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
128
129 /* IA-64 info -- panic! */
130 case 0x10: case 0x15: case 0x1a:
131 case 0x88: case 0x89: case 0x8a: case 0x8d:
132 case 0x90: case 0x96: case 0x9b:
njn67993252004-11-22 18:02:32 +0000133 VG_(tool_panic)("IA-64 cache detected?!");
nethercoteb35a8b92004-09-11 16:45:27 +0000134
njn2d853a12010-10-06 22:46:31 +0000135 /* L3 cache info. */
136 case 0x22: L3c = (cache_t) { 512, 4, 64 }; L3_found = True; break;
137 case 0x23: L3c = (cache_t) { 1024, 8, 64 }; L3_found = True; break;
138 case 0x25: L3c = (cache_t) { 2048, 8, 64 }; L3_found = True; break;
139 case 0x29: L3c = (cache_t) { 4096, 8, 64 }; L3_found = True; break;
140 case 0x46: L3c = (cache_t) { 4096, 4, 64 }; L3_found = True; break;
141 case 0x47: L3c = (cache_t) { 8192, 8, 64 }; L3_found = True; break;
142 case 0x4a: L3c = (cache_t) { 6144, 12, 64 }; L3_found = True; break;
143 case 0x4b: L3c = (cache_t) { 8192, 16, 64 }; L3_found = True; break;
144 case 0x4c: L3c = (cache_t) { 12288, 12, 64 }; L3_found = True; break;
145 case 0x4d: L3c = (cache_t) { 16384, 16, 64 }; L3_found = True; break;
146 case 0xd0: L3c = (cache_t) { 512, 4, 64 }; L3_found = True; break;
147 case 0xd1: L3c = (cache_t) { 1024, 4, 64 }; L3_found = True; break;
148 case 0xd2: L3c = (cache_t) { 2048, 4, 64 }; L3_found = True; break;
149 case 0xd6: L3c = (cache_t) { 1024, 8, 64 }; L3_found = True; break;
150 case 0xd7: L3c = (cache_t) { 2048, 8, 64 }; L3_found = True; break;
151 case 0xd8: L3c = (cache_t) { 4096, 8, 64 }; L3_found = True; break;
152 case 0xdc: L3c = (cache_t) { 1536, 12, 64 }; L3_found = True; break;
153 case 0xdd: L3c = (cache_t) { 3072, 12, 64 }; L3_found = True; break;
154 case 0xde: L3c = (cache_t) { 6144, 12, 64 }; L3_found = True; break;
155 case 0xe2: L3c = (cache_t) { 2048, 16, 64 }; L3_found = True; break;
156 case 0xe3: L3c = (cache_t) { 4096, 16, 64 }; L3_found = True; break;
157 case 0xe4: L3c = (cache_t) { 8192, 16, 64 }; L3_found = True; break;
158 case 0xea: L3c = (cache_t) { 12288, 24, 64 }; L3_found = True; break;
159 case 0xeb: L3c = (cache_t) { 18432, 24, 64 }; L3_found = True; break;
160 case 0xec: L3c = (cache_t) { 24576, 24, 64 }; L3_found = True; break;
nethercoteb35a8b92004-09-11 16:45:27 +0000161
tom55b3a812009-10-28 09:21:53 +0000162 /* Described as "MLC" in Intel documentation */
njn2d853a12010-10-06 22:46:31 +0000163 case 0x21: *LLc = (cache_t) { 256, 8, 64 }; L2_found = True; break;
tom55b3a812009-10-28 09:21:53 +0000164
nethercoteb35a8b92004-09-11 16:45:27 +0000165 /* These are sectored, whatever that means */
njn2d853a12010-10-06 22:46:31 +0000166 case 0x39: *LLc = (cache_t) { 128, 4, 64 }; L2_found = True; break;
167 case 0x3c: *LLc = (cache_t) { 256, 4, 64 }; L2_found = True; break;
nethercoteb35a8b92004-09-11 16:45:27 +0000168
169 /* If a P6 core, this means "no L2 cache".
170 If a P4 core, this means "no L3 cache".
171 We don't know what core it is, so don't issue a warning. To detect
172 a missing L2 cache, we use 'L2_found'. */
173 case 0x40:
174 break;
175
njn2d853a12010-10-06 22:46:31 +0000176 case 0x41: *LLc = (cache_t) { 128, 4, 32 }; L2_found = True; break;
177 case 0x42: *LLc = (cache_t) { 256, 4, 32 }; L2_found = True; break;
178 case 0x43: *LLc = (cache_t) { 512, 4, 32 }; L2_found = True; break;
179 case 0x44: *LLc = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
180 case 0x45: *LLc = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
181 case 0x48: *LLc = (cache_t) { 3072, 12, 64 }; L2_found = True; break;
182 case 0x4e: *LLc = (cache_t) { 6144, 24, 64 }; L2_found = True; break;
weidendo1c3e3c52006-11-23 13:04:30 +0000183 case 0x49:
njn2d853a12010-10-06 22:46:31 +0000184 if (family == 15 && model == 6) {
185 /* On Xeon MP (family F, model 6), this is for L3 */
186 L3c = (cache_t) { 4096, 16, 64 }; L3_found = True;
187 } else {
188 *LLc = (cache_t) { 4096, 16, 64 }; L2_found = True;
189 }
190 break;
nethercoteb35a8b92004-09-11 16:45:27 +0000191
192 /* These are sectored, whatever that means */
nethercoteac7ecd72004-10-13 11:30:14 +0000193 case 0x60: *D1c = (cache_t) { 16, 8, 64 }; break; /* sectored */
nethercoteb35a8b92004-09-11 16:45:27 +0000194 case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
195 case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
196 case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
197
198 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
199 * conversion to byte size is a total guess; treat the 12K and 16K
200 * cases the same since the cache byte size must be a power of two for
201 * everything to work!. Also guessing 32 bytes for the line size...
202 */
203 case 0x70: /* 12K micro-ops, 8-way */
204 *I1c = (cache_t) { 16, 8, 32 };
205 micro_ops_warn(12, 16, 32);
206 break;
207 case 0x71: /* 16K micro-ops, 8-way */
208 *I1c = (cache_t) { 16, 8, 32 };
209 micro_ops_warn(16, 16, 32);
210 break;
211 case 0x72: /* 32K micro-ops, 8-way */
212 *I1c = (cache_t) { 32, 8, 32 };
213 micro_ops_warn(32, 32, 32);
214 break;
215
sewardjf91b0a32009-08-28 22:34:09 +0000216 /* not sectored, whatever that might mean */
njn2d853a12010-10-06 22:46:31 +0000217 case 0x78: *LLc = (cache_t) { 1024, 4, 64 }; L2_found = True; break;
sewardjf91b0a32009-08-28 22:34:09 +0000218
nethercoteb35a8b92004-09-11 16:45:27 +0000219 /* These are sectored, whatever that means */
njn2d853a12010-10-06 22:46:31 +0000220 case 0x79: *LLc = (cache_t) { 128, 8, 64 }; L2_found = True; break;
221 case 0x7a: *LLc = (cache_t) { 256, 8, 64 }; L2_found = True; break;
222 case 0x7b: *LLc = (cache_t) { 512, 8, 64 }; L2_found = True; break;
223 case 0x7c: *LLc = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
224 case 0x7d: *LLc = (cache_t) { 2048, 8, 64 }; L2_found = True; break;
225 case 0x7e: *LLc = (cache_t) { 256, 8, 128 }; L2_found = True; break;
226 case 0x7f: *LLc = (cache_t) { 512, 2, 64 }; L2_found = True; break;
227 case 0x80: *LLc = (cache_t) { 512, 8, 64 }; L2_found = True; break;
228 case 0x81: *LLc = (cache_t) { 128, 8, 32 }; L2_found = True; break;
229 case 0x82: *LLc = (cache_t) { 256, 8, 32 }; L2_found = True; break;
230 case 0x83: *LLc = (cache_t) { 512, 8, 32 }; L2_found = True; break;
231 case 0x84: *LLc = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
232 case 0x85: *LLc = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
233 case 0x86: *LLc = (cache_t) { 512, 4, 64 }; L2_found = True; break;
234 case 0x87: *LLc = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
nethercoteb35a8b92004-09-11 16:45:27 +0000235
tom942d9ef2005-07-27 22:59:50 +0000236 /* Ignore prefetch information */
237 case 0xf0: case 0xf1:
njn6f74a7e2009-03-12 00:06:45 +0000238 break;
tom942d9ef2005-07-27 22:59:50 +0000239
nethercoteb35a8b92004-09-11 16:45:27 +0000240 default:
sewardjb2c985b2009-07-15 14:51:17 +0000241 VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), ignoring\n",
242 info[i]);
njn6f74a7e2009-03-12 00:06:45 +0000243 break;
nethercoteb35a8b92004-09-11 16:45:27 +0000244 }
245 }
246
njn2d853a12010-10-06 22:46:31 +0000247 /* If we found a L3 cache, throw away the L2 data and use the L3's instead. */
248 if (L3_found) {
249 VG_(dmsg)("warning: L3 cache found, using its data for the LL simulation.\n");
250 *LLc = L3c;
251 L2_found = True;
252 }
253
nethercoteb35a8b92004-09-11 16:45:27 +0000254 if (!L2_found)
njn2d853a12010-10-06 22:46:31 +0000255 VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
nethercoteb35a8b92004-09-11 16:45:27 +0000256
257 return 0;
258}
259
260/* AMD method is straightforward, just extract appropriate bits from the
261 * result registers.
262 *
263 * Bits, for D1 and I1:
264 * 31..24 data L1 cache size in KBs
265 * 23..16 data L1 cache associativity (FFh=full)
266 * 15.. 8 data L1 cache lines per tag
267 * 7.. 0 data L1 cache line size in bytes
268 *
269 * Bits, for L2:
270 * 31..16 unified L2 cache size in KBs
271 * 15..12 unified L2 cache associativity (0=off, FFh=full)
272 * 11.. 8 unified L2 cache lines per tag
273 * 7.. 0 unified L2 cache line size in bytes
274 *
275 * #3 The AMD K7 processor's L2 cache must be configured prior to relying
276 * upon this information. (Whatever that means -- njn)
277 *
278 * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
279 * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
280 * so we detect that.
281 *
njn2d853a12010-10-06 22:46:31 +0000282 * Returns 0 on success, non-zero on failure. As with the Intel code
283 * above, if a L3 cache is found, then data for it rather than the L2
284 * is returned via *LLc.
nethercoteb35a8b92004-09-11 16:45:27 +0000285 */
njn2d853a12010-10-06 22:46:31 +0000286
287/* A small helper */
288static Int decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 )
289{
290 /* Decode a L2/L3 associativity indication. It is encoded
291 differently from the I1/D1 associativity. Returns 1
292 (direct-map) as a safe but suboptimal result for unknown
293 encodings. */
294 switch (bits_15_12 & 0xF) {
295 case 1: return 1; case 2: return 2;
296 case 4: return 4; case 6: return 8;
297 case 8: return 16; case 0xA: return 32;
298 case 0xB: return 48; case 0xC: return 64;
299 case 0xD: return 96; case 0xE: return 128;
300 case 0xF: /* fully associative */
301 case 0: /* L2/L3 cache or TLB is disabled */
302 default:
303 return 1;
304 }
305}
306
nethercoteb35a8b92004-09-11 16:45:27 +0000307static
njn2d853a12010-10-06 22:46:31 +0000308Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc)
nethercoteb35a8b92004-09-11 16:45:27 +0000309{
310 UInt ext_level;
311 UInt dummy, model;
njn2d853a12010-10-06 22:46:31 +0000312 UInt I1i, D1i, L2i, L3i;
nethercoteb35a8b92004-09-11 16:45:27 +0000313
314 VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
315
316 if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
sewardjb2c985b2009-07-15 14:51:17 +0000317 VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n",
318 ext_level);
nethercoteb35a8b92004-09-11 16:45:27 +0000319 return -1;
320 }
321
322 VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
njn2d853a12010-10-06 22:46:31 +0000323 VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &L3i);
nethercoteb35a8b92004-09-11 16:45:27 +0000324
325 VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
326
327 /* Check for Duron bug */
328 if (model == 0x630) {
sewardjb2c985b2009-07-15 14:51:17 +0000329 VG_(dmsg)("warning: Buggy Duron stepping A0. Assuming L2 size=65536 bytes\n");
nethercoteb35a8b92004-09-11 16:45:27 +0000330 L2i = (64 << 16) | (L2i & 0xffff);
331 }
332
333 D1c->size = (D1i >> 24) & 0xff;
334 D1c->assoc = (D1i >> 16) & 0xff;
335 D1c->line_size = (D1i >> 0) & 0xff;
336
337 I1c->size = (I1i >> 24) & 0xff;
338 I1c->assoc = (I1i >> 16) & 0xff;
339 I1c->line_size = (I1i >> 0) & 0xff;
340
njn2d853a12010-10-06 22:46:31 +0000341 LLc->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
342 LLc->assoc = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf);
343 LLc->line_size = (L2i >> 0) & 0xff;
344
345 if (((L3i >> 18) & 0x3fff) > 0) {
346 /* There's an L3 cache. Replace *LLc contents with this info. */
347 /* NB: the test in the if is "if L3 size > 0 ". I don't know if
348 this is the right way to test presence-vs-absence of L3. I
349 can't see any guidance on this in the AMD documentation. */
350 LLc->size = ((L3i >> 18) & 0x3fff) * 512;
351 LLc->assoc = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf);
352 LLc->line_size = (L3i >> 0) & 0xff;
353 VG_(dmsg)("warning: L3 cache found, using its data for the L2 simulation.\n");
354 }
nethercoteb35a8b92004-09-11 16:45:27 +0000355
356 return 0;
357}
358
nethercoteb35a8b92004-09-11 16:45:27 +0000359static
njn2d853a12010-10-06 22:46:31 +0000360Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc)
nethercoteb35a8b92004-09-11 16:45:27 +0000361{
sewardjb5f6f512005-03-10 23:59:00 +0000362 Int level, ret;
nethercoteb35a8b92004-09-11 16:45:27 +0000363 Char vendor_id[13];
nethercoteb35a8b92004-09-11 16:45:27 +0000364
sewardjb5f6f512005-03-10 23:59:00 +0000365 if (!VG_(has_cpuid)()) {
sewardjb2c985b2009-07-15 14:51:17 +0000366 VG_(dmsg)("CPUID instruction not supported\n");
nethercoteb35a8b92004-09-11 16:45:27 +0000367 return -1;
368 }
tomf4ed0592005-04-02 17:30:19 +0000369
sewardjb5f6f512005-03-10 23:59:00 +0000370 VG_(cpuid)(0, &level, (int*)&vendor_id[0],
371 (int*)&vendor_id[8], (int*)&vendor_id[4]);
372 vendor_id[12] = '\0';
nethercoteb35a8b92004-09-11 16:45:27 +0000373
374 if (0 == level) {
sewardjb2c985b2009-07-15 14:51:17 +0000375 VG_(dmsg)("CPUID level is 0, early Pentium?\n");
nethercoteb35a8b92004-09-11 16:45:27 +0000376 return -1;
377 }
378
379 /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
380 if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
njn2d853a12010-10-06 22:46:31 +0000381 ret = Intel_cache_info(level, I1c, D1c, LLc);
nethercoteb35a8b92004-09-11 16:45:27 +0000382
383 } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
njn2d853a12010-10-06 22:46:31 +0000384 ret = AMD_cache_info(I1c, D1c, LLc);
nethercoteb35a8b92004-09-11 16:45:27 +0000385
386 } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
387 /* Total kludge. Pretend to be a VIA Nehemiah. */
388 D1c->size = 64;
389 D1c->assoc = 16;
390 D1c->line_size = 16;
391 I1c->size = 64;
392 I1c->assoc = 4;
393 I1c->line_size = 16;
njn2d853a12010-10-06 22:46:31 +0000394 LLc->size = 64;
395 LLc->assoc = 16;
396 LLc->line_size = 16;
nethercoteb35a8b92004-09-11 16:45:27 +0000397 ret = 0;
398
399 } else {
sewardjb2c985b2009-07-15 14:51:17 +0000400 VG_(dmsg)("CPU vendor ID not recognised (%s)\n", vendor_id);
nethercoteb35a8b92004-09-11 16:45:27 +0000401 return -1;
402 }
403
404 /* Successful! Convert sizes from KB to bytes */
405 I1c->size *= 1024;
406 D1c->size *= 1024;
njn2d853a12010-10-06 22:46:31 +0000407 LLc->size *= 1024;
nethercoteb35a8b92004-09-11 16:45:27 +0000408
409 return ret;
410}
411
412
njn2d853a12010-10-06 22:46:31 +0000413void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
njnaf839f52005-06-23 03:27:57 +0000414 Bool all_caches_clo_defined)
nethercoteb35a8b92004-09-11 16:45:27 +0000415{
416 Int res;
417
418 // Set caches to default.
njna1d1a642004-11-26 18:36:02 +0000419 *I1c = (cache_t) { 65536, 2, 64 };
420 *D1c = (cache_t) { 65536, 2, 64 };
njn2d853a12010-10-06 22:46:31 +0000421 *LLc = (cache_t) { 262144, 8, 64 };
nethercoteb35a8b92004-09-11 16:45:27 +0000422
423 // Then replace with any info we can get from CPUID.
njn2d853a12010-10-06 22:46:31 +0000424 res = get_caches_from_CPUID(I1c, D1c, LLc);
nethercoteb35a8b92004-09-11 16:45:27 +0000425
426 // Warn if CPUID failed and config not completely specified from cmd line.
427 if (res != 0 && !all_caches_clo_defined) {
sewardjb2c985b2009-07-15 14:51:17 +0000428 VG_(dmsg)("Warning: Couldn't auto-detect cache config, using one "
429 "or more defaults \n");
nethercoteb35a8b92004-09-11 16:45:27 +0000430 }
431}
432
njn8b68b642009-06-24 00:37:09 +0000433#endif // defined(VGA_x86) || defined(VGA_amd64)
434
nethercoteb35a8b92004-09-11 16:45:27 +0000435/*--------------------------------------------------------------------*/
436/*--- end ---*/
437/*--------------------------------------------------------------------*/