nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
njn | 8b68b64 | 2009-06-24 00:37:09 +0000 | [diff] [blame] | 3 | /*--- x86- and AMD64-specific definitions. cg-x86-amd64.c ---*/ |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Cachegrind, a Valgrind tool for cache |
| 8 | profiling programs. |
| 9 | |
sewardj | 9eecbbb | 2010-05-03 21:37:12 +0000 | [diff] [blame] | 10 | Copyright (C) 2002-2010 Nicholas Nethercote |
njn | 2bc1012 | 2005-05-08 02:10:27 +0000 | [diff] [blame] | 11 | njn@valgrind.org |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
njn | 8b68b64 | 2009-06-24 00:37:09 +0000 | [diff] [blame] | 31 | #if defined(VGA_x86) || defined(VGA_amd64) |
| 32 | |
njn | c7561b9 | 2005-06-19 01:24:32 +0000 | [diff] [blame] | 33 | #include "pub_tool_basics.h" |
njn | 6898086 | 2005-06-18 18:31:26 +0000 | [diff] [blame] | 34 | #include "pub_tool_cpuid.h" |
njn | 97405b2 | 2005-06-02 03:39:33 +0000 | [diff] [blame] | 35 | #include "pub_tool_libcbase.h" |
njn | f39e9a3 | 2005-06-12 02:43:17 +0000 | [diff] [blame] | 36 | #include "pub_tool_libcassert.h" |
njn | 36a20fa | 2005-06-03 03:08:39 +0000 | [diff] [blame] | 37 | #include "pub_tool_libcprint.h" |
njn | c7561b9 | 2005-06-19 01:24:32 +0000 | [diff] [blame] | 38 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 39 | #include "cg_arch.h" |
| 40 | |
sewardj | f91b0a3 | 2009-08-28 22:34:09 +0000 | [diff] [blame] | 41 | // All CPUID info taken from sandpile.org/ia32/cpuid.htm */ |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 42 | // Probably only works for Intel and AMD chips, and probably only for some of |
| 43 | // them. |
| 44 | |
| 45 | static void micro_ops_warn(Int actual_size, Int used_size, Int line_size) |
| 46 | { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 47 | VG_(dmsg)("warning: Pentium 4 with %d KB micro-op instruction trace cache\n", |
| 48 | actual_size); |
| 49 | VG_(dmsg)(" Simulating a %d KB I-cache with %d B lines\n", |
| 50 | used_size, line_size); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 51 | } |
| 52 | |
| 53 | /* Intel method is truly wretched. We have to do an insane indexing into an |
| 54 | * array of pre-defined configurations for various parts of the memory |
weidendo | 1c3e3c5 | 2006-11-23 13:04:30 +0000 | [diff] [blame] | 55 | * hierarchy. |
| 56 | * According to Intel Processor Identification, App Note 485. |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 57 | * |
| 58 | * If a L3 cache is found, then data for it rather than the L2 |
| 59 | * is returned via *LLc. |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 60 | */ |
| 61 | static |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 62 | Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc) |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 63 | { |
weidendo | 1c3e3c5 | 2006-11-23 13:04:30 +0000 | [diff] [blame] | 64 | Int cpuid1_eax; |
| 65 | Int cpuid1_ignore; |
| 66 | Int family; |
| 67 | Int model; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 68 | UChar info[16]; |
| 69 | Int i, trials; |
| 70 | Bool L2_found = False; |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 71 | /* If we see L3 cache info, copy it into L3c. Then, at the end, |
| 72 | copy it into *LLc. Hence if a L3 cache is specified, *LLc will |
| 73 | eventually contain a description of it rather than the L2 cache. |
| 74 | The use of the L3c intermediary makes this process independent |
| 75 | of the order in which the cache specifications appear in |
| 76 | info[]. */ |
| 77 | Bool L3_found = False; |
| 78 | cache_t L3c = { 0, 0, 0 }; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 79 | |
| 80 | if (level < 2) { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 81 | VG_(dmsg)("warning: CPUID level < 2 for Intel processor (%d)\n", level); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 82 | return -1; |
| 83 | } |
| 84 | |
weidendo | 1c3e3c5 | 2006-11-23 13:04:30 +0000 | [diff] [blame] | 85 | /* family/model needed to distinguish code reuse (currently 0x49) */ |
| 86 | VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore, |
| 87 | &cpuid1_ignore, &cpuid1_ignore); |
| 88 | family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf); |
| 89 | model = (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf); |
| 90 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 91 | VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4], |
| 92 | (Int*)&info[8], (Int*)&info[12]); |
| 93 | trials = info[0] - 1; /* AL register - bits 0..7 of %eax */ |
| 94 | info[0] = 0x0; /* reset AL */ |
| 95 | |
| 96 | if (0 != trials) { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 97 | VG_(dmsg)("warning: non-zero CPUID trials for Intel processor (%d)\n", |
| 98 | trials); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 99 | return -1; |
| 100 | } |
| 101 | |
| 102 | for (i = 0; i < 16; i++) { |
| 103 | |
| 104 | switch (info[i]) { |
| 105 | |
| 106 | case 0x0: /* ignore zeros */ |
| 107 | break; |
| 108 | |
| 109 | /* TLB info, ignore */ |
weidendo | 966b5bd | 2006-10-12 14:23:38 +0000 | [diff] [blame] | 110 | case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: |
tom | 55b3a81 | 2009-10-28 09:21:53 +0000 | [diff] [blame] | 111 | case 0x4f: case 0x50: case 0x51: case 0x52: case 0x55: |
tom | 1e76ff5 | 2009-01-02 11:07:18 +0000 | [diff] [blame] | 112 | case 0x56: case 0x57: case 0x59: |
tom | 55b3a81 | 2009-10-28 09:21:53 +0000 | [diff] [blame] | 113 | case 0x5a: case 0x5b: case 0x5c: case 0x5d: |
| 114 | case 0xb0: case 0xb1: case 0xb2: |
tom | 1e76ff5 | 2009-01-02 11:07:18 +0000 | [diff] [blame] | 115 | case 0xb3: case 0xb4: case 0xba: case 0xc0: |
tom | 55b3a81 | 2009-10-28 09:21:53 +0000 | [diff] [blame] | 116 | case 0xca: |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 117 | break; |
| 118 | |
| 119 | case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break; |
| 120 | case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break; |
tom | 55b3a81 | 2009-10-28 09:21:53 +0000 | [diff] [blame] | 121 | case 0x09: *I1c = (cache_t) { 32, 4, 64 }; break; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 122 | case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break; |
| 123 | |
| 124 | case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break; |
| 125 | case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break; |
weidendo | 144b76c | 2009-01-26 22:56:14 +0000 | [diff] [blame] | 126 | case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 127 | case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break; |
| 128 | |
| 129 | /* IA-64 info -- panic! */ |
| 130 | case 0x10: case 0x15: case 0x1a: |
| 131 | case 0x88: case 0x89: case 0x8a: case 0x8d: |
| 132 | case 0x90: case 0x96: case 0x9b: |
njn | 6799325 | 2004-11-22 18:02:32 +0000 | [diff] [blame] | 133 | VG_(tool_panic)("IA-64 cache detected?!"); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 134 | |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 135 | /* L3 cache info. */ |
| 136 | case 0x22: L3c = (cache_t) { 512, 4, 64 }; L3_found = True; break; |
| 137 | case 0x23: L3c = (cache_t) { 1024, 8, 64 }; L3_found = True; break; |
| 138 | case 0x25: L3c = (cache_t) { 2048, 8, 64 }; L3_found = True; break; |
| 139 | case 0x29: L3c = (cache_t) { 4096, 8, 64 }; L3_found = True; break; |
| 140 | case 0x46: L3c = (cache_t) { 4096, 4, 64 }; L3_found = True; break; |
| 141 | case 0x47: L3c = (cache_t) { 8192, 8, 64 }; L3_found = True; break; |
| 142 | case 0x4a: L3c = (cache_t) { 6144, 12, 64 }; L3_found = True; break; |
| 143 | case 0x4b: L3c = (cache_t) { 8192, 16, 64 }; L3_found = True; break; |
| 144 | case 0x4c: L3c = (cache_t) { 12288, 12, 64 }; L3_found = True; break; |
| 145 | case 0x4d: L3c = (cache_t) { 16384, 16, 64 }; L3_found = True; break; |
| 146 | case 0xd0: L3c = (cache_t) { 512, 4, 64 }; L3_found = True; break; |
| 147 | case 0xd1: L3c = (cache_t) { 1024, 4, 64 }; L3_found = True; break; |
| 148 | case 0xd2: L3c = (cache_t) { 2048, 4, 64 }; L3_found = True; break; |
| 149 | case 0xd6: L3c = (cache_t) { 1024, 8, 64 }; L3_found = True; break; |
| 150 | case 0xd7: L3c = (cache_t) { 2048, 8, 64 }; L3_found = True; break; |
| 151 | case 0xd8: L3c = (cache_t) { 4096, 8, 64 }; L3_found = True; break; |
| 152 | case 0xdc: L3c = (cache_t) { 1536, 12, 64 }; L3_found = True; break; |
| 153 | case 0xdd: L3c = (cache_t) { 3072, 12, 64 }; L3_found = True; break; |
| 154 | case 0xde: L3c = (cache_t) { 6144, 12, 64 }; L3_found = True; break; |
| 155 | case 0xe2: L3c = (cache_t) { 2048, 16, 64 }; L3_found = True; break; |
| 156 | case 0xe3: L3c = (cache_t) { 4096, 16, 64 }; L3_found = True; break; |
| 157 | case 0xe4: L3c = (cache_t) { 8192, 16, 64 }; L3_found = True; break; |
| 158 | case 0xea: L3c = (cache_t) { 12288, 24, 64 }; L3_found = True; break; |
| 159 | case 0xeb: L3c = (cache_t) { 18432, 24, 64 }; L3_found = True; break; |
| 160 | case 0xec: L3c = (cache_t) { 24576, 24, 64 }; L3_found = True; break; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 161 | |
tom | 55b3a81 | 2009-10-28 09:21:53 +0000 | [diff] [blame] | 162 | /* Described as "MLC" in Intel documentation */ |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 163 | case 0x21: *LLc = (cache_t) { 256, 8, 64 }; L2_found = True; break; |
tom | 55b3a81 | 2009-10-28 09:21:53 +0000 | [diff] [blame] | 164 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 165 | /* These are sectored, whatever that means */ |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 166 | case 0x39: *LLc = (cache_t) { 128, 4, 64 }; L2_found = True; break; |
| 167 | case 0x3c: *LLc = (cache_t) { 256, 4, 64 }; L2_found = True; break; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 168 | |
| 169 | /* If a P6 core, this means "no L2 cache". |
| 170 | If a P4 core, this means "no L3 cache". |
| 171 | We don't know what core it is, so don't issue a warning. To detect |
| 172 | a missing L2 cache, we use 'L2_found'. */ |
| 173 | case 0x40: |
| 174 | break; |
| 175 | |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 176 | case 0x41: *LLc = (cache_t) { 128, 4, 32 }; L2_found = True; break; |
| 177 | case 0x42: *LLc = (cache_t) { 256, 4, 32 }; L2_found = True; break; |
| 178 | case 0x43: *LLc = (cache_t) { 512, 4, 32 }; L2_found = True; break; |
| 179 | case 0x44: *LLc = (cache_t) { 1024, 4, 32 }; L2_found = True; break; |
| 180 | case 0x45: *LLc = (cache_t) { 2048, 4, 32 }; L2_found = True; break; |
| 181 | case 0x48: *LLc = (cache_t) { 3072, 12, 64 }; L2_found = True; break; |
| 182 | case 0x4e: *LLc = (cache_t) { 6144, 24, 64 }; L2_found = True; break; |
weidendo | 1c3e3c5 | 2006-11-23 13:04:30 +0000 | [diff] [blame] | 183 | case 0x49: |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 184 | if (family == 15 && model == 6) { |
| 185 | /* On Xeon MP (family F, model 6), this is for L3 */ |
| 186 | L3c = (cache_t) { 4096, 16, 64 }; L3_found = True; |
| 187 | } else { |
| 188 | *LLc = (cache_t) { 4096, 16, 64 }; L2_found = True; |
| 189 | } |
| 190 | break; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 191 | |
| 192 | /* These are sectored, whatever that means */ |
nethercote | ac7ecd7 | 2004-10-13 11:30:14 +0000 | [diff] [blame] | 193 | case 0x60: *D1c = (cache_t) { 16, 8, 64 }; break; /* sectored */ |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 194 | case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */ |
| 195 | case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */ |
| 196 | case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */ |
| 197 | |
| 198 | /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based. |
| 199 | * conversion to byte size is a total guess; treat the 12K and 16K |
| 200 | * cases the same since the cache byte size must be a power of two for |
| 201 | * everything to work!. Also guessing 32 bytes for the line size... |
| 202 | */ |
| 203 | case 0x70: /* 12K micro-ops, 8-way */ |
| 204 | *I1c = (cache_t) { 16, 8, 32 }; |
| 205 | micro_ops_warn(12, 16, 32); |
| 206 | break; |
| 207 | case 0x71: /* 16K micro-ops, 8-way */ |
| 208 | *I1c = (cache_t) { 16, 8, 32 }; |
| 209 | micro_ops_warn(16, 16, 32); |
| 210 | break; |
| 211 | case 0x72: /* 32K micro-ops, 8-way */ |
| 212 | *I1c = (cache_t) { 32, 8, 32 }; |
| 213 | micro_ops_warn(32, 32, 32); |
| 214 | break; |
| 215 | |
sewardj | f91b0a3 | 2009-08-28 22:34:09 +0000 | [diff] [blame] | 216 | /* not sectored, whatever that might mean */ |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 217 | case 0x78: *LLc = (cache_t) { 1024, 4, 64 }; L2_found = True; break; |
sewardj | f91b0a3 | 2009-08-28 22:34:09 +0000 | [diff] [blame] | 218 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 219 | /* These are sectored, whatever that means */ |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 220 | case 0x79: *LLc = (cache_t) { 128, 8, 64 }; L2_found = True; break; |
| 221 | case 0x7a: *LLc = (cache_t) { 256, 8, 64 }; L2_found = True; break; |
| 222 | case 0x7b: *LLc = (cache_t) { 512, 8, 64 }; L2_found = True; break; |
| 223 | case 0x7c: *LLc = (cache_t) { 1024, 8, 64 }; L2_found = True; break; |
| 224 | case 0x7d: *LLc = (cache_t) { 2048, 8, 64 }; L2_found = True; break; |
| 225 | case 0x7e: *LLc = (cache_t) { 256, 8, 128 }; L2_found = True; break; |
| 226 | case 0x7f: *LLc = (cache_t) { 512, 2, 64 }; L2_found = True; break; |
| 227 | case 0x80: *LLc = (cache_t) { 512, 8, 64 }; L2_found = True; break; |
| 228 | case 0x81: *LLc = (cache_t) { 128, 8, 32 }; L2_found = True; break; |
| 229 | case 0x82: *LLc = (cache_t) { 256, 8, 32 }; L2_found = True; break; |
| 230 | case 0x83: *LLc = (cache_t) { 512, 8, 32 }; L2_found = True; break; |
| 231 | case 0x84: *LLc = (cache_t) { 1024, 8, 32 }; L2_found = True; break; |
| 232 | case 0x85: *LLc = (cache_t) { 2048, 8, 32 }; L2_found = True; break; |
| 233 | case 0x86: *LLc = (cache_t) { 512, 4, 64 }; L2_found = True; break; |
| 234 | case 0x87: *LLc = (cache_t) { 1024, 8, 64 }; L2_found = True; break; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 235 | |
tom | 942d9ef | 2005-07-27 22:59:50 +0000 | [diff] [blame] | 236 | /* Ignore prefetch information */ |
| 237 | case 0xf0: case 0xf1: |
njn | 6f74a7e | 2009-03-12 00:06:45 +0000 | [diff] [blame] | 238 | break; |
tom | 942d9ef | 2005-07-27 22:59:50 +0000 | [diff] [blame] | 239 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 240 | default: |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 241 | VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), ignoring\n", |
| 242 | info[i]); |
njn | 6f74a7e | 2009-03-12 00:06:45 +0000 | [diff] [blame] | 243 | break; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 244 | } |
| 245 | } |
| 246 | |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 247 | /* If we found a L3 cache, throw away the L2 data and use the L3's instead. */ |
| 248 | if (L3_found) { |
| 249 | VG_(dmsg)("warning: L3 cache found, using its data for the LL simulation.\n"); |
| 250 | *LLc = L3c; |
| 251 | L2_found = True; |
| 252 | } |
| 253 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 254 | if (!L2_found) |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 255 | VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n"); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 256 | |
| 257 | return 0; |
| 258 | } |
| 259 | |
| 260 | /* AMD method is straightforward, just extract appropriate bits from the |
| 261 | * result registers. |
| 262 | * |
| 263 | * Bits, for D1 and I1: |
| 264 | * 31..24 data L1 cache size in KBs |
| 265 | * 23..16 data L1 cache associativity (FFh=full) |
| 266 | * 15.. 8 data L1 cache lines per tag |
| 267 | * 7.. 0 data L1 cache line size in bytes |
| 268 | * |
| 269 | * Bits, for L2: |
| 270 | * 31..16 unified L2 cache size in KBs |
| 271 | * 15..12 unified L2 cache associativity (0=off, FFh=full) |
| 272 | * 11.. 8 unified L2 cache lines per tag |
| 273 | * 7.. 0 unified L2 cache line size in bytes |
| 274 | * |
| 275 | * #3 The AMD K7 processor's L2 cache must be configured prior to relying |
| 276 | * upon this information. (Whatever that means -- njn) |
| 277 | * |
| 278 | * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model |
| 279 | * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB), |
| 280 | * so we detect that. |
| 281 | * |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 282 | * Returns 0 on success, non-zero on failure. As with the Intel code |
| 283 | * above, if a L3 cache is found, then data for it rather than the L2 |
| 284 | * is returned via *LLc. |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 285 | */ |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 286 | |
| 287 | /* A small helper */ |
| 288 | static Int decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 ) |
| 289 | { |
| 290 | /* Decode a L2/L3 associativity indication. It is encoded |
| 291 | differently from the I1/D1 associativity. Returns 1 |
| 292 | (direct-map) as a safe but suboptimal result for unknown |
| 293 | encodings. */ |
| 294 | switch (bits_15_12 & 0xF) { |
| 295 | case 1: return 1; case 2: return 2; |
| 296 | case 4: return 4; case 6: return 8; |
| 297 | case 8: return 16; case 0xA: return 32; |
| 298 | case 0xB: return 48; case 0xC: return 64; |
| 299 | case 0xD: return 96; case 0xE: return 128; |
| 300 | case 0xF: /* fully associative */ |
| 301 | case 0: /* L2/L3 cache or TLB is disabled */ |
| 302 | default: |
| 303 | return 1; |
| 304 | } |
| 305 | } |
| 306 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 307 | static |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 308 | Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc) |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 309 | { |
| 310 | UInt ext_level; |
| 311 | UInt dummy, model; |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 312 | UInt I1i, D1i, L2i, L3i; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 313 | |
| 314 | VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy); |
| 315 | |
| 316 | if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 317 | VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n", |
| 318 | ext_level); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 319 | return -1; |
| 320 | } |
| 321 | |
| 322 | VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i); |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 323 | VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &L3i); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 324 | |
| 325 | VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy); |
| 326 | |
| 327 | /* Check for Duron bug */ |
| 328 | if (model == 0x630) { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 329 | VG_(dmsg)("warning: Buggy Duron stepping A0. Assuming L2 size=65536 bytes\n"); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 330 | L2i = (64 << 16) | (L2i & 0xffff); |
| 331 | } |
| 332 | |
| 333 | D1c->size = (D1i >> 24) & 0xff; |
| 334 | D1c->assoc = (D1i >> 16) & 0xff; |
| 335 | D1c->line_size = (D1i >> 0) & 0xff; |
| 336 | |
| 337 | I1c->size = (I1i >> 24) & 0xff; |
| 338 | I1c->assoc = (I1i >> 16) & 0xff; |
| 339 | I1c->line_size = (I1i >> 0) & 0xff; |
| 340 | |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 341 | LLc->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */ |
| 342 | LLc->assoc = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf); |
| 343 | LLc->line_size = (L2i >> 0) & 0xff; |
| 344 | |
| 345 | if (((L3i >> 18) & 0x3fff) > 0) { |
| 346 | /* There's an L3 cache. Replace *LLc contents with this info. */ |
| 347 | /* NB: the test in the if is "if L3 size > 0 ". I don't know if |
| 348 | this is the right way to test presence-vs-absence of L3. I |
| 349 | can't see any guidance on this in the AMD documentation. */ |
| 350 | LLc->size = ((L3i >> 18) & 0x3fff) * 512; |
| 351 | LLc->assoc = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf); |
| 352 | LLc->line_size = (L3i >> 0) & 0xff; |
| 353 | VG_(dmsg)("warning: L3 cache found, using its data for the L2 simulation.\n"); |
| 354 | } |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 355 | |
| 356 | return 0; |
| 357 | } |
| 358 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 359 | static |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 360 | Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc) |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 361 | { |
sewardj | b5f6f51 | 2005-03-10 23:59:00 +0000 | [diff] [blame] | 362 | Int level, ret; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 363 | Char vendor_id[13]; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 364 | |
sewardj | b5f6f51 | 2005-03-10 23:59:00 +0000 | [diff] [blame] | 365 | if (!VG_(has_cpuid)()) { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 366 | VG_(dmsg)("CPUID instruction not supported\n"); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 367 | return -1; |
| 368 | } |
tom | f4ed059 | 2005-04-02 17:30:19 +0000 | [diff] [blame] | 369 | |
sewardj | b5f6f51 | 2005-03-10 23:59:00 +0000 | [diff] [blame] | 370 | VG_(cpuid)(0, &level, (int*)&vendor_id[0], |
| 371 | (int*)&vendor_id[8], (int*)&vendor_id[4]); |
| 372 | vendor_id[12] = '\0'; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 373 | |
| 374 | if (0 == level) { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 375 | VG_(dmsg)("CPUID level is 0, early Pentium?\n"); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 376 | return -1; |
| 377 | } |
| 378 | |
| 379 | /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */ |
| 380 | if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) { |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 381 | ret = Intel_cache_info(level, I1c, D1c, LLc); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 382 | |
| 383 | } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) { |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 384 | ret = AMD_cache_info(I1c, D1c, LLc); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 385 | |
| 386 | } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) { |
| 387 | /* Total kludge. Pretend to be a VIA Nehemiah. */ |
| 388 | D1c->size = 64; |
| 389 | D1c->assoc = 16; |
| 390 | D1c->line_size = 16; |
| 391 | I1c->size = 64; |
| 392 | I1c->assoc = 4; |
| 393 | I1c->line_size = 16; |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 394 | LLc->size = 64; |
| 395 | LLc->assoc = 16; |
| 396 | LLc->line_size = 16; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 397 | ret = 0; |
| 398 | |
| 399 | } else { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 400 | VG_(dmsg)("CPU vendor ID not recognised (%s)\n", vendor_id); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 401 | return -1; |
| 402 | } |
| 403 | |
| 404 | /* Successful! Convert sizes from KB to bytes */ |
| 405 | I1c->size *= 1024; |
| 406 | D1c->size *= 1024; |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 407 | LLc->size *= 1024; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 408 | |
| 409 | return ret; |
| 410 | } |
| 411 | |
| 412 | |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 413 | void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc, |
njn | af839f5 | 2005-06-23 03:27:57 +0000 | [diff] [blame] | 414 | Bool all_caches_clo_defined) |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 415 | { |
| 416 | Int res; |
| 417 | |
| 418 | // Set caches to default. |
njn | a1d1a64 | 2004-11-26 18:36:02 +0000 | [diff] [blame] | 419 | *I1c = (cache_t) { 65536, 2, 64 }; |
| 420 | *D1c = (cache_t) { 65536, 2, 64 }; |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 421 | *LLc = (cache_t) { 262144, 8, 64 }; |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 422 | |
| 423 | // Then replace with any info we can get from CPUID. |
njn | 2d853a1 | 2010-10-06 22:46:31 +0000 | [diff] [blame] | 424 | res = get_caches_from_CPUID(I1c, D1c, LLc); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 425 | |
| 426 | // Warn if CPUID failed and config not completely specified from cmd line. |
| 427 | if (res != 0 && !all_caches_clo_defined) { |
sewardj | b2c985b | 2009-07-15 14:51:17 +0000 | [diff] [blame] | 428 | VG_(dmsg)("Warning: Couldn't auto-detect cache config, using one " |
| 429 | "or more defaults \n"); |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 430 | } |
| 431 | } |
| 432 | |
njn | 8b68b64 | 2009-06-24 00:37:09 +0000 | [diff] [blame] | 433 | #endif // defined(VGA_x86) || defined(VGA_amd64) |
| 434 | |
nethercote | b35a8b9 | 2004-09-11 16:45:27 +0000 | [diff] [blame] | 435 | /*--------------------------------------------------------------------*/ |
| 436 | /*--- end ---*/ |
| 437 | /*--------------------------------------------------------------------*/ |