Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 1 | // Copyright 2017 Google Inc. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "cpuinfo_x86.h" |
| 16 | #include "internal/bit_utils.h" |
| 17 | #include "internal/cpuid_x86.h" |
| 18 | |
| 19 | #include <stdbool.h> |
| 20 | #include <string.h> |
| 21 | |
| 22 | static const Leaf kEmptyLeaf; |
| 23 | |
| 24 | static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) { |
| 25 | if (leaf_id <= max_cpuid_leaf) { |
| 26 | return CpuId(leaf_id); |
| 27 | } else { |
| 28 | return kEmptyLeaf; |
| 29 | } |
| 30 | } |
| 31 | |
| 32 | #define MASK_XMM 0x2 |
| 33 | #define MASK_YMM 0x4 |
| 34 | #define MASK_MASKREG 0x20 |
| 35 | #define MASK_ZMM0_15 0x40 |
| 36 | #define MASK_ZMM16_31 0x80 |
| 37 | |
| 38 | static bool HasMask(uint32_t value, uint32_t mask) { |
| 39 | return (value & mask) == mask; |
| 40 | } |
| 41 | |
| 42 | // Checks that operating system saves and restores xmm registers during context |
| 43 | // switches. |
| 44 | static bool HasXmmOsXSave(uint32_t xcr0_eax) { |
| 45 | return HasMask(xcr0_eax, MASK_XMM); |
| 46 | } |
| 47 | |
| 48 | // Checks that operating system saves and restores ymm registers during context |
| 49 | // switches. |
| 50 | static bool HasYmmOsXSave(uint32_t xcr0_eax) { |
| 51 | return HasMask(xcr0_eax, MASK_XMM | MASK_YMM); |
| 52 | } |
| 53 | |
| 54 | // Checks that operating system saves and restores zmm registers during context |
| 55 | // switches. |
| 56 | static bool HasZmmOsXSave(uint32_t xcr0_eax) { |
| 57 | return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 | |
| 58 | MASK_ZMM16_31); |
| 59 | } |
| 60 | |
| 61 | static void SetVendor(const Leaf leaf, char* const vendor) { |
| 62 | *(uint32_t*)(vendor) = leaf.ebx; |
| 63 | *(uint32_t*)(vendor + 4) = leaf.edx; |
| 64 | *(uint32_t*)(vendor + 8) = leaf.ecx; |
| 65 | vendor[12] = '\0'; |
| 66 | } |
| 67 | |
| 68 | static int IsVendor(const Leaf leaf, const char* const name) { |
| 69 | const uint32_t ebx = *(const uint32_t*)(name); |
| 70 | const uint32_t edx = *(const uint32_t*)(name + 4); |
| 71 | const uint32_t ecx = *(const uint32_t*)(name + 8); |
| 72 | return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx; |
| 73 | } |
| 74 | |
| 75 | // Reference https://en.wikipedia.org/wiki/CPUID. |
| 76 | static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info) { |
| 77 | const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); |
| 78 | const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); |
| 79 | |
| 80 | const bool have_xsave = IsBitSet(leaf_1.ecx, 26); |
| 81 | const bool have_osxsave = IsBitSet(leaf_1.ecx, 27); |
| 82 | const uint32_t xcr0_eax = (have_xsave && have_osxsave) ? GetXCR0Eax() : 0; |
| 83 | const bool have_sse_os_support = HasXmmOsXSave(xcr0_eax); |
| 84 | const bool have_avx_os_support = HasYmmOsXSave(xcr0_eax); |
| 85 | const bool have_avx512_os_support = HasZmmOsXSave(xcr0_eax); |
| 86 | |
| 87 | const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8); |
| 88 | const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20); |
| 89 | const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4); |
| 90 | const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16); |
| 91 | |
| 92 | X86Features* const features = &info->features; |
| 93 | |
| 94 | info->family = extended_family + family; |
| 95 | info->model = (extended_model << 4) + model; |
| 96 | info->stepping = ExtractBitRange(leaf_1.eax, 3, 0); |
| 97 | |
Patrik Fiedler | 0f1f3ac | 2018-02-13 11:44:40 +0100 | [diff] [blame^] | 98 | features->smx = IsBitSet(leaf_1.ecx, 6); |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 99 | features->aes = IsBitSet(leaf_1.ecx, 25); |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 100 | features->f16c = IsBitSet(leaf_1.ecx, 29); |
Patrik Fiedler | 0f1f3ac | 2018-02-13 11:44:40 +0100 | [diff] [blame^] | 101 | features->sgx = IsBitSet(leaf_7.ebx, 2); |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 102 | features->bmi1 = IsBitSet(leaf_7.ebx, 3); |
| 103 | features->bmi2 = IsBitSet(leaf_7.ebx, 8); |
Patrik Fiedler | 0f1f3ac | 2018-02-13 11:44:40 +0100 | [diff] [blame^] | 104 | features->erms = IsBitSet(leaf_7.ebx, 9); |
Guillaume Chatelet | 11e3e20 | 2018-02-09 08:55:11 +0100 | [diff] [blame] | 105 | features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10); |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 106 | |
| 107 | if (have_sse_os_support) { |
| 108 | features->ssse3 = IsBitSet(leaf_1.ecx, 9); |
| 109 | features->sse4_1 = IsBitSet(leaf_1.ecx, 19); |
| 110 | features->sse4_2 = IsBitSet(leaf_1.ecx, 20); |
| 111 | } |
| 112 | |
| 113 | if (have_avx_os_support) { |
| 114 | features->fma3 = IsBitSet(leaf_1.ecx, 12); |
| 115 | features->avx = IsBitSet(leaf_1.ecx, 28); |
| 116 | features->avx2 = IsBitSet(leaf_7.ebx, 5); |
| 117 | } |
| 118 | |
| 119 | if (have_avx512_os_support) { |
| 120 | features->avx512f = IsBitSet(leaf_7.ebx, 16); |
| 121 | features->avx512cd = IsBitSet(leaf_7.ebx, 28); |
| 122 | features->avx512er = IsBitSet(leaf_7.ebx, 27); |
| 123 | features->avx512pf = IsBitSet(leaf_7.ebx, 26); |
| 124 | features->avx512bw = IsBitSet(leaf_7.ebx, 30); |
| 125 | features->avx512dq = IsBitSet(leaf_7.ebx, 17); |
| 126 | features->avx512vl = IsBitSet(leaf_7.ebx, 31); |
| 127 | features->avx512ifma = IsBitSet(leaf_7.ebx, 21); |
| 128 | features->avx512vbmi = IsBitSet(leaf_7.ecx, 1); |
| 129 | features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6); |
| 130 | features->avx512vnni = IsBitSet(leaf_7.ecx, 11); |
| 131 | features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); |
| 132 | features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); |
| 133 | features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); |
| 134 | features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | static const X86Info kEmptyX86Info; |
| 139 | |
| 140 | X86Info GetX86Info(void) { |
| 141 | X86Info info = kEmptyX86Info; |
| 142 | const Leaf leaf_0 = CpuId(0); |
| 143 | const uint32_t max_cpuid_leaf = leaf_0.eax; |
| 144 | SetVendor(leaf_0, info.vendor); |
| 145 | if (IsVendor(leaf_0, "GenuineIntel") || IsVendor(leaf_0, "AuthenticAMD")) { |
| 146 | ParseCpuId(max_cpuid_leaf, &info); |
| 147 | } |
| 148 | return info; |
| 149 | } |
| 150 | |
| 151 | #define CPUID(FAMILY, MODEL) (((FAMILY & 0xFF) << 8) | (MODEL & 0xFF)) |
| 152 | |
| 153 | X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { |
| 154 | if (memcmp(info->vendor, "GenuineIntel", sizeof(info->vendor)) == 0) { |
| 155 | switch (CPUID(info->family, info->model)) { |
| 156 | case CPUID(0x06, 0x35): |
| 157 | case CPUID(0x06, 0x36): |
| 158 | // https://en.wikipedia.org/wiki/Bonnell_(microarchitecture) |
| 159 | return INTEL_ATOM_BNL; |
| 160 | case CPUID(0x06, 0x37): |
| 161 | case CPUID(0x06, 0x4C): |
| 162 | // https://en.wikipedia.org/wiki/Silvermont |
| 163 | return INTEL_ATOM_SMT; |
| 164 | case CPUID(0x06, 0x5C): |
| 165 | // https://en.wikipedia.org/wiki/Goldmont |
| 166 | return INTEL_ATOM_GMT; |
| 167 | case CPUID(0x06, 0x0F): |
| 168 | case CPUID(0x06, 0x16): |
| 169 | // https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture) |
| 170 | return INTEL_CORE; |
| 171 | case CPUID(0x06, 0x17): |
| 172 | case CPUID(0x06, 0x1D): |
| 173 | // https://en.wikipedia.org/wiki/Penryn_(microarchitecture) |
| 174 | return INTEL_PNR; |
| 175 | case CPUID(0x06, 0x1A): |
| 176 | case CPUID(0x06, 0x1E): |
| 177 | case CPUID(0x06, 0x1F): |
| 178 | case CPUID(0x06, 0x2E): |
| 179 | // https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) |
| 180 | return INTEL_NHM; |
| 181 | case CPUID(0x06, 0x25): |
| 182 | case CPUID(0x06, 0x2C): |
| 183 | case CPUID(0x06, 0x2F): |
| 184 | // https://en.wikipedia.org/wiki/Westmere_(microarchitecture) |
| 185 | return INTEL_WSM; |
| 186 | case CPUID(0x06, 0x2A): |
| 187 | case CPUID(0x06, 0x2D): |
| 188 | // https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings |
| 189 | return INTEL_SNB; |
| 190 | case CPUID(0x06, 0x3A): |
| 191 | case CPUID(0x06, 0x3E): |
| 192 | // https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings |
| 193 | return INTEL_IVB; |
| 194 | case CPUID(0x06, 0x3C): |
| 195 | case CPUID(0x06, 0x3F): |
| 196 | case CPUID(0x06, 0x45): |
| 197 | case CPUID(0x06, 0x46): |
| 198 | // https://en.wikipedia.org/wiki/Haswell_(microarchitecture) |
| 199 | return INTEL_HSW; |
| 200 | case CPUID(0x06, 0x3D): |
| 201 | case CPUID(0x06, 0x47): |
| 202 | case CPUID(0x06, 0x4F): |
| 203 | case CPUID(0x06, 0x56): |
| 204 | // https://en.wikipedia.org/wiki/Broadwell_(microarchitecture) |
| 205 | return INTEL_BDW; |
| 206 | case CPUID(0x06, 0x4E): |
| 207 | case CPUID(0x06, 0x55): |
| 208 | case CPUID(0x06, 0x5E): |
| 209 | // https://en.wikipedia.org/wiki/Skylake_(microarchitecture) |
| 210 | return INTEL_SKL; |
| 211 | case CPUID(0x06, 0x8E): |
| 212 | case CPUID(0x06, 0x9E): |
| 213 | // https://en.wikipedia.org/wiki/Kaby_Lake |
| 214 | return INTEL_KBL; |
| 215 | default: |
| 216 | return X86_UNKNOWN; |
| 217 | } |
| 218 | } |
| 219 | if (memcmp(info->vendor, "AuthenticAMD", sizeof(info->vendor)) == 0) { |
| 220 | switch (info->family) { |
| 221 | // https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures |
| 222 | case 0x0F: |
| 223 | return AMD_HAMMER; |
| 224 | case 0x10: |
| 225 | return AMD_K10; |
| 226 | case 0x14: |
| 227 | return AMD_BOBCAT; |
| 228 | case 0x15: |
| 229 | return AMD_BULLDOZER; |
| 230 | case 0x16: |
| 231 | return AMD_JAGUAR; |
| 232 | case 0x17: |
| 233 | return AMD_ZEN; |
| 234 | default: |
| 235 | return X86_UNKNOWN; |
| 236 | } |
| 237 | } |
| 238 | return X86_UNKNOWN; |
| 239 | } |
| 240 | |
| 241 | static void SetString(const uint32_t max_cpuid_ext_leaf, const uint32_t leaf_id, |
| 242 | char* buffer) { |
| 243 | const Leaf leaf = SafeCpuId(max_cpuid_ext_leaf, leaf_id); |
| 244 | // We allow calling memcpy from SetString which is only called when requesting |
| 245 | // X86BrandString. |
| 246 | memcpy(buffer, &leaf, sizeof(Leaf)); |
| 247 | } |
| 248 | |
| 249 | void FillX86BrandString(char brand_string[49]) { |
| 250 | const Leaf leaf_ext_0 = CpuId(0x80000000); |
| 251 | const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax; |
| 252 | SetString(max_cpuid_leaf_ext, 0x80000002, brand_string); |
| 253 | SetString(max_cpuid_leaf_ext, 0x80000003, brand_string + 16); |
| 254 | SetString(max_cpuid_leaf_ext, 0x80000004, brand_string + 32); |
| 255 | brand_string[48] = '\0'; |
| 256 | } |
| 257 | |
| 258 | //////////////////////////////////////////////////////////////////////////////// |
| 259 | // Introspection functions |
| 260 | |
| 261 | int GetX86FeaturesEnumValue(const X86Features* features, |
| 262 | X86FeaturesEnum value) { |
| 263 | switch (value) { |
| 264 | case X86_AES: |
| 265 | return features->aes; |
| 266 | case X86_ERMS: |
| 267 | return features->erms; |
| 268 | case X86_F16C: |
| 269 | return features->f16c; |
| 270 | case X86_FMA3: |
| 271 | return features->fma3; |
Guillaume Chatelet | 11e3e20 | 2018-02-09 08:55:11 +0100 | [diff] [blame] | 272 | case X86_VPCLMULQDQ: |
| 273 | return features->vpclmulqdq; |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 274 | case X86_BMI1: |
| 275 | return features->bmi1; |
| 276 | case X86_BMI2: |
| 277 | return features->bmi2; |
| 278 | case X86_SSSE3: |
| 279 | return features->ssse3; |
| 280 | case X86_SSE4_1: |
| 281 | return features->sse4_1; |
| 282 | case X86_SSE4_2: |
| 283 | return features->sse4_2; |
| 284 | case X86_AVX: |
| 285 | return features->avx; |
| 286 | case X86_AVX2: |
| 287 | return features->avx2; |
| 288 | case X86_AVX512F: |
| 289 | return features->avx512f; |
| 290 | case X86_AVX512CD: |
| 291 | return features->avx512cd; |
| 292 | case X86_AVX512ER: |
| 293 | return features->avx512er; |
| 294 | case X86_AVX512PF: |
| 295 | return features->avx512pf; |
| 296 | case X86_AVX512BW: |
| 297 | return features->avx512bw; |
| 298 | case X86_AVX512DQ: |
| 299 | return features->avx512dq; |
| 300 | case X86_AVX512VL: |
| 301 | return features->avx512vl; |
| 302 | case X86_AVX512IFMA: |
| 303 | return features->avx512ifma; |
| 304 | case X86_AVX512VBMI: |
| 305 | return features->avx512vbmi; |
| 306 | case X86_AVX512VBMI2: |
| 307 | return features->avx512vbmi2; |
| 308 | case X86_AVX512VNNI: |
| 309 | return features->avx512vnni; |
| 310 | case X86_AVX512BITALG: |
| 311 | return features->avx512bitalg; |
| 312 | case X86_AVX512VPOPCNTDQ: |
| 313 | return features->avx512vpopcntdq; |
| 314 | case X86_AVX512_4VNNIW: |
| 315 | return features->avx512_4vnniw; |
| 316 | case X86_AVX512_4VBMI2: |
| 317 | return features->avx512_4vbmi2; |
Patrik Fiedler | 3ee0d62 | 2018-02-13 11:14:32 +0100 | [diff] [blame] | 318 | case X86_SMX: |
| 319 | return features->smx; |
| 320 | case X86_SGX: |
| 321 | return features->sgx; |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 322 | case X86_LAST_: |
| 323 | break; |
| 324 | } |
| 325 | return false; |
| 326 | } |
| 327 | |
| 328 | const char* GetX86FeaturesEnumName(X86FeaturesEnum value) { |
| 329 | switch (value) { |
| 330 | case X86_AES: |
| 331 | return "aes"; |
| 332 | case X86_ERMS: |
| 333 | return "erms"; |
| 334 | case X86_F16C: |
| 335 | return "f16c"; |
| 336 | case X86_FMA3: |
| 337 | return "fma3"; |
Guillaume Chatelet | 11e3e20 | 2018-02-09 08:55:11 +0100 | [diff] [blame] | 338 | case X86_VPCLMULQDQ: |
| 339 | return "vpclmulqdq"; |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 340 | case X86_BMI1: |
| 341 | return "bmi1"; |
| 342 | case X86_BMI2: |
| 343 | return "bmi2"; |
| 344 | case X86_SSSE3: |
| 345 | return "ssse3"; |
| 346 | case X86_SSE4_1: |
| 347 | return "sse4_1"; |
| 348 | case X86_SSE4_2: |
| 349 | return "sse4_2"; |
| 350 | case X86_AVX: |
| 351 | return "avx"; |
| 352 | case X86_AVX2: |
| 353 | return "avx2"; |
| 354 | case X86_AVX512F: |
| 355 | return "avx512f"; |
| 356 | case X86_AVX512CD: |
| 357 | return "avx512cd"; |
| 358 | case X86_AVX512ER: |
| 359 | return "avx512er"; |
| 360 | case X86_AVX512PF: |
| 361 | return "avx512pf"; |
| 362 | case X86_AVX512BW: |
| 363 | return "avx512bw"; |
| 364 | case X86_AVX512DQ: |
| 365 | return "avx512dq"; |
| 366 | case X86_AVX512VL: |
| 367 | return "avx512vl"; |
| 368 | case X86_AVX512IFMA: |
| 369 | return "avx512ifma"; |
| 370 | case X86_AVX512VBMI: |
| 371 | return "avx512vbmi"; |
| 372 | case X86_AVX512VBMI2: |
| 373 | return "avx512vbmi2"; |
| 374 | case X86_AVX512VNNI: |
| 375 | return "avx512vnni"; |
| 376 | case X86_AVX512BITALG: |
| 377 | return "avx512bitalg"; |
| 378 | case X86_AVX512VPOPCNTDQ: |
| 379 | return "avx512vpopcntdq"; |
| 380 | case X86_AVX512_4VNNIW: |
| 381 | return "avx512_4vnniw"; |
| 382 | case X86_AVX512_4VBMI2: |
| 383 | return "avx512_4vbmi2"; |
Patrik Fiedler | 3ee0d62 | 2018-02-13 11:14:32 +0100 | [diff] [blame] | 384 | case X86_SMX: |
| 385 | return "smx"; |
| 386 | case X86_SGX: |
| 387 | return "sgx"; |
Guillaume Chatelet | 439d371 | 2018-02-01 10:03:09 +0100 | [diff] [blame] | 388 | case X86_LAST_: |
| 389 | break; |
| 390 | } |
| 391 | return "unknown_feature"; |
| 392 | } |
| 393 | |
| 394 | const char* GetX86MicroarchitectureName(X86Microarchitecture uarch) { |
| 395 | switch (uarch) { |
| 396 | case X86_UNKNOWN: |
| 397 | return "X86_UNKNOWN"; |
| 398 | case INTEL_CORE: |
| 399 | return "INTEL_CORE"; |
| 400 | case INTEL_PNR: |
| 401 | return "INTEL_PNR"; |
| 402 | case INTEL_NHM: |
| 403 | return "INTEL_NHM"; |
| 404 | case INTEL_ATOM_BNL: |
| 405 | return "INTEL_ATOM_BNL"; |
| 406 | case INTEL_WSM: |
| 407 | return "INTEL_WSM"; |
| 408 | case INTEL_SNB: |
| 409 | return "INTEL_SNB"; |
| 410 | case INTEL_IVB: |
| 411 | return "INTEL_IVB"; |
| 412 | case INTEL_ATOM_SMT: |
| 413 | return "INTEL_ATOM_SMT"; |
| 414 | case INTEL_HSW: |
| 415 | return "INTEL_HSW"; |
| 416 | case INTEL_BDW: |
| 417 | return "INTEL_BDW"; |
| 418 | case INTEL_SKL: |
| 419 | return "INTEL_SKL"; |
| 420 | case INTEL_ATOM_GMT: |
| 421 | return "INTEL_ATOM_GMT"; |
| 422 | case INTEL_KBL: |
| 423 | return "INTEL_KBL"; |
| 424 | case INTEL_CFL: |
| 425 | return "INTEL_CFL"; |
| 426 | case INTEL_CNL: |
| 427 | return "INTEL_CNL"; |
| 428 | case AMD_HAMMER: |
| 429 | return "AMD_HAMMER"; |
| 430 | case AMD_K10: |
| 431 | return "AMD_K10"; |
| 432 | case AMD_BOBCAT: |
| 433 | return "AMD_BOBCAT"; |
| 434 | case AMD_BULLDOZER: |
| 435 | return "AMD_BULLDOZER"; |
| 436 | case AMD_JAGUAR: |
| 437 | return "AMD_JAGUAR"; |
| 438 | case AMD_ZEN: |
| 439 | return "AMD_ZEN"; |
| 440 | } |
| 441 | return "unknown microarchitecture"; |
| 442 | } |