blob: 83ea7a49faf7bff1f65e009945d9e13e05a64ad1 [file] [log] [blame]
Alina Sbirlea36f57fb2016-07-14 22:02:35 +00001//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is based on LLVM's lib/Support/Host.cpp.
11// It implements the operating system Host concept and builtin
12// __cpu_model for the compiler_rt library, for x86 only.
13//
14//===----------------------------------------------------------------------===//
15
16#if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
19
20#include <assert.h>
21
22#define bool int
23#define true 1
24#define false 0
25
26#ifdef _MSC_VER
27#include <intrin.h>
28#endif
29
Reid Kleckner3ae87c42017-04-07 16:54:32 +000030#ifndef __has_attribute
31#define __has_attribute(attr) 0
32#endif
33
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000034enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
37};
38
39enum ProcessorVendors {
40 VENDOR_INTEL = 1,
41 VENDOR_AMD,
42 VENDOR_OTHER,
43 VENDOR_MAX
44};
45
46enum ProcessorTypes {
Craig Toppere6b07072017-07-13 02:56:24 +000047 INTEL_BONNELL = 1,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000048 INTEL_CORE2,
49 INTEL_COREI7,
50 AMDFAM10H,
51 AMDFAM15H,
Craig Toppere6b07072017-07-13 02:56:24 +000052 INTEL_SILVERMONT,
53 INTEL_KNL,
54 AMD_BTVER1,
55 AMD_BTVER2,
Craig Topper61d84502017-07-10 17:30:20 +000056 AMDFAM17H,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000057 CPU_TYPE_MAX
58};
59
60enum ProcessorSubtypes {
61 INTEL_COREI7_NEHALEM = 1,
62 INTEL_COREI7_WESTMERE,
63 INTEL_COREI7_SANDYBRIDGE,
64 AMDFAM10H_BARCELONA,
65 AMDFAM10H_SHANGHAI,
66 AMDFAM10H_ISTANBUL,
67 AMDFAM15H_BDVER1,
68 AMDFAM15H_BDVER2,
Craig Toppere6b07072017-07-13 02:56:24 +000069 AMDFAM15H_BDVER3,
70 AMDFAM15H_BDVER4,
71 AMDFAM17H_ZNVER1,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000072 INTEL_COREI7_IVYBRIDGE,
73 INTEL_COREI7_HASWELL,
74 INTEL_COREI7_BROADWELL,
75 INTEL_COREI7_SKYLAKE,
76 INTEL_COREI7_SKYLAKE_AVX512,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000077 CPU_SUBTYPE_MAX
78};
79
80enum ProcessorFeatures {
81 FEATURE_CMOV = 0,
82 FEATURE_MMX,
83 FEATURE_POPCNT,
84 FEATURE_SSE,
85 FEATURE_SSE2,
86 FEATURE_SSE3,
87 FEATURE_SSSE3,
88 FEATURE_SSE4_1,
89 FEATURE_SSE4_2,
90 FEATURE_AVX,
91 FEATURE_AVX2,
Craig Toppere6b07072017-07-13 02:56:24 +000092 FEATURE_SSE4_A,
93 FEATURE_FMA4,
94 FEATURE_XOP,
95 FEATURE_FMA,
96 FEATURE_AVX512F,
97 FEATURE_BMI,
98 FEATURE_BMI2,
99 FEATURE_AES,
100 FEATURE_PCLMUL,
101 FEATURE_AVX512VL,
102 FEATURE_AVX512BW,
103 FEATURE_AVX512DQ,
104 FEATURE_AVX512CD,
105 FEATURE_AVX512ER,
106 FEATURE_AVX512PF,
107 FEATURE_AVX512VBMI,
108 FEATURE_AVX512IFMA,
109 FEATURE_AVX5124VNNIW,
110 FEATURE_AVX5124FMAPS,
111 FEATURE_AVX512VPOPCNTDQ
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000112};
113
114// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
115// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
116// support. Consequently, for i386, the presence of CPUID is checked first
117// via the corresponding eflags bit.
118static bool isCpuIdSupported() {
119#if defined(__GNUC__) || defined(__clang__)
120#if defined(__i386__)
121 int __cpuid_supported;
Alina Sbirlea9b476732016-07-17 23:45:55 +0000122 __asm__(" pushfl\n"
123 " popl %%eax\n"
124 " movl %%eax,%%ecx\n"
125 " xorl $0x00200000,%%eax\n"
126 " pushl %%eax\n"
127 " popfl\n"
128 " pushfl\n"
129 " popl %%eax\n"
130 " movl $0,%0\n"
131 " cmpl %%eax,%%ecx\n"
132 " je 1f\n"
133 " movl $1,%0\n"
134 "1:"
135 : "=r"(__cpuid_supported)
136 :
137 : "eax", "ecx");
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000138 if (!__cpuid_supported)
139 return false;
140#endif
141 return true;
142#endif
143 return true;
144}
145
146// This code is copied from lib/Support/Host.cpp.
147// Changes to either file should be mirrored in the other.
148
149/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
150/// the specified arguments. If we can't run cpuid on the host, return true.
Craig Topper61d84502017-07-10 17:30:20 +0000151static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000152 unsigned *rECX, unsigned *rEDX) {
153#if defined(__GNUC__) || defined(__clang__)
154#if defined(__x86_64__)
Craig Topper61d84502017-07-10 17:30:20 +0000155 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
Craig Topper76ac8132017-07-10 17:47:23 +0000156 // FIXME: should we save this for Clang?
Alina Sbirlea9b476732016-07-17 23:45:55 +0000157 __asm__("movq\t%%rbx, %%rsi\n\t"
158 "cpuid\n\t"
159 "xchgq\t%%rbx, %%rsi\n\t"
160 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
161 : "a"(value));
Craig Topper76ac8132017-07-10 17:47:23 +0000162 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000163#elif defined(__i386__)
Alina Sbirlea9b476732016-07-17 23:45:55 +0000164 __asm__("movl\t%%ebx, %%esi\n\t"
165 "cpuid\n\t"
166 "xchgl\t%%ebx, %%esi\n\t"
167 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
168 : "a"(value));
Craig Topper76ac8132017-07-10 17:47:23 +0000169 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000170#else
Craig Topper76ac8132017-07-10 17:47:23 +0000171 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000172#endif
173#elif defined(_MSC_VER)
174 // The MSVC intrinsic is portable across x86 and x64.
175 int registers[4];
176 __cpuid(registers, value);
177 *rEAX = registers[0];
178 *rEBX = registers[1];
179 *rECX = registers[2];
180 *rEDX = registers[3];
Craig Topper61d84502017-07-10 17:30:20 +0000181 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000182#else
Craig Topper61d84502017-07-10 17:30:20 +0000183 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000184#endif
185}
186
187/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
188/// the 4 values in the specified arguments. If we can't run cpuid on the host,
189/// return true.
Craig Topper61d84502017-07-10 17:30:20 +0000190static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000191 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
192 unsigned *rEDX) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000193#if defined(__GNUC__) || defined(__clang__)
Craig Topperb7727902017-07-19 05:11:20 +0000194#if defined(__x86_64__)
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000195 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
196 // FIXME: should we save this for Clang?
Alina Sbirlea9b476732016-07-17 23:45:55 +0000197 __asm__("movq\t%%rbx, %%rsi\n\t"
198 "cpuid\n\t"
199 "xchgq\t%%rbx, %%rsi\n\t"
200 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
201 : "a"(value), "c"(subleaf));
Craig Topper76ac8132017-07-10 17:47:23 +0000202 return false;
Craig Topperb7727902017-07-19 05:11:20 +0000203#elif defined(__i386__)
204 __asm__("movl\t%%ebx, %%esi\n\t"
205 "cpuid\n\t"
206 "xchgl\t%%ebx, %%esi\n\t"
207 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
208 : "a"(value), "c"(subleaf));
209 return false;
210#else
211 return true;
212#endif
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000213#elif defined(_MSC_VER)
214 int registers[4];
215 __cpuidex(registers, value, subleaf);
216 *rEAX = registers[0];
217 *rEBX = registers[1];
218 *rECX = registers[2];
219 *rEDX = registers[3];
Craig Topper76ac8132017-07-10 17:47:23 +0000220 return false;
221#else
222 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000223#endif
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000224}
225
226// Read control register 0 (XCR0). Used to detect features such as AVX.
227static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
228#if defined(__GNUC__) || defined(__clang__)
229 // Check xgetbv; this uses a .byte sequence instead of the instruction
230 // directly because older assemblers do not include support for xgetbv and
231 // there is no easy way to conditionally compile based on the assembler used.
232 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
233 return false;
234#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
235 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
236 *rEAX = Result;
237 *rEDX = Result >> 32;
238 return false;
239#else
240 return true;
241#endif
242}
243
244static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
245 unsigned *Model) {
246 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
247 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
248 if (*Family == 6 || *Family == 0xf) {
249 if (*Family == 0xf)
250 // Examine extended family ID if family ID is F.
251 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
252 // Examine extended model ID if family ID is 6 or F.
253 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
254 }
255}
256
Craig Topper76ac8132017-07-10 17:47:23 +0000257static void
258getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
259 unsigned Brand_id, unsigned Features,
260 unsigned *Type, unsigned *Subtype) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000261 if (Brand_id != 0)
262 return;
263 switch (Family) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000264 case 6:
265 switch (Model) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000266 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
267 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
268 // mobile processor, Intel Core 2 Extreme processor, Intel
269 // Pentium Dual-Core processor, Intel Xeon processor, model
270 // 0Fh. All processors are manufactured using the 65 nm process.
271 case 0x16: // Intel Celeron processor model 16h. All processors are
272 // manufactured using the 65 nm process
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000273 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
274 // 17h. All processors are manufactured using the 45 nm process.
275 //
276 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
277 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
278 // the 45 nm process.
279 *Type = INTEL_CORE2; // "penryn"
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000280 break;
281 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
282 // processors are manufactured using the 45 nm process.
283 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
284 // As found in a Summer 2010 model iMac.
285 case 0x1f:
Craig Topper76ac8132017-07-10 17:47:23 +0000286 case 0x2e: // Nehalem EX
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000287 *Type = INTEL_COREI7; // "nehalem"
288 *Subtype = INTEL_COREI7_NEHALEM;
289 break;
290 case 0x25: // Intel Core i7, laptop version.
291 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
292 // processors are manufactured using the 32 nm process.
293 case 0x2f: // Westmere EX
294 *Type = INTEL_COREI7; // "westmere"
295 *Subtype = INTEL_COREI7_WESTMERE;
296 break;
297 case 0x2a: // Intel Core i7 processor. All processors are manufactured
298 // using the 32 nm process.
299 case 0x2d:
300 *Type = INTEL_COREI7; //"sandybridge"
301 *Subtype = INTEL_COREI7_SANDYBRIDGE;
302 break;
303 case 0x3a:
Craig Topper76ac8132017-07-10 17:47:23 +0000304 case 0x3e: // Ivy Bridge EP
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000305 *Type = INTEL_COREI7; // "ivybridge"
306 *Subtype = INTEL_COREI7_IVYBRIDGE;
307 break;
308
309 // Haswell:
310 case 0x3c:
311 case 0x3f:
312 case 0x45:
313 case 0x46:
314 *Type = INTEL_COREI7; // "haswell"
315 *Subtype = INTEL_COREI7_HASWELL;
316 break;
317
318 // Broadwell:
319 case 0x3d:
320 case 0x47:
321 case 0x4f:
322 case 0x56:
323 *Type = INTEL_COREI7; // "broadwell"
324 *Subtype = INTEL_COREI7_BROADWELL;
325 break;
326
327 // Skylake:
Craig Topper61d84502017-07-10 17:30:20 +0000328 case 0x4e: // Skylake mobile
329 case 0x5e: // Skylake desktop
330 case 0x8e: // Kaby Lake mobile
331 case 0x9e: // Kaby Lake desktop
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000332 *Type = INTEL_COREI7; // "skylake"
333 *Subtype = INTEL_COREI7_SKYLAKE;
334 break;
335
Craig Topper61d84502017-07-10 17:30:20 +0000336 // Skylake Xeon:
337 case 0x55:
Craig Topper76ac8132017-07-10 17:47:23 +0000338 *Type = INTEL_COREI7;
Craig Topper61d84502017-07-10 17:30:20 +0000339 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
340 break;
341
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000342 case 0x1c: // Most 45 nm Intel Atom processors
343 case 0x26: // 45 nm Atom Lincroft
344 case 0x27: // 32 nm Atom Medfield
345 case 0x35: // 32 nm Atom Midview
346 case 0x36: // 32 nm Atom Midview
Craig Toppere6b07072017-07-13 02:56:24 +0000347 *Type = INTEL_BONNELL;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000348 break; // "bonnell"
349
350 // Atom Silvermont codes from the Intel software optimization guide.
351 case 0x37:
352 case 0x4a:
353 case 0x4d:
354 case 0x5a:
355 case 0x5d:
356 case 0x4c: // really airmont
Craig Toppere6b07072017-07-13 02:56:24 +0000357 *Type = INTEL_SILVERMONT;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000358 break; // "silvermont"
359
360 case 0x57:
Craig Toppere6b07072017-07-13 02:56:24 +0000361 *Type = INTEL_KNL; // knl
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000362 break;
363
Craig Toppere6b07072017-07-13 02:56:24 +0000364 default: // Unknown family 6 CPU.
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000365 break;
Craig Topper76ac8132017-07-10 17:47:23 +0000366 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000367 }
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000368 default:
Craig Toppere6b07072017-07-13 02:56:24 +0000369 break; // Unknown.
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000370 }
371}
372
Craig Topper61d84502017-07-10 17:30:20 +0000373static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
374 unsigned Features, unsigned *Type,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000375 unsigned *Subtype) {
376 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
377 // appears to be no way to generate the wide variety of AMD-specific targets
378 // from the information returned from CPUID.
379 switch (Family) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000380 case 16:
381 *Type = AMDFAM10H; // "amdfam10"
382 switch (Model) {
383 case 2:
384 *Subtype = AMDFAM10H_BARCELONA;
385 break;
386 case 4:
387 *Subtype = AMDFAM10H_SHANGHAI;
388 break;
389 case 8:
390 *Subtype = AMDFAM10H_ISTANBUL;
391 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000392 }
Craig Topper76ac8132017-07-10 17:47:23 +0000393 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000394 case 20:
Craig Toppere6b07072017-07-13 02:56:24 +0000395 *Type = AMD_BTVER1;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000396 break; // "btver1";
397 case 21:
398 *Type = AMDFAM15H;
Craig Topper76ac8132017-07-10 17:47:23 +0000399 if (Model >= 0x60 && Model <= 0x7f) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000400 *Subtype = AMDFAM15H_BDVER4;
Craig Toppere6b07072017-07-13 02:56:24 +0000401 break; // "bdver4"; 60h-7Fh: Excavator
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000402 }
403 if (Model >= 0x30 && Model <= 0x3f) {
404 *Subtype = AMDFAM15H_BDVER3;
405 break; // "bdver3"; 30h-3Fh: Steamroller
406 }
407 if (Model >= 0x10 && Model <= 0x1f) {
408 *Subtype = AMDFAM15H_BDVER2;
409 break; // "bdver2"; 10h-1Fh: Piledriver
410 }
411 if (Model <= 0x0f) {
412 *Subtype = AMDFAM15H_BDVER1;
413 break; // "bdver1"; 00h-0Fh: Bulldozer
414 }
415 break;
416 case 22:
Craig Topper76ac8132017-07-10 17:47:23 +0000417 *Type = AMD_BTVER2;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000418 break; // "btver2"
Craig Topper61d84502017-07-10 17:30:20 +0000419 case 23:
420 *Type = AMDFAM17H;
Craig Topper76ac8132017-07-10 17:47:23 +0000421 *Subtype = AMDFAM17H_ZNVER1;
Craig Topper61d84502017-07-10 17:30:20 +0000422 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000423 default:
424 break; // "generic"
425 }
426}
427
Craig Toppere6b07072017-07-13 02:56:24 +0000428static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
429 unsigned *FeaturesOut) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000430 unsigned Features = 0;
Craig Topper61d84502017-07-10 17:30:20 +0000431 unsigned EAX, EBX;
Craig Toppere6b07072017-07-13 02:56:24 +0000432
433 if ((EDX >> 15) & 1)
434 Features |= 1 << FEATURE_CMOV;
435 if ((EDX >> 23) & 1)
436 Features |= 1 << FEATURE_MMX;
437 if ((EDX >> 25) & 1)
438 Features |= 1 << FEATURE_SSE;
439 if ((EDX >> 26) & 1)
440 Features |= 1 << FEATURE_SSE2;
441
442 if ((ECX >> 0) & 1)
443 Features |= 1 << FEATURE_SSE3;
444 if ((ECX >> 1) & 1)
445 Features |= 1 << FEATURE_PCLMUL;
446 if ((ECX >> 9) & 1)
447 Features |= 1 << FEATURE_SSSE3;
448 if ((ECX >> 12) & 1)
449 Features |= 1 << FEATURE_FMA;
450 if ((ECX >> 19) & 1)
451 Features |= 1 << FEATURE_SSE4_1;
452 if ((ECX >> 20) & 1)
453 Features |= 1 << FEATURE_SSE4_2;
454 if ((ECX >> 23) & 1)
455 Features |= 1 << FEATURE_POPCNT;
456 if ((ECX >> 25) & 1)
457 Features |= 1 << FEATURE_AES;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000458
459 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
460 // indicates that the AVX registers will be saved and restored on context
461 // switch, then we have full AVX support.
462 const unsigned AVXBits = (1 << 27) | (1 << 28);
463 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
464 ((EAX & 0x6) == 0x6);
465 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
Craig Toppere6b07072017-07-13 02:56:24 +0000466
467 if (HasAVX)
468 Features |= 1 << FEATURE_AVX;
469
Craig Topper76ac8132017-07-10 17:47:23 +0000470 bool HasLeaf7 =
471 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
Craig Toppere6b07072017-07-13 02:56:24 +0000472
473 if (HasLeaf7 && ((EBX >> 3) & 1))
474 Features |= 1 << FEATURE_BMI;
475 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
476 Features |= 1 << FEATURE_AVX2;
477 if (HasLeaf7 && ((EBX >> 9) & 1))
478 Features |= 1 << FEATURE_BMI2;
479 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
480 Features |= 1 << FEATURE_AVX512F;
481 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
482 Features |= 1 << FEATURE_AVX512DQ;
483 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
484 Features |= 1 << FEATURE_AVX512IFMA;
485 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
486 Features |= 1 << FEATURE_AVX512PF;
487 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
488 Features |= 1 << FEATURE_AVX512ER;
489 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
490 Features |= 1 << FEATURE_AVX512CD;
491 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
492 Features |= 1 << FEATURE_AVX512BW;
493 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
494 Features |= 1 << FEATURE_AVX512VL;
495
496 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
497 Features |= 1 << FEATURE_AVX512VBMI;
498 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
499 Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
500
501 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
502 Features |= 1 << FEATURE_AVX5124VNNIW;
503 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
504 Features |= 1 << FEATURE_AVX5124FMAPS;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000505
Craig Topper61d84502017-07-10 17:30:20 +0000506 unsigned MaxExtLevel;
507 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
508
509 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
510 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
Craig Toppere6b07072017-07-13 02:56:24 +0000511 if (HasExtLeaf1 && ((ECX >> 6) & 1))
512 Features |= 1 << FEATURE_SSE4_A;
513 if (HasExtLeaf1 && ((ECX >> 11) & 1))
514 Features |= 1 << FEATURE_XOP;
515 if (HasExtLeaf1 && ((ECX >> 16) & 1))
516 Features |= 1 << FEATURE_FMA4;
Craig Topper61d84502017-07-10 17:30:20 +0000517
Craig Toppere6b07072017-07-13 02:56:24 +0000518 *FeaturesOut = Features;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000519}
520
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000521#if defined(HAVE_INIT_PRIORITY)
522#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
523#elif __has_attribute(__constructor__)
524#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000525#else
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000526// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
527// this runs during initialization.
528#define CONSTRUCTOR_ATTRIBUTE
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000529#endif
530
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000531int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000532
533struct __processor_model {
534 unsigned int __cpu_vendor;
535 unsigned int __cpu_type;
536 unsigned int __cpu_subtype;
537 unsigned int __cpu_features[1];
538} __cpu_model = {0, 0, 0, {0}};
539
540/* A constructor function that is sets __cpu_model and __cpu_features with
541 the right values. This needs to run only once. This constructor is
542 given the highest priority and it should run before constructors without
543 the priority set. However, it still runs after ifunc initializers and
544 needs to be called explicitly there. */
545
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000546int CONSTRUCTOR_ATTRIBUTE
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000547__cpu_indicator_init(void) {
Craig Topper61d84502017-07-10 17:30:20 +0000548 unsigned EAX, EBX, ECX, EDX;
549 unsigned MaxLeaf = 5;
550 unsigned Vendor;
551 unsigned Model, Family, Brand_id;
552 unsigned Features = 0;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000553
554 /* This function needs to run just once. */
555 if (__cpu_model.__cpu_vendor)
556 return 0;
557
558 if (!isCpuIdSupported())
559 return -1;
560
561 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
Craig Topper61d84502017-07-10 17:30:20 +0000562 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000563 __cpu_model.__cpu_vendor = VENDOR_OTHER;
564 return -1;
565 }
566 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
567 detectX86FamilyModel(EAX, &Family, &Model);
568 Brand_id = EBX & 0xff;
569
570 /* Find available features. */
Craig Toppere6b07072017-07-13 02:56:24 +0000571 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000572 __cpu_model.__cpu_features[0] = Features;
573
574 if (Vendor == SIG_INTEL) {
575 /* Get CPU type. */
576 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
577 &(__cpu_model.__cpu_type),
578 &(__cpu_model.__cpu_subtype));
579 __cpu_model.__cpu_vendor = VENDOR_INTEL;
580 } else if (Vendor == SIG_AMD) {
581 /* Get CPU type. */
582 getAMDProcessorTypeAndSubtype(Family, Model, Features,
583 &(__cpu_model.__cpu_type),
584 &(__cpu_model.__cpu_subtype));
585 __cpu_model.__cpu_vendor = VENDOR_AMD;
586 } else
587 __cpu_model.__cpu_vendor = VENDOR_OTHER;
588
589 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
590 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
591 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
592
593 return 0;
594}
595
596#endif