blob: 4c96e9cd85d59b411193a6e7e42d5ad955c2f9be [file] [log] [blame]
Alina Sbirlea36f57fb2016-07-14 22:02:35 +00001//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is based on LLVM's lib/Support/Host.cpp.
11// It implements the operating system Host concept and builtin
12// __cpu_model for the compiler_rt library, for x86 only.
13//
14//===----------------------------------------------------------------------===//
15
16#if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
19
20#include <assert.h>
21
22#define bool int
23#define true 1
24#define false 0
25
26#ifdef _MSC_VER
27#include <intrin.h>
28#endif
29
Reid Kleckner3ae87c42017-04-07 16:54:32 +000030#ifndef __has_attribute
31#define __has_attribute(attr) 0
32#endif
33
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000034enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
37};
38
39enum ProcessorVendors {
40 VENDOR_INTEL = 1,
41 VENDOR_AMD,
42 VENDOR_OTHER,
43 VENDOR_MAX
44};
45
46enum ProcessorTypes {
Craig Toppere6b07072017-07-13 02:56:24 +000047 INTEL_BONNELL = 1,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000048 INTEL_CORE2,
49 INTEL_COREI7,
50 AMDFAM10H,
51 AMDFAM15H,
Craig Toppere6b07072017-07-13 02:56:24 +000052 INTEL_SILVERMONT,
53 INTEL_KNL,
54 AMD_BTVER1,
55 AMD_BTVER2,
Craig Topper61d84502017-07-10 17:30:20 +000056 AMDFAM17H,
Craig Topper705b9692017-10-11 20:35:43 +000057 INTEL_KNM,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000058 CPU_TYPE_MAX
59};
60
61enum ProcessorSubtypes {
62 INTEL_COREI7_NEHALEM = 1,
63 INTEL_COREI7_WESTMERE,
64 INTEL_COREI7_SANDYBRIDGE,
65 AMDFAM10H_BARCELONA,
66 AMDFAM10H_SHANGHAI,
67 AMDFAM10H_ISTANBUL,
68 AMDFAM15H_BDVER1,
69 AMDFAM15H_BDVER2,
Craig Toppere6b07072017-07-13 02:56:24 +000070 AMDFAM15H_BDVER3,
71 AMDFAM15H_BDVER4,
72 AMDFAM17H_ZNVER1,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000073 INTEL_COREI7_IVYBRIDGE,
74 INTEL_COREI7_HASWELL,
75 INTEL_COREI7_BROADWELL,
76 INTEL_COREI7_SKYLAKE,
77 INTEL_COREI7_SKYLAKE_AVX512,
Craig Topper436c9262017-11-19 00:46:21 +000078 INTEL_COREI7_CANNONLAKE,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000079 CPU_SUBTYPE_MAX
80};
81
82enum ProcessorFeatures {
83 FEATURE_CMOV = 0,
84 FEATURE_MMX,
85 FEATURE_POPCNT,
86 FEATURE_SSE,
87 FEATURE_SSE2,
88 FEATURE_SSE3,
89 FEATURE_SSSE3,
90 FEATURE_SSE4_1,
91 FEATURE_SSE4_2,
92 FEATURE_AVX,
93 FEATURE_AVX2,
Craig Toppere6b07072017-07-13 02:56:24 +000094 FEATURE_SSE4_A,
95 FEATURE_FMA4,
96 FEATURE_XOP,
97 FEATURE_FMA,
98 FEATURE_AVX512F,
99 FEATURE_BMI,
100 FEATURE_BMI2,
101 FEATURE_AES,
102 FEATURE_PCLMUL,
103 FEATURE_AVX512VL,
104 FEATURE_AVX512BW,
105 FEATURE_AVX512DQ,
106 FEATURE_AVX512CD,
107 FEATURE_AVX512ER,
108 FEATURE_AVX512PF,
109 FEATURE_AVX512VBMI,
110 FEATURE_AVX512IFMA,
111 FEATURE_AVX5124VNNIW,
112 FEATURE_AVX5124FMAPS,
113 FEATURE_AVX512VPOPCNTDQ
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000114};
115
116// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
117// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
118// support. Consequently, for i386, the presence of CPUID is checked first
119// via the corresponding eflags bit.
120static bool isCpuIdSupported() {
121#if defined(__GNUC__) || defined(__clang__)
122#if defined(__i386__)
123 int __cpuid_supported;
Alina Sbirlea9b476732016-07-17 23:45:55 +0000124 __asm__(" pushfl\n"
125 " popl %%eax\n"
126 " movl %%eax,%%ecx\n"
127 " xorl $0x00200000,%%eax\n"
128 " pushl %%eax\n"
129 " popfl\n"
130 " pushfl\n"
131 " popl %%eax\n"
132 " movl $0,%0\n"
133 " cmpl %%eax,%%ecx\n"
134 " je 1f\n"
135 " movl $1,%0\n"
136 "1:"
137 : "=r"(__cpuid_supported)
138 :
139 : "eax", "ecx");
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000140 if (!__cpuid_supported)
141 return false;
142#endif
143 return true;
144#endif
145 return true;
146}
147
148// This code is copied from lib/Support/Host.cpp.
149// Changes to either file should be mirrored in the other.
150
151/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
152/// the specified arguments. If we can't run cpuid on the host, return true.
Craig Topper61d84502017-07-10 17:30:20 +0000153static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000154 unsigned *rECX, unsigned *rEDX) {
155#if defined(__GNUC__) || defined(__clang__)
156#if defined(__x86_64__)
Craig Topper61d84502017-07-10 17:30:20 +0000157 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
Craig Topper76ac8132017-07-10 17:47:23 +0000158 // FIXME: should we save this for Clang?
Alina Sbirlea9b476732016-07-17 23:45:55 +0000159 __asm__("movq\t%%rbx, %%rsi\n\t"
160 "cpuid\n\t"
161 "xchgq\t%%rbx, %%rsi\n\t"
162 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
163 : "a"(value));
Craig Topper76ac8132017-07-10 17:47:23 +0000164 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000165#elif defined(__i386__)
Alina Sbirlea9b476732016-07-17 23:45:55 +0000166 __asm__("movl\t%%ebx, %%esi\n\t"
167 "cpuid\n\t"
168 "xchgl\t%%ebx, %%esi\n\t"
169 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
170 : "a"(value));
Craig Topper76ac8132017-07-10 17:47:23 +0000171 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000172#else
Craig Topper76ac8132017-07-10 17:47:23 +0000173 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000174#endif
175#elif defined(_MSC_VER)
176 // The MSVC intrinsic is portable across x86 and x64.
177 int registers[4];
178 __cpuid(registers, value);
179 *rEAX = registers[0];
180 *rEBX = registers[1];
181 *rECX = registers[2];
182 *rEDX = registers[3];
Craig Topper61d84502017-07-10 17:30:20 +0000183 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000184#else
Craig Topper61d84502017-07-10 17:30:20 +0000185 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000186#endif
187}
188
189/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
190/// the 4 values in the specified arguments. If we can't run cpuid on the host,
191/// return true.
Craig Topper61d84502017-07-10 17:30:20 +0000192static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000193 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
194 unsigned *rEDX) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000195#if defined(__GNUC__) || defined(__clang__)
Craig Topperb7727902017-07-19 05:11:20 +0000196#if defined(__x86_64__)
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000197 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
198 // FIXME: should we save this for Clang?
Alina Sbirlea9b476732016-07-17 23:45:55 +0000199 __asm__("movq\t%%rbx, %%rsi\n\t"
200 "cpuid\n\t"
201 "xchgq\t%%rbx, %%rsi\n\t"
202 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
203 : "a"(value), "c"(subleaf));
Craig Topper76ac8132017-07-10 17:47:23 +0000204 return false;
Craig Topperb7727902017-07-19 05:11:20 +0000205#elif defined(__i386__)
206 __asm__("movl\t%%ebx, %%esi\n\t"
207 "cpuid\n\t"
208 "xchgl\t%%ebx, %%esi\n\t"
209 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
210 : "a"(value), "c"(subleaf));
211 return false;
212#else
213 return true;
214#endif
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000215#elif defined(_MSC_VER)
216 int registers[4];
217 __cpuidex(registers, value, subleaf);
218 *rEAX = registers[0];
219 *rEBX = registers[1];
220 *rECX = registers[2];
221 *rEDX = registers[3];
Craig Topper76ac8132017-07-10 17:47:23 +0000222 return false;
223#else
224 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000225#endif
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000226}
227
228// Read control register 0 (XCR0). Used to detect features such as AVX.
229static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
230#if defined(__GNUC__) || defined(__clang__)
231 // Check xgetbv; this uses a .byte sequence instead of the instruction
232 // directly because older assemblers do not include support for xgetbv and
233 // there is no easy way to conditionally compile based on the assembler used.
234 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
235 return false;
236#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
237 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
238 *rEAX = Result;
239 *rEDX = Result >> 32;
240 return false;
241#else
242 return true;
243#endif
244}
245
246static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
247 unsigned *Model) {
248 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
249 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
250 if (*Family == 6 || *Family == 0xf) {
251 if (*Family == 0xf)
252 // Examine extended family ID if family ID is F.
253 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
254 // Examine extended model ID if family ID is 6 or F.
255 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
256 }
257}
258
Craig Topper76ac8132017-07-10 17:47:23 +0000259static void
260getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
261 unsigned Brand_id, unsigned Features,
262 unsigned *Type, unsigned *Subtype) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000263 if (Brand_id != 0)
264 return;
265 switch (Family) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000266 case 6:
267 switch (Model) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000268 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
269 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
270 // mobile processor, Intel Core 2 Extreme processor, Intel
271 // Pentium Dual-Core processor, Intel Xeon processor, model
272 // 0Fh. All processors are manufactured using the 65 nm process.
273 case 0x16: // Intel Celeron processor model 16h. All processors are
274 // manufactured using the 65 nm process
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000275 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
276 // 17h. All processors are manufactured using the 45 nm process.
277 //
278 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
279 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
280 // the 45 nm process.
281 *Type = INTEL_CORE2; // "penryn"
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000282 break;
283 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
284 // processors are manufactured using the 45 nm process.
285 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
286 // As found in a Summer 2010 model iMac.
287 case 0x1f:
Craig Topper76ac8132017-07-10 17:47:23 +0000288 case 0x2e: // Nehalem EX
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000289 *Type = INTEL_COREI7; // "nehalem"
290 *Subtype = INTEL_COREI7_NEHALEM;
291 break;
292 case 0x25: // Intel Core i7, laptop version.
293 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
294 // processors are manufactured using the 32 nm process.
295 case 0x2f: // Westmere EX
296 *Type = INTEL_COREI7; // "westmere"
297 *Subtype = INTEL_COREI7_WESTMERE;
298 break;
299 case 0x2a: // Intel Core i7 processor. All processors are manufactured
300 // using the 32 nm process.
301 case 0x2d:
302 *Type = INTEL_COREI7; //"sandybridge"
303 *Subtype = INTEL_COREI7_SANDYBRIDGE;
304 break;
305 case 0x3a:
Craig Topper76ac8132017-07-10 17:47:23 +0000306 case 0x3e: // Ivy Bridge EP
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000307 *Type = INTEL_COREI7; // "ivybridge"
308 *Subtype = INTEL_COREI7_IVYBRIDGE;
309 break;
310
311 // Haswell:
312 case 0x3c:
313 case 0x3f:
314 case 0x45:
315 case 0x46:
316 *Type = INTEL_COREI7; // "haswell"
317 *Subtype = INTEL_COREI7_HASWELL;
318 break;
319
320 // Broadwell:
321 case 0x3d:
322 case 0x47:
323 case 0x4f:
324 case 0x56:
325 *Type = INTEL_COREI7; // "broadwell"
326 *Subtype = INTEL_COREI7_BROADWELL;
327 break;
328
329 // Skylake:
Craig Topper61d84502017-07-10 17:30:20 +0000330 case 0x4e: // Skylake mobile
331 case 0x5e: // Skylake desktop
332 case 0x8e: // Kaby Lake mobile
333 case 0x9e: // Kaby Lake desktop
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000334 *Type = INTEL_COREI7; // "skylake"
335 *Subtype = INTEL_COREI7_SKYLAKE;
336 break;
337
Craig Topper61d84502017-07-10 17:30:20 +0000338 // Skylake Xeon:
339 case 0x55:
Craig Topper76ac8132017-07-10 17:47:23 +0000340 *Type = INTEL_COREI7;
Craig Topper61d84502017-07-10 17:30:20 +0000341 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
342 break;
343
Craig Topper436c9262017-11-19 00:46:21 +0000344 // Cannonlake:
345 case 0x66:
346 *Type = INTEL_COREI7;
347 *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
348 break;
349
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000350 case 0x1c: // Most 45 nm Intel Atom processors
351 case 0x26: // 45 nm Atom Lincroft
352 case 0x27: // 32 nm Atom Medfield
353 case 0x35: // 32 nm Atom Midview
354 case 0x36: // 32 nm Atom Midview
Craig Toppere6b07072017-07-13 02:56:24 +0000355 *Type = INTEL_BONNELL;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000356 break; // "bonnell"
357
358 // Atom Silvermont codes from the Intel software optimization guide.
359 case 0x37:
360 case 0x4a:
361 case 0x4d:
362 case 0x5a:
363 case 0x5d:
364 case 0x4c: // really airmont
Craig Toppere6b07072017-07-13 02:56:24 +0000365 *Type = INTEL_SILVERMONT;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000366 break; // "silvermont"
367
368 case 0x57:
Craig Toppere6b07072017-07-13 02:56:24 +0000369 *Type = INTEL_KNL; // knl
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000370 break;
371
Craig Topper705b9692017-10-11 20:35:43 +0000372 case 0x85:
373 *Type = INTEL_KNM; // knm
374 break;
375
Craig Toppere6b07072017-07-13 02:56:24 +0000376 default: // Unknown family 6 CPU.
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000377 break;
Craig Topper76ac8132017-07-10 17:47:23 +0000378 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000379 }
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000380 default:
Craig Toppere6b07072017-07-13 02:56:24 +0000381 break; // Unknown.
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000382 }
383}
384
Craig Topper61d84502017-07-10 17:30:20 +0000385static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
386 unsigned Features, unsigned *Type,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000387 unsigned *Subtype) {
388 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
389 // appears to be no way to generate the wide variety of AMD-specific targets
390 // from the information returned from CPUID.
391 switch (Family) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000392 case 16:
393 *Type = AMDFAM10H; // "amdfam10"
394 switch (Model) {
395 case 2:
396 *Subtype = AMDFAM10H_BARCELONA;
397 break;
398 case 4:
399 *Subtype = AMDFAM10H_SHANGHAI;
400 break;
401 case 8:
402 *Subtype = AMDFAM10H_ISTANBUL;
403 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000404 }
Craig Topper76ac8132017-07-10 17:47:23 +0000405 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000406 case 20:
Craig Toppere6b07072017-07-13 02:56:24 +0000407 *Type = AMD_BTVER1;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000408 break; // "btver1";
409 case 21:
410 *Type = AMDFAM15H;
Craig Topper76ac8132017-07-10 17:47:23 +0000411 if (Model >= 0x60 && Model <= 0x7f) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000412 *Subtype = AMDFAM15H_BDVER4;
Craig Toppere6b07072017-07-13 02:56:24 +0000413 break; // "bdver4"; 60h-7Fh: Excavator
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000414 }
415 if (Model >= 0x30 && Model <= 0x3f) {
416 *Subtype = AMDFAM15H_BDVER3;
417 break; // "bdver3"; 30h-3Fh: Steamroller
418 }
419 if (Model >= 0x10 && Model <= 0x1f) {
420 *Subtype = AMDFAM15H_BDVER2;
421 break; // "bdver2"; 10h-1Fh: Piledriver
422 }
423 if (Model <= 0x0f) {
424 *Subtype = AMDFAM15H_BDVER1;
425 break; // "bdver1"; 00h-0Fh: Bulldozer
426 }
427 break;
428 case 22:
Craig Topper76ac8132017-07-10 17:47:23 +0000429 *Type = AMD_BTVER2;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000430 break; // "btver2"
Craig Topper61d84502017-07-10 17:30:20 +0000431 case 23:
432 *Type = AMDFAM17H;
Craig Topper76ac8132017-07-10 17:47:23 +0000433 *Subtype = AMDFAM17H_ZNVER1;
Craig Topper61d84502017-07-10 17:30:20 +0000434 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000435 default:
436 break; // "generic"
437 }
438}
439
Craig Toppere6b07072017-07-13 02:56:24 +0000440static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
441 unsigned *FeaturesOut) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000442 unsigned Features = 0;
Craig Topper61d84502017-07-10 17:30:20 +0000443 unsigned EAX, EBX;
Craig Toppere6b07072017-07-13 02:56:24 +0000444
445 if ((EDX >> 15) & 1)
446 Features |= 1 << FEATURE_CMOV;
447 if ((EDX >> 23) & 1)
448 Features |= 1 << FEATURE_MMX;
449 if ((EDX >> 25) & 1)
450 Features |= 1 << FEATURE_SSE;
451 if ((EDX >> 26) & 1)
452 Features |= 1 << FEATURE_SSE2;
453
454 if ((ECX >> 0) & 1)
455 Features |= 1 << FEATURE_SSE3;
456 if ((ECX >> 1) & 1)
457 Features |= 1 << FEATURE_PCLMUL;
458 if ((ECX >> 9) & 1)
459 Features |= 1 << FEATURE_SSSE3;
460 if ((ECX >> 12) & 1)
461 Features |= 1 << FEATURE_FMA;
462 if ((ECX >> 19) & 1)
463 Features |= 1 << FEATURE_SSE4_1;
464 if ((ECX >> 20) & 1)
465 Features |= 1 << FEATURE_SSE4_2;
466 if ((ECX >> 23) & 1)
467 Features |= 1 << FEATURE_POPCNT;
468 if ((ECX >> 25) & 1)
469 Features |= 1 << FEATURE_AES;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000470
471 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
472 // indicates that the AVX registers will be saved and restored on context
473 // switch, then we have full AVX support.
474 const unsigned AVXBits = (1 << 27) | (1 << 28);
475 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
476 ((EAX & 0x6) == 0x6);
477 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
Craig Toppere6b07072017-07-13 02:56:24 +0000478
479 if (HasAVX)
480 Features |= 1 << FEATURE_AVX;
481
Craig Topper76ac8132017-07-10 17:47:23 +0000482 bool HasLeaf7 =
483 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
Craig Toppere6b07072017-07-13 02:56:24 +0000484
485 if (HasLeaf7 && ((EBX >> 3) & 1))
486 Features |= 1 << FEATURE_BMI;
487 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
488 Features |= 1 << FEATURE_AVX2;
489 if (HasLeaf7 && ((EBX >> 9) & 1))
490 Features |= 1 << FEATURE_BMI2;
491 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
492 Features |= 1 << FEATURE_AVX512F;
493 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
494 Features |= 1 << FEATURE_AVX512DQ;
495 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
496 Features |= 1 << FEATURE_AVX512IFMA;
497 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
498 Features |= 1 << FEATURE_AVX512PF;
499 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
500 Features |= 1 << FEATURE_AVX512ER;
501 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
502 Features |= 1 << FEATURE_AVX512CD;
503 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
504 Features |= 1 << FEATURE_AVX512BW;
505 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
506 Features |= 1 << FEATURE_AVX512VL;
507
508 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
509 Features |= 1 << FEATURE_AVX512VBMI;
510 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
511 Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
512
513 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
514 Features |= 1 << FEATURE_AVX5124VNNIW;
515 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
516 Features |= 1 << FEATURE_AVX5124FMAPS;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000517
Craig Topper61d84502017-07-10 17:30:20 +0000518 unsigned MaxExtLevel;
519 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
520
521 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
522 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
Craig Toppere6b07072017-07-13 02:56:24 +0000523 if (HasExtLeaf1 && ((ECX >> 6) & 1))
524 Features |= 1 << FEATURE_SSE4_A;
525 if (HasExtLeaf1 && ((ECX >> 11) & 1))
526 Features |= 1 << FEATURE_XOP;
527 if (HasExtLeaf1 && ((ECX >> 16) & 1))
528 Features |= 1 << FEATURE_FMA4;
Craig Topper61d84502017-07-10 17:30:20 +0000529
Craig Toppere6b07072017-07-13 02:56:24 +0000530 *FeaturesOut = Features;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000531}
532
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000533#if defined(HAVE_INIT_PRIORITY)
534#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
535#elif __has_attribute(__constructor__)
536#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000537#else
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000538// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
539// this runs during initialization.
540#define CONSTRUCTOR_ATTRIBUTE
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000541#endif
542
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000543int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000544
545struct __processor_model {
546 unsigned int __cpu_vendor;
547 unsigned int __cpu_type;
548 unsigned int __cpu_subtype;
549 unsigned int __cpu_features[1];
550} __cpu_model = {0, 0, 0, {0}};
551
552/* A constructor function that is sets __cpu_model and __cpu_features with
553 the right values. This needs to run only once. This constructor is
554 given the highest priority and it should run before constructors without
555 the priority set. However, it still runs after ifunc initializers and
556 needs to be called explicitly there. */
557
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000558int CONSTRUCTOR_ATTRIBUTE
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000559__cpu_indicator_init(void) {
Craig Topper61d84502017-07-10 17:30:20 +0000560 unsigned EAX, EBX, ECX, EDX;
561 unsigned MaxLeaf = 5;
562 unsigned Vendor;
563 unsigned Model, Family, Brand_id;
564 unsigned Features = 0;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000565
566 /* This function needs to run just once. */
567 if (__cpu_model.__cpu_vendor)
568 return 0;
569
570 if (!isCpuIdSupported())
571 return -1;
572
573 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
Craig Topper61d84502017-07-10 17:30:20 +0000574 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000575 __cpu_model.__cpu_vendor = VENDOR_OTHER;
576 return -1;
577 }
578 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
579 detectX86FamilyModel(EAX, &Family, &Model);
580 Brand_id = EBX & 0xff;
581
582 /* Find available features. */
Craig Toppere6b07072017-07-13 02:56:24 +0000583 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000584 __cpu_model.__cpu_features[0] = Features;
585
586 if (Vendor == SIG_INTEL) {
587 /* Get CPU type. */
588 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
589 &(__cpu_model.__cpu_type),
590 &(__cpu_model.__cpu_subtype));
591 __cpu_model.__cpu_vendor = VENDOR_INTEL;
592 } else if (Vendor == SIG_AMD) {
593 /* Get CPU type. */
594 getAMDProcessorTypeAndSubtype(Family, Model, Features,
595 &(__cpu_model.__cpu_type),
596 &(__cpu_model.__cpu_subtype));
597 __cpu_model.__cpu_vendor = VENDOR_AMD;
598 } else
599 __cpu_model.__cpu_vendor = VENDOR_OTHER;
600
601 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
602 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
603 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
604
605 return 0;
606}
607
608#endif