blob: 54f1add916793eaec8b2dd201d011ea5b7246ec0 [file] [log] [blame]
Alina Sbirlea36f57fb2016-07-14 22:02:35 +00001//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is based on LLVM's lib/Support/Host.cpp.
11// It implements the operating system Host concept and builtin
12// __cpu_model for the compiler_rt library, for x86 only.
13//
14//===----------------------------------------------------------------------===//
15
16#if (defined(__i386__) || defined(_M_IX86) || \
17 defined(__x86_64__) || defined(_M_X64)) && \
18 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
19
20#include <assert.h>
21
22#define bool int
23#define true 1
24#define false 0
25
26#ifdef _MSC_VER
27#include <intrin.h>
28#endif
29
Reid Kleckner3ae87c42017-04-07 16:54:32 +000030#ifndef __has_attribute
31#define __has_attribute(attr) 0
32#endif
33
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000034enum VendorSignatures {
35 SIG_INTEL = 0x756e6547 /* Genu */,
36 SIG_AMD = 0x68747541 /* Auth */
37};
38
39enum ProcessorVendors {
40 VENDOR_INTEL = 1,
41 VENDOR_AMD,
42 VENDOR_OTHER,
43 VENDOR_MAX
44};
45
46enum ProcessorTypes {
Craig Toppere6b07072017-07-13 02:56:24 +000047 INTEL_BONNELL = 1,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000048 INTEL_CORE2,
49 INTEL_COREI7,
50 AMDFAM10H,
51 AMDFAM15H,
Craig Toppere6b07072017-07-13 02:56:24 +000052 INTEL_SILVERMONT,
53 INTEL_KNL,
54 AMD_BTVER1,
55 AMD_BTVER2,
Craig Topper61d84502017-07-10 17:30:20 +000056 AMDFAM17H,
Craig Topper705b9692017-10-11 20:35:43 +000057 INTEL_KNM,
Craig Topper3fe64702018-10-20 03:49:04 +000058 INTEL_GOLDMONT,
59 INTEL_GOLDMONT_PLUS,
60 INTEL_TREMONT,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000061 CPU_TYPE_MAX
62};
63
64enum ProcessorSubtypes {
65 INTEL_COREI7_NEHALEM = 1,
66 INTEL_COREI7_WESTMERE,
67 INTEL_COREI7_SANDYBRIDGE,
68 AMDFAM10H_BARCELONA,
69 AMDFAM10H_SHANGHAI,
70 AMDFAM10H_ISTANBUL,
71 AMDFAM15H_BDVER1,
72 AMDFAM15H_BDVER2,
Craig Toppere6b07072017-07-13 02:56:24 +000073 AMDFAM15H_BDVER3,
74 AMDFAM15H_BDVER4,
75 AMDFAM17H_ZNVER1,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000076 INTEL_COREI7_IVYBRIDGE,
77 INTEL_COREI7_HASWELL,
78 INTEL_COREI7_BROADWELL,
79 INTEL_COREI7_SKYLAKE,
80 INTEL_COREI7_SKYLAKE_AVX512,
Craig Topper436c9262017-11-19 00:46:21 +000081 INTEL_COREI7_CANNONLAKE,
Craig Topper3fe64702018-10-20 03:49:04 +000082 INTEL_COREI7_ICELAKE_CLIENT,
83 INTEL_COREI7_ICELAKE_SERVER,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +000084 CPU_SUBTYPE_MAX
85};
86
87enum ProcessorFeatures {
88 FEATURE_CMOV = 0,
89 FEATURE_MMX,
90 FEATURE_POPCNT,
91 FEATURE_SSE,
92 FEATURE_SSE2,
93 FEATURE_SSE3,
94 FEATURE_SSSE3,
95 FEATURE_SSE4_1,
96 FEATURE_SSE4_2,
97 FEATURE_AVX,
98 FEATURE_AVX2,
Craig Toppere6b07072017-07-13 02:56:24 +000099 FEATURE_SSE4_A,
100 FEATURE_FMA4,
101 FEATURE_XOP,
102 FEATURE_FMA,
103 FEATURE_AVX512F,
104 FEATURE_BMI,
105 FEATURE_BMI2,
106 FEATURE_AES,
107 FEATURE_PCLMUL,
108 FEATURE_AVX512VL,
109 FEATURE_AVX512BW,
110 FEATURE_AVX512DQ,
111 FEATURE_AVX512CD,
112 FEATURE_AVX512ER,
113 FEATURE_AVX512PF,
114 FEATURE_AVX512VBMI,
115 FEATURE_AVX512IFMA,
116 FEATURE_AVX5124VNNIW,
117 FEATURE_AVX5124FMAPS,
Craig Topper3fe64702018-10-20 03:49:04 +0000118 FEATURE_AVX512VPOPCNTDQ,
119 FEATURE_AVX512VBMI2,
120 FEATURE_GFNI,
121 FEATURE_VPCLMULQDQ,
122 FEATURE_AVX512VNNI,
123 FEATURE_AVX512BITALG
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000124};
125
126// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
127// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
128// support. Consequently, for i386, the presence of CPUID is checked first
129// via the corresponding eflags bit.
130static bool isCpuIdSupported() {
131#if defined(__GNUC__) || defined(__clang__)
132#if defined(__i386__)
133 int __cpuid_supported;
Alina Sbirlea9b476732016-07-17 23:45:55 +0000134 __asm__(" pushfl\n"
135 " popl %%eax\n"
136 " movl %%eax,%%ecx\n"
137 " xorl $0x00200000,%%eax\n"
138 " pushl %%eax\n"
139 " popfl\n"
140 " pushfl\n"
141 " popl %%eax\n"
142 " movl $0,%0\n"
143 " cmpl %%eax,%%ecx\n"
144 " je 1f\n"
145 " movl $1,%0\n"
146 "1:"
147 : "=r"(__cpuid_supported)
148 :
149 : "eax", "ecx");
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000150 if (!__cpuid_supported)
151 return false;
152#endif
153 return true;
154#endif
155 return true;
156}
157
158// This code is copied from lib/Support/Host.cpp.
159// Changes to either file should be mirrored in the other.
160
161/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
162/// the specified arguments. If we can't run cpuid on the host, return true.
Craig Topper61d84502017-07-10 17:30:20 +0000163static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000164 unsigned *rECX, unsigned *rEDX) {
165#if defined(__GNUC__) || defined(__clang__)
166#if defined(__x86_64__)
Craig Topper61d84502017-07-10 17:30:20 +0000167 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
Craig Topper76ac8132017-07-10 17:47:23 +0000168 // FIXME: should we save this for Clang?
Alina Sbirlea9b476732016-07-17 23:45:55 +0000169 __asm__("movq\t%%rbx, %%rsi\n\t"
170 "cpuid\n\t"
171 "xchgq\t%%rbx, %%rsi\n\t"
172 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
173 : "a"(value));
Craig Topper76ac8132017-07-10 17:47:23 +0000174 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000175#elif defined(__i386__)
Alina Sbirlea9b476732016-07-17 23:45:55 +0000176 __asm__("movl\t%%ebx, %%esi\n\t"
177 "cpuid\n\t"
178 "xchgl\t%%ebx, %%esi\n\t"
179 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
180 : "a"(value));
Craig Topper76ac8132017-07-10 17:47:23 +0000181 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000182#else
Craig Topper76ac8132017-07-10 17:47:23 +0000183 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000184#endif
185#elif defined(_MSC_VER)
186 // The MSVC intrinsic is portable across x86 and x64.
187 int registers[4];
188 __cpuid(registers, value);
189 *rEAX = registers[0];
190 *rEBX = registers[1];
191 *rECX = registers[2];
192 *rEDX = registers[3];
Craig Topper61d84502017-07-10 17:30:20 +0000193 return false;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000194#else
Craig Topper61d84502017-07-10 17:30:20 +0000195 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000196#endif
197}
198
199/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
200/// the 4 values in the specified arguments. If we can't run cpuid on the host,
201/// return true.
Craig Topper61d84502017-07-10 17:30:20 +0000202static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000203 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
204 unsigned *rEDX) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000205#if defined(__GNUC__) || defined(__clang__)
Craig Topperb7727902017-07-19 05:11:20 +0000206#if defined(__x86_64__)
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000207 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
208 // FIXME: should we save this for Clang?
Alina Sbirlea9b476732016-07-17 23:45:55 +0000209 __asm__("movq\t%%rbx, %%rsi\n\t"
210 "cpuid\n\t"
211 "xchgq\t%%rbx, %%rsi\n\t"
212 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
213 : "a"(value), "c"(subleaf));
Craig Topper76ac8132017-07-10 17:47:23 +0000214 return false;
Craig Topperb7727902017-07-19 05:11:20 +0000215#elif defined(__i386__)
216 __asm__("movl\t%%ebx, %%esi\n\t"
217 "cpuid\n\t"
218 "xchgl\t%%ebx, %%esi\n\t"
219 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
220 : "a"(value), "c"(subleaf));
221 return false;
222#else
223 return true;
224#endif
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000225#elif defined(_MSC_VER)
226 int registers[4];
227 __cpuidex(registers, value, subleaf);
228 *rEAX = registers[0];
229 *rEBX = registers[1];
230 *rECX = registers[2];
231 *rEDX = registers[3];
Craig Topper76ac8132017-07-10 17:47:23 +0000232 return false;
233#else
234 return true;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000235#endif
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000236}
237
238// Read control register 0 (XCR0). Used to detect features such as AVX.
239static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
240#if defined(__GNUC__) || defined(__clang__)
241 // Check xgetbv; this uses a .byte sequence instead of the instruction
242 // directly because older assemblers do not include support for xgetbv and
243 // there is no easy way to conditionally compile based on the assembler used.
244 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
245 return false;
246#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
247 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
248 *rEAX = Result;
249 *rEDX = Result >> 32;
250 return false;
251#else
252 return true;
253#endif
254}
255
256static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
257 unsigned *Model) {
258 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
259 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
260 if (*Family == 6 || *Family == 0xf) {
261 if (*Family == 0xf)
262 // Examine extended family ID if family ID is F.
263 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
264 // Examine extended model ID if family ID is 6 or F.
265 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
266 }
267}
268
Craig Topper76ac8132017-07-10 17:47:23 +0000269static void
270getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
271 unsigned Brand_id, unsigned Features,
272 unsigned *Type, unsigned *Subtype) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000273 if (Brand_id != 0)
274 return;
275 switch (Family) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000276 case 6:
277 switch (Model) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000278 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
279 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
280 // mobile processor, Intel Core 2 Extreme processor, Intel
281 // Pentium Dual-Core processor, Intel Xeon processor, model
282 // 0Fh. All processors are manufactured using the 65 nm process.
283 case 0x16: // Intel Celeron processor model 16h. All processors are
284 // manufactured using the 65 nm process
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000285 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
286 // 17h. All processors are manufactured using the 45 nm process.
287 //
288 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
289 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
290 // the 45 nm process.
291 *Type = INTEL_CORE2; // "penryn"
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000292 break;
293 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
294 // processors are manufactured using the 45 nm process.
295 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
296 // As found in a Summer 2010 model iMac.
297 case 0x1f:
Craig Topper76ac8132017-07-10 17:47:23 +0000298 case 0x2e: // Nehalem EX
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000299 *Type = INTEL_COREI7; // "nehalem"
300 *Subtype = INTEL_COREI7_NEHALEM;
301 break;
302 case 0x25: // Intel Core i7, laptop version.
303 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
304 // processors are manufactured using the 32 nm process.
305 case 0x2f: // Westmere EX
306 *Type = INTEL_COREI7; // "westmere"
307 *Subtype = INTEL_COREI7_WESTMERE;
308 break;
309 case 0x2a: // Intel Core i7 processor. All processors are manufactured
310 // using the 32 nm process.
311 case 0x2d:
312 *Type = INTEL_COREI7; //"sandybridge"
313 *Subtype = INTEL_COREI7_SANDYBRIDGE;
314 break;
315 case 0x3a:
Craig Topper76ac8132017-07-10 17:47:23 +0000316 case 0x3e: // Ivy Bridge EP
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000317 *Type = INTEL_COREI7; // "ivybridge"
318 *Subtype = INTEL_COREI7_IVYBRIDGE;
319 break;
320
321 // Haswell:
322 case 0x3c:
323 case 0x3f:
324 case 0x45:
325 case 0x46:
326 *Type = INTEL_COREI7; // "haswell"
327 *Subtype = INTEL_COREI7_HASWELL;
328 break;
329
330 // Broadwell:
331 case 0x3d:
332 case 0x47:
333 case 0x4f:
334 case 0x56:
335 *Type = INTEL_COREI7; // "broadwell"
336 *Subtype = INTEL_COREI7_BROADWELL;
337 break;
338
339 // Skylake:
Craig Topper61d84502017-07-10 17:30:20 +0000340 case 0x4e: // Skylake mobile
341 case 0x5e: // Skylake desktop
342 case 0x8e: // Kaby Lake mobile
343 case 0x9e: // Kaby Lake desktop
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000344 *Type = INTEL_COREI7; // "skylake"
345 *Subtype = INTEL_COREI7_SKYLAKE;
346 break;
347
Craig Topper61d84502017-07-10 17:30:20 +0000348 // Skylake Xeon:
349 case 0x55:
Craig Topper76ac8132017-07-10 17:47:23 +0000350 *Type = INTEL_COREI7;
Craig Topper61d84502017-07-10 17:30:20 +0000351 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
352 break;
353
Craig Topper436c9262017-11-19 00:46:21 +0000354 // Cannonlake:
355 case 0x66:
356 *Type = INTEL_COREI7;
357 *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
358 break;
359
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000360 case 0x1c: // Most 45 nm Intel Atom processors
361 case 0x26: // 45 nm Atom Lincroft
362 case 0x27: // 32 nm Atom Medfield
363 case 0x35: // 32 nm Atom Midview
364 case 0x36: // 32 nm Atom Midview
Craig Toppere6b07072017-07-13 02:56:24 +0000365 *Type = INTEL_BONNELL;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000366 break; // "bonnell"
367
368 // Atom Silvermont codes from the Intel software optimization guide.
369 case 0x37:
370 case 0x4a:
371 case 0x4d:
372 case 0x5a:
373 case 0x5d:
374 case 0x4c: // really airmont
Craig Toppere6b07072017-07-13 02:56:24 +0000375 *Type = INTEL_SILVERMONT;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000376 break; // "silvermont"
Craig Topper3fe64702018-10-20 03:49:04 +0000377 // Goldmont:
378 case 0x5c: // Apollo Lake
379 case 0x5f: // Denverton
380 *Type = INTEL_GOLDMONT;
381 break; // "goldmont"
382 case 0x7a:
383 *Type = INTEL_GOLDMONT_PLUS;
384 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000385
386 case 0x57:
Craig Toppere6b07072017-07-13 02:56:24 +0000387 *Type = INTEL_KNL; // knl
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000388 break;
389
Craig Topper705b9692017-10-11 20:35:43 +0000390 case 0x85:
391 *Type = INTEL_KNM; // knm
392 break;
393
Craig Toppere6b07072017-07-13 02:56:24 +0000394 default: // Unknown family 6 CPU.
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000395 break;
Craig Topper76ac8132017-07-10 17:47:23 +0000396 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000397 }
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000398 default:
Craig Toppere6b07072017-07-13 02:56:24 +0000399 break; // Unknown.
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000400 }
401}
402
Craig Topper61d84502017-07-10 17:30:20 +0000403static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
404 unsigned Features, unsigned *Type,
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000405 unsigned *Subtype) {
406 // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
407 // appears to be no way to generate the wide variety of AMD-specific targets
408 // from the information returned from CPUID.
409 switch (Family) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000410 case 16:
411 *Type = AMDFAM10H; // "amdfam10"
412 switch (Model) {
413 case 2:
414 *Subtype = AMDFAM10H_BARCELONA;
415 break;
416 case 4:
417 *Subtype = AMDFAM10H_SHANGHAI;
418 break;
419 case 8:
420 *Subtype = AMDFAM10H_ISTANBUL;
421 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000422 }
Craig Topper76ac8132017-07-10 17:47:23 +0000423 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000424 case 20:
Craig Toppere6b07072017-07-13 02:56:24 +0000425 *Type = AMD_BTVER1;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000426 break; // "btver1";
427 case 21:
428 *Type = AMDFAM15H;
Craig Topper76ac8132017-07-10 17:47:23 +0000429 if (Model >= 0x60 && Model <= 0x7f) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000430 *Subtype = AMDFAM15H_BDVER4;
Craig Toppere6b07072017-07-13 02:56:24 +0000431 break; // "bdver4"; 60h-7Fh: Excavator
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000432 }
433 if (Model >= 0x30 && Model <= 0x3f) {
434 *Subtype = AMDFAM15H_BDVER3;
435 break; // "bdver3"; 30h-3Fh: Steamroller
436 }
Roman Lebedeve8e95b52018-05-01 18:40:15 +0000437 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000438 *Subtype = AMDFAM15H_BDVER2;
Roman Lebedeve8e95b52018-05-01 18:40:15 +0000439 break; // "bdver2"; 02h, 10h-1Fh: Piledriver
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000440 }
441 if (Model <= 0x0f) {
442 *Subtype = AMDFAM15H_BDVER1;
443 break; // "bdver1"; 00h-0Fh: Bulldozer
444 }
445 break;
446 case 22:
Craig Topper76ac8132017-07-10 17:47:23 +0000447 *Type = AMD_BTVER2;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000448 break; // "btver2"
Craig Topper61d84502017-07-10 17:30:20 +0000449 case 23:
450 *Type = AMDFAM17H;
Craig Topper76ac8132017-07-10 17:47:23 +0000451 *Subtype = AMDFAM17H_ZNVER1;
Craig Topper61d84502017-07-10 17:30:20 +0000452 break;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000453 default:
454 break; // "generic"
455 }
456}
457
Craig Toppere6b07072017-07-13 02:56:24 +0000458static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
Craig Topper3fe64702018-10-20 03:49:04 +0000459 unsigned *FeaturesOut,
460 unsigned *Features2Out) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000461 unsigned Features = 0;
Craig Topper3fe64702018-10-20 03:49:04 +0000462 unsigned Features2 = 0;
Craig Topper61d84502017-07-10 17:30:20 +0000463 unsigned EAX, EBX;
Craig Toppere6b07072017-07-13 02:56:24 +0000464
Craig Topper3fe64702018-10-20 03:49:04 +0000465#define setFeature(F) \
466 do { \
467 if (F < 32) \
468 Features |= 1 << F; \
469 else if (F < 64) \
470 Features2 |= 1 << (F - 32); \
471 } while (0)
472
Craig Toppere6b07072017-07-13 02:56:24 +0000473 if ((EDX >> 15) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000474 setFeature(FEATURE_CMOV);
Craig Toppere6b07072017-07-13 02:56:24 +0000475 if ((EDX >> 23) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000476 setFeature(FEATURE_MMX);
Craig Toppere6b07072017-07-13 02:56:24 +0000477 if ((EDX >> 25) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000478 setFeature(FEATURE_SSE);
Craig Toppere6b07072017-07-13 02:56:24 +0000479 if ((EDX >> 26) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000480 setFeature(FEATURE_SSE2);
Craig Toppere6b07072017-07-13 02:56:24 +0000481
482 if ((ECX >> 0) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000483 setFeature(FEATURE_SSE3);
Craig Toppere6b07072017-07-13 02:56:24 +0000484 if ((ECX >> 1) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000485 setFeature(FEATURE_PCLMUL);
Craig Toppere6b07072017-07-13 02:56:24 +0000486 if ((ECX >> 9) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000487 setFeature(FEATURE_SSSE3);
Craig Toppere6b07072017-07-13 02:56:24 +0000488 if ((ECX >> 12) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000489 setFeature(FEATURE_FMA);
Craig Toppere6b07072017-07-13 02:56:24 +0000490 if ((ECX >> 19) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000491 setFeature(FEATURE_SSE4_1);
Craig Toppere6b07072017-07-13 02:56:24 +0000492 if ((ECX >> 20) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000493 setFeature(FEATURE_SSE4_2);
Craig Toppere6b07072017-07-13 02:56:24 +0000494 if ((ECX >> 23) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000495 setFeature(FEATURE_POPCNT);
Craig Toppere6b07072017-07-13 02:56:24 +0000496 if ((ECX >> 25) & 1)
Craig Topper3fe64702018-10-20 03:49:04 +0000497 setFeature(FEATURE_AES);
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000498
499 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
500 // indicates that the AVX registers will be saved and restored on context
501 // switch, then we have full AVX support.
502 const unsigned AVXBits = (1 << 27) | (1 << 28);
503 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
504 ((EAX & 0x6) == 0x6);
505 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
Craig Toppere6b07072017-07-13 02:56:24 +0000506
507 if (HasAVX)
Craig Topper3fe64702018-10-20 03:49:04 +0000508 setFeature(FEATURE_AVX);
Craig Toppere6b07072017-07-13 02:56:24 +0000509
Craig Topper76ac8132017-07-10 17:47:23 +0000510 bool HasLeaf7 =
511 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
Craig Toppere6b07072017-07-13 02:56:24 +0000512
513 if (HasLeaf7 && ((EBX >> 3) & 1))
Craig Topper3fe64702018-10-20 03:49:04 +0000514 setFeature(FEATURE_BMI);
Craig Toppere6b07072017-07-13 02:56:24 +0000515 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
Craig Topper3fe64702018-10-20 03:49:04 +0000516 setFeature(FEATURE_AVX2);
Craig Toppere6b07072017-07-13 02:56:24 +0000517 if (HasLeaf7 && ((EBX >> 9) & 1))
Craig Topper3fe64702018-10-20 03:49:04 +0000518 setFeature(FEATURE_BMI2);
Craig Toppere6b07072017-07-13 02:56:24 +0000519 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000520 setFeature(FEATURE_AVX512F);
Craig Toppere6b07072017-07-13 02:56:24 +0000521 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000522 setFeature(FEATURE_AVX512DQ);
Craig Toppere6b07072017-07-13 02:56:24 +0000523 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000524 setFeature(FEATURE_AVX512IFMA);
Craig Toppere6b07072017-07-13 02:56:24 +0000525 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000526 setFeature(FEATURE_AVX512PF);
Craig Toppere6b07072017-07-13 02:56:24 +0000527 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000528 setFeature(FEATURE_AVX512ER);
Craig Toppere6b07072017-07-13 02:56:24 +0000529 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000530 setFeature(FEATURE_AVX512CD);
Craig Toppere6b07072017-07-13 02:56:24 +0000531 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000532 setFeature(FEATURE_AVX512BW);
Craig Toppere6b07072017-07-13 02:56:24 +0000533 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000534 setFeature(FEATURE_AVX512VL);
Craig Toppere6b07072017-07-13 02:56:24 +0000535
536 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000537 setFeature(FEATURE_AVX512VBMI);
538 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
539 setFeature(FEATURE_AVX512VBMI2);
540 if (HasLeaf7 && ((ECX >> 8) & 1))
541 setFeature(FEATURE_GFNI);
542 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
543 setFeature(FEATURE_VPCLMULQDQ);
544 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
545 setFeature(FEATURE_AVX512VNNI);
546 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
547 setFeature(FEATURE_AVX512BITALG);
Craig Toppere6b07072017-07-13 02:56:24 +0000548 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000549 setFeature(FEATURE_AVX512VPOPCNTDQ);
Craig Toppere6b07072017-07-13 02:56:24 +0000550
551 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000552 setFeature(FEATURE_AVX5124VNNIW);
Craig Toppere6b07072017-07-13 02:56:24 +0000553 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
Craig Topper3fe64702018-10-20 03:49:04 +0000554 setFeature(FEATURE_AVX5124FMAPS);
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000555
Craig Topper61d84502017-07-10 17:30:20 +0000556 unsigned MaxExtLevel;
557 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
558
559 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
560 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
Craig Toppere6b07072017-07-13 02:56:24 +0000561 if (HasExtLeaf1 && ((ECX >> 6) & 1))
Craig Topper3fe64702018-10-20 03:49:04 +0000562 setFeature(FEATURE_SSE4_A);
Craig Toppere6b07072017-07-13 02:56:24 +0000563 if (HasExtLeaf1 && ((ECX >> 11) & 1))
Craig Topper3fe64702018-10-20 03:49:04 +0000564 setFeature(FEATURE_XOP);
Craig Toppere6b07072017-07-13 02:56:24 +0000565 if (HasExtLeaf1 && ((ECX >> 16) & 1))
Craig Topper3fe64702018-10-20 03:49:04 +0000566 setFeature(FEATURE_FMA4);
Craig Topper61d84502017-07-10 17:30:20 +0000567
Craig Toppere6b07072017-07-13 02:56:24 +0000568 *FeaturesOut = Features;
Craig Topper3fe64702018-10-20 03:49:04 +0000569 *Features2Out = Features2;
570#undef setFeature
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000571}
572
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000573#if defined(HAVE_INIT_PRIORITY)
574#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
575#elif __has_attribute(__constructor__)
576#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000577#else
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000578// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
579// this runs during initialization.
580#define CONSTRUCTOR_ATTRIBUTE
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000581#endif
582
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000583int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000584
585struct __processor_model {
586 unsigned int __cpu_vendor;
587 unsigned int __cpu_type;
588 unsigned int __cpu_subtype;
589 unsigned int __cpu_features[1];
590} __cpu_model = {0, 0, 0, {0}};
Craig Topper3fe64702018-10-20 03:49:04 +0000591unsigned int __cpu_features2;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000592
Craig Topper3fe64702018-10-20 03:49:04 +0000593/* A constructor function that is sets __cpu_model and __cpu_features2 with
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000594 the right values. This needs to run only once. This constructor is
595 given the highest priority and it should run before constructors without
596 the priority set. However, it still runs after ifunc initializers and
597 needs to be called explicitly there. */
598
Reid Kleckner3ae87c42017-04-07 16:54:32 +0000599int CONSTRUCTOR_ATTRIBUTE
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000600__cpu_indicator_init(void) {
Craig Topper61d84502017-07-10 17:30:20 +0000601 unsigned EAX, EBX, ECX, EDX;
602 unsigned MaxLeaf = 5;
603 unsigned Vendor;
604 unsigned Model, Family, Brand_id;
605 unsigned Features = 0;
Craig Topper3fe64702018-10-20 03:49:04 +0000606 unsigned Features2 = 0;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000607
608 /* This function needs to run just once. */
609 if (__cpu_model.__cpu_vendor)
610 return 0;
611
612 if (!isCpuIdSupported())
613 return -1;
614
615 /* Assume cpuid insn present. Run in level 0 to get vendor id. */
Craig Topper61d84502017-07-10 17:30:20 +0000616 if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000617 __cpu_model.__cpu_vendor = VENDOR_OTHER;
618 return -1;
619 }
620 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
621 detectX86FamilyModel(EAX, &Family, &Model);
622 Brand_id = EBX & 0xff;
623
624 /* Find available features. */
Craig Topper3fe64702018-10-20 03:49:04 +0000625 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2);
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000626 __cpu_model.__cpu_features[0] = Features;
Craig Topper3fe64702018-10-20 03:49:04 +0000627 __cpu_features2 = Features2;
Alina Sbirlea36f57fb2016-07-14 22:02:35 +0000628
629 if (Vendor == SIG_INTEL) {
630 /* Get CPU type. */
631 getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
632 &(__cpu_model.__cpu_type),
633 &(__cpu_model.__cpu_subtype));
634 __cpu_model.__cpu_vendor = VENDOR_INTEL;
635 } else if (Vendor == SIG_AMD) {
636 /* Get CPU type. */
637 getAMDProcessorTypeAndSubtype(Family, Model, Features,
638 &(__cpu_model.__cpu_type),
639 &(__cpu_model.__cpu_subtype));
640 __cpu_model.__cpu_vendor = VENDOR_AMD;
641 } else
642 __cpu_model.__cpu_vendor = VENDOR_OTHER;
643
644 assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
645 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
646 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
647
648 return 0;
649}
650
651#endif