[X86] Make _xgetbv/_xsetbv on non-windows platforms
Summary:
This patch attempts to redo what was tried in r278783, but was reverted.
These intrinsics should be available on non-windows platforms with "xsave" feature check. But on Windows platforms they shouldn't have feature check since that's how MSVC behaves.
To accomplish this I've added a MS builtin with no feature check. And a normal gcc builtin with a feature check. When _MSC_VER is not defined _xgetbv/_xsetbv will be macros pointing to the gcc builtin name.
I've moved the forward declarations from intrin.h to immintrin.h to match the MSDN documentation and used that as the header file for the MS builtin.
I'm not super happy with this implementation, and I'm open to suggestions for better ways to do it.
Reviewers: rnk, RKSimon, spatel
Reviewed By: rnk
Subscribers: cfe-commits
Differential Revision: https://reviews.llvm.org/D56686
llvm-svn: 351160
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4ee2a70..e67e700 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -693,6 +693,10 @@
// XSAVE
TARGET_BUILTIN(__builtin_ia32_xsave, "vv*ULLi", "n", "xsave")
TARGET_BUILTIN(__builtin_ia32_xrstor, "vv*ULLi", "n", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xgetbv, "ULLiUi", "n", "xsave")
+TARGET_HEADER_BUILTIN(_xgetbv, "UWiUi", "nh", "immintrin.h", ALL_MS_LANGUAGES, "")
+TARGET_BUILTIN(__builtin_ia32_xsetbv, "vUiULLi", "n", "xsave")
+TARGET_HEADER_BUILTIN(_xsetbv, "vUiUWi", "nh", "immintrin.h", ALL_MS_LANGUAGES, "")
TARGET_BUILTIN(__builtin_ia32_xsaveopt, "vv*ULLi", "n", "xsaveopt")
TARGET_BUILTIN(__builtin_ia32_xrstors, "vv*ULLi", "n", "xsaves")
TARGET_BUILTIN(__builtin_ia32_xsavec, "vv*ULLi", "n", "xsavec")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4c17602..12234eb 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9831,7 +9831,9 @@
case X86::BI__builtin_ia32_xsavec:
case X86::BI__builtin_ia32_xsavec64:
case X86::BI__builtin_ia32_xsaves:
- case X86::BI__builtin_ia32_xsaves64: {
+ case X86::BI__builtin_ia32_xsaves64:
+ case X86::BI__builtin_ia32_xsetbv:
+ case X86::BI_xsetbv: {
Intrinsic::ID ID;
#define INTRINSIC_X86_XSAVE_ID(NAME) \
case X86::BI__builtin_ia32_##NAME: \
@@ -9851,6 +9853,10 @@
INTRINSIC_X86_XSAVE_ID(xsavec64);
INTRINSIC_X86_XSAVE_ID(xsaves);
INTRINSIC_X86_XSAVE_ID(xsaves64);
+ INTRINSIC_X86_XSAVE_ID(xsetbv);
+ case X86::BI_xsetbv:
+ ID = Intrinsic::x86_xsetbv;
+ break;
}
#undef INTRINSIC_X86_XSAVE_ID
Value *Mhi = Builder.CreateTrunc(
@@ -9860,6 +9866,9 @@
Ops.push_back(Mlo);
return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
}
+ case X86::BI__builtin_ia32_xgetbv:
+ case X86::BI_xgetbv:
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
case X86::BI__builtin_ia32_storedqudi128_mask:
case X86::BI__builtin_ia32_storedqusi128_mask:
case X86::BI__builtin_ia32_storedquhi128_mask:
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 7d0722e..19edd4a 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -378,9 +378,8 @@
#include <fxsrintrin.h>
#endif
-#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
+/* No feature check desired due to internal MSC_VER checks */
#include <xsaveintrin.h>
-#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
#include <xsaveoptintrin.h>
diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h
index c86f41f..acc08ab 100644
--- a/clang/lib/Headers/intrin.h
+++ b/clang/lib/Headers/intrin.h
@@ -200,10 +200,7 @@
_WriteBarrier(void);
unsigned __int32 xbegin(void);
void _xend(void);
-static __inline__
#define _XCR_XFEATURE_ENABLED_MASK 0
-unsigned __int64 __cdecl _xgetbv(unsigned int);
-void __cdecl _xsetbv(unsigned int, unsigned __int64);
/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
#ifdef __x86_64__
@@ -539,12 +536,6 @@
__asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
: "a"(__level), "c"(__ecx));
}
-static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS
-_xgetbv(unsigned int __xcr_no) {
- unsigned int __eax, __edx;
- __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no));
- return ((unsigned __int64)__edx << 32) | __eax;
-}
static __inline__ void __DEFAULT_FN_ATTRS
__halt(void) {
__asm__ volatile ("hlt");
diff --git a/clang/lib/Headers/xsaveintrin.h b/clang/lib/Headers/xsaveintrin.h
index 16f3a78..788f9e0 100644
--- a/clang/lib/Headers/xsaveintrin.h
+++ b/clang/lib/Headers/xsaveintrin.h
@@ -28,6 +28,10 @@
#ifndef __XSAVEINTRIN_H
#define __XSAVEINTRIN_H
+#ifndef _MSC_VER
+#define _XCR_XFEATURE_ENABLED_MASK 0
+#endif
+
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsave")))
@@ -41,6 +45,20 @@
__builtin_ia32_xrstor(__p, __m);
}
+#ifndef _MSC_VER
+#define _xgetbv(A) __builtin_ia32_xgetbv((long long)(A))
+#define _xsetbv(A, B) __builtin_ia32_xsetbv((unsigned int)(A), (unsigned long long)(B));
+#else
+#ifdef __cplusplus
+extern "C" {
+#endif
+unsigned __int64 __cdecl _xgetbv(unsigned int);
+void __cdecl _xsetbv(unsigned int, unsigned __int64);
+#ifdef __cplusplus
+}
+#endif
+#endif /* _MSC_VER */
+
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS
_xsave64(void *__p, unsigned long long __m) {
@@ -51,6 +69,7 @@
_xrstor64(void *__p, unsigned long long __m) {
__builtin_ia32_xrstor64(__p, __m);
}
+
#endif
#undef __DEFAULT_FN_ATTRS
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index fd99dd2b..e237bc2 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -281,6 +281,8 @@
(void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);
(void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi);
+ tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui);
+ (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi);
(void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
(void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
(void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
diff --git a/clang/test/CodeGen/x86_32-xsave.c b/clang/test/CodeGen/x86_32-xsave.c
index f5d84e2..e1acdff 100644
--- a/clang/test/CodeGen/x86_32-xsave.c
+++ b/clang/test/CodeGen/x86_32-xsave.c
@@ -1,6 +1,9 @@
// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
+// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XGETBV
+// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSETBV
+
// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
@@ -10,9 +13,15 @@
// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
void test() {
- unsigned long long tmp_ULLi = 0;
- void* tmp_vp = 0;
+ unsigned long long tmp_ULLi;
+ unsigned int tmp_Ui;
+ void* tmp_vp;
+ tmp_ULLi = 0; tmp_Ui = 0; tmp_vp = 0;
#ifdef TEST_XSAVE
// XSAVE: [[tmp_vp_1:%[0-9a-zA-Z]+]] = load i8*, i8** %tmp_vp, align 4
@@ -30,6 +39,12 @@
// XSAVE: [[low32_3:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
// XSAVE: call void @llvm.x86.xrstor(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
(void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
+
+// XSAVE: call void @llvm.x86.xsave
+ (void)_xsave(tmp_vp, tmp_ULLi);
+
+// XSAVE: call void @llvm.x86.xrstor
+ (void)_xrstor(tmp_vp, tmp_ULLi);
#endif
#ifdef TEST_XSAVEOPT
@@ -40,6 +55,9 @@
// XSAVEOPT: [[low32_1:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
// XSAVEOPT: call void @llvm.x86.xsaveopt(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
(void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
+
+// XSAVEOPT: call void @llvm.x86.xsaveopt
+ (void)_xsaveopt(tmp_vp, tmp_ULLi);
#endif
#ifdef TEST_XSAVEC
@@ -50,6 +68,9 @@
// XSAVEC: [[low32_1:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
// XSAVEC: call void @llvm.x86.xsavec(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
(void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);
+
+// XSAVEC: call void @llvm.x86.xsavec
+ (void)_xsavec(tmp_vp, tmp_ULLi);
#endif
#ifdef TEST_XSAVES
@@ -68,5 +89,34 @@
// XSAVES: [[low32_3:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
// XSAVES: call void @llvm.x86.xrstors(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
(void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);
+
+// XSAVES: call void @llvm.x86.xsaves
+ (void)_xsaves(tmp_vp, tmp_ULLi);
+
+// XSAVES: call void @llvm.x86.xrstors
+ (void)_xrstors(tmp_vp, tmp_ULLi);
#endif
+
+#ifdef TEST_XGETBV
+// XGETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XGETBV: call i64 @llvm.x86.xgetbv(i32 [[tmp_Ui]])
+ tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui);
+
+// XGETBV: call i64 @llvm.x86.xgetbv
+ tmp_ULLi = _xgetbv(tmp_Ui);
+#endif
+
+#ifdef TEST_XSETBV
+// XSETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XSETBV: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSETBV: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSETBV: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSETBV: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSETBV: call void @llvm.x86.xsetbv(i32 [[tmp_Ui]], i32 [[high32_3]], i32 [[low32_3]])
+ (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi);
+
+ // XSETBV: call void @llvm.x86.xsetbv
+ (void)_xsetbv(tmp_Ui, tmp_ULLi);
+#endif
+
}
diff --git a/clang/test/CodeGen/x86_64-xsave.c b/clang/test/CodeGen/x86_64-xsave.c
index beb775c..cfc33cb 100644
--- a/clang/test/CodeGen/x86_64-xsave.c
+++ b/clang/test/CodeGen/x86_64-xsave.c
@@ -1,6 +1,9 @@
// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE
+// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XGETBV
+// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSETBV
+
// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT
@@ -10,9 +13,16 @@
// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
+
void test() {
- unsigned long long tmp_ULLi = 0;
- void* tmp_vp = 0;
+ unsigned long long tmp_ULLi;
+ unsigned int tmp_Ui;
+ void* tmp_vp;
+ tmp_ULLi = 0; tmp_Ui = 0; tmp_vp = 0;
#ifdef TEST_XSAVE
// XSAVE: [[tmp_vp_1:%[0-9a-zA-Z]+]] = load i8*, i8** %tmp_vp, align 8
@@ -46,6 +56,18 @@
// XSAVE: [[low32_4:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_4]] to i32
// XSAVE: call void @llvm.x86.xrstor64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
(void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
+
+// XSAVE: call void @llvm.x86.xsave
+ (void)_xsave(tmp_vp, tmp_ULLi);
+
+// XSAVE: call void @llvm.x86.xsave64
+ (void)_xsave64(tmp_vp, tmp_ULLi);
+
+// XSAVE: call void @llvm.x86.xrstor
+ (void)_xrstor(tmp_vp, tmp_ULLi);
+
+// XSAVE: call void @llvm.x86.xrstor64
+ (void)_xrstor64(tmp_vp, tmp_ULLi);
#endif
#ifdef TEST_XSAVEOPT
@@ -64,6 +86,12 @@
// XSAVEOPT: [[low32_2:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
// XSAVEOPT: call void @llvm.x86.xsaveopt64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
(void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi);
+
+// XSAVEOPT: call void @llvm.x86.xsaveopt
+ (void)_xsaveopt(tmp_vp, tmp_ULLi);
+
+// XSAVEOPT: call void @llvm.x86.xsaveopt64
+ (void)_xsaveopt64(tmp_vp, tmp_ULLi);
#endif
#ifdef TEST_XSAVEC
@@ -82,6 +110,12 @@
// XSAVEC: [[low32_2:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
// XSAVEC: call void @llvm.x86.xsavec64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
(void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi);
+
+// XSAVEC: call void @llvm.x86.xsavec
+ (void)_xsavec(tmp_vp, tmp_ULLi);
+
+// XSAVEC: call void @llvm.x86.xsavec64
+ (void)_xsavec64(tmp_vp, tmp_ULLi);
#endif
#ifdef TEST_XSAVES
@@ -116,5 +150,39 @@
// XSAVES: [[low32_4:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_4]] to i32
// XSAVES: call void @llvm.x86.xrstors64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
(void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi);
+
+// XSAVES: call void @llvm.x86.xsaves
+ (void)_xsaves(tmp_vp, tmp_ULLi);
+
+// XSAVES: call void @llvm.x86.xsaves64
+ (void)_xsaves64(tmp_vp, tmp_ULLi);
+
+// XSAVES: call void @llvm.x86.xrstors
+ (void)_xrstors(tmp_vp, tmp_ULLi);
+
+// XSAVES: call void @llvm.x86.xrstors64
+ (void)_xrstors64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XGETBV
+// XGETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XGETBV: call i64 @llvm.x86.xgetbv(i32 [[tmp_Ui]])
+ tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui);
+
+// XGETBV: call i64 @llvm.x86.xgetbv
+ tmp_ULLi = _xgetbv(tmp_Ui);
+#endif
+
+#ifdef TEST_XSETBV
+// XSETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4
+// XSETBV: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSETBV: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSETBV: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSETBV: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSETBV: call void @llvm.x86.xsetbv(i32 [[tmp_Ui]], i32 [[high32_3]], i32 [[low32_3]])
+ (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi);
+
+ // XSETBV: call void @llvm.x86.xsetbv
+ (void)_xsetbv(tmp_Ui, tmp_ULLi);
#endif
}
diff --git a/clang/test/Headers/ms-intrin.cpp b/clang/test/Headers/ms-intrin.cpp
index b0fef9c..18bb798 100644
--- a/clang/test/Headers/ms-intrin.cpp
+++ b/clang/test/Headers/ms-intrin.cpp
@@ -49,7 +49,9 @@
int info[4];
__cpuid(info, 0);
__cpuidex(info, 0, 0);
+#if defined(_M_X64) || defined(_M_IX86)
_xgetbv(0);
+#endif
__halt();
__nop();
__readmsr(0);