Add feature flags for AVX and FMA and fix some SSE4A feature flag
initialization problems.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@74350 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8df138d..4d26364 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -55,6 +55,13 @@
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions">;
+def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
+ "Enable AVX instructions">;
+def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
+ "Enable three-operand fused multiple-add">;
+def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
+ "Enable four-operand fused multiple-add">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@@ -82,6 +89,9 @@
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+// Sandy Bridge does not have FMA
+def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 201367e..a6b0880 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -236,6 +236,10 @@
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 56983ce..8506fa6 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -207,6 +207,10 @@
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+ HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+ HasAVX = ((ECX >> 28) & 0x1);
+
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
unsigned Family = 0;
@@ -217,6 +221,7 @@
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
+ HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
}
}
@@ -342,6 +347,10 @@
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
, HasX86_64(false)
+ , HasSSE4A(false)
+ , HasAVX(false)
+ , HasFMA3(false)
+ , HasFMA4(false)
, IsBTMemSlow(false)
, DarwinVers(0)
, IsLinux(false)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 694b0eb..f4f6cce 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -64,12 +64,21 @@
///
bool HasX86_64;
- /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
- bool IsBTMemSlow;
-
/// HasSSE4A - True if the processor supports SSE4A instructions.
bool HasSSE4A;
+ /// HasAVX - Target has AVX instructions
+ bool HasAVX;
+
+ /// HasFMA3 - Target has 3-operand fused multiply-add
+ bool HasFMA3;
+
+ /// HasFMA4 - Target has 4-operand fused multiply-add
+ bool HasFMA4;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
+
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
unsigned char DarwinVers; // Is any darwin-x86 platform.
@@ -133,6 +142,9 @@
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasAVX() const { return hasAVX(); }
+ bool hasFMA3() const { return HasFMA3; }
+ bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }