[X86] Support -march=x86-64-v[234]
PR47686. These micro-architecture levels are defined in the x86-64 psABI:
https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9
GCC 11 will support these levels.
Note, -mtune=x86-64-v[234] are invalid and __builtin_cpu_is cannot be
used on them.
Reviewed By: craig.topper, RKSimon
Differential Revision: https://reviews.llvm.org/D89197
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b81547d..d75245e 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -105,7 +105,7 @@
* The 'mpx' feature was removed from the backend. It had been removed from clang
frontend in 10.0. Mention of the 'mpx' feature in an IR file will print a
message to stderr, but IR should still compile.
-* Support for -march=sapphirerapids was added.
+* Support for ``-march=sapphirerapids`` and ``-march=x86-64-v[234]`` has been added.
* The assembler now has support for {disp32} and {disp8} pseudo prefixes for
controlling displacement size for memory operands and jump displacements. The
assembler also supports the .d32 and .d8 mnemonic suffixes to do the same.
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index d97f620..e66a074 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -121,17 +121,24 @@
CK_ZNVER1,
CK_ZNVER2,
CK_x86_64,
+ CK_x86_64_v2,
+ CK_x86_64_v3,
+ CK_x86_64_v4,
CK_Geode,
};
/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
/// \p Only64Bit is true.
CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false);
+CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false);
/// Provide a list of valid CPU names. If \p Only64Bit is true, the list will
/// only contain 64-bit capable CPUs.
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
bool Only64Bit = false);
+/// Provide a list of valid -mtune names.
+void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+ bool Only64Bit = false);
/// Get the key feature prioritizing target multiversioning.
ProcessorFeatures getKeyFeature(CPUKind Kind);
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 1e8569b..35582a9 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -137,6 +137,15 @@
// Basic 64-bit capable CPU.
constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
+constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
+ FeaturePOPCNT | FeatureSSE4_2 |
+ FeatureCMPXCHG16B;
+constexpr FeatureBitset FeaturesX86_64_V3 =
+ FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
+ FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
+constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 |
+ FeatureAVX512BW | FeatureAVX512CD |
+ FeatureAVX512DQ | FeatureAVX512VL;
// Intel Core CPUs
constexpr FeatureBitset FeaturesCore2 =
@@ -383,10 +392,15 @@
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 },
+ { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 },
+ { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3 },
+ { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4 },
// Geode processors.
{ {"geode"}, CK_Geode, ~0U, FeaturesGeode },
};
+constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"};
+
X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
for (const auto &P : Processors)
if (P.Name == CPU && (P.Features[FEATURE_64BIT] || !Only64Bit))
@@ -395,6 +409,12 @@
return CK_None;
}
+X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) {
+ if (llvm::is_contained(NoTuneList, CPU))
+ return CK_None;
+ return parseArchX86(CPU, Only64Bit);
+}
+
void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
bool Only64Bit) {
for (const auto &P : Processors)
@@ -402,6 +422,14 @@
Values.emplace_back(P.Name);
}
+void llvm::X86::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+ bool Only64Bit) {
+ for (const ProcInfo &P : Processors)
+ if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit) &&
+ !llvm::is_contained(NoTuneList, P.Name))
+ Values.emplace_back(P.Name);
+}
+
ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) {
// FIXME: Can we avoid a linear search here? The table might be sorted by
// CPUKind so we could binary search?
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index e5d47a0..5419c35 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -558,18 +558,27 @@
//===----------------------------------------------------------------------===//
def ProcessorFeatures {
+ // x86-64 and x86-64-v[234]
+ list<SubtargetFeature> X86_64V1Features = [
+ FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureNOPL, Feature64Bit
+ ];
+ list<SubtargetFeature> X86_64V2Features = !listconcat(
+ X86_64V1Features,
+ [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
+ list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
+ FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
+ FeatureMOVBE, FeatureXSAVE
+ ]);
+ list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
+ FeatureBWI,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureVLX,
+ ]);
+
// Nehalem
- list<SubtargetFeature> NHMFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE42,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeatureLAHFSAHF];
+ list<SubtargetFeature> NHMFeatures = X86_64V2Features;
list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
FeatureInsertVZEROUPPER];
@@ -1350,16 +1359,7 @@
// covers a huge swath of x86 processors. If there are specific scheduling
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
-def : ProcModel<"x86-64", SandyBridgeModel, [
- FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE2,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
-],
+def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
[
FeatureSlow3OpsLEA,
FeatureSlowDivide64,
@@ -1368,6 +1368,16 @@
FeatureInsertVZEROUPPER
]>;
+// x86-64 micro-architecture levels.
+def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
+ ProcessorFeatures.SNBTuning>;
+// Close to Haswell.
+def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
+ ProcessorFeatures.HSWTuning>;
+// Close to the AVX-512 level implemented by Xeon Scalable Processors.
+def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features,
+ ProcessorFeatures.SKXTuning>;
+
//===----------------------------------------------------------------------===//
// Calling Conventions
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/cpus-other.ll b/llvm/test/CodeGen/X86/cpus-other.ll
index 89231bf..0b97be4 100644
--- a/llvm/test/CodeGen/X86/cpus-other.ll
+++ b/llvm/test/CodeGen/X86/cpus-other.ll
@@ -16,6 +16,11 @@
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+;; x86-64 micro-architecture levels.
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v2
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v3
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v4
+
define void @foo() {
ret void
}