Introduce a pass to insert vzeroupper instructions to avoid AVX to
SSE transition penalty. The pass is enabled through the "x86-use-vzeroupper"
llc command line option. This is only the first step (very naive and
conservative one) to sketch out the idea, but proper DFA is coming next
to allow smarter decisions. Comments and ideas now and in further commits
will be very appreciated.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138317 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 569c040..95e7021 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -16,6 +16,7 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
@@ -92,6 +93,16 @@
}
//===----------------------------------------------------------------------===//
+// Command line options for x86
+//===----------------------------------------------------------------------===//
+bool UseVZeroUpper;
+
+static cl::opt<bool, true>
+VZeroUpper("x86-use-vzeroupper",
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::location(UseVZeroUpper), cl::init(false));
+
+//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
@@ -125,6 +136,11 @@
PM.add(createSSEDomainFixPass());
return true;
}
+
+ if (Subtarget.hasAVX() && UseVZeroUpper) {
+ PM.add(createX86IssueVZeroUpperPass());
+ return true;
+ }
return false;
}