[llvm-exegesis] ExegisX86Target::setRegToConstant() should depend on the subtarget features.

Summary: This fixes PR38008.

Reviewers: gchatelet, RKSimon

Subscribers: tschuett, craig.topper, llvm-commits

Differential Revision: https://reviews.llvm.org/D48820

llvm-svn: 336171
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index c7fc6bd..d2be7f4 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -30,12 +30,13 @@
 
 static std::vector<llvm::MCInst>
 generateSnippetSetupCode(const llvm::ArrayRef<unsigned> RegsToDef,
-                         const ExegesisTarget &ET, bool &IsComplete) {
+                         const ExegesisTarget &ET,
+                         const llvm::LLVMTargetMachine &TM, bool &IsComplete) {
   IsComplete = true;
   std::vector<llvm::MCInst> Result;
   for (const unsigned Reg : RegsToDef) {
     // Load a constant in the register.
-    const auto Code = ET.setRegToConstant(Reg);
+    const auto Code = ET.setRegToConstant(*TM.getMCSubtargetInfo(), Reg);
     if (Code.empty())
       IsComplete = false;
     Result.insert(Result.end(), Code.begin(), Code.end());
@@ -159,7 +160,7 @@
   Properties.reset(llvm::MachineFunctionProperties::Property::IsSSA);
   bool IsSnippetSetupComplete = false;
   std::vector<llvm::MCInst> SnippetWithSetup =
-      generateSnippetSetupCode(RegsToDef, ET, IsSnippetSetupComplete);
+      generateSnippetSetupCode(RegsToDef, ET, *TM, IsSnippetSetupComplete);
   if (!SnippetWithSetup.empty()) {
     SnippetWithSetup.insert(SnippetWithSetup.end(), Instructions.begin(),
                             Instructions.end());
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index cb87e3a..4f1f286 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -34,7 +34,8 @@
   virtual void addTargetSpecificPasses(llvm::PassManagerBase &PM) const {}
 
   // Generates code to move a constant into a the given register.
-  virtual std::vector<llvm::MCInst> setRegToConstant(unsigned Reg) const {
+  virtual std::vector<llvm::MCInst>
+  setRegToConstant(const llvm::MCSubtargetInfo &STI, unsigned Reg) const {
     return {};
   }
 
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 594c48b..f0b411c 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -14,6 +14,7 @@
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "X86.h"
 #include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
 #include "llvm/MC/MCInstBuilder.h"
 
 namespace exegesis {
@@ -130,8 +131,9 @@
     PM.add(llvm::createX86FloatingPointStackifierPass());
   }
 
-  std::vector<llvm::MCInst>
-  setRegToConstant(unsigned Reg) const override {
+  std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
+                                             unsigned Reg) const override {
+    // GPR.
     if (llvm::X86::GR8RegClass.contains(Reg))
       return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
     if (llvm::X86::GR16RegClass.contains(Reg))
@@ -140,12 +142,25 @@
       return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
     if (llvm::X86::GR64RegClass.contains(Reg))
       return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
-    if (llvm::X86::VR128XRegClass.contains(Reg))
-      return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
-    if (llvm::X86::VR256XRegClass.contains(Reg))
+    // MMX.
+    if (llvm::X86::VR64RegClass.contains(Reg))
+      return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
+    // {X,Y,Z}MM.
+    if (llvm::X86::VR128XRegClass.contains(Reg)) {
+      if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
+        return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
+      if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
+        return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
+      return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
+    }
+    if (llvm::X86::VR256XRegClass.contains(Reg)) {
+      if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
+        return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
       return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
+    }
     if (llvm::X86::VR512RegClass.contains(Reg))
-      return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU64Zrm);
+      return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
+    // X87.
     if (llvm::X86::RFP32RegClass.contains(Reg) ||
         llvm::X86::RFP64RegClass.contains(Reg) ||
         llvm::X86::RFP80RegClass.contains(Reg))
@@ -155,8 +170,7 @@
 
   std::unique_ptr<BenchmarkRunner>
   createLatencyBenchmarkRunner(const LLVMState &State) const override {
-    return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(
-        State);
+    return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State);
   }
 
   std::unique_ptr<BenchmarkRunner>