[llvm-exegesis] Improve Register Setup.

Summary:
Added function to set a register to a particular value + tests.
Add EFLAGS test, use new setRegTo instead of setRegToConstant.

Reviewers: courbet, javed.absar

Subscribers: mgorny, tschuett, llvm-commits

Differential Revision: https://reviews.llvm.org/D51856

llvm-svn: 342466
diff --git a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
index fe9d253..90c5927 100644
--- a/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp
@@ -9,6 +9,7 @@
 #include "../Target.h"
 #include "../Latency.h"
 #include "AArch64.h"
+#include "AArch64RegisterInfo.h"
 
 namespace exegesis {
 
@@ -26,33 +27,51 @@
   }
 };
 
+namespace {
+
+static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) {
+  switch (RegBitWidth) {
+  case 32:
+    return llvm::AArch64::MOVi32imm;
+  case 64:
+    return llvm::AArch64::MOVi64imm;
+  }
+  llvm_unreachable("Invalid Value Width");
+}
+
+// Generates instruction to load an immediate value into a register.
+static llvm::MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
+                                  const llvm::APInt &Value) {
+  if (Value.getBitWidth() > RegBitWidth)
+    llvm_unreachable("Value must fit in the Register");
+  return llvm::MCInstBuilder(getLoadImmediateOpcode(RegBitWidth))
+      .addReg(Reg)
+      .addImm(Value.getZExtValue());
+}
+
+} // namespace
+
 class ExegesisAArch64Target : public ExegesisTarget {
   std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
-                                     const llvm::APInt &Value,
-                                     unsigned Reg) const override {
-    llvm_unreachable("Not yet implemented");
-  }
-
-  unsigned getScratchMemoryRegister(const llvm::Triple &) const override {
-    llvm_unreachable("Not yet implemented");
-  }
-
-  void fillMemoryOperands(InstructionBuilder &IB, unsigned Reg,
-                          unsigned Offset) const override {
-    llvm_unreachable("Not yet implemented");
-  }
-
-  unsigned getMaxMemoryAccessSize() const override {
-    llvm_unreachable("Not yet implemented");
+                                     unsigned Reg,
+                                     const llvm::APInt &Value) const override {
+    if (llvm::AArch64::GPR32RegClass.contains(Reg))
+      return {loadImmediate(Reg, 32, Value)};
+    if (llvm::AArch64::GPR64RegClass.contains(Reg))
+      return {loadImmediate(Reg, 64, Value)};
+    llvm::errs() << "setRegTo is not implemented, results will be unreliable\n";
+    return {};
   }
 
   bool matchesArch(llvm::Triple::ArchType Arch) const override {
     return Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be;
   }
+
   void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
     // Function return is a pseudo-instruction that needs to be expanded
     PM.add(llvm::createAArch64ExpandPseudoPass());
   }
+
   std::unique_ptr<BenchmarkRunner>
   createLatencyBenchmarkRunner(const LLVMState &State) const override {
     return llvm::make_unique<AArch64LatencyBenchmarkRunner>(State);
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
index ec488d8..cb6e9e1 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp
@@ -29,18 +29,18 @@
 static constexpr const char FunctionID[] = "foo";
 
 static std::vector<llvm::MCInst>
-generateSnippetSetupCode(const llvm::ArrayRef<unsigned> RegsToDef,
-                         const ExegesisTarget &ET,
-                         const llvm::LLVMTargetMachine &TM, bool &IsComplete) {
-  IsComplete = true;
+generateSnippetSetupCode(const ExegesisTarget &ET,
+                         const llvm::MCSubtargetInfo *const MSI,
+                         llvm::ArrayRef<RegisterValue> RegisterInitialValues,
+                         bool &IsSnippetSetupComplete) {
   std::vector<llvm::MCInst> Result;
-  // for (const unsigned Reg : RegsToDef) {
-  //   // Load a constant in the register.
-  //   const auto Code = ET.setRegToConstant(*TM.getMCSubtargetInfo(), Reg);
-  //   if (Code.empty())
-  //     IsComplete = false;
-  //   Result.insert(Result.end(), Code.begin(), Code.end());
-  // }
+  for (const RegisterValue &RV : RegisterInitialValues) {
+    // Load a constant in the register.
+    const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
+    if (SetRegisterCode.empty())
+      IsSnippetSetupComplete = false;
+    Result.insert(Result.end(), SetRegisterCode.begin(), SetRegisterCode.end());
+  }
   return Result;
 }
 
@@ -149,7 +149,7 @@
 void assembleToStream(const ExegesisTarget &ET,
                       std::unique_ptr<llvm::LLVMTargetMachine> TM,
                       llvm::ArrayRef<unsigned> LiveIns,
-                      llvm::ArrayRef<unsigned> RegsToDef,
+                      llvm::ArrayRef<RegisterValue> RegisterInitialValues,
                       llvm::ArrayRef<llvm::MCInst> Instructions,
                       llvm::raw_pwrite_stream &AsmStream) {
   std::unique_ptr<llvm::LLVMContext> Context =
@@ -171,13 +171,12 @@
     MF.getRegInfo().addLiveIn(Reg);
 
   bool IsSnippetSetupComplete = false;
-  std::vector<llvm::MCInst> SnippetWithSetup =
-      generateSnippetSetupCode(RegsToDef, ET, *TM, IsSnippetSetupComplete);
-  if (!SnippetWithSetup.empty()) {
-    SnippetWithSetup.insert(SnippetWithSetup.end(), Instructions.begin(),
-                            Instructions.end());
-    Instructions = SnippetWithSetup;
-  }
+  std::vector<llvm::MCInst> Code =
+      generateSnippetSetupCode(ET, TM->getMCSubtargetInfo(),
+                               RegisterInitialValues, IsSnippetSetupComplete);
+
+  Code.insert(Code.end(), Instructions.begin(), Instructions.end());
+
   // If the snippet setup is not complete, we disable liveliness tracking. This
   // means that we won't know what values are in the registers.
   if (!IsSnippetSetupComplete)
@@ -188,7 +187,7 @@
   MF.getRegInfo().freezeReservedRegs(MF);
 
   // Fill the MachineFunction from the instructions.
-  fillMachineFunction(MF, LiveIns, Instructions);
+  fillMachineFunction(MF, LiveIns, Code);
 
   // We create the pass manager, run the passes to populate AsmBuffer.
   llvm::MCContext &MCContext = MMI->getContext();
diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.h b/llvm/tools/llvm-exegesis/lib/Assembler.h
index d5b542b..76030ae 100644
--- a/llvm/tools/llvm-exegesis/lib/Assembler.h
+++ b/llvm/tools/llvm-exegesis/lib/Assembler.h
@@ -39,6 +39,12 @@
 // convention and target machine).
 llvm::BitVector getFunctionReservedRegs(const llvm::TargetMachine &TM);
 
+// A simple object storing the value for a particular register.
+struct RegisterValue {
+  unsigned Register;
+  llvm::APInt Value;
+};
+
 // Creates a temporary `void foo(char*)` function containing the provided
 // Instructions. Runs a set of llvm Passes to provide correct prologue and
 // epilogue. Once the MachineFunction is ready, it is assembled for TM to
@@ -46,7 +52,7 @@
 void assembleToStream(const ExegesisTarget &ET,
                       std::unique_ptr<llvm::LLVMTargetMachine> TM,
                       llvm::ArrayRef<unsigned> LiveIns,
-                      llvm::ArrayRef<unsigned> RegsToDef,
+                      llvm::ArrayRef<RegisterValue> RegisterInitialValues,
                       llvm::ArrayRef<llvm::MCInst> Instructions,
                       llvm::raw_pwrite_stream &AsmStream);
 
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h b/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h
index 1195adf4..0370868 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkCode.h
@@ -23,7 +23,7 @@
 
   // Before the code is executed some instructions are added to setup the
   // registers initial values.
-  std::vector<unsigned> RegsToDef;
+  std::vector<RegisterValue> RegisterInitialValues;
 
   // We also need to provide the registers that are live on entry for the
   // assembler to generate proper prologue/epilogue.
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 2d82f59..6c22d1c 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -104,7 +104,7 @@
     return std::move(E);
   llvm::raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
   assembleToStream(State.getExegesisTarget(), State.createTargetMachine(),
-                   BC.LiveIns, BC.RegsToDef, Code, OFS);
+                   BC.LiveIns, BC.RegisterInitialValues, Code, OFS);
   return ResultPath.str();
 }
 
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index 5b51a09..cb58b2d 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -49,7 +49,7 @@
       }
       if (CT.ScratchSpacePointerInReg)
         BC.LiveIns.push_back(CT.ScratchSpacePointerInReg);
-      BC.RegsToDef = computeRegsToDef(CT.Instructions);
+      BC.RegisterInitialValues = computeRegisterInitialValues(CT.Instructions);
       Output.push_back(std::move(BC));
     }
     return Output;
@@ -57,14 +57,14 @@
     return E.takeError();
 }
 
-std::vector<unsigned> SnippetGenerator::computeRegsToDef(
+std::vector<RegisterValue> SnippetGenerator::computeRegisterInitialValues(
     const std::vector<InstructionBuilder> &Instructions) const {
   // Collect all register uses and create an assignment for each of them.
   // Ignore memory operands which are handled separately.
   // Loop invariant: DefinedRegs[i] is true iif it has been set at least once
   // before the current instruction.
   llvm::BitVector DefinedRegs = RATC.emptyRegisters();
-  std::vector<unsigned> RegsToDef;
+  std::vector<RegisterValue> RIV;
   for (const InstructionBuilder &IB : Instructions) {
     // Returns the register that this Operand sets or uses, or 0 if this is not
     // a register.
@@ -82,7 +82,7 @@
       if (!Op.IsDef) {
         const unsigned Reg = GetOpReg(Op);
         if (Reg > 0 && !DefinedRegs.test(Reg)) {
-          RegsToDef.push_back(Reg);
+          RIV.push_back(RegisterValue{Reg, llvm::APInt()});
           DefinedRegs.set(Reg);
         }
       }
@@ -96,7 +96,7 @@
       }
     }
   }
-  return RegsToDef;
+  return RIV;
 }
 
 llvm::Expected<CodeTemplate> SnippetGenerator::generateSelfAliasingCodeTemplate(
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
index ced8ebc..2a412ba 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
@@ -48,8 +48,8 @@
   generateConfigurations(unsigned Opcode) const;
 
   // Given a snippet, computes which registers the setup code needs to define.
-  std::vector<unsigned>
-  computeRegsToDef(const std::vector<InstructionBuilder> &Snippet) const;
+  std::vector<RegisterValue> computeRegisterInitialValues(
+      const std::vector<InstructionBuilder> &Snippet) const;
 
 protected:
   const LLVMState &State;
diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp
index ceca1fd..8baa849 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Target.cpp
@@ -90,21 +90,8 @@
 class ExegesisDefaultTarget : public ExegesisTarget {
 private:
   std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
-                                     const llvm::APInt &Value,
-                                     unsigned Reg) const override {
-    llvm_unreachable("Not yet implemented");
-  }
-
-  unsigned getScratchMemoryRegister(const llvm::Triple &) const override {
-    llvm_unreachable("Not yet implemented");
-  }
-
-  void fillMemoryOperands(InstructionBuilder &IB, unsigned Reg,
-                          unsigned Offset) const override {
-    llvm_unreachable("Not yet implemented");
-  }
-
-  unsigned getMaxMemoryAccessSize() const override {
+                                     unsigned Reg,
+                                     const llvm::APInt &Value) const override {
     llvm_unreachable("Not yet implemented");
   }
 
diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h
index 41d77e3..342d18e 100644
--- a/llvm/tools/llvm-exegesis/lib/Target.h
+++ b/llvm/tools/llvm-exegesis/lib/Target.h
@@ -36,25 +36,31 @@
   virtual void addTargetSpecificPasses(llvm::PassManagerBase &PM) const {}
 
   // Generates code to move a constant into a the given register.
-  virtual std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
-                                             const llvm::APInt &Value,
-                                             unsigned Reg) const = 0;
+  // Precondition: Value must fit into Reg.
+  virtual std::vector<llvm::MCInst>
+  setRegTo(const llvm::MCSubtargetInfo &STI, unsigned Reg,
+           const llvm::APInt &Value) const = 0;
 
   // Returns the register pointing to scratch memory, or 0 if this target
   // does not support memory operands. The benchmark function uses the
   // default calling convention.
-  virtual unsigned getScratchMemoryRegister(const llvm::Triple &) const = 0;
+  virtual unsigned getScratchMemoryRegister(const llvm::Triple &) const {
+    return 0;
+  }
 
   // Fills memory operands with references to the address at [Reg] + Offset.
   virtual void fillMemoryOperands(InstructionBuilder &IB, unsigned Reg,
-                                  unsigned Offset) const = 0;
+                                  unsigned Offset) const {
+    llvm_unreachable(
+        "fillMemoryOperands() requires getScratchMemoryRegister() > 0");
+  }
 
   // Returns the maximum number of bytes a load/store instruction can access at
   // once. This is typically the size of the largest register available on the
   // processor. Note that this only used as a hint to generate independant
   // load/stores to/from memory, so the exact returned value does not really
   // matter as long as it's large enough.
-  virtual unsigned getMaxMemoryAccessSize() const = 0;
+  virtual unsigned getMaxMemoryAccessSize() const { return 0; }
 
   // Creates a snippet generator for the given mode.
   std::unique_ptr<SnippetGenerator>
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index e682b98..2d8f958 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -101,8 +101,8 @@
   }
 };
 
-static unsigned GetLoadImmediateOpcode(const llvm::APInt &Value) {
-  switch (Value.getBitWidth()) {
+static unsigned GetLoadImmediateOpcode(unsigned RegBitWidth) {
+  switch (RegBitWidth) {
   case 8:
     return llvm::X86::MOV8ri;
   case 16:
@@ -115,10 +115,12 @@
   llvm_unreachable("Invalid Value Width");
 }
 
-static llvm::MCInst loadImmediate(unsigned Reg, const llvm::APInt &Value,
-                                  unsigned MaxBitWidth) {
-  assert(Value.getBitWidth() <= MaxBitWidth && "Value too big to fit register");
-  return llvm::MCInstBuilder(GetLoadImmediateOpcode(Value))
+// Generates instruction to load an immediate value into a register.
+static llvm::MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
+                                  const llvm::APInt &Value) {
+  if (Value.getBitWidth() > RegBitWidth)
+    llvm_unreachable("Value must fit in the Register");
+  return llvm::MCInstBuilder(GetLoadImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());
 }
@@ -165,6 +167,8 @@
       .addImm(Bytes);
 }
 
+// Reserves some space on the stack, fills it with the content of the provided
+// constant and provide methods to load the stack value into a register.
 struct ConstantInliner {
   explicit ConstantInliner(const llvm::APInt &Constant)
       : StackSize(Constant.getBitWidth() / 8) {
@@ -187,17 +191,19 @@
           Constant.extractBits(8, ByteOffset * 8).getZExtValue()));
   }
 
-  std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned Opcode,
-                                            unsigned BitWidth) {
-    assert(StackSize * 8 == BitWidth && "Value does not have the correct size");
+  std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
+                                            unsigned Opcode) {
+    assert(StackSize * 8 == RegBitWidth &&
+           "Value does not have the correct size");
     add(loadToReg(Reg, Opcode));
     add(releaseStackSpace(StackSize));
     return std::move(Instructions);
   }
 
-  std::vector<llvm::MCInst> loadX87AndFinalize(unsigned Reg, unsigned Opcode,
-                                               unsigned BitWidth) {
-    assert(StackSize * 8 == BitWidth && "Value does not have the correct size");
+  std::vector<llvm::MCInst>
+  loadX87AndFinalize(unsigned Reg, unsigned RegBitWidth, unsigned Opcode) {
+    assert(StackSize * 8 == RegBitWidth &&
+           "Value does not have the correct size");
     add(llvm::MCInstBuilder(Opcode)
             .addReg(llvm::X86::RSP) // BaseReg
             .addImm(1)              // ScaleAmt
@@ -211,7 +217,7 @@
   }
 
   std::vector<llvm::MCInst> popFlagAndFinalize() {
-    assert(StackSize * 8 == 32 && "Value does not have the correct size");
+    assert(StackSize * 8 == 64 && "Value does not have the correct size");
     add(llvm::MCInstBuilder(llvm::X86::POPF64));
     return std::move(Instructions);
   }
@@ -275,46 +281,46 @@
   }
 
   std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
-                                     const llvm::APInt &Value,
-                                     unsigned Reg) const override {
+                                     unsigned Reg,
+                                     const llvm::APInt &Value) const override {
     if (llvm::X86::GR8RegClass.contains(Reg))
-      return {loadImmediate(Reg, Value, 8)};
+      return {loadImmediate(Reg, 8, Value)};
     if (llvm::X86::GR16RegClass.contains(Reg))
-      return {loadImmediate(Reg, Value, 16)};
+      return {loadImmediate(Reg, 16, Value)};
     if (llvm::X86::GR32RegClass.contains(Reg))
-      return {loadImmediate(Reg, Value, 32)};
+      return {loadImmediate(Reg, 32, Value)};
     if (llvm::X86::GR64RegClass.contains(Reg))
-      return {loadImmediate(Reg, Value, 64)};
+      return {loadImmediate(Reg, 64, Value)};
     ConstantInliner CI(Value);
     if (llvm::X86::VR64RegClass.contains(Reg))
-      return CI.loadAndFinalize(Reg, llvm::X86::MMX_MOVQ64rm, 64);
+      return CI.loadAndFinalize(Reg, 64, llvm::X86::MMX_MOVQ64rm);
     if (llvm::X86::VR128XRegClass.contains(Reg)) {
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
-        return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQU32Z128rm, 128);
+        return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQU32Z128rm);
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
-        return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQUrm, 128);
-      return CI.loadAndFinalize(Reg, llvm::X86::MOVDQUrm, 128);
+        return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQUrm);
+      return CI.loadAndFinalize(Reg, 128, llvm::X86::MOVDQUrm);
     }
     if (llvm::X86::VR256XRegClass.contains(Reg)) {
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
-        return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQU32Z256rm, 256);
+        return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQU32Z256rm);
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
-        return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQUYrm, 256);
+        return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQUYrm);
     }
     if (llvm::X86::VR512RegClass.contains(Reg))
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
-        return CI.loadAndFinalize(Reg, llvm::X86::VMOVDQU32Zrm, 512);
+        return CI.loadAndFinalize(Reg, 512, llvm::X86::VMOVDQU32Zrm);
     if (llvm::X86::RSTRegClass.contains(Reg)) {
       if (Value.getBitWidth() == 32)
-        return CI.loadX87AndFinalize(Reg, llvm::X86::LD_F32m, 32);
+        return CI.loadX87AndFinalize(Reg, 32, llvm::X86::LD_F32m);
       if (Value.getBitWidth() == 64)
-        return CI.loadX87AndFinalize(Reg, llvm::X86::LD_F64m, 64);
+        return CI.loadX87AndFinalize(Reg, 64, llvm::X86::LD_F64m);
       if (Value.getBitWidth() == 80)
-        return CI.loadX87AndFinalize(Reg, llvm::X86::LD_F80m, 80);
+        return CI.loadX87AndFinalize(Reg, 80, llvm::X86::LD_F80m);
     }
     if (Reg == llvm::X86::EFLAGS)
       return CI.popFlagAndFinalize();
-    llvm_unreachable("Not yet implemented");
+    return {}; // Not yet implemented.
   }
 
   std::unique_ptr<SnippetGenerator>