[llvm-exegesis] Improve Register Setup (roll forward of D51856).

Summary:
Added function to set a register to a particular value + tests.
Add EFLAGS test, use new setRegTo instead of setRegToConstant.

Reviewers: courbet, javed.absar

Subscribers: llvm-commits, tschuett, mgorny

Differential Revision: https://reviews.llvm.org/D52297

llvm-svn: 342644
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index e55e77d..2d8f958 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -101,8 +101,8 @@
   }
 };
 
-static unsigned GetLoadImmediateOpcode(const llvm::APInt &Value) {
-  switch (Value.getBitWidth()) {
+static unsigned GetLoadImmediateOpcode(unsigned RegBitWidth) {
+  switch (RegBitWidth) {
   case 8:
     return llvm::X86::MOV8ri;
   case 16:
@@ -115,8 +115,12 @@
   llvm_unreachable("Invalid Value Width");
 }
 
-static llvm::MCInst loadImmediate(unsigned Reg, const llvm::APInt &Value) {
-  return llvm::MCInstBuilder(GetLoadImmediateOpcode(Value))
+// Generates instruction to load an immediate value into a register.
+static llvm::MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
+                                  const llvm::APInt &Value) {
+  if (Value.getBitWidth() > RegBitWidth)
+    llvm_unreachable("Value must fit in the Register");
+  return llvm::MCInstBuilder(GetLoadImmediateOpcode(RegBitWidth))
       .addReg(Reg)
       .addImm(Value.getZExtValue());
 }
@@ -163,39 +167,67 @@
       .addImm(Bytes);
 }
 
+// Reserves some space on the stack, fills it with the content of the provided
+// constant and provide methods to load the stack value into a register.
 struct ConstantInliner {
   explicit ConstantInliner(const llvm::APInt &Constant)
       : StackSize(Constant.getBitWidth() / 8) {
     assert(Constant.getBitWidth() % 8 == 0 && "Must be a multiple of 8");
-    Add(allocateStackSpace(StackSize));
+    add(allocateStackSpace(StackSize));
     size_t ByteOffset = 0;
     for (; StackSize - ByteOffset >= 4; ByteOffset += 4)
-      Add(fillStackSpace(
+      add(fillStackSpace(
           llvm::X86::MOV32mi, ByteOffset,
           Constant.extractBits(32, ByteOffset * 8).getZExtValue()));
     if (StackSize - ByteOffset >= 2) {
-      Add(fillStackSpace(
+      add(fillStackSpace(
           llvm::X86::MOV16mi, ByteOffset,
           Constant.extractBits(16, ByteOffset * 8).getZExtValue()));
       ByteOffset += 2;
     }
     if (StackSize - ByteOffset >= 1)
-      Add(fillStackSpace(
+      add(fillStackSpace(
           llvm::X86::MOV8mi, ByteOffset,
           Constant.extractBits(8, ByteOffset * 8).getZExtValue()));
   }
 
-  ConstantInliner &Add(const llvm::MCInst &Inst) {
-    Instructions.push_back(Inst);
-    return *this;
+  std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
+                                            unsigned Opcode) {
+    assert(StackSize * 8 == RegBitWidth &&
+           "Value does not have the correct size");
+    add(loadToReg(Reg, Opcode));
+    add(releaseStackSpace(StackSize));
+    return std::move(Instructions);
   }
 
-  std::vector<llvm::MCInst> finalize() {
-    Add(releaseStackSpace(StackSize));
+  std::vector<llvm::MCInst>
+  loadX87AndFinalize(unsigned Reg, unsigned RegBitWidth, unsigned Opcode) {
+    assert(StackSize * 8 == RegBitWidth &&
+           "Value does not have the correct size");
+    add(llvm::MCInstBuilder(Opcode)
+            .addReg(llvm::X86::RSP) // BaseReg
+            .addImm(1)              // ScaleAmt
+            .addReg(0)              // IndexReg
+            .addImm(0)              // Disp
+            .addReg(0));            // Segment
+    if (Reg != llvm::X86::ST0)
+      add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg));
+    add(releaseStackSpace(StackSize));
+    return std::move(Instructions);
+  }
+
+  std::vector<llvm::MCInst> popFlagAndFinalize() {
+    assert(StackSize * 8 == 64 && "Value does not have the correct size");
+    add(llvm::MCInstBuilder(llvm::X86::POPF64));
     return std::move(Instructions);
   }
 
 private:
+  ConstantInliner &add(const llvm::MCInst &Inst) {
+    Instructions.push_back(Inst);
+    return *this;
+  }
+
   const size_t StackSize;
   std::vector<llvm::MCInst> Instructions;
 };
@@ -248,64 +280,47 @@
     }
   }
 
-  std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
-                                             unsigned Reg) const override {
-    // GPR.
+  std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
+                                     unsigned Reg,
+                                     const llvm::APInt &Value) const override {
     if (llvm::X86::GR8RegClass.contains(Reg))
-      return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
+      return {loadImmediate(Reg, 8, Value)};
     if (llvm::X86::GR16RegClass.contains(Reg))
-      return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
+      return {loadImmediate(Reg, 16, Value)};
     if (llvm::X86::GR32RegClass.contains(Reg))
-      return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
+      return {loadImmediate(Reg, 32, Value)};
     if (llvm::X86::GR64RegClass.contains(Reg))
-      return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
-    // MMX.
+      return {loadImmediate(Reg, 64, Value)};
+    ConstantInliner CI(Value);
     if (llvm::X86::VR64RegClass.contains(Reg))
-      return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
-    // {X,Y,Z}MM.
+      return CI.loadAndFinalize(Reg, 64, llvm::X86::MMX_MOVQ64rm);
     if (llvm::X86::VR128XRegClass.contains(Reg)) {
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
-        return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
+        return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQU32Z128rm);
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
-        return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
-      return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
+        return CI.loadAndFinalize(Reg, 128, llvm::X86::VMOVDQUrm);
+      return CI.loadAndFinalize(Reg, 128, llvm::X86::MOVDQUrm);
     }
     if (llvm::X86::VR256XRegClass.contains(Reg)) {
       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
-        return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
-      return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
+        return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQU32Z256rm);
+      if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
+        return CI.loadAndFinalize(Reg, 256, llvm::X86::VMOVDQUYrm);
     }
     if (llvm::X86::VR512RegClass.contains(Reg))
-      return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
-    // X87.
-    if (llvm::X86::RFP32RegClass.contains(Reg) ||
-        llvm::X86::RFP64RegClass.contains(Reg) ||
-        llvm::X86::RFP80RegClass.contains(Reg))
-      return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
-    if (Reg == llvm::X86::EFLAGS) {
-      // Set all flags to 0 but the bits that are "reserved and set to 1".
-      constexpr const uint32_t kImmValue = 0x00007002u;
-      std::vector<llvm::MCInst> Result;
-      Result.push_back(allocateStackSpace(8));
-      Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
-      Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
-      return Result;
+      if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
+        return CI.loadAndFinalize(Reg, 512, llvm::X86::VMOVDQU32Zrm);
+    if (llvm::X86::RSTRegClass.contains(Reg)) {
+      if (Value.getBitWidth() == 32)
+        return CI.loadX87AndFinalize(Reg, 32, llvm::X86::LD_F32m);
+      if (Value.getBitWidth() == 64)
+        return CI.loadX87AndFinalize(Reg, 64, llvm::X86::LD_F64m);
+      if (Value.getBitWidth() == 80)
+        return CI.loadX87AndFinalize(Reg, 80, llvm::X86::LD_F80m);
     }
-    llvm_unreachable("Not yet implemented");
-  }
-
-  std::vector<llvm::MCInst> setRegTo(const llvm::MCSubtargetInfo &STI,
-                                     const llvm::APInt &Value,
-                                     unsigned Reg) const override {
-    if (llvm::X86::GR8RegClass.contains(Reg) ||
-        llvm::X86::GR16RegClass.contains(Reg) ||
-        llvm::X86::GR32RegClass.contains(Reg) ||
-        llvm::X86::GR64RegClass.contains(Reg))
-      return {loadImmediate(Reg, Value)};
-    ConstantInliner CI(Value);
-    if (llvm::X86::VR64RegClass.contains(Reg))
-      return CI.Add(loadToReg(Reg, llvm::X86::MMX_MOVQ64rm)).finalize();
-    llvm_unreachable("Not yet implemented");
+    if (Reg == llvm::X86::EFLAGS)
+      return CI.popFlagAndFinalize();
+    return {}; // Not yet implemented.
   }
 
   std::unique_ptr<SnippetGenerator>
@@ -321,31 +336,6 @@
   bool matchesArch(llvm::Triple::ArchType Arch) const override {
     return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
   }
-
-private:
-  // setRegToConstant() specialized for a vector register of size
-  // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
-  // register load.
-  static std::vector<llvm::MCInst>
-  setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
-                         const unsigned RMOpcode) {
-    // There is no instruction to directly set XMM, go through memory.
-    // Since vector values can be interpreted as integers of various sizes (8
-    // to 64 bits) as well as floats and double, so we chose an immediate
-    // value that has set bits for all byte values and is a normal float/
-    // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
-    // interpreted as a float.
-    constexpr const uint32_t kImmValue = 0x40404040u;
-    std::vector<llvm::MCInst> Result;
-    Result.push_back(allocateStackSpace(RegSizeBytes));
-    constexpr const unsigned kMov32NumBytes = 4;
-    for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
-      Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
-    }
-    Result.push_back(loadToReg(Reg, RMOpcode));
-    Result.push_back(releaseStackSpace(RegSizeBytes));
-    return Result;
-  }
 };
 
 } // namespace