[llvm-exegesis] Refactor how forbidden registers are computed.

Summary:
Right now latency generation can incorrectly select the scratch register
as a dependency-carrying register.
 - Move the logic for preventing register selection from Uops
   implementation to common SnippetGenerator class.
 - Aliasing detection now takes a set of forbidden registers just like
   random register assignment does.

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68084

llvm-svn: 373048
diff --git a/llvm/tools/llvm-exegesis/lib/Latency.cpp b/llvm/tools/llvm-exegesis/lib/Latency.cpp
index 2f3fbae..112d3ac 100644
--- a/llvm/tools/llvm-exegesis/lib/Latency.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Latency.cpp
@@ -39,7 +39,8 @@
 
 static std::vector<Instruction>
 computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
-                            size_t MaxAliasingInstructions) {
+                            size_t MaxAliasingInstructions,
+                            const BitVector &ForbiddenRegisters) {
   // Randomly iterate the set of instructions.
   std::vector<unsigned> Opcodes;
   Opcodes.resize(State.getInstrInfo().getNumOpcodes());
@@ -53,15 +54,16 @@
     const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
     if (OtherInstr.hasMemoryOperands())
       continue;
-    if (Instr.hasAliasingRegistersThrough(OtherInstr))
-      AliasingInstructions.push_back(std::move(OtherInstr));
+    if (Instr.hasAliasingRegistersThrough(OtherInstr, ForbiddenRegisters))
+      AliasingInstructions.push_back(OtherInstr);
     if (AliasingInstructions.size() >= MaxAliasingInstructions)
       break;
   }
   return AliasingInstructions;
 }
 
-static ExecutionMode getExecutionModes(const Instruction &Instr) {
+static ExecutionMode getExecutionModes(const Instruction &Instr,
+                                       const BitVector &ForbiddenRegisters) {
   ExecutionMode EM = ExecutionMode::UNKNOWN;
   if (Instr.hasAliasingImplicitRegisters())
     EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
@@ -70,7 +72,7 @@
   if (Instr.hasMemoryOperands())
     EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
   else {
-    if (Instr.hasAliasingRegisters())
+    if (Instr.hasAliasingRegisters(ForbiddenRegisters))
       EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
     if (Instr.hasOneUseOrOneDef())
       EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
@@ -80,6 +82,7 @@
 
 static void appendCodeTemplates(const LLVMState &State,
                                 const Instruction &Instr,
+                                const BitVector &ForbiddenRegisters,
                                 ExecutionMode ExecutionModeBit,
                                 llvm::StringRef ExecutionClassDescription,
                                 std::vector<CodeTemplate> &CodeTemplates) {
@@ -122,8 +125,8 @@
   }
   case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
     // Select back-to-back non-memory instruction.
-    for (const auto OtherInstr :
-         computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
+    for (const auto OtherInstr : computeAliasingInstructions(
+             State, Instr, kMaxAliasingInstructions, ForbiddenRegisters)) {
       const AliasingConfigurations Forward(Instr, OtherInstr);
       const AliasingConfigurations Back(OtherInstr, Instr);
       InstructionTemplate ThisIT(Instr);
@@ -149,13 +152,14 @@
 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
 
 llvm::Expected<std::vector<CodeTemplate>>
-LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
+LatencySnippetGenerator::generateCodeTemplates(
+    const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
   std::vector<CodeTemplate> Results;
-  const ExecutionMode EM = getExecutionModes(Instr);
+  const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters);
   for (const auto EC : kExecutionClasses) {
     for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
-      appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
-                          Results);
+      appendCodeTemplates(State, Instr, ForbiddenRegisters, ExecutionModeBit,
+                          EC.Description, Results);
     if (!Results.empty())
       break;
   }
diff --git a/llvm/tools/llvm-exegesis/lib/Latency.h b/llvm/tools/llvm-exegesis/lib/Latency.h
index 7d6d96a..9ffe2d7 100644
--- a/llvm/tools/llvm-exegesis/lib/Latency.h
+++ b/llvm/tools/llvm-exegesis/lib/Latency.h
@@ -27,7 +27,8 @@
   ~LatencySnippetGenerator() override;
 
   llvm::Expected<std::vector<CodeTemplate>>
-  generateCodeTemplates(const Instruction &Instr) const override;
+  generateCodeTemplates(const Instruction &Instr,
+                        const BitVector &ForbiddenRegisters) const override;
 };
 
 class LatencyBenchmarkRunner : public BenchmarkRunner {
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index 3636a1d..f44dcbd 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -183,10 +183,29 @@
   return ImplDefRegs.anyCommon(ImplUseRegs);
 }
 
+// Returns true if there are registers that are both in `A` and `B` but not in
+// `Forbidden`.
+static bool anyCommonExcludingForbidden(const BitVector &A, const BitVector &B,
+                                        const BitVector &Forbidden) {
+  assert(A.size() == B.size() && B.size() == Forbidden.size());
+  const auto Size = A.size();
+  for (int AIndex = A.find_first(); AIndex != -1;) {
+    const int BIndex = B.find_first_in(AIndex, Size);
+    if (BIndex == -1)
+      return false;
+    if (AIndex == BIndex && !Forbidden.test(AIndex))
+      return true;
+    AIndex = A.find_first_in(BIndex + 1, Size);
+  }
+  return false;
+}
+
 bool Instruction::hasAliasingRegistersThrough(
-    const Instruction &OtherInstr) const {
-  return AllDefRegs.anyCommon(OtherInstr.AllUseRegs) &&
-         OtherInstr.AllDefRegs.anyCommon(AllUseRegs);
+    const Instruction &OtherInstr, const BitVector &ForbiddenRegisters) const {
+  return anyCommonExcludingForbidden(AllDefRegs, OtherInstr.AllUseRegs,
+                                     ForbiddenRegisters) &&
+         anyCommonExcludingForbidden(OtherInstr.AllDefRegs, AllUseRegs,
+                                     ForbiddenRegisters);
 }
 
 bool Instruction::hasTiedRegisters() const {
@@ -194,8 +213,10 @@
       Variables, [](const Variable &Var) { return Var.hasTiedOperands(); });
 }
 
-bool Instruction::hasAliasingRegisters() const {
-  return AllDefRegs.anyCommon(AllUseRegs);
+bool Instruction::hasAliasingRegisters(
+    const BitVector &ForbiddenRegisters) const {
+  return anyCommonExcludingForbidden(AllDefRegs, AllUseRegs,
+                                     ForbiddenRegisters);
 }
 
 bool Instruction::hasOneUseOrOneDef() const {
@@ -203,6 +224,7 @@
 }
 
 void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
+                       const RegisterAliasingTrackerCache &RATC,
                        llvm::raw_ostream &Stream) const {
   Stream << "- " << Name << "\n";
   for (const auto &Op : Operands) {
@@ -251,7 +273,7 @@
     Stream << "- hasAliasingImplicitRegisters (execution is always serial)\n";
   if (hasTiedRegisters())
     Stream << "- hasTiedRegisters (execution is always serial)\n";
-  if (hasAliasingRegisters())
+  if (hasAliasingRegisters(RATC.emptyRegisters()))
     Stream << "- hasAliasingRegisters\n";
 }
 
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index 09bd1ce..1c67ef8 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -112,10 +112,11 @@
   // Repeating this instruction may execute sequentially by picking aliasing
   // Use and Def registers. It may also execute in parallel by picking non
   // aliasing Use and Def registers.
-  bool hasAliasingRegisters() const;
+  bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const;
 
   // Whether this instruction's registers alias with OtherInstr's registers.
-  bool hasAliasingRegistersThrough(const Instruction &OtherInstr) const;
+  bool hasAliasingRegistersThrough(const Instruction &OtherInstr,
+                                   const BitVector &ForbiddenRegisters) const;
 
   // Returns whether this instruction has Memory Operands.
   // Repeating this instruction executes sequentially with an instruction that
@@ -129,6 +130,7 @@
 
   // Convenient function to help with debugging.
   void dump(const llvm::MCRegisterInfo &RegInfo,
+            const RegisterAliasingTrackerCache &RATC,
             llvm::raw_ostream &Stream) const;
 
   const llvm::MCInstrDesc *Description; // Never nullptr.
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index d5c790d..cd93589 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -38,14 +38,33 @@
 
 llvm::Expected<std::vector<BenchmarkCode>>
 SnippetGenerator::generateConfigurations(const Instruction &Instr) const {
-  if (auto E = generateCodeTemplates(Instr)) {
-    const auto &RATC = State.getRATC();
+  llvm::BitVector ForbiddenRegs = State.getRATC().reservedRegisters();
+
+  // If the instruction has memory registers, prevent the generator from
+  // using the scratch register and its aliasing registers.
+  if (Instr.hasMemoryOperands()) {
+    const auto &ET = State.getExegesisTarget();
+    unsigned ScratchSpacePointerInReg =
+        ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
+    if (ScratchSpacePointerInReg == 0)
+      return llvm::make_error<BenchmarkFailure>(
+          "Infeasible : target does not support memory instructions");
+    const auto &ScratchRegAliases =
+        State.getRATC().getRegister(ScratchSpacePointerInReg).aliasedBits();
+    // If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
+    // FIXME: We could make a copy of the scratch register.
+    for (const auto &Op : Instr.Operands) {
+      if (Op.isDef() && Op.isImplicitReg() &&
+          ScratchRegAliases.test(Op.getImplicitReg()))
+        return llvm::make_error<BenchmarkFailure>(
+            "Infeasible : memory instruction uses scratch memory register");
+    }
+    ForbiddenRegs |= ScratchRegAliases;
+  }
+
+  if (auto E = generateCodeTemplates(Instr, ForbiddenRegs)) {
     std::vector<BenchmarkCode> Output;
     for (CodeTemplate &CT : E.get()) {
-      const llvm::BitVector &ForbiddenRegs =
-          CT.ScratchSpacePointerInReg
-              ? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits()
-              : RATC.emptyRegisters();
       // TODO: Generate as many BenchmarkCode as needed.
       {
         BenchmarkCode BC;
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
index ef44bd9..966da2a 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
@@ -69,7 +69,8 @@
 private:
   // API to be implemented by subclasses.
   virtual llvm::Expected<std::vector<CodeTemplate>>
-  generateCodeTemplates(const Instruction &Instr) const = 0;
+  generateCodeTemplates(const Instruction &Instr,
+                        const BitVector &ForbiddenRegisters) const = 0;
 };
 
 // A global Random Number Generator to randomize configurations.
diff --git a/llvm/tools/llvm-exegesis/lib/Uops.cpp b/llvm/tools/llvm-exegesis/lib/Uops.cpp
index 8fc8fd2..6823ec8 100644
--- a/llvm/tools/llvm-exegesis/lib/Uops.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Uops.cpp
@@ -126,7 +126,7 @@
 static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
     const LLVMState &State, const InstructionTemplate &IT,
     const ArrayRef<const Variable *> TiedVariables,
-    const BitVector *ScratchSpaceAliasedRegs) {
+    const BitVector &ForbiddenRegisters) {
   std::vector<InstructionTemplate> Instructions;
   // Assign registers to variables in a round-robin manner. This is simple but
   // ensures that the most register-constrained variable does not get starved.
@@ -135,12 +135,8 @@
     assert(Var);
     const Operand &Op = IT.Instr.getPrimaryOperand(*Var);
     assert(Op.isReg());
-    BitVector PossibleRegs = State.getRATC().emptyRegisters();
-    if (ScratchSpaceAliasedRegs) {
-      PossibleRegs |= *ScratchSpaceAliasedRegs;
-    }
-    PossibleRegs.flip();
-    PossibleRegs &= Op.getRegisterAliasing().sourceBits();
+    BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits();
+    remove(PossibleRegs, ForbiddenRegisters);
     PossibleRegsForVar.push_back(std::move(PossibleRegs));
   }
   SmallVector<int, 2> Iterators(TiedVariables.size(), 0);
@@ -168,28 +164,14 @@
 }
 
 llvm::Expected<std::vector<CodeTemplate>>
-UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
+UopsSnippetGenerator::generateCodeTemplates(
+    const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
   CodeTemplate CT;
-  const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr;
-  if (Instr.hasMemoryOperands()) {
-    const auto &ET = State.getExegesisTarget();
-    CT.ScratchSpacePointerInReg =
-        ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
-    if (CT.ScratchSpacePointerInReg == 0)
-      return llvm::make_error<BenchmarkFailure>(
-          "Infeasible : target does not support memory instructions");
-    ScratchSpaceAliasedRegs =
-        &State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits();
-    // If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
-    // FIXME: We could make a copy of the scratch register.
-    for (const auto &Op : Instr.Operands) {
-      if (Op.isDef() && Op.isImplicitReg() &&
-          ScratchSpaceAliasedRegs->test(Op.getImplicitReg()))
-        return llvm::make_error<BenchmarkFailure>(
-            "Infeasible : memory instruction uses scratch memory register");
-    }
-  }
-
+  CT.ScratchSpacePointerInReg =
+      Instr.hasMemoryOperands()
+          ? State.getExegesisTarget().getScratchMemoryRegister(
+                State.getTargetMachine().getTargetTriple())
+          : 0;
   const AliasingConfigurations SelfAliasing(Instr, Instr);
   InstructionTemplate IT(Instr);
   if (SelfAliasing.empty()) {
@@ -208,20 +190,17 @@
   if (!TiedVariables.empty()) {
     CT.Info = "instruction has tied variables, using static renaming.";
     CT.Instructions = generateSnippetUsingStaticRenaming(
-        State, IT, TiedVariables, ScratchSpaceAliasedRegs);
+        State, IT, TiedVariables, ForbiddenRegisters);
     instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
     return getSingleton(std::move(CT));
   }
-  const auto &ReservedRegisters = State.getRATC().reservedRegisters();
   // No tied variables, we pick random values for defs.
   llvm::BitVector Defs(State.getRegInfo().getNumRegs());
   for (const auto &Op : Instr.Operands) {
     if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) {
       auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
-      remove(PossibleRegisters, ReservedRegisters);
-      // Do not use the scratch memory address register.
-      if (ScratchSpaceAliasedRegs)
-        remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
+      // Do not use forbidden registers.
+      remove(PossibleRegisters, ForbiddenRegisters);
       assert(PossibleRegisters.any() && "No register left to choose from");
       const auto RandomReg = randomBit(PossibleRegisters);
       Defs.set(RandomReg);
@@ -233,10 +212,7 @@
   for (const auto &Op : Instr.Operands) {
     if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) {
       auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
-      remove(PossibleRegisters, ReservedRegisters);
-      // Do not use the scratch memory address register.
-      if (ScratchSpaceAliasedRegs)
-        remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
+      remove(PossibleRegisters, ForbiddenRegisters);
       remove(PossibleRegisters, DefAliases);
       assert(PossibleRegisters.any() && "No register left to choose from");
       const auto RandomReg = randomBit(PossibleRegisters);
diff --git a/llvm/tools/llvm-exegesis/lib/Uops.h b/llvm/tools/llvm-exegesis/lib/Uops.h
index 077c9a8..23caff2 100644
--- a/llvm/tools/llvm-exegesis/lib/Uops.h
+++ b/llvm/tools/llvm-exegesis/lib/Uops.h
@@ -26,7 +26,8 @@
   ~UopsSnippetGenerator() override;
 
   llvm::Expected<std::vector<CodeTemplate>>
-  generateCodeTemplates(const Instruction &Instr) const override;
+  generateCodeTemplates(const Instruction &Instr,
+                        const BitVector &ForbiddenRegisters) const override;
 
   static constexpr const size_t kMinNumDifferentAddresses = 6;
 
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index cebcb1c..45e3611 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -182,19 +182,21 @@
   using LatencySnippetGenerator::LatencySnippetGenerator;
 
   llvm::Expected<std::vector<CodeTemplate>>
-  generateCodeTemplates(const Instruction &Instr) const override;
+  generateCodeTemplates(const Instruction &Instr,
+                        const BitVector &ForbiddenRegisters) const override;
 };
 } // namespace
 
 llvm::Expected<std::vector<CodeTemplate>>
 X86LatencySnippetGenerator::generateCodeTemplates(
-    const Instruction &Instr) const {
+    const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
   if (auto E = IsInvalidOpcode(Instr))
     return std::move(E);
 
   switch (getX86FPFlags(Instr)) {
   case llvm::X86II::NotFP:
-    return LatencySnippetGenerator::generateCodeTemplates(Instr);
+    return LatencySnippetGenerator::generateCodeTemplates(Instr,
+                                                          ForbiddenRegisters);
   case llvm::X86II::ZeroArgFP:
   case llvm::X86II::OneArgFP:
   case llvm::X86II::SpecialFP:
@@ -219,19 +221,21 @@
   using UopsSnippetGenerator::UopsSnippetGenerator;
 
   llvm::Expected<std::vector<CodeTemplate>>
-  generateCodeTemplates(const Instruction &Instr) const override;
+  generateCodeTemplates(const Instruction &Instr,
+                        const BitVector &ForbiddenRegisters) const override;
 };
 } // namespace
 
 llvm::Expected<std::vector<CodeTemplate>>
 X86UopsSnippetGenerator::generateCodeTemplates(
-    const Instruction &Instr) const {
+    const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
   if (auto E = IsInvalidOpcode(Instr))
     return std::move(E);
 
   switch (getX86FPFlags(Instr)) {
   case llvm::X86II::NotFP:
-    return UopsSnippetGenerator::generateCodeTemplates(Instr);
+    return UopsSnippetGenerator::generateCodeTemplates(Instr,
+                                                       ForbiddenRegisters);
   case llvm::X86II::ZeroArgFP:
   case llvm::X86II::OneArgFP:
   case llvm::X86II::SpecialFP: