[llvm-mca] Introduce a new base class for mca::Instruction, and change how read/write information is stored.

This patch introduces a new base class for Instruction named InstructionBase.
Class InstructionBase is responsible for tracking data dependencies with the
help of ReadState and WriteState objects.  Class Instruction now derives from
InstructionBase, and adds extra information related to the `InstrStage` as well
as the `RCUTokenID`.

ReadState and WriteState objects are no longer unique pointers. This avoids
extra heap allocation and pointer checks that weren't really needed.  Now, those
objects are simply stored into SmallVectors.  We use a SmallVector instead of a
std::vector because we expect most instructions to only have a very small number
of reads and writes.  By using a simple SmallVector we also avoid extra heap
allocations most of the time.
In a debug build, this improves the performance of llvm-mca by roughly 10% (I
still have to verify the impact in performance on a release build).

llvm-svn: 345280
diff --git a/llvm/tools/llvm-mca/include/Instruction.h b/llvm/tools/llvm-mca/include/Instruction.h
index a1d1082..9d1c91a 100644
--- a/llvm/tools/llvm-mca/include/Instruction.h
+++ b/llvm/tools/llvm-mca/include/Instruction.h
@@ -16,7 +16,9 @@
 #ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
 #define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/MathExtras.h"
 
 #ifndef NDEBUG
@@ -134,8 +136,6 @@
       : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
         ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
         IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
-  WriteState(const WriteState &Other) = delete;
-  WriteState &operator=(const WriteState &Other) = delete;
 
   int getCyclesLeft() const { return CyclesLeft; }
   unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; }
@@ -205,8 +205,6 @@
       : RD(Desc), RegisterID(RegID), DependentWrites(0),
         CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
         IndependentFromDef(false) {}
-  ReadState(const ReadState &Other) = delete;
-  ReadState &operator=(const ReadState &Other) = delete;
 
   const ReadDescriptor &getDescriptor() const { return RD; }
   unsigned getSchedClass() const { return RD.SchedClassID; }
@@ -313,13 +311,59 @@
   InstrDesc &operator=(const InstrDesc &Other) = delete;
 };
 
+/// Base class for instructions consumed by the simulation pipeline.
+///
+/// This class tracks data dependencies as well as generic properties
+/// of the instruction.
+class InstructionBase {
+  const InstrDesc &Desc;
+
+  // This field is set for instructions that are candidates for move
+  // elimination. For more information about move elimination, see the
+  // definition of RegisterMappingTracker in RegisterFile.h
+  bool IsOptimizableMove;
+
+  // Output dependencies.
+  // One entry per each implicit and explicit register definition.
+  llvm::SmallVector<WriteState, 4> Defs;
+
+  // Input dependencies.
+  // One entry per each implicit and explicit register use.
+  llvm::SmallVector<ReadState, 4> Uses;
+
+public:
+  InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
+
+  llvm::SmallVectorImpl<WriteState> &getDefs() { return Defs; }
+  const llvm::ArrayRef<WriteState> getDefs() const { return Defs; }
+  llvm::SmallVectorImpl<ReadState> &getUses() { return Uses; }
+  const llvm::ArrayRef<ReadState> getUses() const { return Uses; }
+  const InstrDesc &getDesc() const { return Desc; }
+
+  unsigned getLatency() const { return Desc.MaxLatency; }
+
+  bool hasDependentUsers() const {
+    return llvm::any_of(
+        Defs, [](const WriteState &Def) { return Def.getNumUsers() > 0; });
+  }
+
+  unsigned getNumUsers() const {
+    unsigned NumUsers = 0;
+    for (const WriteState &Def : Defs)
+      NumUsers += Def.getNumUsers();
+    return NumUsers;
+  }
+
+  // Returns true if this instruction is a candidate for move elimination.
+  bool isOptimizableMove() const { return IsOptimizableMove; }
+  void setOptimizableMove() { IsOptimizableMove = true; }
+};
+
 /// An instruction propagated through the simulated instruction pipeline.
 ///
 /// This class is used to monitor changes to the internal state of instructions
 /// that are sent to the various components of the simulated hardware pipeline.
-class Instruction {
-  const InstrDesc &Desc;
-
+class Instruction : public InstructionBase {
   enum InstrStage {
     IS_INVALID,   // Instruction in an invalid state.
     IS_AVAILABLE, // Instruction dispatched but operands are not ready.
@@ -339,51 +383,16 @@
   // Retire Unit token ID for this instruction.
   unsigned RCUTokenID;
 
-  // This field is set for instructions that are candidates for move
-  // elimination. For more information about move elimination, see the
-  // definition of RegisterMappingTracker in RegisterFile.h
-  bool IsOptimizableMove;
-
-  using UniqueDef = std::unique_ptr<WriteState>;
-  using UniqueUse = std::unique_ptr<ReadState>;
-  using VecDefs = std::vector<UniqueDef>;
-  using VecUses = std::vector<UniqueUse>;
-
-  // Output dependencies.
-  // One entry per each implicit and explicit register definition.
-  VecDefs Defs;
-
-  // Input dependencies.
-  // One entry per each implicit and explicit register use.
-  VecUses Uses;
-
 public:
   Instruction(const InstrDesc &D)
-      : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0),
-        IsOptimizableMove(false) {}
+      : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
+        RCUTokenID(0) {}
   Instruction(const Instruction &Other) = delete;
   Instruction &operator=(const Instruction &Other) = delete;
 
-  VecDefs &getDefs() { return Defs; }
-  const VecDefs &getDefs() const { return Defs; }
-  VecUses &getUses() { return Uses; }
-  const VecUses &getUses() const { return Uses; }
-  const InstrDesc &getDesc() const { return Desc; }
   unsigned getRCUTokenID() const { return RCUTokenID; }
   int getCyclesLeft() const { return CyclesLeft; }
 
-  bool hasDependentUsers() const {
-    return llvm::any_of(
-        Defs, [](const UniqueDef &Def) { return Def->getNumUsers() > 0; });
-  }
-
-  unsigned getNumUsers() const {
-    unsigned NumUsers = 0;
-    for (const UniqueDef &Def : Defs)
-      NumUsers += Def->getNumUsers();
-    return NumUsers;
-  }
-
   // Transition to the dispatch stage, and assign a RCUToken to this
   // instruction. The RCUToken is used to track the completion of every
   // register write performed by this instruction.
@@ -407,13 +416,10 @@
   bool isExecuted() const { return Stage == IS_EXECUTED; }
   bool isRetired() const { return Stage == IS_RETIRED; }
 
-  // Returns true if this instruction is a candidate for move elimination.
-  bool isOptimizableMove() const { return IsOptimizableMove; }
-  void setOptimizableMove() { IsOptimizableMove = true; }
   bool isEliminated() const {
-    return isReady() && Defs.size() &&
-           llvm::all_of(Defs,
-                        [](const UniqueDef &D) { return D->isEliminated(); });
+    return isReady() && getDefs().size() &&
+           llvm::all_of(getDefs(),
+                        [](const WriteState &W) { return W.isEliminated(); });
   }
 
   // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED.
diff --git a/llvm/tools/llvm-mca/lib/InstrBuilder.cpp b/llvm/tools/llvm-mca/lib/InstrBuilder.cpp
index 3768c2e..3704eaf 100644
--- a/llvm/tools/llvm-mca/lib/InstrBuilder.cpp
+++ b/llvm/tools/llvm-mca/lib/InstrBuilder.cpp
@@ -482,14 +482,15 @@
 
     // Okay, this is a register operand. Create a ReadState for it.
     assert(RegID > 0 && "Invalid register ID found!");
-    auto RS = llvm::make_unique<ReadState>(RD, RegID);
+    NewIS->getUses().emplace_back(RD, RegID);
+    ReadState &RS = NewIS->getUses().back();
 
     if (IsDepBreaking) {
       // A mask of all zeroes means: explicit input operands are not
       // independent.
       if (Mask.isNullValue()) {
         if (!RD.isImplicitRead())
-          RS->setIndependentFromDef();
+          RS.setIndependentFromDef();
       } else {
         // Check if this register operand is independent according to `Mask`.
         // Note that Mask may not have enough bits to describe all explicit and
@@ -499,11 +500,10 @@
         if (Mask.getBitWidth() > RD.UseIndex) {
           // Okay. This map describe register use `RD.UseIndex`.
           if (Mask[RD.UseIndex])
-            RS->setIndependentFromDef();
+            RS.setIndependentFromDef();
         }
       }
     }
-    NewIS->getUses().emplace_back(std::move(RS));
   }
 
   // Early exit if there are no writes.
@@ -530,9 +530,9 @@
     }
 
     assert(RegID && "Expected a valid register ID!");
-    NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(
+    NewIS->getDefs().emplace_back(
         WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex],
-        /* WritesZero */ IsZeroIdiom));
+        /* WritesZero */ IsZeroIdiom);
     ++WriteIndex;
   }
 
diff --git a/llvm/tools/llvm-mca/lib/Instruction.cpp b/llvm/tools/llvm-mca/lib/Instruction.cpp
index 511e7b2..12b6e18 100644
--- a/llvm/tools/llvm-mca/lib/Instruction.cpp
+++ b/llvm/tools/llvm-mca/lib/Instruction.cpp
@@ -120,10 +120,10 @@
   Stage = IS_EXECUTING;
 
   // Set the cycles left before the write-back stage.
-  CyclesLeft = Desc.MaxLatency;
+  CyclesLeft = getLatency();
 
-  for (UniqueDef &Def : Defs)
-    Def->onInstructionIssued();
+  for (WriteState &WS : getDefs())
+    WS.onInstructionIssued();
 
   // Transition to the "executed" stage if this is a zero-latency instruction.
   if (!CyclesLeft)
@@ -139,21 +139,21 @@
 void Instruction::update() {
   assert(isDispatched() && "Unexpected instruction stage found!");
 
-  if (!all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
+  if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); }))
     return;
 
   // A partial register write cannot complete before a dependent write.
-  auto IsDefReady = [&](const UniqueDef &Def) {
-    if (const WriteState *Write = Def->getDependentWrite()) {
+  auto IsDefReady = [&](const WriteState &Def) {
+    if (const WriteState *Write = Def.getDependentWrite()) {
       int WriteLatency = Write->getCyclesLeft();
       if (WriteLatency == UNKNOWN_CYCLES)
         return false;
-      return static_cast<unsigned>(WriteLatency) < Desc.MaxLatency;
+      return static_cast<unsigned>(WriteLatency) < getLatency();
     }
     return true;
   };
 
-  if (all_of(Defs, IsDefReady))
+  if (all_of(getDefs(), IsDefReady))
     Stage = IS_READY;
 }
 
@@ -162,8 +162,8 @@
     return;
 
   if (isDispatched()) {
-    for (UniqueUse &Use : Uses)
-      Use->cycleEvent();
+    for (ReadState &Use : getUses())
+      Use.cycleEvent();
 
     update();
     return;
@@ -171,8 +171,8 @@
 
   assert(isExecuting() && "Instruction not in-flight?");
   assert(CyclesLeft && "Instruction already executed?");
-  for (UniqueDef &Def : Defs)
-    Def->cycleEvent();
+  for (WriteState &Def : getDefs())
+    Def.cycleEvent();
   CyclesLeft--;
   if (!CyclesLeft)
     Stage = IS_EXECUTED;
diff --git a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp
index 653f39b..0246151 100644
--- a/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp
+++ b/llvm/tools/llvm-mca/lib/Stages/DispatchStage.cpp
@@ -37,9 +37,8 @@
 
 bool DispatchStage::checkPRF(const InstRef &IR) const {
   SmallVector<unsigned, 4> RegDefs;
-  for (const std::unique_ptr<WriteState> &RegDef :
-       IR.getInstruction()->getDefs())
-    RegDefs.emplace_back(RegDef->getRegisterID());
+  for (const WriteState &RegDef : IR.getInstruction()->getDefs())
+    RegDefs.emplace_back(RegDef.getRegisterID());
 
   const unsigned RegisterMask = PRF.isAvailable(RegDefs);
   // A mask with all zeroes means: register files are available.
@@ -105,7 +104,7 @@
   if (IS.isOptimizableMove()) {
     assert(IS.getDefs().size() == 1 && "Expected a single input!");
     assert(IS.getUses().size() == 1 && "Expected a single output!");
-    IsEliminated = PRF.tryEliminateMove(*IS.getDefs()[0], *IS.getUses()[0]);
+    IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]);
   }
 
   // A dependency-breaking instruction doesn't have to wait on the register
@@ -118,9 +117,9 @@
   // We also don't update data dependencies for instructions that have been
   // eliminated at register renaming stage.
   if (!IsEliminated) {
-    for (std::unique_ptr<ReadState> &RS : IS.getUses()) {
-      if (!RS->isIndependentFromDef())
-        updateRAWDependencies(*RS, STI);
+    for (ReadState &RS : IS.getUses()) {
+      if (!RS.isIndependentFromDef())
+        updateRAWDependencies(RS, STI);
     }
   }
 
@@ -128,8 +127,8 @@
   // at register renaming stage. That means, no physical register is allocated
   // to the instruction.
   SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
-  for (std::unique_ptr<WriteState> &WS : IS.getDefs())
-    PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), WS.get()),
+  for (WriteState &WS : IS.getDefs())
+    PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS),
                          RegisterFiles);
 
   // Reserve slots in the RCU, and notify the instruction that it has been
diff --git a/llvm/tools/llvm-mca/lib/Stages/RetireStage.cpp b/llvm/tools/llvm-mca/lib/Stages/RetireStage.cpp
index 3c923e4..8297c9c 100644
--- a/llvm/tools/llvm-mca/lib/Stages/RetireStage.cpp
+++ b/llvm/tools/llvm-mca/lib/Stages/RetireStage.cpp
@@ -52,8 +52,8 @@
   llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
   const Instruction &Inst = *IR.getInstruction();
 
-  for (const std::unique_ptr<WriteState> &WS : Inst.getDefs())
-    PRF.removeRegisterWrite(*WS.get(), FreedRegs);
+  for (const WriteState &WS : Inst.getDefs())
+    PRF.removeRegisterWrite(WS, FreedRegs);
   notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
 }