[llvm-mca] Move llvm-mca library to llvm/lib/MCA.

Summary: See PR38731.

Reviewers: andreadb

Subscribers: mgorny, javed.absar, tschuett, gbedwell, andreadb, RKSimon, llvm-commits

Differential Revision: https://reviews.llvm.org/D55557

llvm-svn: 349332
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
new file mode 100644
index 0000000..5e228a2
--- /dev/null
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -0,0 +1,675 @@
+//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstrBuilder interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
+                           const llvm::MCInstrInfo &mcii,
+                           const llvm::MCRegisterInfo &mri,
+                           const llvm::MCInstrAnalysis &mcia)
+    : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
+      FirstReturnInst(true) {
+  computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
+}
+
+static void initializeUsedResources(InstrDesc &ID,
+                                    const MCSchedClassDesc &SCDesc,
+                                    const MCSubtargetInfo &STI,
+                                    ArrayRef<uint64_t> ProcResourceMasks) {
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Populate resources consumed.
+  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
+  std::vector<ResourcePlusCycles> Worklist;
+
+  // Track cycles contributed by resources that are in a "Super" relationship.
+  // This is required if we want to correctly match the behavior of method
+  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
+  // of "consumed" processor resources and resource cycles, the logic in
+  // ExpandProcResource() doesn't update the number of resource cycles
+  // contributed by a "Super" resource to a group.
+  // We need to take this into account when we find that a processor resource is
+  // part of a group, and it is also used as the "Super" of other resources.
+  // This map stores the number of cycles contributed by sub-resources that are
+  // part of a "Super" resource. The key value is the "Super" resource mask ID.
+  DenseMap<uint64_t, unsigned> SuperResources;
+
+  unsigned NumProcResources = SM.getNumProcResourceKinds();
+  APInt Buffers(NumProcResources, 0);
+
+  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
+    const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
+    const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
+    uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
+    if (PR.BufferSize != -1)
+      Buffers.setBit(PRE->ProcResourceIdx);
+    CycleSegment RCy(0, PRE->Cycles, false);
+    Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
+    if (PR.SuperIdx) {
+      uint64_t Super = ProcResourceMasks[PR.SuperIdx];
+      SuperResources[Super] += PRE->Cycles;
+    }
+  }
+
+  // Sort elements by mask popcount, so that we prioritize resource units over
+  // resource groups, and smaller groups over larger groups.
+  sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
+    unsigned popcntA = countPopulation(A.first);
+    unsigned popcntB = countPopulation(B.first);
+    if (popcntA < popcntB)
+      return true;
+    if (popcntA > popcntB)
+      return false;
+    return A.first < B.first;
+  });
+
+  uint64_t UsedResourceUnits = 0;
+
+  // Remove cycles contributed by smaller resources.
+  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
+    ResourcePlusCycles &A = Worklist[I];
+    if (!A.second.size()) {
+      A.second.NumUnits = 0;
+      A.second.setReserved();
+      ID.Resources.emplace_back(A);
+      continue;
+    }
+
+    ID.Resources.emplace_back(A);
+    uint64_t NormalizedMask = A.first;
+    if (countPopulation(A.first) == 1) {
+      UsedResourceUnits |= A.first;
+    } else {
+      // Remove the leading 1 from the resource group mask.
+      NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+    }
+
+    for (unsigned J = I + 1; J < E; ++J) {
+      ResourcePlusCycles &B = Worklist[J];
+      if ((NormalizedMask & B.first) == NormalizedMask) {
+        B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
+        if (countPopulation(B.first) > 1)
+          B.second.NumUnits++;
+      }
+    }
+  }
+
+  // A SchedWrite may specify a number of cycles in which a resource group
+  // is reserved. For example (on target x86; cpu Haswell):
+  //
+  //  SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
+  //    let ResourceCycles = [2, 2, 3];
+  //  }
+  //
+  // This means:
+  // Resource units HWPort0 and HWPort1 are both used for 2cy.
+  // Resource group HWPort01 is the union of HWPort0 and HWPort1.
+  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
+  // will not be usable for 2 entire cycles from instruction issue.
+  //
+  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
+  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
+  // extra delay on top of the 2 cycles latency.
+  // During those extra cycles, HWPort01 is not usable by other instructions.
+  for (ResourcePlusCycles &RPC : ID.Resources) {
+    if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
+      // Remove the leading 1 from the resource group mask.
+      uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
+      if ((Mask & UsedResourceUnits) == Mask)
+        RPC.second.setReserved();
+    }
+  }
+
+  // Identify extra buffers that are consumed through super resources.
+  for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
+    for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
+      const MCProcResourceDesc &PR = *SM.getProcResource(I);
+      if (PR.BufferSize == -1)
+        continue;
+
+      uint64_t Mask = ProcResourceMasks[I];
+      if (Mask != SR.first && ((Mask & SR.first) == SR.first))
+        Buffers.setBit(I);
+    }
+  }
+
+  // Now set the buffers.
+  if (unsigned NumBuffers = Buffers.countPopulation()) {
+    ID.Buffers.resize(NumBuffers);
+    for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
+      if (Buffers[I]) {
+        --NumBuffers;
+        ID.Buffers[NumBuffers] = ProcResourceMasks[I];
+      }
+    }
+  }
+
+  LLVM_DEBUG({
+    for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
+      dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
+    for (const uint64_t R : ID.Buffers)
+      dbgs() << "\t\tBuffer Mask=" << R << '\n';
+  });
+}
+
+static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
+                              const MCSchedClassDesc &SCDesc,
+                              const MCSubtargetInfo &STI) {
+  if (MCDesc.isCall()) {
+    // We cannot estimate how long this call will take.
+    // Artificially set an arbitrarily high latency (100cy).
+    ID.MaxLatency = 100U;
+    return;
+  }
+
+  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+  // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
+  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
+}
+
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+  // Count register definitions, and skip non register operands in the process.
+  unsigned I, E;
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+    const MCOperand &Op = MCI.getOperand(I);
+    if (Op.isReg())
+      --NumExplicitDefs;
+  }
+
+  if (NumExplicitDefs) {
+    return make_error<InstructionError<MCInst>>(
+        "Expected more register operand definitions.", MCI);
+  }
+
+  if (MCDesc.hasOptionalDef()) {
+    // Always assume that the optional definition is the last operand.
+    const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+    if (I == MCI.getNumOperands() || !Op.isReg()) {
+      std::string Message =
+          "expected a register operand for an optional definition. Instruction "
+          "has not been correctly analyzed.";
+      return make_error<InstructionError<MCInst>>(Message, MCI);
+    }
+  }
+
+  return ErrorSuccess();
+}
+
+void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
+                                  unsigned SchedClassID) {
+  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+  const MCSchedModel &SM = STI.getSchedModel();
+  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+
+  // Assumptions made by this algorithm:
+  //  1. The number of explicit and implicit register definitions in a MCInst
+  //     matches the number of explicit and implicit definitions according to
+  //     the opcode descriptor (MCInstrDesc).
+  //  2. Uses start at index #(MCDesc.getNumDefs()).
+  //  3. There can only be a single optional register definition, an it is
+  //     always the last operand of the sequence (excluding extra operands
+  //     contributed by variadic opcodes).
+  //
+  // These assumptions work quite well for most out-of-order in-tree targets
+  // like x86. This is mainly because the vast majority of instructions is
+  // expanded to MCInst using a straightforward lowering logic that preserves
+  // the ordering of the operands.
+  //
+  // About assumption 1.
+  // The algorithm allows non-register operands between register operand
+  // definitions. This helps to handle some special ARM instructions with
+  // implicit operand increment (-mtriple=armv7):
+  //
+  // vld1.32  {d18, d19}, [r1]!  @ <MCInst #1463 VLD1q32wb_fixed
+  //                             @  <MCOperand Reg:59>
+  //                             @  <MCOperand Imm:0>     (!!)
+  //                             @  <MCOperand Reg:67>
+  //                             @  <MCOperand Imm:0>
+  //                             @  <MCOperand Imm:14>
+  //                             @  <MCOperand Reg:0>>
+  //
+  // MCDesc reports:
+  //  6 explicit operands.
+  //  1 optional definition
+  //  2 explicit definitions (!!)
+  //
+  // The presence of an 'Imm' operand between the two register definitions
+  // breaks the assumption that "register definitions are always at the
+  // beginning of the operand sequence".
+  //
+  // To workaround this issue, this algorithm ignores (i.e. skips) any
+  // non-register operands between register definitions.  The optional
+  // definition is still at index #(NumOperands-1).
+  //
+  // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+  // That means, register R1 from the example is both read and written.
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
+  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
+  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
+  if (MCDesc.hasOptionalDef())
+    TotalDefs++;
+
+  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+  ID.Writes.resize(TotalDefs + NumVariadicOps);
+  // Iterate over the operands list, and skip non-register operands.
+  // The first NumExplictDefs register operands are expected to be register
+  // definitions.
+  unsigned CurrentDef = 0;
+  unsigned i = 0;
+  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
+    const MCOperand &Op = MCI.getOperand(i);
+    if (!Op.isReg())
+      continue;
+
+    WriteDescriptor &Write = ID.Writes[CurrentDef];
+    Write.OpIndex = i;
+    if (CurrentDef < NumWriteLatencyEntries) {
+      const MCWriteLatencyEntry &WLE =
+          *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
+      // Conservatively default to MaxLatency.
+      Write.Latency =
+          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+    } else {
+      // Assign a default latency for this write.
+      Write.Latency = ID.MaxLatency;
+      Write.SClassOrWriteResourceID = 0;
+    }
+    Write.IsOptionalDef = false;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def]    OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+    CurrentDef++;
+  }
+
+  assert(CurrentDef == NumExplicitDefs &&
+         "Expected more register operand definitions.");
+  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
+    unsigned Index = NumExplicitDefs + CurrentDef;
+    WriteDescriptor &Write = ID.Writes[Index];
+    Write.OpIndex = ~CurrentDef;
+    Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
+    if (Index < NumWriteLatencyEntries) {
+      const MCWriteLatencyEntry &WLE =
+          *STI.getWriteLatencyEntry(&SCDesc, Index);
+      // Conservatively default to MaxLatency.
+      Write.Latency =
+          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+    } else {
+      // Assign a default latency for this write.
+      Write.Latency = ID.MaxLatency;
+      Write.SClassOrWriteResourceID = 0;
+    }
+
+    Write.IsOptionalDef = false;
+    assert(Write.RegisterID != 0 && "Expected a valid phys register!");
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
+             << ", PhysReg=" << MRI.getName(Write.RegisterID)
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  if (MCDesc.hasOptionalDef()) {
+    WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+    Write.OpIndex = MCDesc.getNumOperands() - 1;
+    // Assign a default latency for this write.
+    Write.Latency = ID.MaxLatency;
+    Write.SClassOrWriteResourceID = 0;
+    Write.IsOptionalDef = true;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  if (!NumVariadicOps)
+    return;
+
+  // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
+  // "unmodeledSideEffects', then this logic optimistically assumes that any
+  // extra register operands in the variadic sequence is not a register
+  // definition.
+  //
+  // Otherwise, we conservatively assume that any register operand from the
+  // variadic sequence is both a register read and a register write.
+  bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
+                        !MCDesc.hasUnmodeledSideEffects();
+  CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
+  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+       I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
+    WriteDescriptor &Write = ID.Writes[CurrentDef];
+    Write.OpIndex = OpIndex;
+    // Assign a default latency for this write.
+    Write.Latency = ID.MaxLatency;
+    Write.SClassOrWriteResourceID = 0;
+    Write.IsOptionalDef = false;
+    ++CurrentDef;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  ID.Writes.resize(CurrentDef);
+}
+
+void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
+                                 unsigned SchedClassID) {
+  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
+  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
+  // Remove the optional definition.
+  if (MCDesc.hasOptionalDef())
+    --NumExplicitUses;
+  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+  unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
+  ID.Reads.resize(TotalUses);
+  unsigned CurrentUse = 0;
+  for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
+       ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
+    ReadDescriptor &Read = ID.Reads[CurrentUse];
+    Read.OpIndex = OpIndex;
+    Read.UseIndex = I;
+    Read.SchedClassID = SchedClassID;
+    ++CurrentUse;
+    LLVM_DEBUG(dbgs() << "\t\t[Use]    OpIdx=" << Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << '\n');
+  }
+
+  // For the purpose of ReadAdvance, implicit uses come directly after explicit
+  // uses. The "UseIndex" must be updated according to that implicit layout.
+  for (unsigned I = 0; I < NumImplicitUses; ++I) {
+    ReadDescriptor &Read = ID.Reads[CurrentUse + I];
+    Read.OpIndex = ~I;
+    Read.UseIndex = NumExplicitUses + I;
+    Read.RegisterID = MCDesc.getImplicitUses()[I];
+    Read.SchedClassID = SchedClassID;
+    LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << ", RegisterID="
+                      << MRI.getName(Read.RegisterID) << '\n');
+  }
+
+  CurrentUse += NumImplicitUses;
+
+  // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
+  // "unmodeledSideEffects", then this logic optimistically assumes that any
+  // extra register operands in the variadic sequence are not register
+  // definition.
+
+  bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
+                        !MCDesc.hasUnmodeledSideEffects();
+  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+       I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
+    const MCOperand &Op = MCI.getOperand(OpIndex);
+    if (!Op.isReg())
+      continue;
+
+    ReadDescriptor &Read = ID.Reads[CurrentUse];
+    Read.OpIndex = OpIndex;
+    Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
+    Read.SchedClassID = SchedClassID;
+    ++CurrentUse;
+    LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << '\n');
+  }
+
+  ID.Reads.resize(CurrentUse);
+}
+
+Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
+                                    const MCInst &MCI) const {
+  if (ID.NumMicroOps != 0)
+    return ErrorSuccess();
+
+  bool UsesMemory = ID.MayLoad || ID.MayStore;
+  bool UsesBuffers = !ID.Buffers.empty();
+  bool UsesResources = !ID.Resources.empty();
+  if (!UsesMemory && !UsesBuffers && !UsesResources)
+    return ErrorSuccess();
+
+  StringRef Message;
+  if (UsesMemory) {
+    Message = "found an inconsistent instruction that decodes "
+              "into zero opcodes and that consumes load/store "
+              "unit resources.";
+  } else {
+    Message = "found an inconsistent instruction that decodes "
+              "to zero opcodes and that consumes scheduler "
+              "resources.";
+  }
+
+  return make_error<InstructionError<MCInst>>(Message, MCI);
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
+  assert(STI.getSchedModel().hasInstrSchedModel() &&
+         "Itineraries are not yet supported!");
+
+  // Obtain the instruction descriptor from the opcode.
+  unsigned short Opcode = MCI.getOpcode();
+  const MCInstrDesc &MCDesc = MCII.get(Opcode);
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Then obtain the scheduling class information from the instruction.
+  unsigned SchedClassID = MCDesc.getSchedClass();
+  bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
+
+  // Try to solve variant scheduling classes.
+  if (IsVariant) {
+    unsigned CPUID = SM.getProcessorID();
+    while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+      SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
+
+    if (!SchedClassID) {
+      return make_error<InstructionError<MCInst>>(
+          "unable to resolve scheduling class for write variant.", MCI);
+    }
+  }
+
+  // Check if this instruction is supported. Otherwise, report an error.
+  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
+    return make_error<InstructionError<MCInst>>(
+        "found an unsupported instruction in the input assembly sequence.",
+        MCI);
+  }
+
+  // Create a new empty descriptor.
+  std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
+  ID->NumMicroOps = SCDesc.NumMicroOps;
+
+  if (MCDesc.isCall() && FirstCallInst) {
+    // We don't correctly model calls.
+    WithColor::warning() << "found a call in the input assembly sequence.\n";
+    WithColor::note() << "call instructions are not correctly modeled. "
+                      << "Assume a latency of 100cy.\n";
+    FirstCallInst = false;
+  }
+
+  if (MCDesc.isReturn() && FirstReturnInst) {
+    WithColor::warning() << "found a return instruction in the input"
+                         << " assembly sequence.\n";
+    WithColor::note() << "program counter updates are ignored.\n";
+    FirstReturnInst = false;
+  }
+
+  ID->MayLoad = MCDesc.mayLoad();
+  ID->MayStore = MCDesc.mayStore();
+  ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
+
+  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
+  computeMaxLatency(*ID, MCDesc, SCDesc, STI);
+
+  if (Error Err = verifyOperands(MCDesc, MCI))
+    return std::move(Err);
+
+  populateWrites(*ID, MCI, SchedClassID);
+  populateReads(*ID, MCI, SchedClassID);
+
+  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
+  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
+
+  // Sanity check on the instruction descriptor.
+  if (Error Err = verifyInstrDesc(*ID, MCI))
+    return std::move(Err);
+
+  // Now add the new descriptor.
+  SchedClassID = MCDesc.getSchedClass();
+  bool IsVariadic = MCDesc.isVariadic();
+  if (!IsVariadic && !IsVariant) {
+    Descriptors[MCI.getOpcode()] = std::move(ID);
+    return *Descriptors[MCI.getOpcode()];
+  }
+
+  VariantDescriptors[&MCI] = std::move(ID);
+  return *VariantDescriptors[&MCI];
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
+  if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
+    return *Descriptors[MCI.getOpcode()];
+
+  if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
+    return *VariantDescriptors[&MCI];
+
+  return createInstrDescImpl(MCI);
+}
+
+Expected<std::unique_ptr<Instruction>>
+InstrBuilder::createInstruction(const MCInst &MCI) {
+  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
+  if (!DescOrErr)
+    return DescOrErr.takeError();
+  const InstrDesc &D = *DescOrErr;
+  std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
+
+  // Check if this is a dependency breaking instruction.
+  APInt Mask;
+
+  unsigned ProcID = STI.getSchedModel().getProcessorID();
+  bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID);
+  bool IsDepBreaking =
+      IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID);
+  if (MCIA.isOptimizableRegisterMove(MCI, ProcID))
+    NewIS->setOptimizableMove();
+
+  // Initialize Reads first.
+  for (const ReadDescriptor &RD : D.Reads) {
+    int RegID = -1;
+    if (!RD.isImplicitRead()) {
+      // explicit read.
+      const MCOperand &Op = MCI.getOperand(RD.OpIndex);
+      // Skip non-register operands.
+      if (!Op.isReg())
+        continue;
+      RegID = Op.getReg();
+    } else {
+      // Implicit read.
+      RegID = RD.RegisterID;
+    }
+
+    // Skip invalid register operands.
+    if (!RegID)
+      continue;
+
+    // Okay, this is a register operand. Create a ReadState for it.
+    assert(RegID > 0 && "Invalid register ID found!");
+    NewIS->getUses().emplace_back(RD, RegID);
+    ReadState &RS = NewIS->getUses().back();
+
+    if (IsDepBreaking) {
+      // A mask of all zeroes means: explicit input operands are not
+      // independent.
+      if (Mask.isNullValue()) {
+        if (!RD.isImplicitRead())
+          RS.setIndependentFromDef();
+      } else {
+        // Check if this register operand is independent according to `Mask`.
+        // Note that Mask may not have enough bits to describe all explicit and
+        // implicit input operands. If this register operand doesn't have a
+        // corresponding bit in Mask, then conservatively assume that it is
+        // dependent.
+        if (Mask.getBitWidth() > RD.UseIndex) {
+          // Okay. This map describe register use `RD.UseIndex`.
+          if (Mask[RD.UseIndex])
+            RS.setIndependentFromDef();
+        }
+      }
+    }
+  }
+
+  // Early exit if there are no writes.
+  if (D.Writes.empty())
+    return std::move(NewIS);
+
+  // Track register writes that implicitly clear the upper portion of the
+  // underlying super-registers using an APInt.
+  APInt WriteMask(D.Writes.size(), 0);
+
+  // Now query the MCInstrAnalysis object to obtain information about which
+  // register writes implicitly clear the upper portion of a super-register.
+  MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
+
+  // Initialize writes.
+  unsigned WriteIndex = 0;
+  for (const WriteDescriptor &WD : D.Writes) {
+    unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
+                                          : MCI.getOperand(WD.OpIndex).getReg();
+    // Check if this is a optional definition that references NoReg.
+    if (WD.IsOptionalDef && !RegID) {
+      ++WriteIndex;
+      continue;
+    }
+
+    assert(RegID && "Expected a valid register ID!");
+    NewIS->getDefs().emplace_back(WD, RegID,
+                                  /* ClearsSuperRegs */ WriteMask[WriteIndex],
+                                  /* WritesZero */ IsZeroIdiom);
+    ++WriteIndex;
+  }
+
+  return std::move(NewIS);
+}
+} // namespace mca
+} // namespace llvm