[llvm-mca] Move llvm-mca library to llvm/lib/MCA.
Summary: See PR38731.
Reviewers: andreadb
Subscribers: mgorny, javed.absar, tschuett, gbedwell, andreadb, RKSimon, llvm-commits
Differential Revision: https://reviews.llvm.org/D55557
llvm-svn: 349332
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
new file mode 100644
index 0000000..5e228a2
--- /dev/null
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -0,0 +1,675 @@
+//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstrBuilder interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/InstrBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
+ const llvm::MCInstrInfo &mcii,
+ const llvm::MCRegisterInfo &mri,
+ const llvm::MCInstrAnalysis &mcia)
+ : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
+ FirstReturnInst(true) {
+ computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
+}
+
+static void initializeUsedResources(InstrDesc &ID,
+ const MCSchedClassDesc &SCDesc,
+ const MCSubtargetInfo &STI,
+ ArrayRef<uint64_t> ProcResourceMasks) {
+ const MCSchedModel &SM = STI.getSchedModel();
+
+ // Populate resources consumed.
+ using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
+ std::vector<ResourcePlusCycles> Worklist;
+
+ // Track cycles contributed by resources that are in a "Super" relationship.
+ // This is required if we want to correctly match the behavior of method
+ // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
+ // of "consumed" processor resources and resource cycles, the logic in
+ // ExpandProcResource() doesn't update the number of resource cycles
+ // contributed by a "Super" resource to a group.
+ // We need to take this into account when we find that a processor resource is
+ // part of a group, and it is also used as the "Super" of other resources.
+ // This map stores the number of cycles contributed by sub-resources that are
+ // part of a "Super" resource. The key value is the "Super" resource mask ID.
+ DenseMap<uint64_t, unsigned> SuperResources;
+
+ unsigned NumProcResources = SM.getNumProcResourceKinds();
+ APInt Buffers(NumProcResources, 0);
+
+ for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
+ const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
+ const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
+ uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
+ if (PR.BufferSize != -1)
+ Buffers.setBit(PRE->ProcResourceIdx);
+ CycleSegment RCy(0, PRE->Cycles, false);
+ Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
+ if (PR.SuperIdx) {
+ uint64_t Super = ProcResourceMasks[PR.SuperIdx];
+ SuperResources[Super] += PRE->Cycles;
+ }
+ }
+
+ // Sort elements by mask popcount, so that we prioritize resource units over
+ // resource groups, and smaller groups over larger groups.
+ sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
+ unsigned popcntA = countPopulation(A.first);
+ unsigned popcntB = countPopulation(B.first);
+ if (popcntA < popcntB)
+ return true;
+ if (popcntA > popcntB)
+ return false;
+ return A.first < B.first;
+ });
+
+ uint64_t UsedResourceUnits = 0;
+
+ // Remove cycles contributed by smaller resources.
+ for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
+ ResourcePlusCycles &A = Worklist[I];
+ if (!A.second.size()) {
+ A.second.NumUnits = 0;
+ A.second.setReserved();
+ ID.Resources.emplace_back(A);
+ continue;
+ }
+
+ ID.Resources.emplace_back(A);
+ uint64_t NormalizedMask = A.first;
+ if (countPopulation(A.first) == 1) {
+ UsedResourceUnits |= A.first;
+ } else {
+ // Remove the leading 1 from the resource group mask.
+ NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+ }
+
+ for (unsigned J = I + 1; J < E; ++J) {
+ ResourcePlusCycles &B = Worklist[J];
+ if ((NormalizedMask & B.first) == NormalizedMask) {
+ B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
+ if (countPopulation(B.first) > 1)
+ B.second.NumUnits++;
+ }
+ }
+ }
+
+ // A SchedWrite may specify a number of cycles in which a resource group
+ // is reserved. For example (on target x86; cpu Haswell):
+ //
+ // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
+ // let ResourceCycles = [2, 2, 3];
+ // }
+ //
+ // This means:
+ // Resource units HWPort0 and HWPort1 are both used for 2cy.
+ // Resource group HWPort01 is the union of HWPort0 and HWPort1.
+ // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
+ // will not be usable for 2 entire cycles from instruction issue.
+ //
+ // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
+ // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
+ // extra delay on top of the 2 cycles latency.
+ // During those extra cycles, HWPort01 is not usable by other instructions.
+ for (ResourcePlusCycles &RPC : ID.Resources) {
+ if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
+ // Remove the leading 1 from the resource group mask.
+ uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
+ if ((Mask & UsedResourceUnits) == Mask)
+ RPC.second.setReserved();
+ }
+ }
+
+ // Identify extra buffers that are consumed through super resources.
+ for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
+ for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
+ const MCProcResourceDesc &PR = *SM.getProcResource(I);
+ if (PR.BufferSize == -1)
+ continue;
+
+ uint64_t Mask = ProcResourceMasks[I];
+ if (Mask != SR.first && ((Mask & SR.first) == SR.first))
+ Buffers.setBit(I);
+ }
+ }
+
+ // Now set the buffers.
+ if (unsigned NumBuffers = Buffers.countPopulation()) {
+ ID.Buffers.resize(NumBuffers);
+ for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) {
+ if (Buffers[I]) {
+ --NumBuffers;
+ ID.Buffers[NumBuffers] = ProcResourceMasks[I];
+ }
+ }
+ }
+
+ LLVM_DEBUG({
+ for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
+ dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
+ for (const uint64_t R : ID.Buffers)
+ dbgs() << "\t\tBuffer Mask=" << R << '\n';
+ });
+}
+
+static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
+ const MCSchedClassDesc &SCDesc,
+ const MCSubtargetInfo &STI) {
+ if (MCDesc.isCall()) {
+ // We cannot estimate how long this call will take.
+ // Artificially set an arbitrarily high latency (100cy).
+ ID.MaxLatency = 100U;
+ return;
+ }
+
+ int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
+ ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
+}
+
+static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
+ // Count register definitions, and skip non register operands in the process.
+ unsigned I, E;
+ unsigned NumExplicitDefs = MCDesc.getNumDefs();
+ for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
+ const MCOperand &Op = MCI.getOperand(I);
+ if (Op.isReg())
+ --NumExplicitDefs;
+ }
+
+ if (NumExplicitDefs) {
+ return make_error<InstructionError<MCInst>>(
+ "Expected more register operand definitions.", MCI);
+ }
+
+ if (MCDesc.hasOptionalDef()) {
+ // Always assume that the optional definition is the last operand.
+ const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
+ if (I == MCI.getNumOperands() || !Op.isReg()) {
+ std::string Message =
+ "expected a register operand for an optional definition. Instruction "
+ "has not been correctly analyzed.";
+ return make_error<InstructionError<MCInst>>(Message, MCI);
+ }
+ }
+
+ return ErrorSuccess();
+}
+
+void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
+ unsigned SchedClassID) {
+ const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+ const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+
+ // Assumptions made by this algorithm:
+ // 1. The number of explicit and implicit register definitions in a MCInst
+ // matches the number of explicit and implicit definitions according to
+ // the opcode descriptor (MCInstrDesc).
+ // 2. Uses start at index #(MCDesc.getNumDefs()).
+ // 3. There can only be a single optional register definition, an it is
+ // always the last operand of the sequence (excluding extra operands
+ // contributed by variadic opcodes).
+ //
+ // These assumptions work quite well for most out-of-order in-tree targets
+ // like x86. This is mainly because the vast majority of instructions is
+ // expanded to MCInst using a straightforward lowering logic that preserves
+ // the ordering of the operands.
+ //
+ // About assumption 1.
+ // The algorithm allows non-register operands between register operand
+ // definitions. This helps to handle some special ARM instructions with
+ // implicit operand increment (-mtriple=armv7):
+ //
+ // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
+ // @ <MCOperand Reg:59>
+ // @ <MCOperand Imm:0> (!!)
+ // @ <MCOperand Reg:67>
+ // @ <MCOperand Imm:0>
+ // @ <MCOperand Imm:14>
+ // @ <MCOperand Reg:0>>
+ //
+ // MCDesc reports:
+ // 6 explicit operands.
+ // 1 optional definition
+ // 2 explicit definitions (!!)
+ //
+ // The presence of an 'Imm' operand between the two register definitions
+ // breaks the assumption that "register definitions are always at the
+ // beginning of the operand sequence".
+ //
+ // To workaround this issue, this algorithm ignores (i.e. skips) any
+ // non-register operands between register definitions. The optional
+ // definition is still at index #(NumOperands-1).
+ //
+ // According to assumption 2. register reads start at #(NumExplicitDefs-1).
+ // That means, register R1 from the example is both read and written.
+ unsigned NumExplicitDefs = MCDesc.getNumDefs();
+ unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
+ unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
+ unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
+ if (MCDesc.hasOptionalDef())
+ TotalDefs++;
+
+ unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+ ID.Writes.resize(TotalDefs + NumVariadicOps);
+ // Iterate over the operands list, and skip non-register operands.
+ // The first NumExplictDefs register operands are expected to be register
+ // definitions.
+ unsigned CurrentDef = 0;
+ unsigned i = 0;
+ for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
+ const MCOperand &Op = MCI.getOperand(i);
+ if (!Op.isReg())
+ continue;
+
+ WriteDescriptor &Write = ID.Writes[CurrentDef];
+ Write.OpIndex = i;
+ if (CurrentDef < NumWriteLatencyEntries) {
+ const MCWriteLatencyEntry &WLE =
+ *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
+ // Conservatively default to MaxLatency.
+ Write.Latency =
+ WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+ Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+ } else {
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ }
+ Write.IsOptionalDef = false;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ CurrentDef++;
+ }
+
+ assert(CurrentDef == NumExplicitDefs &&
+ "Expected more register operand definitions.");
+ for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
+ unsigned Index = NumExplicitDefs + CurrentDef;
+ WriteDescriptor &Write = ID.Writes[Index];
+ Write.OpIndex = ~CurrentDef;
+ Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
+ if (Index < NumWriteLatencyEntries) {
+ const MCWriteLatencyEntry &WLE =
+ *STI.getWriteLatencyEntry(&SCDesc, Index);
+ // Conservatively default to MaxLatency.
+ Write.Latency =
+ WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+ Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+ } else {
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ }
+
+ Write.IsOptionalDef = false;
+ assert(Write.RegisterID != 0 && "Expected a valid phys register!");
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
+ << ", PhysReg=" << MRI.getName(Write.RegisterID)
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ }
+
+ if (MCDesc.hasOptionalDef()) {
+ WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
+ Write.OpIndex = MCDesc.getNumOperands() - 1;
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ Write.IsOptionalDef = true;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ }
+
+ if (!NumVariadicOps)
+ return;
+
+ // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
+ // "unmodeledSideEffects', then this logic optimistically assumes that any
+ // extra register operands in the variadic sequence is not a register
+ // definition.
+ //
+ // Otherwise, we conservatively assume that any register operand from the
+ // variadic sequence is both a register read and a register write.
+ bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
+ !MCDesc.hasUnmodeledSideEffects();
+ CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
+ for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+ I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
+ WriteDescriptor &Write = ID.Writes[CurrentDef];
+ Write.OpIndex = OpIndex;
+ // Assign a default latency for this write.
+ Write.Latency = ID.MaxLatency;
+ Write.SClassOrWriteResourceID = 0;
+ Write.IsOptionalDef = false;
+ ++CurrentDef;
+ LLVM_DEBUG({
+ dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
+ << ", Latency=" << Write.Latency
+ << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+ });
+ }
+
+ ID.Writes.resize(CurrentDef);
+}
+
+void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
+ unsigned SchedClassID) {
+ const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+ unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
+ unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
+ // Remove the optional definition.
+ if (MCDesc.hasOptionalDef())
+ --NumExplicitUses;
+ unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
+ unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
+ ID.Reads.resize(TotalUses);
+ unsigned CurrentUse = 0;
+ for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
+ ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
+ ReadDescriptor &Read = ID.Reads[CurrentUse];
+ Read.OpIndex = OpIndex;
+ Read.UseIndex = I;
+ Read.SchedClassID = SchedClassID;
+ ++CurrentUse;
+ LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << '\n');
+ }
+
+ // For the purpose of ReadAdvance, implicit uses come directly after explicit
+ // uses. The "UseIndex" must be updated according to that implicit layout.
+ for (unsigned I = 0; I < NumImplicitUses; ++I) {
+ ReadDescriptor &Read = ID.Reads[CurrentUse + I];
+ Read.OpIndex = ~I;
+ Read.UseIndex = NumExplicitUses + I;
+ Read.RegisterID = MCDesc.getImplicitUses()[I];
+ Read.SchedClassID = SchedClassID;
+ LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << ", RegisterID="
+ << MRI.getName(Read.RegisterID) << '\n');
+ }
+
+ CurrentUse += NumImplicitUses;
+
+ // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
+ // "unmodeledSideEffects", then this logic optimistically assumes that any
+ // extra register operands in the variadic sequence are not register
+ // definition.
+
+ bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
+ !MCDesc.hasUnmodeledSideEffects();
+ for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
+ I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
+ const MCOperand &Op = MCI.getOperand(OpIndex);
+ if (!Op.isReg())
+ continue;
+
+ ReadDescriptor &Read = ID.Reads[CurrentUse];
+ Read.OpIndex = OpIndex;
+ Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
+ Read.SchedClassID = SchedClassID;
+ ++CurrentUse;
+ LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
+ << ", UseIndex=" << Read.UseIndex << '\n');
+ }
+
+ ID.Reads.resize(CurrentUse);
+}
+
+Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
+ const MCInst &MCI) const {
+ if (ID.NumMicroOps != 0)
+ return ErrorSuccess();
+
+ bool UsesMemory = ID.MayLoad || ID.MayStore;
+ bool UsesBuffers = !ID.Buffers.empty();
+ bool UsesResources = !ID.Resources.empty();
+ if (!UsesMemory && !UsesBuffers && !UsesResources)
+ return ErrorSuccess();
+
+ StringRef Message;
+ if (UsesMemory) {
+ Message = "found an inconsistent instruction that decodes "
+ "into zero opcodes and that consumes load/store "
+ "unit resources.";
+ } else {
+ Message = "found an inconsistent instruction that decodes "
+ "to zero opcodes and that consumes scheduler "
+ "resources.";
+ }
+
+ return make_error<InstructionError<MCInst>>(Message, MCI);
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
+ assert(STI.getSchedModel().hasInstrSchedModel() &&
+ "Itineraries are not yet supported!");
+
+ // Obtain the instruction descriptor from the opcode.
+ unsigned short Opcode = MCI.getOpcode();
+ const MCInstrDesc &MCDesc = MCII.get(Opcode);
+ const MCSchedModel &SM = STI.getSchedModel();
+
+ // Then obtain the scheduling class information from the instruction.
+ unsigned SchedClassID = MCDesc.getSchedClass();
+ bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
+
+ // Try to solve variant scheduling classes.
+ if (IsVariant) {
+ unsigned CPUID = SM.getProcessorID();
+ while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+ SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
+
+ if (!SchedClassID) {
+ return make_error<InstructionError<MCInst>>(
+ "unable to resolve scheduling class for write variant.", MCI);
+ }
+ }
+
+ // Check if this instruction is supported. Otherwise, report an error.
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+ if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
+ return make_error<InstructionError<MCInst>>(
+ "found an unsupported instruction in the input assembly sequence.",
+ MCI);
+ }
+
+ // Create a new empty descriptor.
+ std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
+ ID->NumMicroOps = SCDesc.NumMicroOps;
+
+ if (MCDesc.isCall() && FirstCallInst) {
+ // We don't correctly model calls.
+ WithColor::warning() << "found a call in the input assembly sequence.\n";
+ WithColor::note() << "call instructions are not correctly modeled. "
+ << "Assume a latency of 100cy.\n";
+ FirstCallInst = false;
+ }
+
+ if (MCDesc.isReturn() && FirstReturnInst) {
+ WithColor::warning() << "found a return instruction in the input"
+ << " assembly sequence.\n";
+ WithColor::note() << "program counter updates are ignored.\n";
+ FirstReturnInst = false;
+ }
+
+ ID->MayLoad = MCDesc.mayLoad();
+ ID->MayStore = MCDesc.mayStore();
+ ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
+
+ initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
+ computeMaxLatency(*ID, MCDesc, SCDesc, STI);
+
+ if (Error Err = verifyOperands(MCDesc, MCI))
+ return std::move(Err);
+
+ populateWrites(*ID, MCI, SchedClassID);
+ populateReads(*ID, MCI, SchedClassID);
+
+ LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
+
+ // Sanity check on the instruction descriptor.
+ if (Error Err = verifyInstrDesc(*ID, MCI))
+ return std::move(Err);
+
+ // Now add the new descriptor.
+ SchedClassID = MCDesc.getSchedClass();
+ bool IsVariadic = MCDesc.isVariadic();
+ if (!IsVariadic && !IsVariant) {
+ Descriptors[MCI.getOpcode()] = std::move(ID);
+ return *Descriptors[MCI.getOpcode()];
+ }
+
+ VariantDescriptors[&MCI] = std::move(ID);
+ return *VariantDescriptors[&MCI];
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
+ if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
+ return *Descriptors[MCI.getOpcode()];
+
+ if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
+ return *VariantDescriptors[&MCI];
+
+ return createInstrDescImpl(MCI);
+}
+
+Expected<std::unique_ptr<Instruction>>
+InstrBuilder::createInstruction(const MCInst &MCI) {
+ Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
+ if (!DescOrErr)
+ return DescOrErr.takeError();
+ const InstrDesc &D = *DescOrErr;
+ std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
+
+ // Check if this is a dependency breaking instruction.
+ APInt Mask;
+
+ unsigned ProcID = STI.getSchedModel().getProcessorID();
+ bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID);
+ bool IsDepBreaking =
+ IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID);
+ if (MCIA.isOptimizableRegisterMove(MCI, ProcID))
+ NewIS->setOptimizableMove();
+
+ // Initialize Reads first.
+ for (const ReadDescriptor &RD : D.Reads) {
+ int RegID = -1;
+ if (!RD.isImplicitRead()) {
+ // explicit read.
+ const MCOperand &Op = MCI.getOperand(RD.OpIndex);
+ // Skip non-register operands.
+ if (!Op.isReg())
+ continue;
+ RegID = Op.getReg();
+ } else {
+ // Implicit read.
+ RegID = RD.RegisterID;
+ }
+
+ // Skip invalid register operands.
+ if (!RegID)
+ continue;
+
+ // Okay, this is a register operand. Create a ReadState for it.
+ assert(RegID > 0 && "Invalid register ID found!");
+ NewIS->getUses().emplace_back(RD, RegID);
+ ReadState &RS = NewIS->getUses().back();
+
+ if (IsDepBreaking) {
+ // A mask of all zeroes means: explicit input operands are not
+ // independent.
+ if (Mask.isNullValue()) {
+ if (!RD.isImplicitRead())
+ RS.setIndependentFromDef();
+ } else {
+ // Check if this register operand is independent according to `Mask`.
+ // Note that Mask may not have enough bits to describe all explicit and
+ // implicit input operands. If this register operand doesn't have a
+ // corresponding bit in Mask, then conservatively assume that it is
+ // dependent.
+ if (Mask.getBitWidth() > RD.UseIndex) {
+ // Okay. This map describe register use `RD.UseIndex`.
+ if (Mask[RD.UseIndex])
+ RS.setIndependentFromDef();
+ }
+ }
+ }
+ }
+
+ // Early exit if there are no writes.
+ if (D.Writes.empty())
+ return std::move(NewIS);
+
+ // Track register writes that implicitly clear the upper portion of the
+ // underlying super-registers using an APInt.
+ APInt WriteMask(D.Writes.size(), 0);
+
+ // Now query the MCInstrAnalysis object to obtain information about which
+ // register writes implicitly clear the upper portion of a super-register.
+ MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
+
+ // Initialize writes.
+ unsigned WriteIndex = 0;
+ for (const WriteDescriptor &WD : D.Writes) {
+ unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
+ : MCI.getOperand(WD.OpIndex).getReg();
+ // Check if this is a optional definition that references NoReg.
+ if (WD.IsOptionalDef && !RegID) {
+ ++WriteIndex;
+ continue;
+ }
+
+ assert(RegID && "Expected a valid register ID!");
+ NewIS->getDefs().emplace_back(WD, RegID,
+ /* ClearsSuperRegs */ WriteMask[WriteIndex],
+ /* WritesZero */ IsZeroIdiom);
+ ++WriteIndex;
+ }
+
+ return std::move(NewIS);
+}
+} // namespace mca
+} // namespace llvm