[SystemZ] Improve decoding in case of instructions with four register operands.
Since z13, the max group size will be 2 if any μop has more than 3 register
sources.
This has been ignored sofar in the SystemZHazardRecognizer, but is now
handled by recognizing those instructions and adjusting the tracking of
decoding and the cost heuristic for grouping.
Review: Ulrich Weigand
https://reviews.llvm.org/D49847
llvm-svn: 338368
diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index d01dd9e..c7dd358 100644
--- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -81,6 +81,7 @@
void SystemZHazardRecognizer::Reset() {
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
@@ -99,6 +100,12 @@
if (SC->BeginGroup)
return (CurrGroupSize == 0);
+ // An instruction with 4 register operands will not fit in last slot.
+ assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) ||
+ "Current decoder group is already full!");
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return false;
+
// Since a full group is handled immediately in EmitInstruction(),
// SU should fit into current group. NumSlots should be 1 or 0,
// since it is not a cracked or expanded instruction.
@@ -108,6 +115,23 @@
return true;
}
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &MID = MI->getDesc();
+ unsigned Count = 0;
+ for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+ const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+ if (RC == nullptr)
+ continue;
+ if (OpIdx >= MID.getNumDefs() &&
+ MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ continue;
+ Count++;
+ }
+ return Count >= 4;
+}
+
void SystemZHazardRecognizer::nextGroup() {
if (CurrGroupSize == 0)
return;
@@ -119,6 +143,7 @@
// Reset counter for next group.
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
// Decrease counters for execution units by one.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
@@ -172,6 +197,8 @@
OS << "/EndsGroup";
if (SU->isUnbuffered)
OS << "/Unbuffered";
+ if (has4RegOps(SU->getInstr()))
+ OS << "/4RegOps";
}
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
@@ -184,6 +211,7 @@
dbgs() << "{ " << CurGroupDbg << " }";
dbgs() << " (" << CurrGroupSize << " decoder slot"
<< (CurrGroupSize > 1 ? "s":"")
+ << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
<< ")\n";
}
}
@@ -294,11 +322,14 @@
// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
- assert (CurrGroupSize <= 3);
+ CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+ unsigned GroupLim =
+ ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
+ assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == 3 || SC->EndGroup)
+ if (CurrGroupSize == GroupLim || SC->EndGroup)
nextGroup();
}
@@ -325,6 +356,10 @@
return -1;
}
+ // An instruction with 4 register operands will not fit in last slot.
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return 1;
+
// Most instructions can be placed in any decoder slot.
return 0;
}