Add a hybrid bottom up scheduler that reduce register usage while avoiding
pipeline stall. It's useful for targets like ARM cortex-a8. NEON has a lot
of long latency instructions so a strict register pressure reduction
scheduler does not work well.
Early experiments show this speeds up some NEON loops by over 30%.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@104216 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 68d0c80..f765409 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -347,7 +347,7 @@
const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpSU->Latency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep));
+ ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
@@ -378,6 +378,10 @@
}
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ if (InstrItins.isEmpty()) {
+ SU->Latency = 1;
+ return;
+ }
// Compute the latency for the node. We use the sum of the latencies for
// all nodes flagged together into this SUnit.
@@ -389,6 +393,37 @@
}
}
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies())
+ return;
+
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ if (InstrItins.isEmpty())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Def->isMachineOpcode() && Use->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (DefIdx >= II.getNumDefs())
+ return;
+ int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
+ if (DefCycle < 0)
+ return;
+ const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
+ int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+ if (UseCycle >= 0) {
+ int Latency = DefCycle - UseCycle + 1;
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+ }
+ }
+}
+
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";