- Allow target to specify when is register pressure "too high". In most cases,
  it's too late to start backing off aggressive latency scheduling when most
  of the registers are in use so the threshold should be a bit tighter.
- Correctly handle live out's and extract_subreg etc.
- Enable register pressure aware scheduling by default for hybrid scheduler.
  For ARM, this is almost always a win on # of instructions. It's runtime
  neutral for most of the tests. But for some kernels with high register
  pressure it can be a huge win. e.g. 464.h264ref reduced number of spills by
  54 and sped up by 20%.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109279 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 334ce58..2ffd350 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -28,16 +28,12 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
-static cl::opt<bool> RegPressureAware("reg-pressure-aware-sched",
-                                      cl::init(false), cl::Hidden);
-
 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
 STATISTIC(NumDups,       "Number of duplicated nodes");
@@ -1075,7 +1071,7 @@
         std::fill(RegPressure.begin(), RegPressure.end(), 0);
         for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
                E = TRI->regclass_end(); I != E; ++I)
-          RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1;
+          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
       }
     }
     
@@ -1172,10 +1168,12 @@
       SU->NodeQueueId = 0;
     }
 
-    bool HighRegPressure(const SUnit *SU) const {
+    bool HighRegPressure(const SUnit *SU, unsigned &Excess) const {
       if (!TLI)
         return false;
 
+      bool High = false;
+      Excess = 0;
       for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
            I != E; ++I) {
         if (I->isCtrl())
@@ -1183,12 +1181,41 @@
         SUnit *PredSU = I->getSUnit();
         const SDNode *PN = PredSU->getNode();
         if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyToReg) {
-            EVT VT = PN->getOperand(1).getValueType();
+          if (PN->getOpcode() == ISD::CopyFromReg) {
+            EVT VT = PN->getValueType(0);
             unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
             unsigned Cost = TLI->getRepRegClassCostFor(VT);
-            if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
-              return true;
+            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+              High = true;
+              Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
+            }
+          }
+          continue;
+        }
+        unsigned POpc = PN->getMachineOpcode();
+        if (POpc == TargetOpcode::IMPLICIT_DEF)
+          continue;
+        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+          EVT VT = PN->getOperand(0).getValueType();
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          unsigned Cost = TLI->getRepRegClassCostFor(VT);
+          // Check if this increases register pressure of the specific register
+          // class to the point where it would cause spills.
+          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+            High = true;
+            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
+          }
+          continue;            
+        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+                   POpc == TargetOpcode::SUBREG_TO_REG) {
+          EVT VT = PN->getValueType(0);
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          unsigned Cost = TLI->getRepRegClassCostFor(VT);
+          // Check if this increases register pressure of the specific register
+          // class to the point where it would cause spills.
+          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+            High = true;
+            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
           }
           continue;
         }
@@ -1201,12 +1228,14 @@
           unsigned Cost = TLI->getRepRegClassCostFor(VT);
           // Check if this increases register pressure of the specific register
           // class to the point where it would cause spills.
-          if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
-            return true;
+          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+            High = true;
+            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
+          }
         }
       }
 
-      return false;
+      return High;
     }
 
     void ScheduledNode(SUnit *SU) {
@@ -1214,13 +1243,18 @@
         return;
 
       const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode())
-        return;
-      unsigned Opc = N->getMachineOpcode();
-      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
-          Opc == TargetOpcode::REG_SEQUENCE ||
-          Opc == TargetOpcode::IMPLICIT_DEF)
-        return;
+      if (!N->isMachineOpcode()) {
+        if (N->getOpcode() != ISD::CopyToReg)
+          return;
+      } else {
+        unsigned Opc = N->getMachineOpcode();
+        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+            Opc == TargetOpcode::INSERT_SUBREG ||
+            Opc == TargetOpcode::SUBREG_TO_REG ||
+            Opc == TargetOpcode::REG_SEQUENCE ||
+            Opc == TargetOpcode::IMPLICIT_DEF)
+          return;
+      }
 
       for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
            I != E; ++I) {
@@ -1231,8 +1265,8 @@
           continue;
         const SDNode *PN = PredSU->getNode();
         if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyToReg) {
-            EVT VT = PN->getOperand(1).getValueType();
+          if (PN->getOpcode() == ISD::CopyFromReg) {
+            EVT VT = PN->getValueType(0);
             unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
             RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
           }
@@ -1241,6 +1275,18 @@
         unsigned POpc = PN->getMachineOpcode();
         if (POpc == TargetOpcode::IMPLICIT_DEF)
           continue;
+        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+          EVT VT = PN->getOperand(0).getValueType();
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;            
+        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+                   POpc == TargetOpcode::SUBREG_TO_REG) {
+          EVT VT = PN->getValueType(0);
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;
+        }
         unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
         for (unsigned i = 0; i != NumDefs; ++i) {
           EVT VT = PN->getValueType(i);
@@ -1251,19 +1297,19 @@
         }
       }
 
-      if (!SU->NumSuccs)
-        return;
-      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-      for (unsigned i = 0; i != NumDefs; ++i) {
-        EVT VT = N->getValueType(i);
-        if (!N->hasAnyUseOfValue(i))
-          continue;
-        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-        if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
-          // Register pressure tracking is imprecise. This can happen.
-          RegPressure[RCId] = 0;
-        else
-          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      if (SU->NumSuccs) {
+        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+        for (unsigned i = 0; i != NumDefs; ++i) {
+          EVT VT = N->getValueType(i);
+          if (!N->hasAnyUseOfValue(i))
+            continue;
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+            // Register pressure tracking is imprecise. This can happen.
+            RegPressure[RCId] = 0;
+          else
+            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+        }
       }
 
       dumpRegPressure();
@@ -1274,10 +1320,14 @@
         return;
 
       const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode())
-        return;
+      if (!N->isMachineOpcode()) {
+        if (N->getOpcode() != ISD::CopyToReg)
+          return;
+      }
       unsigned Opc = N->getMachineOpcode();
-      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
+      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+          Opc == TargetOpcode::INSERT_SUBREG ||
+          Opc == TargetOpcode::SUBREG_TO_REG ||
           Opc == TargetOpcode::REG_SEQUENCE ||
           Opc == TargetOpcode::IMPLICIT_DEF)
         return;
@@ -1291,8 +1341,8 @@
           continue;
         const SDNode *PN = PredSU->getNode();
         if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyToReg) {
-            EVT VT = PN->getOperand(1).getValueType();
+          if (PN->getOpcode() == ISD::CopyFromReg) {
+            EVT VT = PN->getValueType(0);
             unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
             RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
           }
@@ -1301,6 +1351,18 @@
         unsigned POpc = PN->getMachineOpcode();
         if (POpc == TargetOpcode::IMPLICIT_DEF)
           continue;
+        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+          EVT VT = PN->getOperand(0).getValueType();
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;            
+        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+                   POpc == TargetOpcode::SUBREG_TO_REG) {
+          EVT VT = PN->getValueType(0);
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;
+        }
         unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
         for (unsigned i = 0; i != NumDefs; ++i) {
           EVT VT = PN->getValueType(i);
@@ -1315,17 +1377,17 @@
         }
       }
 
-      if (!SU->NumSuccs)
-        return;
-      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-      for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-        EVT VT = N->getValueType(i);
-        if (VT == MVT::Flag || VT == MVT::Other)
-          continue;
-        if (!N->hasAnyUseOfValue(i))
-          continue;
-        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      if (SU->NumSuccs) {
+        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+          EVT VT = N->getValueType(i);
+          if (VT == MVT::Flag || VT == MVT::Other)
+            continue;
+          if (!N->hasAnyUseOfValue(i))
+            continue;
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+        }
       }
 
       dumpRegPressure();
@@ -1464,13 +1526,20 @@
 }
 
 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
-  bool LHigh = SPQ->HighRegPressure(left);
-  bool RHigh = SPQ->HighRegPressure(right);
+  unsigned LExcess, RExcess;
+  bool LHigh = SPQ->HighRegPressure(left, LExcess);
+  bool RHigh = SPQ->HighRegPressure(right, RExcess);
   if (LHigh && !RHigh)
     return true;
   else if (!LHigh && RHigh)
     return false;
-  else if (!LHigh && !RHigh) {
+  else if (LHigh && RHigh) {
+    if (LExcess > RExcess)
+      return true;
+    else if (LExcess < RExcess)
+      return false;
+    // Otherwise schedule for register pressure reduction.
+  } else {
     // Low register pressure situation, schedule for latency if possible.
     bool LStall = left->SchedulingPref == Sched::Latency &&
       SPQ->getCurCycle() < left->getHeight();
@@ -1889,8 +1958,7 @@
   const TargetLowering *TLI = &IS->getTargetLowering();
   
   HybridBURRPriorityQueue *PQ =
-    new HybridBURRPriorityQueue(*IS->MF, RegPressureAware, TII, TRI,
-                                (RegPressureAware ? TLI : 0));
+    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
   PQ->setScheduleDAG(SD);
   return SD;  
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0e33758..1f9908c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -166,6 +166,7 @@
 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     : TargetLowering(TM, createTLOF(TM)) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
+  RegInfo = TM.getRegisterInfo();
 
   if (Subtarget->isTargetDarwin()) {
     // Uses VFP for Thumb libfuncs if available.
@@ -729,6 +730,23 @@
   return Sched::RegPressure;
 }
 
+unsigned
+ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const {
+  unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case ARM::tGPRRegClassID:
+    return 5 - FPDiff;
+  case ARM::GPRRegClassID:
+    return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0);
+  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
+  case ARM::DPRRegClassID:
+    return 32 - 10;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Lowering Code
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 05d7d5f..b544b5e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,6 +17,7 @@
 
 #include "ARMSubtarget.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -268,6 +269,9 @@
 
     Sched::Preference getSchedulingPreference(SDNode *N) const;
 
+    unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                 MachineFunction &MF) const;
+
     bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
@@ -285,6 +289,8 @@
     /// make the right decision when generating code for different targets.
     const ARMSubtarget *Subtarget;
 
+    const TargetRegisterInfo *RegInfo;
+
     /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
     ///
     unsigned ARMPCLabelIndex;