- Add a hook for target to determine whether an instruction def is "long latency" enough to hoist even if it may increase spilling. Reloading a value from spill slot is often cheaper than performing an expensive computation in the loop. For X86, that means machine LICM will hoist SQRT, DIV, etc. ARM will be somewhat aggressive with VFP and NEON instructions. - Enable register pressure aware machine LICM by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116781 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 11e8b74a7ae9ecd59b64180a59143e39bc3b9514 [log] [tgz]
author: Evan Cheng <evan.cheng@apple.com> Tue Oct 19 00:55:07 2010 +0000
committer: Evan Cheng <evan.cheng@apple.com> Tue Oct 19 00:55:07 2010 +0000
tree: 23c9e8ebbd1f15df2c90829c4bf6669a0aa0940b
parent: b5a2d3f8e3b40c3076d03c5db7c4f0387e58b53b [diff] [blame]
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 607e8f1..829fae6 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp

@@ -43,11 +43,6 @@
 
 using namespace llvm;
 
-static cl::opt<bool>
-TrackRegPressure("rp-aware-machine-licm",
-                 cl::desc("Register pressure aware machine LICM"),
-                 cl::init(false), cl::Hidden);
-
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
 STATISTIC(NumLowRP,
@@ -124,6 +119,7 @@
       RegSeen.clear();
       RegPressure.clear();
       RegLimit.clear();
+      BackTrace.clear();
       for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
              CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
         CI->second.clear();
@@ -171,9 +167,10 @@
     /// 
     bool IsLoopInvariantInst(MachineInstr &I);
 
-    /// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
-    /// and an use in the current loop.
-    int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
+    /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+    /// and an use in the current loop, return true if the target considered
+    /// it 'high'.
+    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
 
     /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
     /// if hoisting an instruction of the given cost matrix can cause high
@@ -556,28 +553,24 @@
   if (!Preheader)
     return;
 
-  if (TrackRegPressure) {
-    if (IsHeader) {
-      // Compute registers which are liveout of preheader.
-      RegSeen.clear();
-      BackTrace.clear();
-      InitRegPressure(Preheader);
-    }
-
-    // Remember livein register pressure.
-    BackTrace.push_back(RegPressure);
+  if (IsHeader) {
+    // Compute registers which are liveout of preheader.
+    RegSeen.clear();
+    BackTrace.clear();
+    InitRegPressure(Preheader);
   }
 
+  // Remember livein register pressure.
+  BackTrace.push_back(RegPressure);
+
   for (MachineBasicBlock::iterator
          MII = BB->begin(), E = BB->end(); MII != E; ) {
     MachineBasicBlock::iterator NextMII = MII; ++NextMII;
     MachineInstr *MI = &*MII;
 
-    if (TrackRegPressure)
-      UpdateRegPressureBefore(MI);
+    UpdateRegPressureBefore(MI);
     Hoist(MI, Preheader);
-    if (TrackRegPressure)
-      UpdateRegPressureAfter(MI);
+    UpdateRegPressureAfter(MI);
 
     MII = NextMII;
   }
@@ -591,8 +584,7 @@
       HoistRegion(Children[I]);
   }
 
-  if (TrackRegPressure)
-    BackTrace.pop_back();
+  BackTrace.pop_back();
 }
 
 /// InitRegPressure - Find all virtual register references that are liveout of
@@ -788,15 +780,14 @@
   }
 }
 
-/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
-/// and an use in the current loop.
-int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
-                                       unsigned DefIdx, unsigned Reg) {
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+                                        unsigned DefIdx, unsigned Reg) {
   if (MRI->use_nodbg_empty(Reg))
-    // No use? Return arbitrary large number!
-    return 300;
+    return false;
 
-  int Latency = -1;
   for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
          E = MRI->use_nodbg_end(); I != E; ++I) {
     MachineInstr *UseMI = &*I;
@@ -810,18 +801,15 @@
       if (MOReg != Reg)
         continue;
 
-      int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i);
-      Latency = std::max(Latency, UseCycle);
+      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+        return true;
     }
 
-    if (Latency != -1)
-      break;
+    // Only look at the first in loop use.
+    break;
   }
 
-  if (Latency == -1)
-    Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx);
-
-  return Latency;
+  return false;
 }
 
 /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
@@ -855,19 +843,19 @@
   if (MI.isImplicitDef())
     return true;
 
-  // FIXME: For now, only hoist re-materilizable instructions. LICM will
-  // increase register pressure. We want to make sure it doesn't increase
-  // spilling.
+  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+  // will increase register pressure. It's probably not worth it if the
+  // instruction is cheap.
   // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
   // these tend to help performance in low register pressure situation. The
   // trade off is it may cause spill in high pressure situation. It will end up
   // adding a store in the loop preheader. But the reload is no more expensive.
   // The side benefit is these loads are frequently CSE'ed.
-  if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) {
-    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
-        !isLoadFromConstantMemory(&MI))
+  if (MI.getDesc().isAsCheapAsAMove()) {
+    if (!TII->isTriviallyReMaterializable(&MI, AA))
       return false;
   } else {
+    // Estimate register pressure to determine whether to LICM the instruction.
     // In low register pressure situation, we can be more aggressive about 
     // hoisting. Also, favors hoisting long latency instructions even in
     // moderately high pressure situation.
@@ -880,13 +868,9 @@
       if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
         continue;
       if (MO.isDef()) {
-        if (InstrItins && !InstrItins->isEmpty()) {
-          int Cycle = ComputeOperandLatency(MI, i, Reg);
-          if (Cycle > 3) {
-            // FIXME: Target specific high latency limit?
-            ++NumHighLatency;
-            return true;
-          }
+        if (HasHighOperandLatency(MI, i, Reg)) {
+          ++NumHighLatency;
+          return true;
         }
 
         const TargetRegisterClass *RC = MRI->getRegClass(Reg);
commit	11e8b74a7ae9ecd59b64180a59143e39bc3b9514	[log] [tgz]
author	Evan Cheng <evan.cheng@apple.com>	Tue Oct 19 00:55:07 2010 +0000
committer	Evan Cheng <evan.cheng@apple.com>	Tue Oct 19 00:55:07 2010 +0000
tree	23c9e8ebbd1f15df2c90829c4bf6669a0aa0940b
parent	b5a2d3f8e3b40c3076d03c5db7c4f0387e58b53b [diff] [blame]