[peephole] Enhance folding logic to work for STATEPOINTs

The general idea here is to get enough of the existing restrictions out of the way that the already existing folding logic in foldMemoryOperand can kick in for STATEPOINTs and fold references to immutable stack slots. The key changes are:

    Support for folding multiple operands at once which reference the same load
    Support for folding multiple loads into a single instruction
    Walk all the operands of the instruction for varidic instructions (this is a bug fix!)

Once this lands, I'll post another patch which refactors the TII interface here. There's nothing actually x86 specific about the x86 code used here.

Differential Revision: https://reviews.llvm.org/D24103

llvm-svn: 289510
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 3cf67ee..1b8106d 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1540,11 +1540,6 @@
       if (MI->isDebugValue())
           continue;
 
-      // If we run into an instruction we can't fold across, discard
-      // the load candidates.
-      if (MI->isLoadFoldBarrier())
-        FoldAsLoadDefCandidates.clear();
-
       if (MI->isPosition() || MI->isPHI())
         continue;
 
@@ -1588,7 +1583,6 @@
         DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
                      << '\n');
         NAPhysToVirtMIs.clear();
-        continue;
       }
 
       if ((isUncoalescableCopy(*MI) &&
@@ -1639,8 +1633,14 @@
       // earlier load into MI.
       if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
           !FoldAsLoadDefCandidates.empty()) {
+
+        // We visit each operand even after successfully folding a previous
+        // one.  This allows us to fold multiple loads into a single
+        // instruction.  We do assume that optimizeLoadInstr doesn't insert
+        // foldable uses earlier in the argument list.  Since we don't restart
+        // iteration, we'd miss such cases.
         const MCInstrDesc &MIDesc = MI->getDesc();
-        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
+        for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
              ++i) {
           const MachineOperand &MOp = MI->getOperand(i);
           if (!MOp.isReg())
@@ -1667,13 +1667,23 @@
               MRI->markUsesInDebugValueAsUndef(FoldedReg);
               FoldAsLoadDefCandidates.erase(FoldedReg);
               ++NumLoadFold;
-              // MI is replaced with FoldMI.
+              
+              // MI is replaced with FoldMI so we can continue trying to fold
               Changed = true;
-              break;
+              MI = FoldMI;
             }
           }
         }
       }
+      
+      // If we run into an instruction we can't fold across, discard
+      // the load candidates.  Note: We might be able to fold *into* this
+      // instruction, so this needs to be after the folding logic.
+      if (MI->isLoadFoldBarrier()) {
+        DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n");
+        FoldAsLoadDefCandidates.clear();
+      }
+
     }
   }