implement rdar://6653118 - fastisel should fold loads where possible.

Since mem2reg isn't run at -O0, we get a ton of reloads from the stack,
for example, before, this code:

int foo(int x, int y, int z) {
  return x+y+z;
}

used to compile into:

_foo:                                   ## @foo
	subq	$12, %rsp
	movl	%edi, 8(%rsp)
	movl	%esi, 4(%rsp)
	movl	%edx, (%rsp)
	movl	8(%rsp), %edx
	movl	4(%rsp), %esi
	addl	%edx, %esi
	movl	(%rsp), %edx
	addl	%esi, %edx
	movl	%edx, %eax
	addq	$12, %rsp
	ret

Now we produce:

_foo:                                   ## @foo
	subq	$12, %rsp
	movl	%edi, 8(%rsp)
	movl	%esi, 4(%rsp)
	movl	%edx, (%rsp)
	movl	8(%rsp), %edx
	addl	4(%rsp), %edx    ## Folded load
	addl	(%rsp), %edx     ## Folded load
	movl	%edx, %eax
	addq	$12, %rsp
	ret

Fewer instructions and less register use = faster compiles.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113102 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 66cb5ce..dead5d5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -661,6 +661,43 @@
   }
 }
 
+
+
+  
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             FastISel *FastIS) {
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile()) return false;
+  
+  // Figure out which vreg this is going into.
+  unsigned LoadReg = FastIS->getRegForValue(LI);
+  assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+  // Check to see what the uses of this vreg are.  If it has no uses, or more
+  // than one use (at the machine instr level) then we can't fold it.
+  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+  if (RI == RegInfo->reg_end())
+    return false;
+  
+  // See if there is exactly one use of the vreg.  If there are multiple uses,
+  // then the instruction got lowered to multiple machine instructions or the
+  // use of the loaded value ended up being multiple operands of the result, in
+  // either case, we can't fold this.
+  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+  if (PostRI != RegInfo->reg_end())
+    return false;
+  
+  assert(RI.getOperand().isUse() &&
+         "The only use of the vreg must be a use, we haven't emitted the def!");
+
+  // Ask the target to try folding the load.
+  return FastIS->TryToFoldLoad(&*RI, RI.getOperandNo(), LI);
+}
+
+  
+
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
@@ -723,8 +760,21 @@
         FastIS->recomputeInsertPt();
 
         // Try to select the instruction with FastISel.
-        if (FastIS->SelectInstruction(Inst))
+        if (FastIS->SelectInstruction(Inst)) {
+          // If fast isel succeeded, check to see if there is a single-use
+          // non-volatile load right before the selected instruction, and see if
+          // the load is used by the instruction.  If so, try to fold it.
+          const Instruction *BeforeInst = 0;
+          if (Inst != Begin)
+            BeforeInst = llvm::prior(llvm::prior(BI));
+          if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) {
+            // If we succeeded, don't re-select the load.
+            --BI;
+          }          
           continue;
+        }
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(Inst)) {