implement rdar://6653118 - fastisel should fold loads where possible.

Since mem2reg isn't run at -O0, we get a ton of reloads from the stack,
for example, before, this code:

int foo(int x, int y, int z) {
  return x+y+z;
}

used to compile into:

_foo:                                   ## @foo
	subq	$12, %rsp
	movl	%edi, 8(%rsp)
	movl	%esi, 4(%rsp)
	movl	%edx, (%rsp)
	movl	8(%rsp), %edx
	movl	4(%rsp), %esi
	addl	%edx, %esi
	movl	(%rsp), %edx
	addl	%esi, %edx
	movl	%edx, %eax
	addq	$12, %rsp
	ret

Now we produce:

_foo:                                   ## @foo
	subq	$12, %rsp
	movl	%edi, 8(%rsp)
	movl	%esi, 4(%rsp)
	movl	%edx, (%rsp)
	movl	8(%rsp), %edx
	addl	4(%rsp), %edx    ## Folded load
	addl	(%rsp), %edx     ## Folded load
	movl	%edx, %eax
	addq	$12, %rsp
	ret

Fewer instructions and less register use = faster compiles.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113102 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 0c70eec..9390ba9 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -63,6 +63,13 @@
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                             const LoadInst *LI);
+  
 #include "X86GenFastISel.inc"
 
 private:
@@ -1941,6 +1948,34 @@
   return ResultReg;
 }
 
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  X86AddressMode AM;
+  if (!X86SelectAddress(LI->getOperand(0), AM))
+    return false;
+  
+  X86InstrInfo &XII = (X86InstrInfo&)TII;
+  
+  unsigned Size = TD.getTypeAllocSize(LI->getType());
+  unsigned Alignment = LI->getAlignment();
+
+  SmallVector<MachineOperand, 8> AddrOps;
+  AM.getFullAddress(AddrOps);
+  
+  MachineInstr *Result =
+    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+  if (Result == 0) return false;
+  
+  MI->getParent()->insert(MI, Result);
+  MI->eraseFromParent();
+  return true;
+}
+
+
 namespace llvm {
   llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
     return new X86FastISel(funcInfo);