[X86] Enable call frame optimization ("mov to push") not only for optsize (PR26325)

The size savings are significant, and from what I can tell, both ICC and GCC do this.

Differential Revision: http://reviews.llvm.org/D18573

llvm-svn: 264966
diff --git a/llvm/test/CodeGen/X86/mcu-abi.ll b/llvm/test/CodeGen/X86/mcu-abi.ll
index 263ddcf..1cc277c 100644
--- a/llvm/test/CodeGen/X86/mcu-abi.ll
+++ b/llvm/test/CodeGen/X86/mcu-abi.ll
@@ -93,14 +93,10 @@
 }
 
 ; CHECK-LABEL: test_fp128:
-; CHECK: movl    (%eax), %e[[CX:..]]
-; CHECK-NEXT: movl    4(%eax), %e[[DX:..]]
-; CHECK-NEXT: movl    8(%eax), %e[[SI:..]]
-; CHECK-NEXT: movl    12(%eax), %e[[AX:..]]
-; CHECK-NEXT: movl    %e[[AX]], 12(%esp)
-; CHECK-NEXT: movl    %e[[SI]], 8(%esp)
-; CHECK-NEXT: movl    %e[[DX]], 4(%esp)
-; CHECK-NEXT: movl    %e[[CX]], (%esp)
+; CHECK:      pushl   12(%eax)
+; CHECK-NEXT: pushl   8(%eax)
+; CHECK-NEXT: pushl   4(%eax)
+; CHECK-NEXT: pushl   (%eax)
 ; CHECK-NEXT: calll   __fixtfsi
 define i32 @test_fp128(fp128* %ptr) #0 {
   %v = load fp128, fp128* %ptr