Re-land r237175: [X86] Always return the sret parameter in eax/rax ...

This reverts commit r237210.

Also fix X86/complex-fca.ll to match the code that we used to generate
on win32 and now generate everwhere to conform to SysV.

llvm-svn: 237639
diff --git a/llvm/test/CodeGen/X86/cmovcmov.ll b/llvm/test/CodeGen/X86/cmovcmov.ll
index 982bc82..d3d9748 100644
--- a/llvm/test/CodeGen/X86/cmovcmov.ll
+++ b/llvm/test/CodeGen/X86/cmovcmov.ll
@@ -143,19 +143,19 @@
 ; NOCMOV-NEXT:   jp  [[TBB]]
 ; NOCMOV-NEXT:   leal  24(%esp), %eax
 ; NOCMOV-NEXT: [[TBB]]:
-; NOCMOV-NEXT:   movl  (%eax), %eax
-; NOCMOV-NEXT:   leal  44(%esp), %ecx
+; NOCMOV-NEXT:   movl  (%eax), %ecx
+; NOCMOV-NEXT:   leal  44(%esp), %edx
 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 ; NOCMOV-NEXT:   jp  [[TBB]]
-; NOCMOV-NEXT:   leal  28(%esp), %ecx
+; NOCMOV-NEXT:   leal  28(%esp), %edx
 ; NOCMOV-NEXT: [[TBB]]:
-; NOCMOV-NEXT:   movl  (%ecx), %ecx
+; NOCMOV-NEXT:   movl  12(%esp), %eax
+; NOCMOV-NEXT:   movl  (%edx), %edx
 ; NOCMOV-NEXT:   leal  48(%esp), %esi
 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
 ; NOCMOV-NEXT:   jp  [[TBB]]
 ; NOCMOV-NEXT:   leal  32(%esp), %esi
 ; NOCMOV-NEXT: [[TBB]]:
-; NOCMOV-NEXT:   movl  12(%esp), %edx
 ; NOCMOV-NEXT:   movl  (%esi), %esi
 ; NOCMOV-NEXT:   leal  52(%esp), %edi
 ; NOCMOV-NEXT:   jne  [[TBB:.LBB[0-9_]+]]
@@ -163,10 +163,10 @@
 ; NOCMOV-NEXT:   leal  36(%esp), %edi
 ; NOCMOV-NEXT: [[TBB]]:
 ; NOCMOV-NEXT:   movl  (%edi), %edi
-; NOCMOV-NEXT:   movl  %edi, 12(%edx)
-; NOCMOV-NEXT:   movl  %esi, 8(%edx)
-; NOCMOV-NEXT:   movl  %ecx, 4(%edx)
-; NOCMOV-NEXT:   movl  %eax, (%edx)
+; NOCMOV-NEXT:   movl  %edi, 12(%eax)
+; NOCMOV-NEXT:   movl  %esi, 8(%eax)
+; NOCMOV-NEXT:   movl  %edx, 4(%eax)
+; NOCMOV-NEXT:   movl  %ecx, (%eax)
 ; NOCMOV-NEXT:   popl  %esi
 ; NOCMOV-NEXT:   popl  %edi
 ; NOCMOV-NEXT:   retl  $4
diff --git a/llvm/test/CodeGen/X86/complex-fca.ll b/llvm/test/CodeGen/X86/complex-fca.ll
index 8ad38a4..78b27b7 100644
--- a/llvm/test/CodeGen/X86/complex-fca.ll
+++ b/llvm/test/CodeGen/X86/complex-fca.ll
@@ -1,17 +1,21 @@
-; RUN: llc < %s -march=x86 | grep mov | count 2
-
-; Skip this on Windows as there is no ccosl and sret behaves differently.
-; XFAIL: pc-win32
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
 entry:
-	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
-	%z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
-	%0 = fsub x86_fp80 0xK80000000000000000000, %z9
-	%insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
-	%insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
-	call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
-	ret void
+  %z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
+  %z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
+  %0 = fsub x86_fp80 0xK80000000000000000000, %z9
+  %insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
+  %insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
+  call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
+  ret void
 }
 
+; CHECK-LABEL: ccosl:
+; CHECK:         movl    {{[0-9]+}}(%esp), %[[sret_reg:[^ ]+]]
+; CHECK:         movl    %[[sret_reg]], (%esp)
+; CHECK:         calll   {{.*ccoshl.*}}
+; CHECK:         movl    %[[sret_reg]], %eax
+; CHECK:         retl
+
 declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret, { x86_fp80, x86_fp80 }) nounwind
diff --git a/llvm/test/CodeGen/X86/sret-implicit.ll b/llvm/test/CodeGen/X86/sret-implicit.ll
index 5680952..2a998fc 100644
--- a/llvm/test/CodeGen/X86/sret-implicit.ll
+++ b/llvm/test/CodeGen/X86/sret-implicit.ll
@@ -1,12 +1,34 @@
-; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin8 -terminal-rule < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-pc-linux -terminal-rule < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-apple-darwin8 -terminal-rule < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=x86_64-pc-linux -terminal-rule < %s | FileCheck %s --check-prefix=X64
 
-; CHECK-LABEL: return32
-; CHECK-DAG: movq	$0, (%rdi)
-; CHECK-DAG: movq	%rdi, %rax
-; CHECK: retq
-define i256 @return32() {
+define void @sret_void(i32* sret %p) {
+  store i32 0, i32* %p
+  ret void
+}
+
+; X64-LABEL: sret_void
+; X64-DAG: movl $0, (%rdi)
+; X64-DAG: movq %rdi, %rax
+; X64: retq
+
+; X86-LABEL: sret_void
+; X86: movl 4(%esp), %eax
+; X86: movl $0, (%eax)
+; X86: retl
+
+define i256 @sret_demoted() {
   ret i256 0
 }
+
+; X64-LABEL: sret_demoted
+; X64-DAG: movq $0, (%rdi)
+; X64-DAG: movq %rdi, %rax
+; X64: retq
+
+; X86-LABEL: sret_demoted
+; X86: movl 4(%esp), %eax
+; X86: movl $0, (%eax)
+; X86: retl