It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
new file mode 100644
index 0000000..69c5fc5
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | \
+; RUN: llc -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
+; RUN: grep fldl %t | wc -l | grep 1
+; RUN: not grep xmm %t
+; RUN: grep {sub.*esp} %t | wc -l | grep 1
+
+; These testcases shouldn't require loading into an XMM register then storing
+; to memory, then reloading into an FPStack reg.
+
+define double @test1(double *%P) {
+ %A = load double* %P
+ ret double %A
+}
+
+; fastcc should return a value
+define fastcc double @test2(<2 x double> %A) {
+ %B = extractelement <2 x double> %A, i32 0
+ ret double %B
+}
+
+define fastcc double @test3(<4 x float> %A) {
+ %B = bitcast <4 x float> %A to <2 x double>
+ %C = call fastcc double @test2(<2 x double> %B)
+ ret double %C
+}
+