My previous commit had an incomplete message, it should have been:
make the 'fp return in ST(0)' optimization smart enough to
look through token factor nodes. THis allows us to compile
testcases like CodeGen/X86/fp-stack-retcopy.ll into:
_carg:
subl $12, %esp
call L_foo$stub
fstpl (%esp)
fldl (%esp)
addl $12, %esp
ret
instead of:
_carg:
subl $28, %esp
call L_foo$stub
fstpl 16(%esp)
movsd 16(%esp), %xmm0
movsd %xmm0, 8(%esp)
fldl 8(%esp)
addl $28, %esp
ret
Still not optimal, but much better and this is a trivial patch. Fixing
the rest requires invasive surgery that is is not llvm 2.2 material.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46054 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/fp-stack-retcopy.ll b/test/CodeGen/X86/fp-stack-retcopy.ll
new file mode 100644
index 0000000..997f8df
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-retcopy.ll
@@ -0,0 +1,12 @@
+; This should not copy the result of foo into an xmm register.
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
+; rdar://5689903
+
+declare double @foo()
+
+define double @carg({ double, double }* byval %z) nounwind {
+entry:
+ %tmp5 = tail call double @foo() nounwind ; <double> [#uses=1]
+ ret double %tmp5
+}
+