For whatever the reason, x86 CallingConv::Fast (i.e. fastcc) was not passing scalar arguments in registers. This patch defines a new fastcc CC which is slightly different from the FastCall CC. In addition to passing integer arguments in ECX and EDX, it also specify doubles are passed in 8-byte slots which are 8-byte aligned (instead of 4-byte aligned). This avoids a potential performance hazard where doubles span cacheline boundaries.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55807 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
new file mode 100644
index 0000000..13068ba
--- /dev/null
+++ b/test/CodeGen/X86/fastcc.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep mov | grep ecx | grep 0
+; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep mov | grep xmm0 | grep 16
+
+@d = external global double		; <double*> [#uses=1]
+@c = external global double		; <double*> [#uses=1]
+@b = external global double		; <double*> [#uses=1]
+@a = external global double		; <double*> [#uses=1]
+
+define i32 @foo() nounwind {
+entry:
+	%0 = load double* @d, align 8		; <double> [#uses=1]
+	%1 = load double* @c, align 8		; <double> [#uses=1]
+	%2 = load double* @b, align 8		; <double> [#uses=1]
+	%3 = load double* @a, align 8		; <double> [#uses=1]
+	tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind
+	ret i32 0
+}
+
+declare fastcc void @bar(i32, i32, i32, double, double, double, double, double)