Revert r122955. It seems using movups to lower memcpy can cause massive regression (even on Nehalem) in edge cases. I also didn't see any real performance benefit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123015 91177308-0d34-0410-b5e6-96231b3b80d8

commit: a5e1362f968568d66d76ddcdcff4ab98e203a48c [log] [tgz]
author: Evan Cheng <evan.cheng@apple.com> Fri Jan 07 19:35:30 2011 +0000
committer: Evan Cheng <evan.cheng@apple.com> Fri Jan 07 19:35:30 2011 +0000
tree: 53e266c315432b49be8ad6f3a2d2a5873265ab53
parent: 1434f66b2e132a707e2c8ccb3350ea13fb5aa051 [diff] [blame]
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
index 8f69b11..1b596b5 100644
--- a/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/test/CodeGen/X86/small-byval-memcpy.ll

@@ -1,12 +1,8 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2   | grep movsd  | count 8
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 2
 
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %z) nounwind  {
 entry:
-; CHECK: ccosl:
-; CHECK: movaps
-; CHECK: movaps
-; CHECK: movups
-; CHECK: movups
 	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
 	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
 	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
commit	a5e1362f968568d66d76ddcdcff4ab98e203a48c	[log] [tgz]
author	Evan Cheng <evan.cheng@apple.com>	Fri Jan 07 19:35:30 2011 +0000
committer	Evan Cheng <evan.cheng@apple.com>	Fri Jan 07 19:35:30 2011 +0000
tree	53e266c315432b49be8ad6f3a2d2a5873265ab53
parent	1434f66b2e132a707e2c8ccb3350ea13fb5aa051 [diff] [blame]