Use movups to lower memcpy and memset even if it's not fast (like corei7). The theory is it's still faster than a pair of movq / a quad of movl. This will probably hurt older chips like P4 but should run faster on current and future Intel processors. rdar://8817010 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122955 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 461f1fc359dff438dad25e809499845b10a3d032 [log] [tgz]
author: Evan Cheng <evan.cheng@apple.com> Thu Jan 06 07:58:36 2011 +0000
committer: Evan Cheng <evan.cheng@apple.com> Thu Jan 06 07:58:36 2011 +0000
tree: 143a2a682ffdd84409d6bd1673e22630d42d565e
parent: cce240d26bbf1c2bec9cfff4838d8d807b215586 [diff]
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index f871b5a..ddec78b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -1063,12 +1063,8 @@
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
   const Function *F = MF.getFunction();
-  if (NonScalarIntSafe &&
-      !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+  if (NonScalarIntSafe && !F->hasFnAttr(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
-        (Subtarget->isUnalignedMemAccessFast() ||
-         ((DstAlign == 0 || DstAlign >= 16) &&
-          (SrcAlign == 0 || SrcAlign >= 16))) &&
         Subtarget->getStackAlignment() >= 16) {
       if (Subtarget->hasSSE2())
         return MVT::v4i32;
commit	461f1fc359dff438dad25e809499845b10a3d032	[log] [tgz]
author	Evan Cheng <evan.cheng@apple.com>	Thu Jan 06 07:58:36 2011 +0000
committer	Evan Cheng <evan.cheng@apple.com>	Thu Jan 06 07:58:36 2011 +0000
tree	143a2a682ffdd84409d6bd1673e22630d42d565e
parent	cce240d26bbf1c2bec9cfff4838d8d807b215586 [diff]