Fix sdisel memcpy, memset, memmove lowering: 1. Makes it possible to lower with floating point loads and stores. 2. Avoid unaligned loads / stores unless it's fast. 3. Fix some memcpy lowering logic bug related to when to optimize a load from constant string into a constant. 4. Adjust x86 memcpy lowering threshold to make it more sane. 5. Fix x86 target hook so it uses vector and floating point memory ops more effectively. rdar://7774704 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100090 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 255f20f7f76e4ca1ac1c73294852cb6fcb18c77d [log] [tgz]
author: Evan Cheng <evan.cheng@apple.com> Thu Apr 01 06:04:33 2010 +0000
committer: Evan Cheng <evan.cheng@apple.com> Thu Apr 01 06:04:33 2010 +0000
tree: c3bd975c3254d60625b64fcff2c8b918060b6afa
parent: 48c58bb8610cd475d1acb073694e0d2b4dd7cc8c [diff]
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b94f76e..bd268ec 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -1012,7 +1012,7 @@
   // FIXME: These should be based on subtarget info. Plus, the values should
   // be smaller when we are in optimizing for size mode.
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
-  maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+  maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
   maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
   setPrefLoopAlignment(16);
   benefitFromCodePlacementOpt = true;
@@ -1074,19 +1074,27 @@
 /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
 /// determining it.
 EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                       bool isSrcConst, bool isSrcStr,
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+                                       unsigned DstAlign, unsigned SrcAlign,
                                        SelectionDAG &DAG) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
   const Function *F = DAG.getMachineFunction().getFunction();
-  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
-  if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
-    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
-      return MVT::v4i32;
-    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
-      return MVT::v4f32;
+  if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
+    if (Size >= 16 &&
+        (Subtarget->isUnalignedMemAccessFast() ||
+         (DstAlign == 0 || DstAlign >= 16) &&
+         (SrcAlign == 0 || SrcAlign >= 16)) &&
+        Subtarget->getStackAlignment() >= 16) {
+      if (Subtarget->hasSSE2())
+        return MVT::v4i32;
+      if (Subtarget->hasSSE1())
+        return MVT::v4f32;
+    } else if (Size >= 8 &&
+               Subtarget->getStackAlignment() >= 8 &&
+               Subtarget->hasSSE2())
+      return MVT::f64;
   }
   if (Subtarget->is64Bit() && Size >= 8)
     return MVT::i64;

diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 46fa3ce..569dc1f 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h

@@ -423,8 +423,8 @@
     /// and store operations as a result of memset, memcpy, and memmove
     /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
     /// determining it.
-    virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                    bool isSrcConst, bool isSrcStr,
+    virtual EVT getOptimalMemOpType(uint64_t Size,
+                                    unsigned DstAlign, unsigned SrcAlign,
                                     SelectionDAG &DAG) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
commit	255f20f7f76e4ca1ac1c73294852cb6fcb18c77d	[log] [tgz]
author	Evan Cheng <evan.cheng@apple.com>	Thu Apr 01 06:04:33 2010 +0000
committer	Evan Cheng <evan.cheng@apple.com>	Thu Apr 01 06:04:33 2010 +0000
tree	c3bd975c3254d60625b64fcff2c8b918060b6afa
parent	48c58bb8610cd475d1acb073694e0d2b4dd7cc8c [diff]