Add support for byval function whose argument is not 32 bit aligned.
To do this it is necessary to add a "always inline" argument to the
memcpy node. For completeness I have also added this node to memmove
and memset.  I have also added getMem* functions, because the extra
argument makes it cumbersome to use getNode and because I get confused
by it :-)




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43172 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9c9c6a4..2d16c3c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1246,9 +1246,10 @@
 
     SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
     SDOperand  SizeNode = DAG.getConstant(Size, MVT::i32);
+    SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
 
-    return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
-                       AlignNode);
+    return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
+                         AlwaysInline);
   } else {
     return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
   }
@@ -4472,9 +4473,23 @@
   SDOperand SourceOp = Op.getOperand(2);
   SDOperand CountOp = Op.getOperand(3);
   SDOperand AlignOp = Op.getOperand(4);
+  SDOperand AlwaysInlineOp = Op.getOperand(5);
+
+  bool AlwaysInline = (bool)cast<ConstantSDNode>(AlwaysInlineOp)->getValue();
   unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue();
   if (Align == 0) Align = 1;
 
+  // If size is unknown, call memcpy.
+  ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
+  if (!I) {
+    assert(!AlwaysInline && "Cannot inline copy of unknown size");
+    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
+  }
+  unsigned Size = I->getValue();
+
+  if (AlwaysInline)
+    return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
+
   // The libc version is likely to be faster for the following cases. It can
   // use the address value and run time information about the CPU.
   // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
@@ -4483,13 +4498,7 @@
   if ((Align & 3) != 0)
     return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
 
-  // If size is unknown, call memcpy.
-  ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
-  if (!I)
-    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
   // If size is more than the threshold, call memcpy.
-  unsigned Size = I->getValue();
   if (Size > Subtarget->getMinRepStrSizeThreshold())
     return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);