Add support for inlining small memcpys. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144578 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 909cb4f2f2d227ea01852cb318c80a79c46bc9bf [log] [tgz]
author: Chad Rosier <mcrosier@apple.com> Mon Nov 14 22:46:17 2011 +0000
committer: Chad Rosier <mcrosier@apple.com> Mon Nov 14 22:46:17 2011 +0000
tree: 0d72868df4d6c519803c7ed08b285e8f495273d4
parent: e489af8dce12249be26ac0c8e371557378886bc2 [diff] [blame]
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 0b728a9..67ba14b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp

@@ -185,6 +185,9 @@
     bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
     bool ARMComputeAddress(const Value *Obj, Address &Addr);
     void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+    bool ARMIsMemXferSmall(uint64_t Len);
+    bool ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
+                                bool isMemCpy);
     unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
     unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
     unsigned ARMMaterializeInt(const Constant *C, EVT VT);
@@ -2193,18 +2196,76 @@
   return true;
 }
 
+bool ARMFastISel::ARMIsMemXferSmall(uint64_t Len) {
+  return Len <= 16;
+}
+
+bool ARMFastISel::ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len,
+                                         bool isMemCpy) {
+  // FIXME: Memmove's require a little more care because their source and
+  // destination may overlap.
+  if (!isMemCpy)
+    return false;
+
+  // Make sure we don't bloat code by inlining very large memcpy's.
+  if (!ARMIsMemXferSmall(Len))
+    return false;
+
+  // We don't care about alignment here since we just emit integer accesses.
+  while (Len) {
+    MVT VT;
+    if (Len >= 4)
+      VT = MVT::i32;
+    else if (Len >= 2)
+      VT = MVT::i16;
+    else {
+      assert(Len == 1);
+      VT = MVT::i8;
+    }
+
+    bool RV;
+    unsigned ResultReg;
+    RV = ARMEmitLoad(VT, ResultReg, Src);
+    assert (RV = true && "Should be able to handle this load.");
+    RV = ARMEmitStore(VT, ResultReg, Dest);
+    assert (RV = true && "Should be able to handle this store.");
+
+    unsigned Size = VT.getSizeInBits()/8;
+    Len -= Size;
+    Dest.Offset += Size;
+    Src.Offset += Size;
+  }
+
+  return true;
+}
+
 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
   case Intrinsic::memcpy:
   case Intrinsic::memmove: {
-    // FIXME: Small memcpy/memmove's are common enough that we want to do them
-    // without a call if possible.
     const MemTransferInst &MTI = cast<MemTransferInst>(I);
     // Don't handle volatile.
     if (MTI.isVolatile())
       return false;
+
+    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
+    // we would emit dead code because we don't currently handle memmoves.
+    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
+    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+      // Small memcpy/memmove's are common enough that we want to do them
+      // without a call if possible.
+      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
+      if (ARMIsMemXferSmall(Len)) {
+        Address Dest, Src;
+        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
+            !ARMComputeAddress(MTI.getRawSource(), Src))
+          return false;
+        if (ARMTryEmitSmallMemXfer(Dest, Src, Len, isMemCpy))
+          return true;
+      }
+    }
     
     if (!MTI.getLength()->getType()->isIntegerTy(32))
       return false;
commit	909cb4f2f2d227ea01852cb318c80a79c46bc9bf	[log] [tgz]
author	Chad Rosier <mcrosier@apple.com>	Mon Nov 14 22:46:17 2011 +0000
committer	Chad Rosier <mcrosier@apple.com>	Mon Nov 14 22:46:17 2011 +0000
tree	0d72868df4d6c519803c7ed08b285e8f495273d4
parent	e489af8dce12249be26ac0c8e371557378886bc2 [diff] [blame]