[Memcpy Loop Lowering] Insert loop BB inbetween the split BB.

The original memcpy expansion inserted the loop basic block inbetween
the 2 new basic blocks created by splitting the original block the memcpy
call was in. This commit makes the new memcpy expansion do the same to keep the
layout of the IR matching between the old and new implementations.

Differential Review: https://reviews.llvm.org/D41197

llvm-svn: 320848
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 900450b..33b8f62 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -174,7 +174,7 @@
   Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
 
   BasicBlock *LoopBB =
-      BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr);
+      BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
   IRBuilder<> LoopBuilder(LoopBB);
 
   PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
@@ -193,7 +193,8 @@
   if (LoopOpType != Int8Type) {
     // Loop body for the residual copy.
     BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
-                                               PreLoopBB->getParent(), nullptr);
+                                               PreLoopBB->getParent(),
+                                               PostLoopBB);
     // Residual loop header.
     BasicBlock *ResHeaderBB = BasicBlock::Create(
         Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);