Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 1 | //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 11 | #include "llvm/Analysis/TargetTransformInfo.h" |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 12 | #include "llvm/IR/IRBuilder.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 13 | #include "llvm/IR/IntrinsicInst.h" |
| 14 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 15 | |
| 16 | using namespace llvm; |
| 17 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 18 | static unsigned getLoopOperandSizeInBytes(Type *Type) { |
| 19 | if (VectorType *VTy = dyn_cast<VectorType>(Type)) { |
| 20 | return VTy->getBitWidth() / 8; |
| 21 | } |
| 22 | |
| 23 | return Type->getPrimitiveSizeInBits() / 8; |
| 24 | } |
| 25 | |
| 26 | void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, |
| 27 | Value *DstAddr, ConstantInt *CopyLen, |
| 28 | unsigned SrcAlign, unsigned DestAlign, |
| 29 | bool SrcIsVolatile, bool DstIsVolatile, |
| 30 | const TargetTransformInfo &TTI) { |
| 31 | // No need to expand zero length copies. |
| 32 | if (CopyLen->isZero()) |
| 33 | return; |
| 34 | |
| 35 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
| 36 | BasicBlock *PostLoopBB = nullptr; |
| 37 | Function *ParentFunc = PreLoopBB->getParent(); |
| 38 | LLVMContext &Ctx = PreLoopBB->getContext(); |
| 39 | |
| 40 | Type *TypeOfCopyLen = CopyLen->getType(); |
| 41 | Type *LoopOpType = |
| 42 | TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); |
| 43 | |
| 44 | unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); |
| 45 | uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; |
| 46 | |
| 47 | unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); |
| 48 | unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 49 | |
| 50 | if (LoopEndCount != 0) { |
| 51 | // Split |
| 52 | PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); |
| 53 | BasicBlock *LoopBB = |
| 54 | BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); |
| 55 | PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); |
| 56 | |
| 57 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
| 58 | |
| 59 | // Cast the Src and Dst pointers to pointers to the loop operand type (if |
| 60 | // needed). |
| 61 | PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); |
| 62 | PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); |
| 63 | if (SrcAddr->getType() != SrcOpType) { |
| 64 | SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); |
| 65 | } |
| 66 | if (DstAddr->getType() != DstOpType) { |
| 67 | DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); |
| 68 | } |
| 69 | |
| 70 | IRBuilder<> LoopBuilder(LoopBB); |
| 71 | PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); |
| 72 | LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); |
| 73 | // Loop Body |
| 74 | Value *SrcGEP = |
| 75 | LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); |
| 76 | Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 77 | Value *DstGEP = |
| 78 | LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); |
| 79 | LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 80 | |
| 81 | Value *NewIndex = |
| 82 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); |
| 83 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 84 | |
| 85 | // Create the loop branch condition. |
| 86 | Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); |
| 87 | LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), |
| 88 | LoopBB, PostLoopBB); |
| 89 | } |
| 90 | |
| 91 | uint64_t BytesCopied = LoopEndCount * LoopOpSize; |
| 92 | uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; |
| 93 | if (RemainingBytes) { |
| 94 | IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() |
| 95 | : InsertBefore); |
| 96 | |
| 97 | // Update the alignment based on the copy size used in the loop body. |
| 98 | SrcAlign = std::min(SrcAlign, LoopOpSize); |
| 99 | DestAlign = std::min(DestAlign, LoopOpSize); |
| 100 | |
| 101 | SmallVector<Type *, 5> RemainingOps; |
| 102 | TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, |
| 103 | SrcAlign, DestAlign); |
| 104 | |
| 105 | for (auto OpTy : RemainingOps) { |
| 106 | // Calaculate the new index |
| 107 | unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); |
| 108 | uint64_t GepIndex = BytesCopied / OperandSize; |
| 109 | assert(GepIndex * OperandSize == BytesCopied && |
| 110 | "Division should have no Remainder!"); |
| 111 | // Cast source to operand type and load |
| 112 | PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); |
| 113 | Value *CastedSrc = SrcAddr->getType() == SrcPtrType |
| 114 | ? SrcAddr |
| 115 | : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); |
| 116 | Value *SrcGEP = RBuilder.CreateInBoundsGEP( |
| 117 | OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); |
| 118 | Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 119 | |
| 120 | // Cast destination to operand type and store. |
| 121 | PointerType *DstPtrType = PointerType::get(OpTy, DstAS); |
| 122 | Value *CastedDst = DstAddr->getType() == DstPtrType |
| 123 | ? DstAddr |
| 124 | : RBuilder.CreateBitCast(DstAddr, DstPtrType); |
| 125 | Value *DstGEP = RBuilder.CreateInBoundsGEP( |
| 126 | OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); |
| 127 | RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 128 | |
| 129 | BytesCopied += OperandSize; |
| 130 | } |
| 131 | } |
| 132 | assert(BytesCopied == CopyLen->getZExtValue() && |
| 133 | "Bytes copied should match size in the call!"); |
| 134 | } |
| 135 | |
| 136 | void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, |
| 137 | Value *SrcAddr, Value *DstAddr, |
| 138 | Value *CopyLen, unsigned SrcAlign, |
| 139 | unsigned DestAlign, bool SrcIsVolatile, |
| 140 | bool DstIsVolatile, |
| 141 | const TargetTransformInfo &TTI) { |
| 142 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
| 143 | BasicBlock *PostLoopBB = |
| 144 | PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); |
| 145 | |
| 146 | Function *ParentFunc = PreLoopBB->getParent(); |
| 147 | LLVMContext &Ctx = PreLoopBB->getContext(); |
| 148 | |
| 149 | Type *LoopOpType = |
| 150 | TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); |
| 151 | unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); |
| 152 | |
| 153 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
| 154 | |
| 155 | unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); |
| 156 | unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 157 | PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); |
| 158 | PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); |
| 159 | if (SrcAddr->getType() != SrcOpType) { |
| 160 | SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); |
| 161 | } |
| 162 | if (DstAddr->getType() != DstOpType) { |
| 163 | DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); |
| 164 | } |
| 165 | |
| 166 | // Calculate the loop trip count, and remaining bytes to copy after the loop. |
| 167 | Type *CopyLenType = CopyLen->getType(); |
| 168 | IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); |
| 169 | assert(ILengthType && |
| 170 | "expected size argument to memcpy to be an integer type!"); |
| 171 | ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); |
| 172 | Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); |
| 173 | Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); |
| 174 | Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); |
| 175 | |
| 176 | BasicBlock *LoopBB = |
Sean Fertile | 42b1334 | 2017-12-15 19:29:12 +0000 | [diff] [blame^] | 177 | BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 178 | IRBuilder<> LoopBuilder(LoopBB); |
| 179 | |
| 180 | PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); |
| 181 | LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); |
| 182 | |
| 183 | Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); |
| 184 | Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 185 | Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); |
| 186 | LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 187 | |
| 188 | Value *NewIndex = |
| 189 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); |
| 190 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 191 | |
| 192 | Type *Int8Type = Type::getInt8Ty(Ctx); |
| 193 | if (LoopOpType != Int8Type) { |
| 194 | // Loop body for the residual copy. |
| 195 | BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", |
Sean Fertile | 42b1334 | 2017-12-15 19:29:12 +0000 | [diff] [blame^] | 196 | PreLoopBB->getParent(), |
| 197 | PostLoopBB); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 198 | // Residual loop header. |
| 199 | BasicBlock *ResHeaderBB = BasicBlock::Create( |
| 200 | Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); |
| 201 | |
| 202 | // Need to update the pre-loop basic block to branch to the correct place. |
| 203 | // branch to the main loop if the count is non-zero, branch to the residual |
| 204 | // loop if the copy size is smaller then 1 iteration of the main loop but |
| 205 | // non-zero and finally branch to after the residual loop if the memcpy |
| 206 | // size is zero. |
| 207 | ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); |
| 208 | PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), |
| 209 | LoopBB, ResHeaderBB); |
| 210 | PreLoopBB->getTerminator()->eraseFromParent(); |
| 211 | |
| 212 | LoopBuilder.CreateCondBr( |
| 213 | LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, |
| 214 | ResHeaderBB); |
| 215 | |
| 216 | // Determine if we need to branch to the residual loop or bypass it. |
| 217 | IRBuilder<> RHBuilder(ResHeaderBB); |
| 218 | RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), |
| 219 | ResLoopBB, PostLoopBB); |
| 220 | |
| 221 | // Copy the residual with single byte load/store loop. |
| 222 | IRBuilder<> ResBuilder(ResLoopBB); |
| 223 | PHINode *ResidualIndex = |
| 224 | ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); |
| 225 | ResidualIndex->addIncoming(Zero, ResHeaderBB); |
| 226 | |
| 227 | Value *SrcAsInt8 = |
| 228 | ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); |
| 229 | Value *DstAsInt8 = |
| 230 | ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); |
| 231 | Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); |
| 232 | Value *SrcGEP = |
| 233 | ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); |
| 234 | Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 235 | Value *DstGEP = |
| 236 | ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); |
| 237 | ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 238 | |
| 239 | Value *ResNewIndex = |
| 240 | ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); |
| 241 | ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); |
| 242 | |
| 243 | // Create the loop branch condition. |
| 244 | ResBuilder.CreateCondBr( |
| 245 | ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, |
| 246 | PostLoopBB); |
| 247 | } else { |
| 248 | // In this case the loop operand type was a byte, and there is no need for a |
| 249 | // residual loop to copy the remaining memory after the main loop. |
| 250 | // We do however need to patch up the control flow by creating the |
| 251 | // terminators for the preloop block and the memcpy loop. |
| 252 | ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); |
| 253 | PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), |
| 254 | LoopBB, PostLoopBB); |
| 255 | PreLoopBB->getTerminator()->eraseFromParent(); |
| 256 | LoopBuilder.CreateCondBr( |
| 257 | LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, |
| 258 | PostLoopBB); |
| 259 | } |
| 260 | } |
| 261 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 262 | void llvm::createMemCpyLoop(Instruction *InsertBefore, |
| 263 | Value *SrcAddr, Value *DstAddr, Value *CopyLen, |
| 264 | unsigned SrcAlign, unsigned DestAlign, |
| 265 | bool SrcIsVolatile, bool DstIsVolatile) { |
| 266 | Type *TypeOfCopyLen = CopyLen->getType(); |
| 267 | |
| 268 | BasicBlock *OrigBB = InsertBefore->getParent(); |
| 269 | Function *F = OrigBB->getParent(); |
| 270 | BasicBlock *NewBB = |
| 271 | InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split"); |
| 272 | BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop", |
| 273 | F, NewBB); |
| 274 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 275 | IRBuilder<> Builder(OrigBB->getTerminator()); |
| 276 | |
| 277 | // SrcAddr and DstAddr are expected to be pointer types, |
| 278 | // so no check is made here. |
| 279 | unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); |
| 280 | unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 281 | |
| 282 | // Cast pointers to (char *) |
| 283 | SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); |
| 284 | DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); |
| 285 | |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 286 | Builder.CreateCondBr( |
| 287 | Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, |
| 288 | LoopBB); |
| 289 | OrigBB->getTerminator()->eraseFromParent(); |
| 290 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 291 | IRBuilder<> LoopBuilder(LoopBB); |
| 292 | PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); |
| 293 | LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); |
| 294 | |
| 295 | // load from SrcAddr+LoopIndex |
| 296 | // TODO: we can leverage the align parameter of llvm.memcpy for more efficient |
| 297 | // word-sized loads and stores. |
| 298 | Value *Element = |
| 299 | LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( |
| 300 | LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), |
| 301 | SrcIsVolatile); |
| 302 | // store at DstAddr+LoopIndex |
| 303 | LoopBuilder.CreateStore(Element, |
| 304 | LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), |
| 305 | DstAddr, LoopIndex), |
| 306 | DstIsVolatile); |
| 307 | |
| 308 | // The value for LoopIndex coming from backedge is (LoopIndex + 1) |
| 309 | Value *NewIndex = |
| 310 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); |
| 311 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 312 | |
| 313 | LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, |
| 314 | NewBB); |
| 315 | } |
| 316 | |
| 317 | // Lower memmove to IR. memmove is required to correctly copy overlapping memory |
| 318 | // regions; therefore, it has to check the relative positions of the source and |
| 319 | // destination pointers and choose the copy direction accordingly. |
| 320 | // |
| 321 | // The code below is an IR rendition of this C function: |
| 322 | // |
| 323 | // void* memmove(void* dst, const void* src, size_t n) { |
| 324 | // unsigned char* d = dst; |
| 325 | // const unsigned char* s = src; |
| 326 | // if (s < d) { |
| 327 | // // copy backwards |
| 328 | // while (n--) { |
| 329 | // d[n] = s[n]; |
| 330 | // } |
| 331 | // } else { |
| 332 | // // copy forward |
| 333 | // for (size_t i = 0; i < n; ++i) { |
| 334 | // d[i] = s[i]; |
| 335 | // } |
| 336 | // } |
| 337 | // return dst; |
| 338 | // } |
| 339 | static void createMemMoveLoop(Instruction *InsertBefore, |
| 340 | Value *SrcAddr, Value *DstAddr, Value *CopyLen, |
| 341 | unsigned SrcAlign, unsigned DestAlign, |
| 342 | bool SrcIsVolatile, bool DstIsVolatile) { |
| 343 | Type *TypeOfCopyLen = CopyLen->getType(); |
| 344 | BasicBlock *OrigBB = InsertBefore->getParent(); |
| 345 | Function *F = OrigBB->getParent(); |
| 346 | |
| 347 | // Create the a comparison of src and dst, based on which we jump to either |
| 348 | // the forward-copy part of the function (if src >= dst) or the backwards-copy |
| 349 | // part (if src < dst). |
| 350 | // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else |
| 351 | // structure. Its block terminators (unconditional branches) are replaced by |
| 352 | // the appropriate conditional branches when the loop is built. |
| 353 | ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, |
| 354 | SrcAddr, DstAddr, "compare_src_dst"); |
| 355 | TerminatorInst *ThenTerm, *ElseTerm; |
| 356 | SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, |
| 357 | &ElseTerm); |
| 358 | |
| 359 | // Each part of the function consists of two blocks: |
| 360 | // copy_backwards: used to skip the loop when n == 0 |
| 361 | // copy_backwards_loop: the actual backwards loop BB |
| 362 | // copy_forward: used to skip the loop when n == 0 |
| 363 | // copy_forward_loop: the actual forward loop BB |
| 364 | BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); |
| 365 | CopyBackwardsBB->setName("copy_backwards"); |
| 366 | BasicBlock *CopyForwardBB = ElseTerm->getParent(); |
| 367 | CopyForwardBB->setName("copy_forward"); |
| 368 | BasicBlock *ExitBB = InsertBefore->getParent(); |
| 369 | ExitBB->setName("memmove_done"); |
| 370 | |
| 371 | // Initial comparison of n == 0 that lets us skip the loops altogether. Shared |
| 372 | // between both backwards and forward copy clauses. |
| 373 | ICmpInst *CompareN = |
| 374 | new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, |
| 375 | ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); |
| 376 | |
| 377 | // Copying backwards. |
| 378 | BasicBlock *LoopBB = |
| 379 | BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); |
| 380 | IRBuilder<> LoopBuilder(LoopBB); |
| 381 | PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); |
| 382 | Value *IndexPtr = LoopBuilder.CreateSub( |
| 383 | LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); |
| 384 | Value *Element = LoopBuilder.CreateLoad( |
| 385 | LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); |
| 386 | LoopBuilder.CreateStore(Element, |
| 387 | LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); |
| 388 | LoopBuilder.CreateCondBr( |
| 389 | LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), |
| 390 | ExitBB, LoopBB); |
| 391 | LoopPhi->addIncoming(IndexPtr, LoopBB); |
| 392 | LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); |
| 393 | BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); |
| 394 | ThenTerm->eraseFromParent(); |
| 395 | |
| 396 | // Copying forward. |
| 397 | BasicBlock *FwdLoopBB = |
| 398 | BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); |
| 399 | IRBuilder<> FwdLoopBuilder(FwdLoopBB); |
| 400 | PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); |
| 401 | Value *FwdElement = FwdLoopBuilder.CreateLoad( |
| 402 | FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); |
| 403 | FwdLoopBuilder.CreateStore( |
| 404 | FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); |
| 405 | Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( |
| 406 | FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); |
| 407 | FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), |
| 408 | ExitBB, FwdLoopBB); |
| 409 | FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); |
| 410 | FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); |
| 411 | |
| 412 | BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); |
| 413 | ElseTerm->eraseFromParent(); |
| 414 | } |
| 415 | |
| 416 | static void createMemSetLoop(Instruction *InsertBefore, |
| 417 | Value *DstAddr, Value *CopyLen, Value *SetValue, |
| 418 | unsigned Align, bool IsVolatile) { |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 419 | Type *TypeOfCopyLen = CopyLen->getType(); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 420 | BasicBlock *OrigBB = InsertBefore->getParent(); |
| 421 | Function *F = OrigBB->getParent(); |
| 422 | BasicBlock *NewBB = |
| 423 | OrigBB->splitBasicBlock(InsertBefore, "split"); |
| 424 | BasicBlock *LoopBB |
| 425 | = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); |
| 426 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 427 | IRBuilder<> Builder(OrigBB->getTerminator()); |
| 428 | |
| 429 | // Cast pointer to the type of value getting stored |
| 430 | unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 431 | DstAddr = Builder.CreateBitCast(DstAddr, |
| 432 | PointerType::get(SetValue->getType(), dstAS)); |
| 433 | |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 434 | Builder.CreateCondBr( |
| 435 | Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, |
| 436 | LoopBB); |
| 437 | OrigBB->getTerminator()->eraseFromParent(); |
| 438 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 439 | IRBuilder<> LoopBuilder(LoopBB); |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 440 | PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); |
| 441 | LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 442 | |
| 443 | LoopBuilder.CreateStore( |
| 444 | SetValue, |
| 445 | LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), |
| 446 | IsVolatile); |
| 447 | |
| 448 | Value *NewIndex = |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 449 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 450 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 451 | |
| 452 | LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, |
| 453 | NewBB); |
| 454 | } |
| 455 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 456 | void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, |
| 457 | const TargetTransformInfo &TTI) { |
| 458 | // Original implementation |
| 459 | if (!TTI.useWideIRMemcpyLoopLowering()) { |
| 460 | createMemCpyLoop(/* InsertBefore */ Memcpy, |
| 461 | /* SrcAddr */ Memcpy->getRawSource(), |
| 462 | /* DstAddr */ Memcpy->getRawDest(), |
| 463 | /* CopyLen */ Memcpy->getLength(), |
| 464 | /* SrcAlign */ Memcpy->getAlignment(), |
| 465 | /* DestAlign */ Memcpy->getAlignment(), |
| 466 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
| 467 | /* DstIsVolatile */ Memcpy->isVolatile()); |
| 468 | } else { |
| 469 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { |
| 470 | createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, |
| 471 | /* SrcAddr */ Memcpy->getRawSource(), |
| 472 | /* DstAddr */ Memcpy->getRawDest(), |
| 473 | /* CopyLen */ CI, |
| 474 | /* SrcAlign */ Memcpy->getAlignment(), |
| 475 | /* DestAlign */ Memcpy->getAlignment(), |
| 476 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
| 477 | /* DstIsVolatile */ Memcpy->isVolatile(), |
| 478 | /* TargetTransformInfo */ TTI); |
| 479 | } else { |
| 480 | createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, |
| 481 | /* SrcAddr */ Memcpy->getRawSource(), |
| 482 | /* DstAddr */ Memcpy->getRawDest(), |
| 483 | /* CopyLen */ Memcpy->getLength(), |
| 484 | /* SrcAlign */ Memcpy->getAlignment(), |
| 485 | /* DestAlign */ Memcpy->getAlignment(), |
| 486 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
| 487 | /* DstIsVolatile */ Memcpy->isVolatile(), |
| 488 | /* TargetTransfomrInfo */ TTI); |
| 489 | } |
| 490 | } |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 491 | } |
| 492 | |
| 493 | void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { |
| 494 | createMemMoveLoop(/* InsertBefore */ Memmove, |
| 495 | /* SrcAddr */ Memmove->getRawSource(), |
| 496 | /* DstAddr */ Memmove->getRawDest(), |
| 497 | /* CopyLen */ Memmove->getLength(), |
| 498 | /* SrcAlign */ Memmove->getAlignment(), |
| 499 | /* DestAlign */ Memmove->getAlignment(), |
| 500 | /* SrcIsVolatile */ Memmove->isVolatile(), |
| 501 | /* DstIsVolatile */ Memmove->isVolatile()); |
| 502 | } |
| 503 | |
| 504 | void llvm::expandMemSetAsLoop(MemSetInst *Memset) { |
| 505 | createMemSetLoop(/* InsertBefore */ Memset, |
| 506 | /* DstAddr */ Memset->getRawDest(), |
| 507 | /* CopyLen */ Memset->getLength(), |
| 508 | /* SetValue */ Memset->getValue(), |
| 509 | /* Alignment */ Memset->getAlignment(), |
| 510 | Memset->isVolatile()); |
| 511 | } |