Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 1 | //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 11 | #include "llvm/Analysis/TargetTransformInfo.h" |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 12 | #include "llvm/IR/IRBuilder.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 13 | #include "llvm/IR/IntrinsicInst.h" |
| 14 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 15 | |
| 16 | using namespace llvm; |
| 17 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 18 | static unsigned getLoopOperandSizeInBytes(Type *Type) { |
| 19 | if (VectorType *VTy = dyn_cast<VectorType>(Type)) { |
| 20 | return VTy->getBitWidth() / 8; |
| 21 | } |
| 22 | |
| 23 | return Type->getPrimitiveSizeInBits() / 8; |
| 24 | } |
| 25 | |
| 26 | void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, |
| 27 | Value *DstAddr, ConstantInt *CopyLen, |
| 28 | unsigned SrcAlign, unsigned DestAlign, |
| 29 | bool SrcIsVolatile, bool DstIsVolatile, |
| 30 | const TargetTransformInfo &TTI) { |
| 31 | // No need to expand zero length copies. |
| 32 | if (CopyLen->isZero()) |
| 33 | return; |
| 34 | |
| 35 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
| 36 | BasicBlock *PostLoopBB = nullptr; |
| 37 | Function *ParentFunc = PreLoopBB->getParent(); |
| 38 | LLVMContext &Ctx = PreLoopBB->getContext(); |
| 39 | |
| 40 | Type *TypeOfCopyLen = CopyLen->getType(); |
| 41 | Type *LoopOpType = |
| 42 | TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); |
| 43 | |
| 44 | unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); |
| 45 | uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; |
| 46 | |
| 47 | unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); |
| 48 | unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 49 | |
| 50 | if (LoopEndCount != 0) { |
| 51 | // Split |
| 52 | PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); |
| 53 | BasicBlock *LoopBB = |
| 54 | BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); |
| 55 | PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); |
| 56 | |
| 57 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
| 58 | |
| 59 | // Cast the Src and Dst pointers to pointers to the loop operand type (if |
| 60 | // needed). |
| 61 | PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); |
| 62 | PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); |
| 63 | if (SrcAddr->getType() != SrcOpType) { |
| 64 | SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); |
| 65 | } |
| 66 | if (DstAddr->getType() != DstOpType) { |
| 67 | DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); |
| 68 | } |
| 69 | |
| 70 | IRBuilder<> LoopBuilder(LoopBB); |
| 71 | PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); |
| 72 | LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); |
| 73 | // Loop Body |
| 74 | Value *SrcGEP = |
| 75 | LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); |
| 76 | Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 77 | Value *DstGEP = |
| 78 | LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); |
| 79 | LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 80 | |
| 81 | Value *NewIndex = |
| 82 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); |
| 83 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 84 | |
| 85 | // Create the loop branch condition. |
| 86 | Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); |
| 87 | LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), |
| 88 | LoopBB, PostLoopBB); |
| 89 | } |
| 90 | |
| 91 | uint64_t BytesCopied = LoopEndCount * LoopOpSize; |
| 92 | uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; |
| 93 | if (RemainingBytes) { |
| 94 | IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() |
| 95 | : InsertBefore); |
| 96 | |
| 97 | // Update the alignment based on the copy size used in the loop body. |
| 98 | SrcAlign = std::min(SrcAlign, LoopOpSize); |
| 99 | DestAlign = std::min(DestAlign, LoopOpSize); |
| 100 | |
| 101 | SmallVector<Type *, 5> RemainingOps; |
| 102 | TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, |
| 103 | SrcAlign, DestAlign); |
| 104 | |
| 105 | for (auto OpTy : RemainingOps) { |
| 106 | // Calaculate the new index |
| 107 | unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); |
| 108 | uint64_t GepIndex = BytesCopied / OperandSize; |
| 109 | assert(GepIndex * OperandSize == BytesCopied && |
| 110 | "Division should have no Remainder!"); |
| 111 | // Cast source to operand type and load |
| 112 | PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); |
| 113 | Value *CastedSrc = SrcAddr->getType() == SrcPtrType |
| 114 | ? SrcAddr |
| 115 | : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); |
| 116 | Value *SrcGEP = RBuilder.CreateInBoundsGEP( |
| 117 | OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); |
| 118 | Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 119 | |
| 120 | // Cast destination to operand type and store. |
| 121 | PointerType *DstPtrType = PointerType::get(OpTy, DstAS); |
| 122 | Value *CastedDst = DstAddr->getType() == DstPtrType |
| 123 | ? DstAddr |
| 124 | : RBuilder.CreateBitCast(DstAddr, DstPtrType); |
| 125 | Value *DstGEP = RBuilder.CreateInBoundsGEP( |
| 126 | OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); |
| 127 | RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 128 | |
| 129 | BytesCopied += OperandSize; |
| 130 | } |
| 131 | } |
| 132 | assert(BytesCopied == CopyLen->getZExtValue() && |
| 133 | "Bytes copied should match size in the call!"); |
| 134 | } |
| 135 | |
| 136 | void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, |
| 137 | Value *SrcAddr, Value *DstAddr, |
| 138 | Value *CopyLen, unsigned SrcAlign, |
| 139 | unsigned DestAlign, bool SrcIsVolatile, |
| 140 | bool DstIsVolatile, |
| 141 | const TargetTransformInfo &TTI) { |
| 142 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
| 143 | BasicBlock *PostLoopBB = |
| 144 | PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); |
| 145 | |
| 146 | Function *ParentFunc = PreLoopBB->getParent(); |
| 147 | LLVMContext &Ctx = PreLoopBB->getContext(); |
| 148 | |
| 149 | Type *LoopOpType = |
| 150 | TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); |
| 151 | unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); |
| 152 | |
| 153 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
| 154 | |
| 155 | unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); |
| 156 | unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 157 | PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); |
| 158 | PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); |
| 159 | if (SrcAddr->getType() != SrcOpType) { |
| 160 | SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); |
| 161 | } |
| 162 | if (DstAddr->getType() != DstOpType) { |
| 163 | DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); |
| 164 | } |
| 165 | |
| 166 | // Calculate the loop trip count, and remaining bytes to copy after the loop. |
| 167 | Type *CopyLenType = CopyLen->getType(); |
| 168 | IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); |
| 169 | assert(ILengthType && |
| 170 | "expected size argument to memcpy to be an integer type!"); |
Sean Fertile | 68d7f9d | 2017-12-16 22:41:39 +0000 | [diff] [blame] | 171 | Type *Int8Type = Type::getInt8Ty(Ctx); |
| 172 | bool LoopOpIsInt8 = LoopOpType == Int8Type; |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 173 | ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); |
Sean Fertile | 68d7f9d | 2017-12-16 22:41:39 +0000 | [diff] [blame] | 174 | Value *RuntimeLoopCount = LoopOpIsInt8 ? |
| 175 | CopyLen : |
| 176 | PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 177 | BasicBlock *LoopBB = |
Sean Fertile | 42b1334 | 2017-12-15 19:29:12 +0000 | [diff] [blame] | 178 | BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 179 | IRBuilder<> LoopBuilder(LoopBB); |
| 180 | |
| 181 | PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); |
| 182 | LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); |
| 183 | |
| 184 | Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); |
| 185 | Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 186 | Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); |
| 187 | LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 188 | |
| 189 | Value *NewIndex = |
| 190 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); |
| 191 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 192 | |
Sean Fertile | 68d7f9d | 2017-12-16 22:41:39 +0000 | [diff] [blame] | 193 | if (!LoopOpIsInt8) { |
| 194 | // Add in the |
| 195 | Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); |
| 196 | Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); |
| 197 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 198 | // Loop body for the residual copy. |
| 199 | BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", |
Sean Fertile | 42b1334 | 2017-12-15 19:29:12 +0000 | [diff] [blame] | 200 | PreLoopBB->getParent(), |
| 201 | PostLoopBB); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 202 | // Residual loop header. |
| 203 | BasicBlock *ResHeaderBB = BasicBlock::Create( |
| 204 | Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); |
| 205 | |
| 206 | // Need to update the pre-loop basic block to branch to the correct place. |
| 207 | // branch to the main loop if the count is non-zero, branch to the residual |
| 208 | // loop if the copy size is smaller then 1 iteration of the main loop but |
| 209 | // non-zero and finally branch to after the residual loop if the memcpy |
| 210 | // size is zero. |
| 211 | ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); |
| 212 | PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), |
| 213 | LoopBB, ResHeaderBB); |
| 214 | PreLoopBB->getTerminator()->eraseFromParent(); |
| 215 | |
| 216 | LoopBuilder.CreateCondBr( |
| 217 | LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, |
| 218 | ResHeaderBB); |
| 219 | |
| 220 | // Determine if we need to branch to the residual loop or bypass it. |
| 221 | IRBuilder<> RHBuilder(ResHeaderBB); |
| 222 | RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), |
| 223 | ResLoopBB, PostLoopBB); |
| 224 | |
| 225 | // Copy the residual with single byte load/store loop. |
| 226 | IRBuilder<> ResBuilder(ResLoopBB); |
| 227 | PHINode *ResidualIndex = |
| 228 | ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); |
| 229 | ResidualIndex->addIncoming(Zero, ResHeaderBB); |
| 230 | |
| 231 | Value *SrcAsInt8 = |
| 232 | ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); |
| 233 | Value *DstAsInt8 = |
| 234 | ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); |
| 235 | Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); |
| 236 | Value *SrcGEP = |
| 237 | ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); |
| 238 | Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 239 | Value *DstGEP = |
| 240 | ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); |
| 241 | ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 242 | |
| 243 | Value *ResNewIndex = |
| 244 | ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); |
| 245 | ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); |
| 246 | |
| 247 | // Create the loop branch condition. |
| 248 | ResBuilder.CreateCondBr( |
| 249 | ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, |
| 250 | PostLoopBB); |
| 251 | } else { |
| 252 | // In this case the loop operand type was a byte, and there is no need for a |
| 253 | // residual loop to copy the remaining memory after the main loop. |
| 254 | // We do however need to patch up the control flow by creating the |
| 255 | // terminators for the preloop block and the memcpy loop. |
| 256 | ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); |
| 257 | PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), |
| 258 | LoopBB, PostLoopBB); |
| 259 | PreLoopBB->getTerminator()->eraseFromParent(); |
| 260 | LoopBuilder.CreateCondBr( |
| 261 | LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, |
| 262 | PostLoopBB); |
| 263 | } |
| 264 | } |
| 265 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 266 | // Lower memmove to IR. memmove is required to correctly copy overlapping memory |
| 267 | // regions; therefore, it has to check the relative positions of the source and |
| 268 | // destination pointers and choose the copy direction accordingly. |
| 269 | // |
| 270 | // The code below is an IR rendition of this C function: |
| 271 | // |
| 272 | // void* memmove(void* dst, const void* src, size_t n) { |
| 273 | // unsigned char* d = dst; |
| 274 | // const unsigned char* s = src; |
| 275 | // if (s < d) { |
| 276 | // // copy backwards |
| 277 | // while (n--) { |
| 278 | // d[n] = s[n]; |
| 279 | // } |
| 280 | // } else { |
| 281 | // // copy forward |
| 282 | // for (size_t i = 0; i < n; ++i) { |
| 283 | // d[i] = s[i]; |
| 284 | // } |
| 285 | // } |
| 286 | // return dst; |
| 287 | // } |
| 288 | static void createMemMoveLoop(Instruction *InsertBefore, |
| 289 | Value *SrcAddr, Value *DstAddr, Value *CopyLen, |
| 290 | unsigned SrcAlign, unsigned DestAlign, |
| 291 | bool SrcIsVolatile, bool DstIsVolatile) { |
| 292 | Type *TypeOfCopyLen = CopyLen->getType(); |
| 293 | BasicBlock *OrigBB = InsertBefore->getParent(); |
| 294 | Function *F = OrigBB->getParent(); |
| 295 | |
| 296 | // Create the a comparison of src and dst, based on which we jump to either |
| 297 | // the forward-copy part of the function (if src >= dst) or the backwards-copy |
| 298 | // part (if src < dst). |
| 299 | // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else |
| 300 | // structure. Its block terminators (unconditional branches) are replaced by |
| 301 | // the appropriate conditional branches when the loop is built. |
| 302 | ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, |
| 303 | SrcAddr, DstAddr, "compare_src_dst"); |
Chandler Carruth | 4a2d58e | 2018-10-15 09:34:05 +0000 | [diff] [blame^] | 304 | Instruction *ThenTerm, *ElseTerm; |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 305 | SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, |
| 306 | &ElseTerm); |
| 307 | |
| 308 | // Each part of the function consists of two blocks: |
| 309 | // copy_backwards: used to skip the loop when n == 0 |
| 310 | // copy_backwards_loop: the actual backwards loop BB |
| 311 | // copy_forward: used to skip the loop when n == 0 |
| 312 | // copy_forward_loop: the actual forward loop BB |
| 313 | BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); |
| 314 | CopyBackwardsBB->setName("copy_backwards"); |
| 315 | BasicBlock *CopyForwardBB = ElseTerm->getParent(); |
| 316 | CopyForwardBB->setName("copy_forward"); |
| 317 | BasicBlock *ExitBB = InsertBefore->getParent(); |
| 318 | ExitBB->setName("memmove_done"); |
| 319 | |
| 320 | // Initial comparison of n == 0 that lets us skip the loops altogether. Shared |
| 321 | // between both backwards and forward copy clauses. |
| 322 | ICmpInst *CompareN = |
| 323 | new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, |
| 324 | ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); |
| 325 | |
| 326 | // Copying backwards. |
| 327 | BasicBlock *LoopBB = |
| 328 | BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); |
| 329 | IRBuilder<> LoopBuilder(LoopBB); |
| 330 | PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); |
| 331 | Value *IndexPtr = LoopBuilder.CreateSub( |
| 332 | LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); |
| 333 | Value *Element = LoopBuilder.CreateLoad( |
| 334 | LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); |
| 335 | LoopBuilder.CreateStore(Element, |
| 336 | LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); |
| 337 | LoopBuilder.CreateCondBr( |
| 338 | LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), |
| 339 | ExitBB, LoopBB); |
| 340 | LoopPhi->addIncoming(IndexPtr, LoopBB); |
| 341 | LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); |
| 342 | BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); |
| 343 | ThenTerm->eraseFromParent(); |
| 344 | |
| 345 | // Copying forward. |
| 346 | BasicBlock *FwdLoopBB = |
| 347 | BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); |
| 348 | IRBuilder<> FwdLoopBuilder(FwdLoopBB); |
| 349 | PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); |
| 350 | Value *FwdElement = FwdLoopBuilder.CreateLoad( |
| 351 | FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); |
| 352 | FwdLoopBuilder.CreateStore( |
| 353 | FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); |
| 354 | Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( |
| 355 | FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); |
| 356 | FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), |
| 357 | ExitBB, FwdLoopBB); |
| 358 | FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); |
| 359 | FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); |
| 360 | |
| 361 | BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); |
| 362 | ElseTerm->eraseFromParent(); |
| 363 | } |
| 364 | |
| 365 | static void createMemSetLoop(Instruction *InsertBefore, |
| 366 | Value *DstAddr, Value *CopyLen, Value *SetValue, |
| 367 | unsigned Align, bool IsVolatile) { |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 368 | Type *TypeOfCopyLen = CopyLen->getType(); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 369 | BasicBlock *OrigBB = InsertBefore->getParent(); |
| 370 | Function *F = OrigBB->getParent(); |
| 371 | BasicBlock *NewBB = |
| 372 | OrigBB->splitBasicBlock(InsertBefore, "split"); |
| 373 | BasicBlock *LoopBB |
| 374 | = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); |
| 375 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 376 | IRBuilder<> Builder(OrigBB->getTerminator()); |
| 377 | |
| 378 | // Cast pointer to the type of value getting stored |
| 379 | unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 380 | DstAddr = Builder.CreateBitCast(DstAddr, |
| 381 | PointerType::get(SetValue->getType(), dstAS)); |
| 382 | |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 383 | Builder.CreateCondBr( |
| 384 | Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, |
| 385 | LoopBB); |
| 386 | OrigBB->getTerminator()->eraseFromParent(); |
| 387 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 388 | IRBuilder<> LoopBuilder(LoopBB); |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 389 | PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); |
| 390 | LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 391 | |
| 392 | LoopBuilder.CreateStore( |
| 393 | SetValue, |
| 394 | LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), |
| 395 | IsVolatile); |
| 396 | |
| 397 | Value *NewIndex = |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 398 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 399 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 400 | |
| 401 | LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, |
| 402 | NewBB); |
| 403 | } |
| 404 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 405 | void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, |
| 406 | const TargetTransformInfo &TTI) { |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 407 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { |
| 408 | createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, |
| 409 | /* SrcAddr */ Memcpy->getRawSource(), |
| 410 | /* DstAddr */ Memcpy->getRawDest(), |
| 411 | /* CopyLen */ CI, |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 412 | /* SrcAlign */ Memcpy->getSourceAlignment(), |
| 413 | /* DestAlign */ Memcpy->getDestAlignment(), |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 414 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
| 415 | /* DstIsVolatile */ Memcpy->isVolatile(), |
| 416 | /* TargetTransformInfo */ TTI); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 417 | } else { |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 418 | createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 419 | /* SrcAddr */ Memcpy->getRawSource(), |
| 420 | /* DstAddr */ Memcpy->getRawDest(), |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 421 | /* CopyLen */ Memcpy->getLength(), |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 422 | /* SrcAlign */ Memcpy->getSourceAlignment(), |
| 423 | /* DestAlign */ Memcpy->getDestAlignment(), |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 424 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
| 425 | /* DstIsVolatile */ Memcpy->isVolatile(), |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 426 | /* TargetTransfomrInfo */ TTI); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 427 | } |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 428 | } |
| 429 | |
| 430 | void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { |
| 431 | createMemMoveLoop(/* InsertBefore */ Memmove, |
| 432 | /* SrcAddr */ Memmove->getRawSource(), |
| 433 | /* DstAddr */ Memmove->getRawDest(), |
| 434 | /* CopyLen */ Memmove->getLength(), |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 435 | /* SrcAlign */ Memmove->getSourceAlignment(), |
| 436 | /* DestAlign */ Memmove->getDestAlignment(), |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 437 | /* SrcIsVolatile */ Memmove->isVolatile(), |
| 438 | /* DstIsVolatile */ Memmove->isVolatile()); |
| 439 | } |
| 440 | |
| 441 | void llvm::expandMemSetAsLoop(MemSetInst *Memset) { |
| 442 | createMemSetLoop(/* InsertBefore */ Memset, |
| 443 | /* DstAddr */ Memset->getRawDest(), |
| 444 | /* CopyLen */ Memset->getLength(), |
| 445 | /* SetValue */ Memset->getValue(), |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 446 | /* Alignment */ Memset->getDestAlignment(), |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 447 | Memset->isVolatile()); |
| 448 | } |