Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 1 | //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame^] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 10 | #include "llvm/Analysis/TargetTransformInfo.h" |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 11 | #include "llvm/IR/IRBuilder.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 12 | #include "llvm/IR/IntrinsicInst.h" |
| 13 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 14 | |
| 15 | using namespace llvm; |
| 16 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 17 | static unsigned getLoopOperandSizeInBytes(Type *Type) { |
| 18 | if (VectorType *VTy = dyn_cast<VectorType>(Type)) { |
| 19 | return VTy->getBitWidth() / 8; |
| 20 | } |
| 21 | |
| 22 | return Type->getPrimitiveSizeInBits() / 8; |
| 23 | } |
| 24 | |
| 25 | void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, |
| 26 | Value *DstAddr, ConstantInt *CopyLen, |
| 27 | unsigned SrcAlign, unsigned DestAlign, |
| 28 | bool SrcIsVolatile, bool DstIsVolatile, |
| 29 | const TargetTransformInfo &TTI) { |
| 30 | // No need to expand zero length copies. |
| 31 | if (CopyLen->isZero()) |
| 32 | return; |
| 33 | |
| 34 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
| 35 | BasicBlock *PostLoopBB = nullptr; |
| 36 | Function *ParentFunc = PreLoopBB->getParent(); |
| 37 | LLVMContext &Ctx = PreLoopBB->getContext(); |
| 38 | |
| 39 | Type *TypeOfCopyLen = CopyLen->getType(); |
| 40 | Type *LoopOpType = |
| 41 | TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); |
| 42 | |
| 43 | unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); |
| 44 | uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; |
| 45 | |
| 46 | unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); |
| 47 | unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 48 | |
| 49 | if (LoopEndCount != 0) { |
| 50 | // Split |
| 51 | PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); |
| 52 | BasicBlock *LoopBB = |
| 53 | BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); |
| 54 | PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); |
| 55 | |
| 56 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
| 57 | |
| 58 | // Cast the Src and Dst pointers to pointers to the loop operand type (if |
| 59 | // needed). |
| 60 | PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); |
| 61 | PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); |
| 62 | if (SrcAddr->getType() != SrcOpType) { |
| 63 | SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); |
| 64 | } |
| 65 | if (DstAddr->getType() != DstOpType) { |
| 66 | DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); |
| 67 | } |
| 68 | |
| 69 | IRBuilder<> LoopBuilder(LoopBB); |
| 70 | PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); |
| 71 | LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); |
| 72 | // Loop Body |
| 73 | Value *SrcGEP = |
| 74 | LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); |
| 75 | Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 76 | Value *DstGEP = |
| 77 | LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); |
| 78 | LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 79 | |
| 80 | Value *NewIndex = |
| 81 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); |
| 82 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 83 | |
| 84 | // Create the loop branch condition. |
| 85 | Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); |
| 86 | LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), |
| 87 | LoopBB, PostLoopBB); |
| 88 | } |
| 89 | |
| 90 | uint64_t BytesCopied = LoopEndCount * LoopOpSize; |
| 91 | uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; |
| 92 | if (RemainingBytes) { |
| 93 | IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() |
| 94 | : InsertBefore); |
| 95 | |
| 96 | // Update the alignment based on the copy size used in the loop body. |
| 97 | SrcAlign = std::min(SrcAlign, LoopOpSize); |
| 98 | DestAlign = std::min(DestAlign, LoopOpSize); |
| 99 | |
| 100 | SmallVector<Type *, 5> RemainingOps; |
| 101 | TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, |
| 102 | SrcAlign, DestAlign); |
| 103 | |
| 104 | for (auto OpTy : RemainingOps) { |
| 105 | // Calaculate the new index |
| 106 | unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); |
| 107 | uint64_t GepIndex = BytesCopied / OperandSize; |
| 108 | assert(GepIndex * OperandSize == BytesCopied && |
| 109 | "Division should have no Remainder!"); |
| 110 | // Cast source to operand type and load |
| 111 | PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); |
| 112 | Value *CastedSrc = SrcAddr->getType() == SrcPtrType |
| 113 | ? SrcAddr |
| 114 | : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); |
| 115 | Value *SrcGEP = RBuilder.CreateInBoundsGEP( |
| 116 | OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); |
| 117 | Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 118 | |
| 119 | // Cast destination to operand type and store. |
| 120 | PointerType *DstPtrType = PointerType::get(OpTy, DstAS); |
| 121 | Value *CastedDst = DstAddr->getType() == DstPtrType |
| 122 | ? DstAddr |
| 123 | : RBuilder.CreateBitCast(DstAddr, DstPtrType); |
| 124 | Value *DstGEP = RBuilder.CreateInBoundsGEP( |
| 125 | OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); |
| 126 | RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 127 | |
| 128 | BytesCopied += OperandSize; |
| 129 | } |
| 130 | } |
| 131 | assert(BytesCopied == CopyLen->getZExtValue() && |
| 132 | "Bytes copied should match size in the call!"); |
| 133 | } |
| 134 | |
| 135 | void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, |
| 136 | Value *SrcAddr, Value *DstAddr, |
| 137 | Value *CopyLen, unsigned SrcAlign, |
| 138 | unsigned DestAlign, bool SrcIsVolatile, |
| 139 | bool DstIsVolatile, |
| 140 | const TargetTransformInfo &TTI) { |
| 141 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
| 142 | BasicBlock *PostLoopBB = |
| 143 | PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); |
| 144 | |
| 145 | Function *ParentFunc = PreLoopBB->getParent(); |
| 146 | LLVMContext &Ctx = PreLoopBB->getContext(); |
| 147 | |
| 148 | Type *LoopOpType = |
| 149 | TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); |
| 150 | unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); |
| 151 | |
| 152 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
| 153 | |
| 154 | unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); |
| 155 | unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 156 | PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); |
| 157 | PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); |
| 158 | if (SrcAddr->getType() != SrcOpType) { |
| 159 | SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); |
| 160 | } |
| 161 | if (DstAddr->getType() != DstOpType) { |
| 162 | DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); |
| 163 | } |
| 164 | |
| 165 | // Calculate the loop trip count, and remaining bytes to copy after the loop. |
| 166 | Type *CopyLenType = CopyLen->getType(); |
| 167 | IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); |
| 168 | assert(ILengthType && |
| 169 | "expected size argument to memcpy to be an integer type!"); |
Sean Fertile | 68d7f9d | 2017-12-16 22:41:39 +0000 | [diff] [blame] | 170 | Type *Int8Type = Type::getInt8Ty(Ctx); |
| 171 | bool LoopOpIsInt8 = LoopOpType == Int8Type; |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 172 | ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); |
Sean Fertile | 68d7f9d | 2017-12-16 22:41:39 +0000 | [diff] [blame] | 173 | Value *RuntimeLoopCount = LoopOpIsInt8 ? |
| 174 | CopyLen : |
| 175 | PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 176 | BasicBlock *LoopBB = |
Sean Fertile | 42b1334 | 2017-12-15 19:29:12 +0000 | [diff] [blame] | 177 | BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 178 | IRBuilder<> LoopBuilder(LoopBB); |
| 179 | |
| 180 | PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); |
| 181 | LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); |
| 182 | |
| 183 | Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); |
| 184 | Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 185 | Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); |
| 186 | LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 187 | |
| 188 | Value *NewIndex = |
| 189 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); |
| 190 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 191 | |
Sean Fertile | 68d7f9d | 2017-12-16 22:41:39 +0000 | [diff] [blame] | 192 | if (!LoopOpIsInt8) { |
| 193 | // Add in the |
| 194 | Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); |
| 195 | Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); |
| 196 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 197 | // Loop body for the residual copy. |
| 198 | BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", |
Sean Fertile | 42b1334 | 2017-12-15 19:29:12 +0000 | [diff] [blame] | 199 | PreLoopBB->getParent(), |
| 200 | PostLoopBB); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 201 | // Residual loop header. |
| 202 | BasicBlock *ResHeaderBB = BasicBlock::Create( |
| 203 | Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); |
| 204 | |
| 205 | // Need to update the pre-loop basic block to branch to the correct place. |
| 206 | // branch to the main loop if the count is non-zero, branch to the residual |
| 207 | // loop if the copy size is smaller then 1 iteration of the main loop but |
| 208 | // non-zero and finally branch to after the residual loop if the memcpy |
| 209 | // size is zero. |
| 210 | ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); |
| 211 | PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), |
| 212 | LoopBB, ResHeaderBB); |
| 213 | PreLoopBB->getTerminator()->eraseFromParent(); |
| 214 | |
| 215 | LoopBuilder.CreateCondBr( |
| 216 | LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, |
| 217 | ResHeaderBB); |
| 218 | |
| 219 | // Determine if we need to branch to the residual loop or bypass it. |
| 220 | IRBuilder<> RHBuilder(ResHeaderBB); |
| 221 | RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), |
| 222 | ResLoopBB, PostLoopBB); |
| 223 | |
| 224 | // Copy the residual with single byte load/store loop. |
| 225 | IRBuilder<> ResBuilder(ResLoopBB); |
| 226 | PHINode *ResidualIndex = |
| 227 | ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); |
| 228 | ResidualIndex->addIncoming(Zero, ResHeaderBB); |
| 229 | |
| 230 | Value *SrcAsInt8 = |
| 231 | ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); |
| 232 | Value *DstAsInt8 = |
| 233 | ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); |
| 234 | Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); |
| 235 | Value *SrcGEP = |
| 236 | ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); |
| 237 | Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); |
| 238 | Value *DstGEP = |
| 239 | ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); |
| 240 | ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); |
| 241 | |
| 242 | Value *ResNewIndex = |
| 243 | ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); |
| 244 | ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); |
| 245 | |
| 246 | // Create the loop branch condition. |
| 247 | ResBuilder.CreateCondBr( |
| 248 | ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, |
| 249 | PostLoopBB); |
| 250 | } else { |
| 251 | // In this case the loop operand type was a byte, and there is no need for a |
| 252 | // residual loop to copy the remaining memory after the main loop. |
| 253 | // We do however need to patch up the control flow by creating the |
| 254 | // terminators for the preloop block and the memcpy loop. |
| 255 | ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); |
| 256 | PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), |
| 257 | LoopBB, PostLoopBB); |
| 258 | PreLoopBB->getTerminator()->eraseFromParent(); |
| 259 | LoopBuilder.CreateCondBr( |
| 260 | LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, |
| 261 | PostLoopBB); |
| 262 | } |
| 263 | } |
| 264 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 265 | // Lower memmove to IR. memmove is required to correctly copy overlapping memory |
| 266 | // regions; therefore, it has to check the relative positions of the source and |
| 267 | // destination pointers and choose the copy direction accordingly. |
| 268 | // |
| 269 | // The code below is an IR rendition of this C function: |
| 270 | // |
| 271 | // void* memmove(void* dst, const void* src, size_t n) { |
| 272 | // unsigned char* d = dst; |
| 273 | // const unsigned char* s = src; |
| 274 | // if (s < d) { |
| 275 | // // copy backwards |
| 276 | // while (n--) { |
| 277 | // d[n] = s[n]; |
| 278 | // } |
| 279 | // } else { |
| 280 | // // copy forward |
| 281 | // for (size_t i = 0; i < n; ++i) { |
| 282 | // d[i] = s[i]; |
| 283 | // } |
| 284 | // } |
| 285 | // return dst; |
| 286 | // } |
| 287 | static void createMemMoveLoop(Instruction *InsertBefore, |
| 288 | Value *SrcAddr, Value *DstAddr, Value *CopyLen, |
| 289 | unsigned SrcAlign, unsigned DestAlign, |
| 290 | bool SrcIsVolatile, bool DstIsVolatile) { |
| 291 | Type *TypeOfCopyLen = CopyLen->getType(); |
| 292 | BasicBlock *OrigBB = InsertBefore->getParent(); |
| 293 | Function *F = OrigBB->getParent(); |
| 294 | |
| 295 | // Create the a comparison of src and dst, based on which we jump to either |
| 296 | // the forward-copy part of the function (if src >= dst) or the backwards-copy |
| 297 | // part (if src < dst). |
| 298 | // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else |
| 299 | // structure. Its block terminators (unconditional branches) are replaced by |
| 300 | // the appropriate conditional branches when the loop is built. |
| 301 | ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, |
| 302 | SrcAddr, DstAddr, "compare_src_dst"); |
Chandler Carruth | 4a2d58e | 2018-10-15 09:34:05 +0000 | [diff] [blame] | 303 | Instruction *ThenTerm, *ElseTerm; |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 304 | SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, |
| 305 | &ElseTerm); |
| 306 | |
| 307 | // Each part of the function consists of two blocks: |
| 308 | // copy_backwards: used to skip the loop when n == 0 |
| 309 | // copy_backwards_loop: the actual backwards loop BB |
| 310 | // copy_forward: used to skip the loop when n == 0 |
| 311 | // copy_forward_loop: the actual forward loop BB |
| 312 | BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); |
| 313 | CopyBackwardsBB->setName("copy_backwards"); |
| 314 | BasicBlock *CopyForwardBB = ElseTerm->getParent(); |
| 315 | CopyForwardBB->setName("copy_forward"); |
| 316 | BasicBlock *ExitBB = InsertBefore->getParent(); |
| 317 | ExitBB->setName("memmove_done"); |
| 318 | |
| 319 | // Initial comparison of n == 0 that lets us skip the loops altogether. Shared |
| 320 | // between both backwards and forward copy clauses. |
| 321 | ICmpInst *CompareN = |
| 322 | new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, |
| 323 | ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); |
| 324 | |
| 325 | // Copying backwards. |
| 326 | BasicBlock *LoopBB = |
| 327 | BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); |
| 328 | IRBuilder<> LoopBuilder(LoopBB); |
| 329 | PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); |
| 330 | Value *IndexPtr = LoopBuilder.CreateSub( |
| 331 | LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); |
| 332 | Value *Element = LoopBuilder.CreateLoad( |
| 333 | LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); |
| 334 | LoopBuilder.CreateStore(Element, |
| 335 | LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); |
| 336 | LoopBuilder.CreateCondBr( |
| 337 | LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), |
| 338 | ExitBB, LoopBB); |
| 339 | LoopPhi->addIncoming(IndexPtr, LoopBB); |
| 340 | LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); |
| 341 | BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); |
| 342 | ThenTerm->eraseFromParent(); |
| 343 | |
| 344 | // Copying forward. |
| 345 | BasicBlock *FwdLoopBB = |
| 346 | BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); |
| 347 | IRBuilder<> FwdLoopBuilder(FwdLoopBB); |
| 348 | PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); |
| 349 | Value *FwdElement = FwdLoopBuilder.CreateLoad( |
| 350 | FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); |
| 351 | FwdLoopBuilder.CreateStore( |
| 352 | FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); |
| 353 | Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( |
| 354 | FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); |
| 355 | FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), |
| 356 | ExitBB, FwdLoopBB); |
| 357 | FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); |
| 358 | FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); |
| 359 | |
| 360 | BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); |
| 361 | ElseTerm->eraseFromParent(); |
| 362 | } |
| 363 | |
| 364 | static void createMemSetLoop(Instruction *InsertBefore, |
| 365 | Value *DstAddr, Value *CopyLen, Value *SetValue, |
| 366 | unsigned Align, bool IsVolatile) { |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 367 | Type *TypeOfCopyLen = CopyLen->getType(); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 368 | BasicBlock *OrigBB = InsertBefore->getParent(); |
| 369 | Function *F = OrigBB->getParent(); |
| 370 | BasicBlock *NewBB = |
| 371 | OrigBB->splitBasicBlock(InsertBefore, "split"); |
| 372 | BasicBlock *LoopBB |
| 373 | = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); |
| 374 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 375 | IRBuilder<> Builder(OrigBB->getTerminator()); |
| 376 | |
| 377 | // Cast pointer to the type of value getting stored |
| 378 | unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); |
| 379 | DstAddr = Builder.CreateBitCast(DstAddr, |
| 380 | PointerType::get(SetValue->getType(), dstAS)); |
| 381 | |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 382 | Builder.CreateCondBr( |
| 383 | Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, |
| 384 | LoopBB); |
| 385 | OrigBB->getTerminator()->eraseFromParent(); |
| 386 | |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 387 | IRBuilder<> LoopBuilder(LoopBB); |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 388 | PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); |
| 389 | LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 390 | |
| 391 | LoopBuilder.CreateStore( |
| 392 | SetValue, |
| 393 | LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), |
| 394 | IsVolatile); |
| 395 | |
| 396 | Value *NewIndex = |
Teresa Johnson | 32d9574 | 2017-07-01 03:24:10 +0000 | [diff] [blame] | 397 | LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 398 | LoopIndex->addIncoming(NewIndex, LoopBB); |
| 399 | |
| 400 | LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, |
| 401 | NewBB); |
| 402 | } |
| 403 | |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 404 | void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, |
| 405 | const TargetTransformInfo &TTI) { |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 406 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { |
| 407 | createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, |
| 408 | /* SrcAddr */ Memcpy->getRawSource(), |
| 409 | /* DstAddr */ Memcpy->getRawDest(), |
| 410 | /* CopyLen */ CI, |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 411 | /* SrcAlign */ Memcpy->getSourceAlignment(), |
| 412 | /* DestAlign */ Memcpy->getDestAlignment(), |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 413 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
| 414 | /* DstIsVolatile */ Memcpy->isVolatile(), |
| 415 | /* TargetTransformInfo */ TTI); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 416 | } else { |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 417 | createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 418 | /* SrcAddr */ Memcpy->getRawSource(), |
| 419 | /* DstAddr */ Memcpy->getRawDest(), |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 420 | /* CopyLen */ Memcpy->getLength(), |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 421 | /* SrcAlign */ Memcpy->getSourceAlignment(), |
| 422 | /* DestAlign */ Memcpy->getDestAlignment(), |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 423 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
| 424 | /* DstIsVolatile */ Memcpy->isVolatile(), |
Sean Fertile | 5fb624a | 2017-12-18 15:31:14 +0000 | [diff] [blame] | 425 | /* TargetTransfomrInfo */ TTI); |
Sean Fertile | 9cd1cdf | 2017-07-07 02:00:06 +0000 | [diff] [blame] | 426 | } |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 427 | } |
| 428 | |
| 429 | void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { |
| 430 | createMemMoveLoop(/* InsertBefore */ Memmove, |
| 431 | /* SrcAddr */ Memmove->getRawSource(), |
| 432 | /* DstAddr */ Memmove->getRawDest(), |
| 433 | /* CopyLen */ Memmove->getLength(), |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 434 | /* SrcAlign */ Memmove->getSourceAlignment(), |
| 435 | /* DestAlign */ Memmove->getDestAlignment(), |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 436 | /* SrcIsVolatile */ Memmove->isVolatile(), |
| 437 | /* DstIsVolatile */ Memmove->isVolatile()); |
| 438 | } |
| 439 | |
| 440 | void llvm::expandMemSetAsLoop(MemSetInst *Memset) { |
| 441 | createMemSetLoop(/* InsertBefore */ Memset, |
| 442 | /* DstAddr */ Memset->getRawDest(), |
| 443 | /* CopyLen */ Memset->getLength(), |
| 444 | /* SetValue */ Memset->getValue(), |
Daniel Neilson | 3c23f66 | 2018-02-05 22:23:58 +0000 | [diff] [blame] | 445 | /* Alignment */ Memset->getDestAlignment(), |
Matt Arsenault | 5606652 | 2017-02-08 17:49:52 +0000 | [diff] [blame] | 446 | Memset->isVolatile()); |
| 447 | } |