|  | //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // \file | 
|  | // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when | 
|  | // the size is large or is not a compile-time constant. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "NVPTXLowerAggrCopies.h" | 
|  | #include "llvm/CodeGen/StackProtector.h" | 
|  | #include "llvm/IR/Constants.h" | 
|  | #include "llvm/IR/DataLayout.h" | 
|  | #include "llvm/IR/Function.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/Instructions.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/IR/Intrinsics.h" | 
|  | #include "llvm/IR/LLVMContext.h" | 
|  | #include "llvm/IR/Module.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | 
|  | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" | 
|  |  | 
|  | #define DEBUG_TYPE "nvptx" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // actual analysis class, which is a functionpass | 
|  | struct NVPTXLowerAggrCopies : public FunctionPass { | 
|  | static char ID; | 
|  |  | 
|  | NVPTXLowerAggrCopies() : FunctionPass(ID) {} | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addPreserved<StackProtector>(); | 
|  | } | 
|  |  | 
|  | bool runOnFunction(Function &F) override; | 
|  |  | 
|  | static const unsigned MaxAggrCopySize = 128; | 
|  |  | 
|  | StringRef getPassName() const override { | 
|  | return "Lower aggregate copies/intrinsics into loops"; | 
|  | } | 
|  | }; | 
|  |  | 
|  | char NVPTXLowerAggrCopies::ID = 0; | 
|  |  | 
|  | bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { | 
|  | SmallVector<LoadInst *, 4> AggrLoads; | 
|  | SmallVector<MemIntrinsic *, 4> MemCalls; | 
|  |  | 
|  | const DataLayout &DL = F.getParent()->getDataLayout(); | 
|  | LLVMContext &Context = F.getParent()->getContext(); | 
|  |  | 
|  | // Collect all aggregate loads and mem* calls. | 
|  | for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { | 
|  | for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; | 
|  | ++II) { | 
|  | if (LoadInst *LI = dyn_cast<LoadInst>(II)) { | 
|  | if (!LI->hasOneUse()) | 
|  | continue; | 
|  |  | 
|  | if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize) | 
|  | continue; | 
|  |  | 
|  | if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) { | 
|  | if (SI->getOperand(0) != LI) | 
|  | continue; | 
|  | AggrLoads.push_back(LI); | 
|  | } | 
|  | } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) { | 
|  | // Convert intrinsic calls with variable size or with constant size | 
|  | // larger than the MaxAggrCopySize threshold. | 
|  | if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) { | 
|  | if (LenCI->getZExtValue() >= MaxAggrCopySize) { | 
|  | MemCalls.push_back(IntrCall); | 
|  | } | 
|  | } else { | 
|  | MemCalls.push_back(IntrCall); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (AggrLoads.size() == 0 && MemCalls.size() == 0) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // | 
|  | // Do the transformation of an aggr load/copy/set to a loop | 
|  | // | 
|  | for (LoadInst *LI : AggrLoads) { | 
|  | StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin()); | 
|  | Value *SrcAddr = LI->getOperand(0); | 
|  | Value *DstAddr = SI->getOperand(1); | 
|  | unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); | 
|  | Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); | 
|  |  | 
|  | createMemCpyLoop(/* ConvertedInst */ SI, | 
|  | /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, | 
|  | /* CopyLen */ CopyLen, | 
|  | /* SrcAlign */ LI->getAlignment(), | 
|  | /* DestAlign */ SI->getAlignment(), | 
|  | /* SrcIsVolatile */ LI->isVolatile(), | 
|  | /* DstIsVolatile */ SI->isVolatile()); | 
|  |  | 
|  | SI->eraseFromParent(); | 
|  | LI->eraseFromParent(); | 
|  | } | 
|  |  | 
|  | // Transform mem* intrinsic calls. | 
|  | for (MemIntrinsic *MemCall : MemCalls) { | 
|  | if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) { | 
|  | expandMemCpyAsLoop(Memcpy); | 
|  | } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) { | 
|  | expandMemMoveAsLoop(Memmove); | 
|  | } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) { | 
|  | expandMemSetAsLoop(Memset); | 
|  | } | 
|  | MemCall->eraseFromParent(); | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | } // namespace | 
|  |  | 
|  | namespace llvm { | 
|  | void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); | 
|  | } | 
|  |  | 
|  | INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies", | 
|  | "Lower aggregate copies, and llvm.mem* intrinsics into loops", | 
|  | false, false) | 
|  |  | 
|  | FunctionPass *llvm::createLowerAggrCopies() { | 
|  | return new NVPTXLowerAggrCopies(); | 
|  | } |