|  | //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// This pass adds amdgpu.uniform metadata to IR values so this information | 
|  | /// can be used during instruction selection. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUIntrinsicInfo.h" | 
|  | #include "llvm/ADT/SetVector.h" | 
|  | #include "llvm/Analysis/DivergenceAnalysis.h" | 
|  | #include "llvm/Analysis/LoopInfo.h" | 
|  | #include "llvm/Analysis/MemoryDependenceAnalysis.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/InstVisitor.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  |  | 
|  | #define DEBUG_TYPE "amdgpu-annotate-uniform" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class AMDGPUAnnotateUniformValues : public FunctionPass, | 
|  | public InstVisitor<AMDGPUAnnotateUniformValues> { | 
|  | DivergenceAnalysis *DA; | 
|  | MemoryDependenceResults *MDR; | 
|  | LoopInfo *LI; | 
|  | DenseMap<Value*, GetElementPtrInst*> noClobberClones; | 
|  | bool isKernelFunc; | 
|  | AMDGPUAS AMDGPUASI; | 
|  |  | 
|  | public: | 
|  | static char ID; | 
|  | AMDGPUAnnotateUniformValues() : | 
|  | FunctionPass(ID) { } | 
|  | bool doInitialization(Module &M) override; | 
|  | bool runOnFunction(Function &F) override; | 
|  | StringRef getPassName() const override { | 
|  | return "AMDGPU Annotate Uniform Values"; | 
|  | } | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<DivergenceAnalysis>(); | 
|  | AU.addRequired<MemoryDependenceWrapperPass>(); | 
|  | AU.addRequired<LoopInfoWrapperPass>(); | 
|  | AU.setPreservesAll(); | 
|  | } | 
|  |  | 
|  | void visitBranchInst(BranchInst &I); | 
|  | void visitLoadInst(LoadInst &I); | 
|  | bool isClobberedInFunction(LoadInst * Load); | 
|  | }; | 
|  |  | 
|  | } // End anonymous namespace | 
|  |  | 
|  | INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, | 
|  | "Add AMDGPU uniform metadata", false, false) | 
|  | INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) | 
|  | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) | 
|  | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) | 
|  | INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, | 
|  | "Add AMDGPU uniform metadata", false, false) | 
|  |  | 
|  | char AMDGPUAnnotateUniformValues::ID = 0; | 
|  |  | 
|  | static void setUniformMetadata(Instruction *I) { | 
|  | I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); | 
|  | } | 
|  | static void setNoClobberMetadata(Instruction *I) { | 
|  | I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); | 
|  | } | 
|  |  | 
|  | static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) { | 
|  | for (auto I : predecessors(Root)) | 
|  | if (Set.insert(I)) | 
|  | DFS(I, Set); | 
|  | } | 
|  |  | 
|  | bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { | 
|  | // 1. get Loop for the Load->getparent(); | 
|  | // 2. if it exists, collect all the BBs from the most outer | 
|  | // loop and check for the writes. If NOT - start DFS over all preds. | 
|  | // 3. Start DFS over all preds from the most outer loop header. | 
|  | SetVector<BasicBlock *> Checklist; | 
|  | BasicBlock *Start = Load->getParent(); | 
|  | Checklist.insert(Start); | 
|  | const Value *Ptr = Load->getPointerOperand(); | 
|  | const Loop *L = LI->getLoopFor(Start); | 
|  | if (L) { | 
|  | const Loop *P = L; | 
|  | do { | 
|  | L = P; | 
|  | P = P->getParentLoop(); | 
|  | } while (P); | 
|  | Checklist.insert(L->block_begin(), L->block_end()); | 
|  | Start = L->getHeader(); | 
|  | } | 
|  |  | 
|  | DFS(Start, Checklist); | 
|  | for (auto &BB : Checklist) { | 
|  | BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? | 
|  | BasicBlock::iterator(Load) : BB->end(); | 
|  | auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, | 
|  | StartIt, BB, Load); | 
|  | if (Q.isClobber() || Q.isUnknown()) | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { | 
|  | if (I.isUnconditional()) | 
|  | return; | 
|  |  | 
|  | Value *Cond = I.getCondition(); | 
|  | if (!DA->isUniform(Cond)) | 
|  | return; | 
|  |  | 
|  | setUniformMetadata(I.getParent()->getTerminator()); | 
|  | } | 
|  |  | 
|  | void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { | 
|  | Value *Ptr = I.getPointerOperand(); | 
|  | if (!DA->isUniform(Ptr)) | 
|  | return; | 
|  | auto isGlobalLoad = [&](LoadInst &Load)->bool { | 
|  | return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; | 
|  | }; | 
|  | // We're tracking up to the Function boundaries | 
|  | // We cannot go beyond because of FunctionPass restrictions | 
|  | // Thus we can ensure that memory not clobbered for memory | 
|  | // operations that live in kernel only. | 
|  | bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I); | 
|  | Instruction *PtrI = dyn_cast<Instruction>(Ptr); | 
|  | if (!PtrI && NotClobbered && isGlobalLoad(I)) { | 
|  | if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { | 
|  | // Lookup for the existing GEP | 
|  | if (noClobberClones.count(Ptr)) { | 
|  | PtrI = noClobberClones[Ptr]; | 
|  | } else { | 
|  | // Create GEP of the Value | 
|  | Function *F = I.getParent()->getParent(); | 
|  | Value *Idx = Constant::getIntegerValue( | 
|  | Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); | 
|  | // Insert GEP at the entry to make it dominate all uses | 
|  | PtrI = GetElementPtrInst::Create( | 
|  | Ptr->getType()->getPointerElementType(), Ptr, | 
|  | ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI()); | 
|  | } | 
|  | I.replaceUsesOfWith(Ptr, PtrI); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (PtrI) { | 
|  | setUniformMetadata(PtrI); | 
|  | if (NotClobbered) | 
|  | setNoClobberMetadata(PtrI); | 
|  | } | 
|  | } | 
|  |  | 
|  | bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { | 
|  | AMDGPUASI = AMDGPU::getAMDGPUAS(M); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { | 
|  | if (skipFunction(F)) | 
|  | return false; | 
|  |  | 
|  | DA  = &getAnalysis<DivergenceAnalysis>(); | 
|  | MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); | 
|  | LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | 
|  | isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL; | 
|  |  | 
|  | visit(F); | 
|  | noClobberClones.clear(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | FunctionPass * | 
|  | llvm::createAMDGPUAnnotateUniformValues() { | 
|  | return new AMDGPUAnnotateUniformValues(); | 
|  | } |