| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 1 | //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | /// \file | 
|  | 11 | /// This pass adds amdgpu.uniform metadata to IR values so this information | 
|  | 12 | /// can be used during instruction selection. | 
|  | 13 | // | 
|  | 14 | //===----------------------------------------------------------------------===// | 
|  | 15 |  | 
|  | 16 | #include "AMDGPU.h" | 
|  | 17 | #include "AMDGPUIntrinsicInfo.h" | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 18 | #include "llvm/ADT/SetVector.h" | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 19 | #include "llvm/Analysis/DivergenceAnalysis.h" | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 20 | #include "llvm/Analysis/LoopInfo.h" | 
|  | 21 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 22 | #include "llvm/IR/IRBuilder.h" | 
| Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 23 | #include "llvm/IR/InstVisitor.h" | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 24 | #include "llvm/Support/Debug.h" | 
|  | 25 | #include "llvm/Support/raw_ostream.h" | 
|  | 26 |  | 
|  | 27 | #define DEBUG_TYPE "amdgpu-annotate-uniform" | 
|  | 28 |  | 
|  | 29 | using namespace llvm; | 
|  | 30 |  | 
|  | 31 | namespace { | 
|  | 32 |  | 
|  | 33 | class AMDGPUAnnotateUniformValues : public FunctionPass, | 
|  | 34 | public InstVisitor<AMDGPUAnnotateUniformValues> { | 
|  | 35 | DivergenceAnalysis *DA; | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 36 | MemoryDependenceResults *MDR; | 
|  | 37 | LoopInfo *LI; | 
|  | 38 | DenseMap<Value*, GetElementPtrInst*> noClobberClones; | 
|  | 39 | bool isKernelFunc; | 
| Yaxun Liu | 1a14bfa | 2017-03-27 14:04:01 +0000 | [diff] [blame] | 40 | AMDGPUAS AMDGPUASI; | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 41 |  | 
|  | 42 | public: | 
|  | 43 | static char ID; | 
|  | 44 | AMDGPUAnnotateUniformValues() : | 
|  | 45 | FunctionPass(ID) { } | 
|  | 46 | bool doInitialization(Module &M) override; | 
|  | 47 | bool runOnFunction(Function &F) override; | 
| Mehdi Amini | 117296c | 2016-10-01 02:56:57 +0000 | [diff] [blame] | 48 | StringRef getPassName() const override { | 
|  | 49 | return "AMDGPU Annotate Uniform Values"; | 
|  | 50 | } | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 51 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | 52 | AU.addRequired<DivergenceAnalysis>(); | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 53 | AU.addRequired<MemoryDependenceWrapperPass>(); | 
|  | 54 | AU.addRequired<LoopInfoWrapperPass>(); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 55 | AU.setPreservesAll(); | 
|  | 56 | } | 
|  | 57 |  | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 58 | void visitBranchInst(BranchInst &I); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 59 | void visitLoadInst(LoadInst &I); | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 60 | bool isClobberedInFunction(LoadInst * Load); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 61 | }; | 
|  | 62 |  | 
|  | 63 | } // End anonymous namespace | 
|  | 64 |  | 
|  | 65 | INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, | 
|  | 66 | "Add AMDGPU uniform metadata", false, false) | 
|  | 67 | INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 68 | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) | 
|  | 69 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 70 | INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, | 
|  | 71 | "Add AMDGPU uniform metadata", false, false) | 
|  | 72 |  | 
|  | 73 | char AMDGPUAnnotateUniformValues::ID = 0; | 
|  | 74 |  | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 75 | static void setUniformMetadata(Instruction *I) { | 
|  | 76 | I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); | 
|  | 77 | } | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 78 | static void setNoClobberMetadata(Instruction *I) { | 
|  | 79 | I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); | 
|  | 80 | } | 
|  | 81 |  | 
|  | 82 | static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) { | 
|  | 83 | for (auto I : predecessors(Root)) | 
|  | 84 | if (Set.insert(I)) | 
|  | 85 | DFS(I, Set); | 
|  | 86 | } | 
|  | 87 |  | 
|  | 88 | bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { | 
|  | 89 | // 1. get Loop for the Load->getparent(); | 
|  | 90 | // 2. if it exists, collect all the BBs from the most outer | 
|  | 91 | // loop and check for the writes. If NOT - start DFS over all preds. | 
|  | 92 | // 3. Start DFS over all preds from the most outer loop header. | 
|  | 93 | SetVector<BasicBlock *> Checklist; | 
|  | 94 | BasicBlock *Start = Load->getParent(); | 
|  | 95 | Checklist.insert(Start); | 
|  | 96 | const Value *Ptr = Load->getPointerOperand(); | 
|  | 97 | const Loop *L = LI->getLoopFor(Start); | 
|  | 98 | if (L) { | 
|  | 99 | const Loop *P = L; | 
|  | 100 | do { | 
|  | 101 | L = P; | 
|  | 102 | P = P->getParentLoop(); | 
|  | 103 | } while (P); | 
|  | 104 | Checklist.insert(L->block_begin(), L->block_end()); | 
|  | 105 | Start = L->getHeader(); | 
|  | 106 | } | 
|  | 107 |  | 
|  | 108 | DFS(Start, Checklist); | 
|  | 109 | for (auto &BB : Checklist) { | 
| Alexander Timofeev | 0f9c84c | 2017-06-15 19:33:10 +0000 | [diff] [blame] | 110 | BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? | 
| Matt Arsenault | ce34ac5 | 2017-07-12 23:06:18 +0000 | [diff] [blame] | 111 | BasicBlock::iterator(Load) : BB->end(); | 
|  | 112 | auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, | 
|  | 113 | StartIt, BB, Load); | 
|  | 114 | if (Q.isClobber() || Q.isUnknown()) | 
|  | 115 | return true; | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 116 | } | 
|  | 117 | return false; | 
|  | 118 | } | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 119 |  | 
|  | 120 | void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { | 
|  | 121 | if (I.isUnconditional()) | 
|  | 122 | return; | 
|  | 123 |  | 
|  | 124 | Value *Cond = I.getCondition(); | 
|  | 125 | if (!DA->isUniform(Cond)) | 
|  | 126 | return; | 
|  | 127 |  | 
|  | 128 | setUniformMetadata(I.getParent()->getTerminator()); | 
|  | 129 | } | 
|  | 130 |  | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 131 | void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { | 
|  | 132 | Value *Ptr = I.getPointerOperand(); | 
|  | 133 | if (!DA->isUniform(Ptr)) | 
|  | 134 | return; | 
| Yaxun Liu | 1a14bfa | 2017-03-27 14:04:01 +0000 | [diff] [blame] | 135 | auto isGlobalLoad = [&](LoadInst &Load)->bool { | 
|  | 136 | return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 137 | }; | 
|  | 138 | // We're tracking up to the Function boundaries | 
|  | 139 | // We cannot go beyond because of FunctionPass restrictions | 
|  | 140 | // Thus we can ensure that memory not clobbered for memory | 
|  | 141 | // operations that live in kernel only. | 
|  | 142 | bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I); | 
|  | 143 | Instruction *PtrI = dyn_cast<Instruction>(Ptr); | 
|  | 144 | if (!PtrI && NotClobbered && isGlobalLoad(I)) { | 
|  | 145 | if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { | 
|  | 146 | // Lookup for the existing GEP | 
|  | 147 | if (noClobberClones.count(Ptr)) { | 
|  | 148 | PtrI = noClobberClones[Ptr]; | 
|  | 149 | } else { | 
|  | 150 | // Create GEP of the Value | 
|  | 151 | Function *F = I.getParent()->getParent(); | 
|  | 152 | Value *Idx = Constant::getIntegerValue( | 
|  | 153 | Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); | 
|  | 154 | // Insert GEP at the entry to make it dominate all uses | 
|  | 155 | PtrI = GetElementPtrInst::Create( | 
|  | 156 | Ptr->getType()->getPointerElementType(), Ptr, | 
|  | 157 | ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI()); | 
|  | 158 | } | 
|  | 159 | I.replaceUsesOfWith(Ptr, PtrI); | 
|  | 160 | } | 
|  | 161 | } | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 162 |  | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 163 | if (PtrI) { | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 164 | setUniformMetadata(PtrI); | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 165 | if (NotClobbered) | 
|  | 166 | setNoClobberMetadata(PtrI); | 
|  | 167 | } | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 168 | } | 
|  | 169 |  | 
|  | 170 | bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { | 
| Yaxun Liu | 1a14bfa | 2017-03-27 14:04:01 +0000 | [diff] [blame] | 171 | AMDGPUASI = AMDGPU::getAMDGPUAS(M); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 172 | return false; | 
|  | 173 | } | 
|  | 174 |  | 
|  | 175 | bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { | 
| Andrew Kaylor | 7de74af | 2016-04-25 22:23:44 +0000 | [diff] [blame] | 176 | if (skipFunction(F)) | 
|  | 177 | return false; | 
|  | 178 |  | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 179 | DA  = &getAnalysis<DivergenceAnalysis>(); | 
|  | 180 | MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); | 
|  | 181 | LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | 
|  | 182 | isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL; | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 183 |  | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 184 | visit(F); | 
|  | 185 | noClobberClones.clear(); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 186 | return true; | 
|  | 187 | } | 
|  | 188 |  | 
|  | 189 | FunctionPass * | 
|  | 190 | llvm::createAMDGPUAnnotateUniformValues() { | 
|  | 191 | return new AMDGPUAnnotateUniformValues(); | 
|  | 192 | } |