| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 1 | //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// | 
|  | 2 | // | 
| Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|  | 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 6 | // | 
|  | 7 | //===----------------------------------------------------------------------===// | 
|  | 8 | // | 
|  | 9 | /// \file | 
|  | 10 | /// This pass adds amdgpu.uniform metadata to IR values so this information | 
|  | 11 | /// can be used during instruction selection. | 
|  | 12 | // | 
|  | 13 | //===----------------------------------------------------------------------===// | 
|  | 14 |  | 
|  | 15 | #include "AMDGPU.h" | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 16 | #include "llvm/ADT/SetVector.h" | 
| Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 17 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 18 | #include "llvm/Analysis/LoopInfo.h" | 
|  | 19 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 20 | #include "llvm/IR/IRBuilder.h" | 
| Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 21 | #include "llvm/IR/InstVisitor.h" | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 22 | #include "llvm/Support/Debug.h" | 
|  | 23 | #include "llvm/Support/raw_ostream.h" | 
|  | 24 |  | 
|  | 25 | #define DEBUG_TYPE "amdgpu-annotate-uniform" | 
|  | 26 |  | 
|  | 27 | using namespace llvm; | 
|  | 28 |  | 
|  | 29 | namespace { | 
|  | 30 |  | 
|  | 31 | class AMDGPUAnnotateUniformValues : public FunctionPass, | 
|  | 32 | public InstVisitor<AMDGPUAnnotateUniformValues> { | 
| Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 33 | LegacyDivergenceAnalysis *DA; | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 34 | MemoryDependenceResults *MDR; | 
|  | 35 | LoopInfo *LI; | 
|  | 36 | DenseMap<Value*, GetElementPtrInst*> noClobberClones; | 
|  | 37 | bool isKernelFunc; | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 38 |  | 
|  | 39 | public: | 
|  | 40 | static char ID; | 
|  | 41 | AMDGPUAnnotateUniformValues() : | 
|  | 42 | FunctionPass(ID) { } | 
|  | 43 | bool doInitialization(Module &M) override; | 
|  | 44 | bool runOnFunction(Function &F) override; | 
| Mehdi Amini | 117296c | 2016-10-01 02:56:57 +0000 | [diff] [blame] | 45 | StringRef getPassName() const override { | 
|  | 46 | return "AMDGPU Annotate Uniform Values"; | 
|  | 47 | } | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 48 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
| Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 49 | AU.addRequired<LegacyDivergenceAnalysis>(); | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 50 | AU.addRequired<MemoryDependenceWrapperPass>(); | 
|  | 51 | AU.addRequired<LoopInfoWrapperPass>(); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 52 | AU.setPreservesAll(); | 
|  | 53 | } | 
|  | 54 |  | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 55 | void visitBranchInst(BranchInst &I); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 56 | void visitLoadInst(LoadInst &I); | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 57 | bool isClobberedInFunction(LoadInst * Load); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 58 | }; | 
|  | 59 |  | 
|  | 60 | } // End anonymous namespace | 
|  | 61 |  | 
|  | 62 | INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, | 
|  | 63 | "Add AMDGPU uniform metadata", false, false) | 
| Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 64 | INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 65 | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) | 
|  | 66 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 67 | INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, | 
|  | 68 | "Add AMDGPU uniform metadata", false, false) | 
|  | 69 |  | 
|  | 70 | char AMDGPUAnnotateUniformValues::ID = 0; | 
|  | 71 |  | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 72 | static void setUniformMetadata(Instruction *I) { | 
|  | 73 | I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); | 
|  | 74 | } | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 75 | static void setNoClobberMetadata(Instruction *I) { | 
|  | 76 | I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); | 
|  | 77 | } | 
|  | 78 |  | 
|  | 79 | static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) { | 
|  | 80 | for (auto I : predecessors(Root)) | 
|  | 81 | if (Set.insert(I)) | 
|  | 82 | DFS(I, Set); | 
|  | 83 | } | 
|  | 84 |  | 
|  | 85 | bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { | 
|  | 86 | // 1. get Loop for the Load->getparent(); | 
|  | 87 | // 2. if it exists, collect all the BBs from the most outer | 
|  | 88 | // loop and check for the writes. If NOT - start DFS over all preds. | 
|  | 89 | // 3. Start DFS over all preds from the most outer loop header. | 
|  | 90 | SetVector<BasicBlock *> Checklist; | 
|  | 91 | BasicBlock *Start = Load->getParent(); | 
|  | 92 | Checklist.insert(Start); | 
|  | 93 | const Value *Ptr = Load->getPointerOperand(); | 
|  | 94 | const Loop *L = LI->getLoopFor(Start); | 
|  | 95 | if (L) { | 
|  | 96 | const Loop *P = L; | 
|  | 97 | do { | 
|  | 98 | L = P; | 
|  | 99 | P = P->getParentLoop(); | 
|  | 100 | } while (P); | 
|  | 101 | Checklist.insert(L->block_begin(), L->block_end()); | 
|  | 102 | Start = L->getHeader(); | 
|  | 103 | } | 
|  | 104 |  | 
|  | 105 | DFS(Start, Checklist); | 
|  | 106 | for (auto &BB : Checklist) { | 
| Alexander Timofeev | 0f9c84c | 2017-06-15 19:33:10 +0000 | [diff] [blame] | 107 | BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? | 
| Matt Arsenault | ce34ac5 | 2017-07-12 23:06:18 +0000 | [diff] [blame] | 108 | BasicBlock::iterator(Load) : BB->end(); | 
|  | 109 | auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, | 
|  | 110 | StartIt, BB, Load); | 
|  | 111 | if (Q.isClobber() || Q.isUnknown()) | 
|  | 112 | return true; | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 113 | } | 
|  | 114 | return false; | 
|  | 115 | } | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 116 |  | 
|  | 117 | void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { | 
| Rhys Perry | f77e2e8 | 2019-01-07 15:52:28 +0000 | [diff] [blame] | 118 | if (DA->isUniform(&I)) | 
|  | 119 | setUniformMetadata(I.getParent()->getTerminator()); | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 120 | } | 
|  | 121 |  | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 122 | void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { | 
|  | 123 | Value *Ptr = I.getPointerOperand(); | 
|  | 124 | if (!DA->isUniform(Ptr)) | 
|  | 125 | return; | 
| Yaxun Liu | 1a14bfa | 2017-03-27 14:04:01 +0000 | [diff] [blame] | 126 | auto isGlobalLoad = [&](LoadInst &Load)->bool { | 
| Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 127 | return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 128 | }; | 
|  | 129 | // We're tracking up to the Function boundaries | 
|  | 130 | // We cannot go beyond because of FunctionPass restrictions | 
|  | 131 | // Thus we can ensure that memory not clobbered for memory | 
|  | 132 | // operations that live in kernel only. | 
|  | 133 | bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I); | 
|  | 134 | Instruction *PtrI = dyn_cast<Instruction>(Ptr); | 
|  | 135 | if (!PtrI && NotClobbered && isGlobalLoad(I)) { | 
|  | 136 | if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { | 
|  | 137 | // Lookup for the existing GEP | 
|  | 138 | if (noClobberClones.count(Ptr)) { | 
|  | 139 | PtrI = noClobberClones[Ptr]; | 
|  | 140 | } else { | 
|  | 141 | // Create GEP of the Value | 
|  | 142 | Function *F = I.getParent()->getParent(); | 
|  | 143 | Value *Idx = Constant::getIntegerValue( | 
|  | 144 | Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); | 
|  | 145 | // Insert GEP at the entry to make it dominate all uses | 
|  | 146 | PtrI = GetElementPtrInst::Create( | 
|  | 147 | Ptr->getType()->getPointerElementType(), Ptr, | 
|  | 148 | ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI()); | 
|  | 149 | } | 
|  | 150 | I.replaceUsesOfWith(Ptr, PtrI); | 
|  | 151 | } | 
|  | 152 | } | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 153 |  | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 154 | if (PtrI) { | 
| Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 155 | setUniformMetadata(PtrI); | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 156 | if (NotClobbered) | 
|  | 157 | setNoClobberMetadata(PtrI); | 
|  | 158 | } | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 159 | } | 
|  | 160 |  | 
|  | 161 | bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { | 
|  | 162 | return false; | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { | 
| Andrew Kaylor | 7de74af | 2016-04-25 22:23:44 +0000 | [diff] [blame] | 166 | if (skipFunction(F)) | 
|  | 167 | return false; | 
|  | 168 |  | 
| Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 169 | DA  = &getAnalysis<LegacyDivergenceAnalysis>(); | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 170 | MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); | 
|  | 171 | LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | 
|  | 172 | isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL; | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 173 |  | 
| Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 174 | visit(F); | 
|  | 175 | noClobberClones.clear(); | 
| Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 176 | return true; | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | FunctionPass * | 
|  | 180 | llvm::createAMDGPUAnnotateUniformValues() { | 
|  | 181 | return new AMDGPUAnnotateUniformValues(); | 
|  | 182 | } |