Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 1 | //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame^] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// This pass adds amdgpu.uniform metadata to IR values so this information |
| 11 | /// can be used during instruction selection. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AMDGPU.h" |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 16 | #include "llvm/ADT/SetVector.h" |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 17 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 18 | #include "llvm/Analysis/LoopInfo.h" |
| 19 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 20 | #include "llvm/IR/IRBuilder.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 21 | #include "llvm/IR/InstVisitor.h" |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 22 | #include "llvm/Support/Debug.h" |
| 23 | #include "llvm/Support/raw_ostream.h" |
| 24 | |
| 25 | #define DEBUG_TYPE "amdgpu-annotate-uniform" |
| 26 | |
| 27 | using namespace llvm; |
| 28 | |
| 29 | namespace { |
| 30 | |
| 31 | class AMDGPUAnnotateUniformValues : public FunctionPass, |
| 32 | public InstVisitor<AMDGPUAnnotateUniformValues> { |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 33 | LegacyDivergenceAnalysis *DA; |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 34 | MemoryDependenceResults *MDR; |
| 35 | LoopInfo *LI; |
| 36 | DenseMap<Value*, GetElementPtrInst*> noClobberClones; |
| 37 | bool isKernelFunc; |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 38 | |
| 39 | public: |
| 40 | static char ID; |
| 41 | AMDGPUAnnotateUniformValues() : |
| 42 | FunctionPass(ID) { } |
| 43 | bool doInitialization(Module &M) override; |
| 44 | bool runOnFunction(Function &F) override; |
Mehdi Amini | 117296c | 2016-10-01 02:56:57 +0000 | [diff] [blame] | 45 | StringRef getPassName() const override { |
| 46 | return "AMDGPU Annotate Uniform Values"; |
| 47 | } |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 48 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 49 | AU.addRequired<LegacyDivergenceAnalysis>(); |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 50 | AU.addRequired<MemoryDependenceWrapperPass>(); |
| 51 | AU.addRequired<LoopInfoWrapperPass>(); |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 52 | AU.setPreservesAll(); |
| 53 | } |
| 54 | |
Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 55 | void visitBranchInst(BranchInst &I); |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 56 | void visitLoadInst(LoadInst &I); |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 57 | bool isClobberedInFunction(LoadInst * Load); |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 58 | }; |
| 59 | |
| 60 | } // End anonymous namespace |
| 61 | |
| 62 | INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, |
| 63 | "Add AMDGPU uniform metadata", false, false) |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 64 | INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 65 | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) |
| 66 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 67 | INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, |
| 68 | "Add AMDGPU uniform metadata", false, false) |
| 69 | |
| 70 | char AMDGPUAnnotateUniformValues::ID = 0; |
| 71 | |
Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 72 | static void setUniformMetadata(Instruction *I) { |
| 73 | I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); |
| 74 | } |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 75 | static void setNoClobberMetadata(Instruction *I) { |
| 76 | I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); |
| 77 | } |
| 78 | |
| 79 | static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) { |
| 80 | for (auto I : predecessors(Root)) |
| 81 | if (Set.insert(I)) |
| 82 | DFS(I, Set); |
| 83 | } |
| 84 | |
| 85 | bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { |
| 86 | // 1. get Loop for the Load->getparent(); |
| 87 | // 2. if it exists, collect all the BBs from the most outer |
| 88 | // loop and check for the writes. If NOT - start DFS over all preds. |
| 89 | // 3. Start DFS over all preds from the most outer loop header. |
| 90 | SetVector<BasicBlock *> Checklist; |
| 91 | BasicBlock *Start = Load->getParent(); |
| 92 | Checklist.insert(Start); |
| 93 | const Value *Ptr = Load->getPointerOperand(); |
| 94 | const Loop *L = LI->getLoopFor(Start); |
| 95 | if (L) { |
| 96 | const Loop *P = L; |
| 97 | do { |
| 98 | L = P; |
| 99 | P = P->getParentLoop(); |
| 100 | } while (P); |
| 101 | Checklist.insert(L->block_begin(), L->block_end()); |
| 102 | Start = L->getHeader(); |
| 103 | } |
| 104 | |
| 105 | DFS(Start, Checklist); |
| 106 | for (auto &BB : Checklist) { |
Alexander Timofeev | 0f9c84c | 2017-06-15 19:33:10 +0000 | [diff] [blame] | 107 | BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? |
Matt Arsenault | ce34ac5 | 2017-07-12 23:06:18 +0000 | [diff] [blame] | 108 | BasicBlock::iterator(Load) : BB->end(); |
| 109 | auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, |
| 110 | StartIt, BB, Load); |
| 111 | if (Q.isClobber() || Q.isUnknown()) |
| 112 | return true; |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 113 | } |
| 114 | return false; |
| 115 | } |
Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 116 | |
| 117 | void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { |
Rhys Perry | f77e2e8 | 2019-01-07 15:52:28 +0000 | [diff] [blame] | 118 | if (DA->isUniform(&I)) |
| 119 | setUniformMetadata(I.getParent()->getTerminator()); |
Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 120 | } |
| 121 | |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 122 | void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { |
| 123 | Value *Ptr = I.getPointerOperand(); |
| 124 | if (!DA->isUniform(Ptr)) |
| 125 | return; |
Yaxun Liu | 1a14bfa | 2017-03-27 14:04:01 +0000 | [diff] [blame] | 126 | auto isGlobalLoad = [&](LoadInst &Load)->bool { |
Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 127 | return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 128 | }; |
| 129 | // We're tracking up to the Function boundaries |
| 130 | // We cannot go beyond because of FunctionPass restrictions |
| 131 | // Thus we can ensure that memory not clobbered for memory |
| 132 | // operations that live in kernel only. |
| 133 | bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I); |
| 134 | Instruction *PtrI = dyn_cast<Instruction>(Ptr); |
| 135 | if (!PtrI && NotClobbered && isGlobalLoad(I)) { |
| 136 | if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { |
| 137 | // Lookup for the existing GEP |
| 138 | if (noClobberClones.count(Ptr)) { |
| 139 | PtrI = noClobberClones[Ptr]; |
| 140 | } else { |
| 141 | // Create GEP of the Value |
| 142 | Function *F = I.getParent()->getParent(); |
| 143 | Value *Idx = Constant::getIntegerValue( |
| 144 | Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); |
| 145 | // Insert GEP at the entry to make it dominate all uses |
| 146 | PtrI = GetElementPtrInst::Create( |
| 147 | Ptr->getType()->getPointerElementType(), Ptr, |
| 148 | ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI()); |
| 149 | } |
| 150 | I.replaceUsesOfWith(Ptr, PtrI); |
| 151 | } |
| 152 | } |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 153 | |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 154 | if (PtrI) { |
Tom Stellard | bc4497b | 2016-02-12 23:45:29 +0000 | [diff] [blame] | 155 | setUniformMetadata(PtrI); |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 156 | if (NotClobbered) |
| 157 | setNoClobberMetadata(PtrI); |
| 158 | } |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 159 | } |
| 160 | |
| 161 | bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { |
| 162 | return false; |
| 163 | } |
| 164 | |
| 165 | bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { |
Andrew Kaylor | 7de74af | 2016-04-25 22:23:44 +0000 | [diff] [blame] | 166 | if (skipFunction(F)) |
| 167 | return false; |
| 168 | |
Nicolai Haehnle | 35617ed | 2018-08-30 14:21:36 +0000 | [diff] [blame] | 169 | DA = &getAnalysis<LegacyDivergenceAnalysis>(); |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 170 | MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); |
| 171 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 172 | isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL; |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 173 | |
Alexander Timofeev | 1800956 | 2016-12-08 17:28:47 +0000 | [diff] [blame] | 174 | visit(F); |
| 175 | noClobberClones.clear(); |
Tom Stellard | a6f24c6 | 2015-12-15 20:55:55 +0000 | [diff] [blame] | 176 | return true; |
| 177 | } |
| 178 | |
| 179 | FunctionPass * |
| 180 | llvm::createAMDGPUAnnotateUniformValues() { |
| 181 | return new AMDGPUAnnotateUniformValues(); |
| 182 | } |