blob: c837180334c096fcf54cb2f98ba38072dcdd45ce [file] [log] [blame]
Matt Arsenault39319482015-11-06 18:01:57 +00001//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass adds target attributes to functions which use intrinsics
11/// which will impact calling convention lowering.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
Matt Arsenaulte823d922017-02-18 18:29:53 +000016#include "AMDGPUSubtarget.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000017#include "llvm/ADT/Triple.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000018#include "llvm/Analysis/CallGraphSCCPass.h"
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000019#include "llvm/CodeGen/TargetPassConfig.h"
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000020#include "llvm/IR/Constants.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000021#include "llvm/IR/InstIterator.h"
Matt Arsenault39319482015-11-06 18:01:57 +000022#include "llvm/IR/Instructions.h"
23#include "llvm/IR/Module.h"
24
25#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
26
27using namespace llvm;
28
29namespace {
30
Matt Arsenault6b930462017-07-13 21:43:42 +000031class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
Matt Arsenault39319482015-11-06 18:01:57 +000032private:
Matt Arsenault6b930462017-07-13 21:43:42 +000033 const TargetMachine *TM = nullptr;
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000034 AMDGPUAS AS;
Matt Arsenault99c14522016-04-25 19:27:24 +000035
Matt Arsenault6b930462017-07-13 21:43:42 +000036 bool addFeatureAttributes(Function &F);
37
38 void addAttrToCallers(Function &Intrin, StringRef AttrName);
Matt Arsenault39319482015-11-06 18:01:57 +000039 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
40
41public:
42 static char ID;
43
Matt Arsenault6b930462017-07-13 21:43:42 +000044 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
45
46 bool doInitialization(CallGraph &CG) override;
47 bool runOnSCC(CallGraphSCC &SCC) override;
Mehdi Amini117296c2016-10-01 02:56:57 +000048 StringRef getPassName() const override {
Matt Arsenault39319482015-11-06 18:01:57 +000049 return "AMDGPU Annotate Kernel Features";
50 }
51
52 void getAnalysisUsage(AnalysisUsage &AU) const override {
53 AU.setPreservesAll();
Matt Arsenault6b930462017-07-13 21:43:42 +000054 CallGraphSCCPass::getAnalysisUsage(AU);
Matt Arsenault39319482015-11-06 18:01:57 +000055 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000056
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000057 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000058 static bool visitConstantExprsRecursively(
59 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000060 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
61 AMDGPUAS AS);
Matt Arsenault39319482015-11-06 18:01:57 +000062};
63
64}
65
66char AMDGPUAnnotateKernelFeatures::ID = 0;
67
68char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
69
Matt Arsenault99c14522016-04-25 19:27:24 +000070INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
71 "Add AMDGPU function attributes", false, false)
Matt Arsenault39319482015-11-06 18:01:57 +000072
Matt Arsenault39319482015-11-06 18:01:57 +000073
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000074// The queue ptr is only needed when casting to flat, not from it.
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000075static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
76 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
Matt Arsenault99c14522016-04-25 19:27:24 +000077}
78
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000079static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
80 const AMDGPUAS &AS) {
81 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000082}
83
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000084bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
85 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000086 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
87 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000088 return castRequiresQueuePtr(SrcAS, AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000089 }
90
91 return false;
92}
93
94bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
95 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000096 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
97 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000098
99 if (!ConstantExprVisited.insert(EntryC).second)
100 return false;
101
102 SmallVector<const Constant *, 16> Stack;
103 Stack.push_back(EntryC);
104
105 while (!Stack.empty()) {
106 const Constant *C = Stack.pop_back_val();
107
108 // Check this constant expression.
109 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000110 if (visitConstantExpr(CE, AS))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000111 return true;
112 }
113
114 // Visit all sub-expressions.
115 for (const Use &U : C->operands()) {
116 const auto *OpC = dyn_cast<Constant>(U);
117 if (!OpC)
118 continue;
119
120 if (!ConstantExprVisited.insert(OpC).second)
121 continue;
122
123 Stack.push_back(OpC);
124 }
125 }
126
127 return false;
128}
129
Matt Arsenault6b930462017-07-13 21:43:42 +0000130// We do not need to note the x workitem or workgroup id because they are always
131// initialized.
132//
133// TODO: We should not add the attributes if the known compile time workgroup
134// size is 1 for y/z.
135static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &IsQueuePtr) {
136 switch (ID) {
137 case Intrinsic::amdgcn_workitem_id_y:
138 case Intrinsic::r600_read_tidig_y:
139 return "amdgpu-work-item-id-y";
140 case Intrinsic::amdgcn_workitem_id_z:
141 case Intrinsic::r600_read_tidig_z:
142 return "amdgpu-work-item-id-z";
143 case Intrinsic::amdgcn_workgroup_id_y:
144 case Intrinsic::r600_read_tgid_y:
145 return "amdgpu-work-group-id-y";
146 case Intrinsic::amdgcn_workgroup_id_z:
147 case Intrinsic::r600_read_tgid_z:
148 return "amdgpu-work-group-id-z";
149 case Intrinsic::amdgcn_dispatch_ptr:
150 return "amdgpu-dispatch-ptr";
151 case Intrinsic::amdgcn_dispatch_id:
152 return "amdgpu-dispatch-id";
153 case Intrinsic::amdgcn_queue_ptr:
154 case Intrinsic::trap:
155 case Intrinsic::debugtrap:
156 IsQueuePtr = true;
157 return "amdgpu-queue-ptr";
158 default:
159 return "";
160 }
161}
162
163static bool handleAttr(Function &Parent, const Function &Callee,
164 StringRef Name) {
165 if (Callee.hasFnAttribute(Name)) {
166 Parent.addFnAttr(Name);
167 return true;
168 }
169
170 return false;
171}
172
173static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
174 bool &NeedQueuePtr) {
175
176 static const StringRef AttrNames[] = {
177 // .x omitted
178 { "amdgpu-work-item-id-y" },
179 { "amdgpu-work-item-id-z" },
180 // .x omitted
181 { "amdgpu-work-group-id-y" },
182 { "amdgpu-work-group-id-z" },
183 { "amdgpu-dispatch-ptr" },
184 { "amdgpu-dispatch-id" }
185 };
186
187 if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
188 NeedQueuePtr = true;
189
190 for (StringRef AttrName : AttrNames)
191 handleAttr(Parent, Callee, AttrName);
192}
193
194bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
195 bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000196 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
197
Matt Arsenault6b930462017-07-13 21:43:42 +0000198 bool Changed = false;
199 bool NeedQueuePtr = false;
200
201 for (BasicBlock &BB : F) {
202 for (Instruction &I : BB) {
203 CallSite CS(&I);
204 if (CS) {
205 Function *Callee = CS.getCalledFunction();
206
207 // TODO: Do something with indirect calls.
208 if (!Callee)
209 continue;
210
211 Intrinsic::ID IID = Callee->getIntrinsicID();
212 if (IID == Intrinsic::not_intrinsic) {
213 copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
214 Changed = true;
215 } else {
216 StringRef AttrName = intrinsicToAttrName(IID, NeedQueuePtr);
217 if (!AttrName.empty()) {
218 F.addFnAttr(AttrName);
219 Changed = true;
220 }
221 }
222 }
223
224 if (NeedQueuePtr || HasApertureRegs)
225 continue;
226
Matt Arsenault99c14522016-04-25 19:27:24 +0000227 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000228 if (castRequiresQueuePtr(ASC, AS)) {
229 NeedQueuePtr = true;
230 continue;
231 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000232 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000233
234 for (const Use &U : I.operands()) {
235 const auto *OpC = dyn_cast<Constant>(U);
236 if (!OpC)
237 continue;
238
Matt Arsenault6b930462017-07-13 21:43:42 +0000239 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
240 NeedQueuePtr = true;
241 break;
242 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000243 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000244 }
245 }
246
Matt Arsenault6b930462017-07-13 21:43:42 +0000247 if (NeedQueuePtr) {
248 F.addFnAttr("amdgpu-queue-ptr");
249 Changed = true;
250 }
251
252 return Changed;
Matt Arsenault99c14522016-04-25 19:27:24 +0000253}
Matt Arsenault39319482015-11-06 18:01:57 +0000254
Matt Arsenault6b930462017-07-13 21:43:42 +0000255void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function &Intrin,
Matt Arsenault39319482015-11-06 18:01:57 +0000256 StringRef AttrName) {
257 SmallPtrSet<Function *, 4> SeenFuncs;
258
Matt Arsenault6b930462017-07-13 21:43:42 +0000259 for (User *U : Intrin.users()) {
Matt Arsenault39319482015-11-06 18:01:57 +0000260 // CallInst is the only valid user for an intrinsic.
261 CallInst *CI = cast<CallInst>(U);
262
263 Function *CallingFunction = CI->getParent()->getParent();
264 if (SeenFuncs.insert(CallingFunction).second)
265 CallingFunction->addFnAttr(AttrName);
266 }
267}
268
269bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
270 Module &M,
271 ArrayRef<StringRef[2]> IntrinsicToAttr) {
272 bool Changed = false;
273
274 for (const StringRef *Arr : IntrinsicToAttr) {
275 if (Function *Fn = M.getFunction(Arr[0])) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000276 addAttrToCallers(*Fn, Arr[1]);
Matt Arsenault39319482015-11-06 18:01:57 +0000277 Changed = true;
278 }
279 }
280
281 return Changed;
282}
283
Matt Arsenault6b930462017-07-13 21:43:42 +0000284bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
285 Module &M = SCC.getCallGraph().getModule();
Matt Arsenault39319482015-11-06 18:01:57 +0000286 Triple TT(M.getTargetTriple());
287
Matt Arsenault6b930462017-07-13 21:43:42 +0000288 bool Changed = false;
289 for (CallGraphNode *I : SCC) {
290 Function *F = I->getFunction();
291 if (!F || F->isDeclaration())
292 continue;
Matt Arsenault43976df2016-01-30 04:25:19 +0000293
Matt Arsenault6b930462017-07-13 21:43:42 +0000294 Changed |= addFeatureAttributes(*F);
Matt Arsenault99c14522016-04-25 19:27:24 +0000295 }
296
Matt Arsenault6b930462017-07-13 21:43:42 +0000297
Matt Arsenault39319482015-11-06 18:01:57 +0000298 return Changed;
299}
300
Matt Arsenault6b930462017-07-13 21:43:42 +0000301bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
302 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
303 if (!TPC)
304 report_fatal_error("TargetMachine is required");
305
306 AS = AMDGPU::getAMDGPUAS(CG.getModule());
307 TM = &TPC->getTM<TargetMachine>();
308 return false;
309}
310
311Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000312 return new AMDGPUAnnotateKernelFeatures();
Matt Arsenault39319482015-11-06 18:01:57 +0000313}