blob: d1580ff4e19ce2568b096c9cbb5ab9431cf87f6f [file] [log] [blame]
Matt Arsenault39319482015-11-06 18:01:57 +00001//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass adds target attributes to functions which use intrinsics
11/// which will impact calling convention lowering.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
Matt Arsenaulte823d922017-02-18 18:29:53 +000016#include "AMDGPUSubtarget.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000017#include "llvm/ADT/Triple.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000018#include "llvm/Analysis/CallGraphSCCPass.h"
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000019#include "llvm/CodeGen/TargetPassConfig.h"
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000020#include "llvm/IR/Constants.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000021#include "llvm/IR/InstIterator.h"
Matt Arsenault39319482015-11-06 18:01:57 +000022#include "llvm/IR/Instructions.h"
23#include "llvm/IR/Module.h"
24
25#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
26
27using namespace llvm;
28
29namespace {
30
Matt Arsenault6b930462017-07-13 21:43:42 +000031class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
Matt Arsenault39319482015-11-06 18:01:57 +000032private:
Matt Arsenault6b930462017-07-13 21:43:42 +000033 const TargetMachine *TM = nullptr;
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000034 AMDGPUAS AS;
Matt Arsenault99c14522016-04-25 19:27:24 +000035
Matt Arsenault6b930462017-07-13 21:43:42 +000036 bool addFeatureAttributes(Function &F);
37
38 void addAttrToCallers(Function &Intrin, StringRef AttrName);
Matt Arsenault39319482015-11-06 18:01:57 +000039 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
40
41public:
42 static char ID;
43
Matt Arsenault6b930462017-07-13 21:43:42 +000044 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
45
46 bool doInitialization(CallGraph &CG) override;
47 bool runOnSCC(CallGraphSCC &SCC) override;
Mehdi Amini117296c2016-10-01 02:56:57 +000048 StringRef getPassName() const override {
Matt Arsenault39319482015-11-06 18:01:57 +000049 return "AMDGPU Annotate Kernel Features";
50 }
51
52 void getAnalysisUsage(AnalysisUsage &AU) const override {
53 AU.setPreservesAll();
Matt Arsenault6b930462017-07-13 21:43:42 +000054 CallGraphSCCPass::getAnalysisUsage(AU);
Matt Arsenault39319482015-11-06 18:01:57 +000055 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000056
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000057 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000058 static bool visitConstantExprsRecursively(
59 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000060 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
61 AMDGPUAS AS);
Matt Arsenault39319482015-11-06 18:01:57 +000062};
63
64}
65
66char AMDGPUAnnotateKernelFeatures::ID = 0;
67
68char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
69
Matt Arsenault99c14522016-04-25 19:27:24 +000070INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
71 "Add AMDGPU function attributes", false, false)
Matt Arsenault39319482015-11-06 18:01:57 +000072
Matt Arsenault39319482015-11-06 18:01:57 +000073
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000074// The queue ptr is only needed when casting to flat, not from it.
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000075static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
76 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
Matt Arsenault99c14522016-04-25 19:27:24 +000077}
78
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000079static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
80 const AMDGPUAS &AS) {
81 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000082}
83
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000084bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
85 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000086 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
87 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000088 return castRequiresQueuePtr(SrcAS, AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000089 }
90
91 return false;
92}
93
94bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
95 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000096 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
97 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000098
99 if (!ConstantExprVisited.insert(EntryC).second)
100 return false;
101
102 SmallVector<const Constant *, 16> Stack;
103 Stack.push_back(EntryC);
104
105 while (!Stack.empty()) {
106 const Constant *C = Stack.pop_back_val();
107
108 // Check this constant expression.
109 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000110 if (visitConstantExpr(CE, AS))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000111 return true;
112 }
113
114 // Visit all sub-expressions.
115 for (const Use &U : C->operands()) {
116 const auto *OpC = dyn_cast<Constant>(U);
117 if (!OpC)
118 continue;
119
120 if (!ConstantExprVisited.insert(OpC).second)
121 continue;
122
123 Stack.push_back(OpC);
124 }
125 }
126
127 return false;
128}
129
Matt Arsenault6b930462017-07-13 21:43:42 +0000130// We do not need to note the x workitem or workgroup id because they are always
131// initialized.
132//
133// TODO: We should not add the attributes if the known compile time workgroup
134// size is 1 for y/z.
135static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &IsQueuePtr) {
136 switch (ID) {
137 case Intrinsic::amdgcn_workitem_id_y:
138 case Intrinsic::r600_read_tidig_y:
139 return "amdgpu-work-item-id-y";
140 case Intrinsic::amdgcn_workitem_id_z:
141 case Intrinsic::r600_read_tidig_z:
142 return "amdgpu-work-item-id-z";
143 case Intrinsic::amdgcn_workgroup_id_y:
144 case Intrinsic::r600_read_tgid_y:
145 return "amdgpu-work-group-id-y";
146 case Intrinsic::amdgcn_workgroup_id_z:
147 case Intrinsic::r600_read_tgid_z:
148 return "amdgpu-work-group-id-z";
149 case Intrinsic::amdgcn_dispatch_ptr:
150 return "amdgpu-dispatch-ptr";
151 case Intrinsic::amdgcn_dispatch_id:
152 return "amdgpu-dispatch-id";
Matt Arsenault23e4df62017-07-14 00:11:13 +0000153 case Intrinsic::amdgcn_kernarg_segment_ptr:
154 case Intrinsic::amdgcn_implicitarg_ptr:
155 return "amdgpu-kernarg-segment-ptr";
Matt Arsenault6b930462017-07-13 21:43:42 +0000156 case Intrinsic::amdgcn_queue_ptr:
157 case Intrinsic::trap:
158 case Intrinsic::debugtrap:
159 IsQueuePtr = true;
160 return "amdgpu-queue-ptr";
161 default:
162 return "";
163 }
164}
165
166static bool handleAttr(Function &Parent, const Function &Callee,
167 StringRef Name) {
168 if (Callee.hasFnAttribute(Name)) {
169 Parent.addFnAttr(Name);
170 return true;
171 }
172
173 return false;
174}
175
176static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
177 bool &NeedQueuePtr) {
178
179 static const StringRef AttrNames[] = {
180 // .x omitted
181 { "amdgpu-work-item-id-y" },
182 { "amdgpu-work-item-id-z" },
183 // .x omitted
184 { "amdgpu-work-group-id-y" },
185 { "amdgpu-work-group-id-z" },
186 { "amdgpu-dispatch-ptr" },
Matt Arsenault23e4df62017-07-14 00:11:13 +0000187 { "amdgpu-dispatch-id" },
188 { "amdgpu-kernarg-segment-ptr" }
Matt Arsenault6b930462017-07-13 21:43:42 +0000189 };
190
191 if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
192 NeedQueuePtr = true;
193
194 for (StringRef AttrName : AttrNames)
195 handleAttr(Parent, Callee, AttrName);
196}
197
198bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
199 bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000200 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
201
Matt Arsenault6b930462017-07-13 21:43:42 +0000202 bool Changed = false;
203 bool NeedQueuePtr = false;
204
205 for (BasicBlock &BB : F) {
206 for (Instruction &I : BB) {
207 CallSite CS(&I);
208 if (CS) {
209 Function *Callee = CS.getCalledFunction();
210
211 // TODO: Do something with indirect calls.
212 if (!Callee)
213 continue;
214
215 Intrinsic::ID IID = Callee->getIntrinsicID();
216 if (IID == Intrinsic::not_intrinsic) {
217 copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
218 Changed = true;
219 } else {
220 StringRef AttrName = intrinsicToAttrName(IID, NeedQueuePtr);
221 if (!AttrName.empty()) {
222 F.addFnAttr(AttrName);
223 Changed = true;
224 }
225 }
226 }
227
228 if (NeedQueuePtr || HasApertureRegs)
229 continue;
230
Matt Arsenault99c14522016-04-25 19:27:24 +0000231 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000232 if (castRequiresQueuePtr(ASC, AS)) {
233 NeedQueuePtr = true;
234 continue;
235 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000236 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000237
238 for (const Use &U : I.operands()) {
239 const auto *OpC = dyn_cast<Constant>(U);
240 if (!OpC)
241 continue;
242
Matt Arsenault6b930462017-07-13 21:43:42 +0000243 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
244 NeedQueuePtr = true;
245 break;
246 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000247 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000248 }
249 }
250
Matt Arsenault6b930462017-07-13 21:43:42 +0000251 if (NeedQueuePtr) {
252 F.addFnAttr("amdgpu-queue-ptr");
253 Changed = true;
254 }
255
256 return Changed;
Matt Arsenault99c14522016-04-25 19:27:24 +0000257}
Matt Arsenault39319482015-11-06 18:01:57 +0000258
Matt Arsenault6b930462017-07-13 21:43:42 +0000259void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function &Intrin,
Matt Arsenault39319482015-11-06 18:01:57 +0000260 StringRef AttrName) {
261 SmallPtrSet<Function *, 4> SeenFuncs;
262
Matt Arsenault6b930462017-07-13 21:43:42 +0000263 for (User *U : Intrin.users()) {
Matt Arsenault39319482015-11-06 18:01:57 +0000264 // CallInst is the only valid user for an intrinsic.
265 CallInst *CI = cast<CallInst>(U);
266
267 Function *CallingFunction = CI->getParent()->getParent();
268 if (SeenFuncs.insert(CallingFunction).second)
269 CallingFunction->addFnAttr(AttrName);
270 }
271}
272
273bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
274 Module &M,
275 ArrayRef<StringRef[2]> IntrinsicToAttr) {
276 bool Changed = false;
277
278 for (const StringRef *Arr : IntrinsicToAttr) {
279 if (Function *Fn = M.getFunction(Arr[0])) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000280 addAttrToCallers(*Fn, Arr[1]);
Matt Arsenault39319482015-11-06 18:01:57 +0000281 Changed = true;
282 }
283 }
284
285 return Changed;
286}
287
Matt Arsenault6b930462017-07-13 21:43:42 +0000288bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
289 Module &M = SCC.getCallGraph().getModule();
Matt Arsenault39319482015-11-06 18:01:57 +0000290 Triple TT(M.getTargetTriple());
291
Matt Arsenault6b930462017-07-13 21:43:42 +0000292 bool Changed = false;
293 for (CallGraphNode *I : SCC) {
294 Function *F = I->getFunction();
295 if (!F || F->isDeclaration())
296 continue;
Matt Arsenault43976df2016-01-30 04:25:19 +0000297
Matt Arsenault6b930462017-07-13 21:43:42 +0000298 Changed |= addFeatureAttributes(*F);
Matt Arsenault99c14522016-04-25 19:27:24 +0000299 }
300
Matt Arsenault6b930462017-07-13 21:43:42 +0000301
Matt Arsenault39319482015-11-06 18:01:57 +0000302 return Changed;
303}
304
Matt Arsenault6b930462017-07-13 21:43:42 +0000305bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
306 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
307 if (!TPC)
308 report_fatal_error("TargetMachine is required");
309
310 AS = AMDGPU::getAMDGPUAS(CG.getModule());
311 TM = &TPC->getTM<TargetMachine>();
312 return false;
313}
314
315Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000316 return new AMDGPUAnnotateKernelFeatures();
Matt Arsenault39319482015-11-06 18:01:57 +0000317}