blob: 3bc73bf6e50709cf354929441c66bf5f03b92969 [file] [log] [blame]
Matt Arsenault39319482015-11-06 18:01:57 +00001//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass adds target attributes to functions which use intrinsics
11/// which will impact calling convention lowering.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
Matt Arsenaulte823d922017-02-18 18:29:53 +000016#include "AMDGPUSubtarget.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000017#include "llvm/ADT/Triple.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000018#include "llvm/Analysis/CallGraphSCCPass.h"
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000019#include "llvm/CodeGen/TargetPassConfig.h"
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000020#include "llvm/IR/Constants.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000021#include "llvm/IR/InstIterator.h"
Matt Arsenault39319482015-11-06 18:01:57 +000022#include "llvm/IR/Instructions.h"
23#include "llvm/IR/Module.h"
24
25#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
26
27using namespace llvm;
28
29namespace {
30
Matt Arsenault6b930462017-07-13 21:43:42 +000031class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
Matt Arsenault39319482015-11-06 18:01:57 +000032private:
Matt Arsenault6b930462017-07-13 21:43:42 +000033 const TargetMachine *TM = nullptr;
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000034 AMDGPUAS AS;
Matt Arsenault99c14522016-04-25 19:27:24 +000035
Matt Arsenault6b930462017-07-13 21:43:42 +000036 bool addFeatureAttributes(Function &F);
37
Matt Arsenault39319482015-11-06 18:01:57 +000038public:
39 static char ID;
40
Matt Arsenault6b930462017-07-13 21:43:42 +000041 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
42
43 bool doInitialization(CallGraph &CG) override;
44 bool runOnSCC(CallGraphSCC &SCC) override;
Mehdi Amini117296c2016-10-01 02:56:57 +000045 StringRef getPassName() const override {
Matt Arsenault39319482015-11-06 18:01:57 +000046 return "AMDGPU Annotate Kernel Features";
47 }
48
49 void getAnalysisUsage(AnalysisUsage &AU) const override {
50 AU.setPreservesAll();
Matt Arsenault6b930462017-07-13 21:43:42 +000051 CallGraphSCCPass::getAnalysisUsage(AU);
Matt Arsenault39319482015-11-06 18:01:57 +000052 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000053
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000054 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000055 static bool visitConstantExprsRecursively(
56 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000057 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
58 AMDGPUAS AS);
Matt Arsenault39319482015-11-06 18:01:57 +000059};
60
61}
62
63char AMDGPUAnnotateKernelFeatures::ID = 0;
64
65char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
66
Matt Arsenault99c14522016-04-25 19:27:24 +000067INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
68 "Add AMDGPU function attributes", false, false)
Matt Arsenault39319482015-11-06 18:01:57 +000069
Matt Arsenault39319482015-11-06 18:01:57 +000070
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000071// The queue ptr is only needed when casting to flat, not from it.
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000072static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
73 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
Matt Arsenault99c14522016-04-25 19:27:24 +000074}
75
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000076static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
77 const AMDGPUAS &AS) {
78 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000079}
80
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000081bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
82 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000083 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
84 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000085 return castRequiresQueuePtr(SrcAS, AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000086 }
87
88 return false;
89}
90
91bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
92 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000093 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
94 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000095
96 if (!ConstantExprVisited.insert(EntryC).second)
97 return false;
98
99 SmallVector<const Constant *, 16> Stack;
100 Stack.push_back(EntryC);
101
102 while (!Stack.empty()) {
103 const Constant *C = Stack.pop_back_val();
104
105 // Check this constant expression.
106 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000107 if (visitConstantExpr(CE, AS))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000108 return true;
109 }
110
111 // Visit all sub-expressions.
112 for (const Use &U : C->operands()) {
113 const auto *OpC = dyn_cast<Constant>(U);
114 if (!OpC)
115 continue;
116
117 if (!ConstantExprVisited.insert(OpC).second)
118 continue;
119
120 Stack.push_back(OpC);
121 }
122 }
123
124 return false;
125}
126
Matt Arsenault6b930462017-07-13 21:43:42 +0000127// We do not need to note the x workitem or workgroup id because they are always
128// initialized.
129//
130// TODO: We should not add the attributes if the known compile time workgroup
131// size is 1 for y/z.
132static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &IsQueuePtr) {
133 switch (ID) {
134 case Intrinsic::amdgcn_workitem_id_y:
135 case Intrinsic::r600_read_tidig_y:
136 return "amdgpu-work-item-id-y";
137 case Intrinsic::amdgcn_workitem_id_z:
138 case Intrinsic::r600_read_tidig_z:
139 return "amdgpu-work-item-id-z";
140 case Intrinsic::amdgcn_workgroup_id_y:
141 case Intrinsic::r600_read_tgid_y:
142 return "amdgpu-work-group-id-y";
143 case Intrinsic::amdgcn_workgroup_id_z:
144 case Intrinsic::r600_read_tgid_z:
145 return "amdgpu-work-group-id-z";
146 case Intrinsic::amdgcn_dispatch_ptr:
147 return "amdgpu-dispatch-ptr";
148 case Intrinsic::amdgcn_dispatch_id:
149 return "amdgpu-dispatch-id";
Matt Arsenault23e4df62017-07-14 00:11:13 +0000150 case Intrinsic::amdgcn_kernarg_segment_ptr:
151 case Intrinsic::amdgcn_implicitarg_ptr:
152 return "amdgpu-kernarg-segment-ptr";
Matt Arsenault6b930462017-07-13 21:43:42 +0000153 case Intrinsic::amdgcn_queue_ptr:
154 case Intrinsic::trap:
155 case Intrinsic::debugtrap:
156 IsQueuePtr = true;
157 return "amdgpu-queue-ptr";
158 default:
159 return "";
160 }
161}
162
163static bool handleAttr(Function &Parent, const Function &Callee,
164 StringRef Name) {
165 if (Callee.hasFnAttribute(Name)) {
166 Parent.addFnAttr(Name);
167 return true;
168 }
169
170 return false;
171}
172
173static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
174 bool &NeedQueuePtr) {
175
176 static const StringRef AttrNames[] = {
177 // .x omitted
178 { "amdgpu-work-item-id-y" },
179 { "amdgpu-work-item-id-z" },
180 // .x omitted
181 { "amdgpu-work-group-id-y" },
182 { "amdgpu-work-group-id-z" },
183 { "amdgpu-dispatch-ptr" },
Matt Arsenault23e4df62017-07-14 00:11:13 +0000184 { "amdgpu-dispatch-id" },
185 { "amdgpu-kernarg-segment-ptr" }
Matt Arsenault6b930462017-07-13 21:43:42 +0000186 };
187
188 if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
189 NeedQueuePtr = true;
190
191 for (StringRef AttrName : AttrNames)
192 handleAttr(Parent, Callee, AttrName);
193}
194
195bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
196 bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000197 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
198
Matt Arsenault6b930462017-07-13 21:43:42 +0000199 bool Changed = false;
200 bool NeedQueuePtr = false;
201
202 for (BasicBlock &BB : F) {
203 for (Instruction &I : BB) {
204 CallSite CS(&I);
205 if (CS) {
206 Function *Callee = CS.getCalledFunction();
207
208 // TODO: Do something with indirect calls.
209 if (!Callee)
210 continue;
211
212 Intrinsic::ID IID = Callee->getIntrinsicID();
213 if (IID == Intrinsic::not_intrinsic) {
214 copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
215 Changed = true;
216 } else {
217 StringRef AttrName = intrinsicToAttrName(IID, NeedQueuePtr);
218 if (!AttrName.empty()) {
219 F.addFnAttr(AttrName);
220 Changed = true;
221 }
222 }
223 }
224
225 if (NeedQueuePtr || HasApertureRegs)
226 continue;
227
Matt Arsenault99c14522016-04-25 19:27:24 +0000228 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000229 if (castRequiresQueuePtr(ASC, AS)) {
230 NeedQueuePtr = true;
231 continue;
232 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000233 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000234
235 for (const Use &U : I.operands()) {
236 const auto *OpC = dyn_cast<Constant>(U);
237 if (!OpC)
238 continue;
239
Matt Arsenault6b930462017-07-13 21:43:42 +0000240 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
241 NeedQueuePtr = true;
242 break;
243 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000244 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000245 }
246 }
247
Matt Arsenault6b930462017-07-13 21:43:42 +0000248 if (NeedQueuePtr) {
249 F.addFnAttr("amdgpu-queue-ptr");
250 Changed = true;
251 }
252
253 return Changed;
Matt Arsenault99c14522016-04-25 19:27:24 +0000254}
Matt Arsenault39319482015-11-06 18:01:57 +0000255
Matt Arsenault6b930462017-07-13 21:43:42 +0000256bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
257 Module &M = SCC.getCallGraph().getModule();
Matt Arsenault39319482015-11-06 18:01:57 +0000258 Triple TT(M.getTargetTriple());
259
Matt Arsenault6b930462017-07-13 21:43:42 +0000260 bool Changed = false;
261 for (CallGraphNode *I : SCC) {
262 Function *F = I->getFunction();
263 if (!F || F->isDeclaration())
264 continue;
Matt Arsenault43976df2016-01-30 04:25:19 +0000265
Matt Arsenault6b930462017-07-13 21:43:42 +0000266 Changed |= addFeatureAttributes(*F);
Matt Arsenault99c14522016-04-25 19:27:24 +0000267 }
268
Matt Arsenault6b930462017-07-13 21:43:42 +0000269
Matt Arsenault39319482015-11-06 18:01:57 +0000270 return Changed;
271}
272
Matt Arsenault6b930462017-07-13 21:43:42 +0000273bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
274 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
275 if (!TPC)
276 report_fatal_error("TargetMachine is required");
277
278 AS = AMDGPU::getAMDGPUAS(CG.getModule());
279 TM = &TPC->getTM<TargetMachine>();
280 return false;
281}
282
283Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000284 return new AMDGPUAnnotateKernelFeatures();
Matt Arsenault39319482015-11-06 18:01:57 +0000285}