blob: 896ac9c87779eda62b38ac0ac7db32ea4d12d7cc [file] [log] [blame]
Eugene Zelenkofa6434b2017-08-31 21:56:16 +00001//===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
Matt Arsenault39319482015-11-06 18:01:57 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass adds target attributes to functions which use intrinsics
11/// which will impact calling convention lowering.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
Matt Arsenaulte823d922017-02-18 18:29:53 +000016#include "AMDGPUSubtarget.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000017#include "Utils/AMDGPUBaseInfo.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringRef.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000021#include "llvm/ADT/Triple.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000022#include "llvm/Analysis/CallGraph.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000023#include "llvm/Analysis/CallGraphSCCPass.h"
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000024#include "llvm/CodeGen/TargetPassConfig.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000025#include "llvm/IR/CallSite.h"
26#include "llvm/IR/Constant.h"
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000027#include "llvm/IR/Constants.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000028#include "llvm/IR/Function.h"
29#include "llvm/IR/Instruction.h"
Matt Arsenault39319482015-11-06 18:01:57 +000030#include "llvm/IR/Instructions.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000031#include "llvm/IR/Intrinsics.h"
Matt Arsenault39319482015-11-06 18:01:57 +000032#include "llvm/IR/Module.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000033#include "llvm/IR/Type.h"
34#include "llvm/IR/Use.h"
35#include "llvm/Pass.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Target/TargetMachine.h"
Matt Arsenault39319482015-11-06 18:01:57 +000039
40#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
41
42using namespace llvm;
43
44namespace {
45
Matt Arsenault6b930462017-07-13 21:43:42 +000046class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
Matt Arsenault39319482015-11-06 18:01:57 +000047private:
Matt Arsenault6b930462017-07-13 21:43:42 +000048 const TargetMachine *TM = nullptr;
Matt Arsenault99c14522016-04-25 19:27:24 +000049
Matt Arsenault6b930462017-07-13 21:43:42 +000050 bool addFeatureAttributes(Function &F);
51
Matt Arsenault39319482015-11-06 18:01:57 +000052public:
53 static char ID;
54
Matt Arsenault6b930462017-07-13 21:43:42 +000055 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
56
57 bool doInitialization(CallGraph &CG) override;
58 bool runOnSCC(CallGraphSCC &SCC) override;
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000059
Mehdi Amini117296c2016-10-01 02:56:57 +000060 StringRef getPassName() const override {
Matt Arsenault39319482015-11-06 18:01:57 +000061 return "AMDGPU Annotate Kernel Features";
62 }
63
64 void getAnalysisUsage(AnalysisUsage &AU) const override {
65 AU.setPreservesAll();
Matt Arsenault6b930462017-07-13 21:43:42 +000066 CallGraphSCCPass::getAnalysisUsage(AU);
Matt Arsenault39319482015-11-06 18:01:57 +000067 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000068
Matt Arsenault0da63502018-08-31 05:49:54 +000069 static bool visitConstantExpr(const ConstantExpr *CE);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000070 static bool visitConstantExprsRecursively(
71 const Constant *EntryC,
Matt Arsenault0da63502018-08-31 05:49:54 +000072 SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
Matt Arsenault39319482015-11-06 18:01:57 +000073};
74
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000075} // end anonymous namespace
Matt Arsenault39319482015-11-06 18:01:57 +000076
77char AMDGPUAnnotateKernelFeatures::ID = 0;
78
79char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
80
Matt Arsenault99c14522016-04-25 19:27:24 +000081INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
82 "Add AMDGPU function attributes", false, false)
Matt Arsenault39319482015-11-06 18:01:57 +000083
Matt Arsenault39319482015-11-06 18:01:57 +000084
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000085// The queue ptr is only needed when casting to flat, not from it.
Matt Arsenault0da63502018-08-31 05:49:54 +000086static bool castRequiresQueuePtr(unsigned SrcAS) {
87 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
Matt Arsenault99c14522016-04-25 19:27:24 +000088}
89
Matt Arsenault0da63502018-08-31 05:49:54 +000090static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
91 return castRequiresQueuePtr(ASC->getSrcAddressSpace());
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000092}
93
Matt Arsenault0da63502018-08-31 05:49:54 +000094bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000095 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
96 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
Matt Arsenault0da63502018-08-31 05:49:54 +000097 return castRequiresQueuePtr(SrcAS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000098 }
99
100 return false;
101}
102
103bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
104 const Constant *EntryC,
Matt Arsenault0da63502018-08-31 05:49:54 +0000105 SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000106
107 if (!ConstantExprVisited.insert(EntryC).second)
108 return false;
109
110 SmallVector<const Constant *, 16> Stack;
111 Stack.push_back(EntryC);
112
113 while (!Stack.empty()) {
114 const Constant *C = Stack.pop_back_val();
115
116 // Check this constant expression.
117 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
Matt Arsenault0da63502018-08-31 05:49:54 +0000118 if (visitConstantExpr(CE))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000119 return true;
120 }
121
122 // Visit all sub-expressions.
123 for (const Use &U : C->operands()) {
124 const auto *OpC = dyn_cast<Constant>(U);
125 if (!OpC)
126 continue;
127
128 if (!ConstantExprVisited.insert(OpC).second)
129 continue;
130
131 Stack.push_back(OpC);
132 }
133 }
134
135 return false;
136}
137
Matt Arsenault6b930462017-07-13 21:43:42 +0000138// We do not need to note the x workitem or workgroup id because they are always
139// initialized.
140//
141// TODO: We should not add the attributes if the known compile time workgroup
142// size is 1 for y/z.
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000143static StringRef intrinsicToAttrName(Intrinsic::ID ID,
144 bool &NonKernelOnly,
145 bool &IsQueuePtr) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000146 switch (ID) {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000147 case Intrinsic::amdgcn_workitem_id_x:
148 NonKernelOnly = true;
149 return "amdgpu-work-item-id-x";
150 case Intrinsic::amdgcn_workgroup_id_x:
151 NonKernelOnly = true;
152 return "amdgpu-work-group-id-x";
Matt Arsenault6b930462017-07-13 21:43:42 +0000153 case Intrinsic::amdgcn_workitem_id_y:
154 case Intrinsic::r600_read_tidig_y:
155 return "amdgpu-work-item-id-y";
156 case Intrinsic::amdgcn_workitem_id_z:
157 case Intrinsic::r600_read_tidig_z:
158 return "amdgpu-work-item-id-z";
159 case Intrinsic::amdgcn_workgroup_id_y:
160 case Intrinsic::r600_read_tgid_y:
161 return "amdgpu-work-group-id-y";
162 case Intrinsic::amdgcn_workgroup_id_z:
163 case Intrinsic::r600_read_tgid_z:
164 return "amdgpu-work-group-id-z";
165 case Intrinsic::amdgcn_dispatch_ptr:
166 return "amdgpu-dispatch-ptr";
167 case Intrinsic::amdgcn_dispatch_id:
168 return "amdgpu-dispatch-id";
Matt Arsenault23e4df62017-07-14 00:11:13 +0000169 case Intrinsic::amdgcn_kernarg_segment_ptr:
Matt Arsenault23e4df62017-07-14 00:11:13 +0000170 return "amdgpu-kernarg-segment-ptr";
Matt Arsenault9166ce82017-07-28 15:52:08 +0000171 case Intrinsic::amdgcn_implicitarg_ptr:
172 return "amdgpu-implicitarg-ptr";
Matt Arsenault6b930462017-07-13 21:43:42 +0000173 case Intrinsic::amdgcn_queue_ptr:
174 case Intrinsic::trap:
175 case Intrinsic::debugtrap:
176 IsQueuePtr = true;
177 return "amdgpu-queue-ptr";
178 default:
179 return "";
180 }
181}
182
183static bool handleAttr(Function &Parent, const Function &Callee,
184 StringRef Name) {
185 if (Callee.hasFnAttribute(Name)) {
186 Parent.addFnAttr(Name);
187 return true;
188 }
189
190 return false;
191}
192
193static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
194 bool &NeedQueuePtr) {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000195 // X ids unnecessarily propagated to kernels.
Matt Arsenault6b930462017-07-13 21:43:42 +0000196 static const StringRef AttrNames[] = {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000197 { "amdgpu-work-item-id-x" },
Matt Arsenault6b930462017-07-13 21:43:42 +0000198 { "amdgpu-work-item-id-y" },
199 { "amdgpu-work-item-id-z" },
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000200 { "amdgpu-work-group-id-x" },
Matt Arsenault6b930462017-07-13 21:43:42 +0000201 { "amdgpu-work-group-id-y" },
202 { "amdgpu-work-group-id-z" },
203 { "amdgpu-dispatch-ptr" },
Matt Arsenault23e4df62017-07-14 00:11:13 +0000204 { "amdgpu-dispatch-id" },
Matt Arsenault9166ce82017-07-28 15:52:08 +0000205 { "amdgpu-kernarg-segment-ptr" },
206 { "amdgpu-implicitarg-ptr" }
Matt Arsenault6b930462017-07-13 21:43:42 +0000207 };
208
209 if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
210 NeedQueuePtr = true;
211
212 for (StringRef AttrName : AttrNames)
213 handleAttr(Parent, Callee, AttrName);
214}
215
216bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
Tom Stellard5bfbae52018-07-11 20:59:01 +0000217 const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000218 bool HasFlat = ST.hasFlatAddressSpace();
219 bool HasApertureRegs = ST.hasApertureRegs();
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000220 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
221
Matt Arsenault6b930462017-07-13 21:43:42 +0000222 bool Changed = false;
223 bool NeedQueuePtr = false;
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000224 bool HaveCall = false;
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000225 bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
Matt Arsenault6b930462017-07-13 21:43:42 +0000226
227 for (BasicBlock &BB : F) {
228 for (Instruction &I : BB) {
229 CallSite CS(&I);
230 if (CS) {
231 Function *Callee = CS.getCalledFunction();
232
233 // TODO: Do something with indirect calls.
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000234 if (!Callee) {
235 if (!CS.isInlineAsm())
236 HaveCall = true;
Matt Arsenault6b930462017-07-13 21:43:42 +0000237 continue;
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000238 }
Matt Arsenault6b930462017-07-13 21:43:42 +0000239
240 Intrinsic::ID IID = Callee->getIntrinsicID();
241 if (IID == Intrinsic::not_intrinsic) {
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000242 HaveCall = true;
Matt Arsenault6b930462017-07-13 21:43:42 +0000243 copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
244 Changed = true;
245 } else {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000246 bool NonKernelOnly = false;
247 StringRef AttrName = intrinsicToAttrName(IID,
248 NonKernelOnly, NeedQueuePtr);
249 if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000250 F.addFnAttr(AttrName);
251 Changed = true;
252 }
253 }
254 }
255
256 if (NeedQueuePtr || HasApertureRegs)
257 continue;
258
Matt Arsenault99c14522016-04-25 19:27:24 +0000259 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
Matt Arsenault0da63502018-08-31 05:49:54 +0000260 if (castRequiresQueuePtr(ASC)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000261 NeedQueuePtr = true;
262 continue;
263 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000264 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000265
266 for (const Use &U : I.operands()) {
267 const auto *OpC = dyn_cast<Constant>(U);
268 if (!OpC)
269 continue;
270
Matt Arsenault0da63502018-08-31 05:49:54 +0000271 if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000272 NeedQueuePtr = true;
273 break;
274 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000275 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000276 }
277 }
278
Matt Arsenault6b930462017-07-13 21:43:42 +0000279 if (NeedQueuePtr) {
280 F.addFnAttr("amdgpu-queue-ptr");
281 Changed = true;
282 }
283
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000284 // TODO: We could refine this to captured pointers that could possibly be
285 // accessed by flat instructions. For now this is mostly a poor way of
286 // estimating whether there are calls before argument lowering.
287 if (HasFlat && !IsFunc && HaveCall) {
288 F.addFnAttr("amdgpu-flat-scratch");
289 Changed = true;
290 }
291
Matt Arsenault6b930462017-07-13 21:43:42 +0000292 return Changed;
Matt Arsenault99c14522016-04-25 19:27:24 +0000293}
Matt Arsenault39319482015-11-06 18:01:57 +0000294
Matt Arsenault6b930462017-07-13 21:43:42 +0000295bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
296 Module &M = SCC.getCallGraph().getModule();
Matt Arsenault39319482015-11-06 18:01:57 +0000297 Triple TT(M.getTargetTriple());
298
Matt Arsenault6b930462017-07-13 21:43:42 +0000299 bool Changed = false;
300 for (CallGraphNode *I : SCC) {
301 Function *F = I->getFunction();
302 if (!F || F->isDeclaration())
303 continue;
Matt Arsenault43976df2016-01-30 04:25:19 +0000304
Matt Arsenault6b930462017-07-13 21:43:42 +0000305 Changed |= addFeatureAttributes(*F);
Matt Arsenault99c14522016-04-25 19:27:24 +0000306 }
307
Matt Arsenault39319482015-11-06 18:01:57 +0000308 return Changed;
309}
310
Matt Arsenault6b930462017-07-13 21:43:42 +0000311bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
312 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
313 if (!TPC)
314 report_fatal_error("TargetMachine is required");
315
Matt Arsenault6b930462017-07-13 21:43:42 +0000316 TM = &TPC->getTM<TargetMachine>();
317 return false;
318}
319
320Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000321 return new AMDGPUAnnotateKernelFeatures();
Matt Arsenault39319482015-11-06 18:01:57 +0000322}