blob: 96e7603106574a55411432bd8d36bbafc66113a6 [file] [log] [blame]
Eugene Zelenkofa6434b2017-08-31 21:56:16 +00001//===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
Matt Arsenault39319482015-11-06 18:01:57 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Matt Arsenault39319482015-11-06 18:01:57 +00006//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass adds target attributes to functions which use intrinsics
10/// which will impact calling convention lowering.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
Matt Arsenaulte823d922017-02-18 18:29:53 +000015#include "AMDGPUSubtarget.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000016#include "Utils/AMDGPUBaseInfo.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000020#include "llvm/ADT/Triple.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000021#include "llvm/Analysis/CallGraph.h"
Matt Arsenault6b930462017-07-13 21:43:42 +000022#include "llvm/Analysis/CallGraphSCCPass.h"
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000023#include "llvm/CodeGen/TargetPassConfig.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000024#include "llvm/IR/CallSite.h"
25#include "llvm/IR/Constant.h"
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000026#include "llvm/IR/Constants.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000027#include "llvm/IR/Function.h"
28#include "llvm/IR/Instruction.h"
Matt Arsenault39319482015-11-06 18:01:57 +000029#include "llvm/IR/Instructions.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000030#include "llvm/IR/Intrinsics.h"
Matt Arsenault39319482015-11-06 18:01:57 +000031#include "llvm/IR/Module.h"
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000032#include "llvm/IR/Type.h"
33#include "llvm/IR/Use.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/Casting.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Target/TargetMachine.h"
Matt Arsenault39319482015-11-06 18:01:57 +000038
39#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
40
41using namespace llvm;
42
43namespace {
44
Matt Arsenault6b930462017-07-13 21:43:42 +000045class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
Matt Arsenault39319482015-11-06 18:01:57 +000046private:
Matt Arsenault6b930462017-07-13 21:43:42 +000047 const TargetMachine *TM = nullptr;
Matt Arsenault99c14522016-04-25 19:27:24 +000048
Matt Arsenault6b930462017-07-13 21:43:42 +000049 bool addFeatureAttributes(Function &F);
50
Matt Arsenault39319482015-11-06 18:01:57 +000051public:
52 static char ID;
53
Matt Arsenault6b930462017-07-13 21:43:42 +000054 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
55
56 bool doInitialization(CallGraph &CG) override;
57 bool runOnSCC(CallGraphSCC &SCC) override;
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000058
Mehdi Amini117296c2016-10-01 02:56:57 +000059 StringRef getPassName() const override {
Matt Arsenault39319482015-11-06 18:01:57 +000060 return "AMDGPU Annotate Kernel Features";
61 }
62
63 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 AU.setPreservesAll();
Matt Arsenault6b930462017-07-13 21:43:42 +000065 CallGraphSCCPass::getAnalysisUsage(AU);
Matt Arsenault39319482015-11-06 18:01:57 +000066 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000067
Matt Arsenault0da63502018-08-31 05:49:54 +000068 static bool visitConstantExpr(const ConstantExpr *CE);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000069 static bool visitConstantExprsRecursively(
70 const Constant *EntryC,
Matt Arsenault0da63502018-08-31 05:49:54 +000071 SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
Matt Arsenault39319482015-11-06 18:01:57 +000072};
73
Eugene Zelenkofa6434b2017-08-31 21:56:16 +000074} // end anonymous namespace
Matt Arsenault39319482015-11-06 18:01:57 +000075
76char AMDGPUAnnotateKernelFeatures::ID = 0;
77
78char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
79
Matt Arsenault99c14522016-04-25 19:27:24 +000080INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
81 "Add AMDGPU function attributes", false, false)
Matt Arsenault39319482015-11-06 18:01:57 +000082
Matt Arsenault39319482015-11-06 18:01:57 +000083
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000084// The queue ptr is only needed when casting to flat, not from it.
Matt Arsenault0da63502018-08-31 05:49:54 +000085static bool castRequiresQueuePtr(unsigned SrcAS) {
86 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
Matt Arsenault99c14522016-04-25 19:27:24 +000087}
88
Matt Arsenault0da63502018-08-31 05:49:54 +000089static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
90 return castRequiresQueuePtr(ASC->getSrcAddressSpace());
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000091}
92
Matt Arsenault0da63502018-08-31 05:49:54 +000093bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000094 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
95 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
Matt Arsenault0da63502018-08-31 05:49:54 +000096 return castRequiresQueuePtr(SrcAS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000097 }
98
99 return false;
100}
101
102bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
103 const Constant *EntryC,
Matt Arsenault0da63502018-08-31 05:49:54 +0000104 SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000105
106 if (!ConstantExprVisited.insert(EntryC).second)
107 return false;
108
109 SmallVector<const Constant *, 16> Stack;
110 Stack.push_back(EntryC);
111
112 while (!Stack.empty()) {
113 const Constant *C = Stack.pop_back_val();
114
115 // Check this constant expression.
116 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
Matt Arsenault0da63502018-08-31 05:49:54 +0000117 if (visitConstantExpr(CE))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000118 return true;
119 }
120
121 // Visit all sub-expressions.
122 for (const Use &U : C->operands()) {
123 const auto *OpC = dyn_cast<Constant>(U);
124 if (!OpC)
125 continue;
126
127 if (!ConstantExprVisited.insert(OpC).second)
128 continue;
129
130 Stack.push_back(OpC);
131 }
132 }
133
134 return false;
135}
136
Matt Arsenault6b930462017-07-13 21:43:42 +0000137// We do not need to note the x workitem or workgroup id because they are always
138// initialized.
139//
140// TODO: We should not add the attributes if the known compile time workgroup
141// size is 1 for y/z.
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000142static StringRef intrinsicToAttrName(Intrinsic::ID ID,
143 bool &NonKernelOnly,
144 bool &IsQueuePtr) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000145 switch (ID) {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000146 case Intrinsic::amdgcn_workitem_id_x:
147 NonKernelOnly = true;
148 return "amdgpu-work-item-id-x";
149 case Intrinsic::amdgcn_workgroup_id_x:
150 NonKernelOnly = true;
151 return "amdgpu-work-group-id-x";
Matt Arsenault6b930462017-07-13 21:43:42 +0000152 case Intrinsic::amdgcn_workitem_id_y:
153 case Intrinsic::r600_read_tidig_y:
154 return "amdgpu-work-item-id-y";
155 case Intrinsic::amdgcn_workitem_id_z:
156 case Intrinsic::r600_read_tidig_z:
157 return "amdgpu-work-item-id-z";
158 case Intrinsic::amdgcn_workgroup_id_y:
159 case Intrinsic::r600_read_tgid_y:
160 return "amdgpu-work-group-id-y";
161 case Intrinsic::amdgcn_workgroup_id_z:
162 case Intrinsic::r600_read_tgid_z:
163 return "amdgpu-work-group-id-z";
164 case Intrinsic::amdgcn_dispatch_ptr:
165 return "amdgpu-dispatch-ptr";
166 case Intrinsic::amdgcn_dispatch_id:
167 return "amdgpu-dispatch-id";
Matt Arsenault23e4df62017-07-14 00:11:13 +0000168 case Intrinsic::amdgcn_kernarg_segment_ptr:
Matt Arsenault23e4df62017-07-14 00:11:13 +0000169 return "amdgpu-kernarg-segment-ptr";
Matt Arsenault9166ce82017-07-28 15:52:08 +0000170 case Intrinsic::amdgcn_implicitarg_ptr:
171 return "amdgpu-implicitarg-ptr";
Matt Arsenault6b930462017-07-13 21:43:42 +0000172 case Intrinsic::amdgcn_queue_ptr:
173 case Intrinsic::trap:
174 case Intrinsic::debugtrap:
175 IsQueuePtr = true;
176 return "amdgpu-queue-ptr";
177 default:
178 return "";
179 }
180}
181
182static bool handleAttr(Function &Parent, const Function &Callee,
183 StringRef Name) {
184 if (Callee.hasFnAttribute(Name)) {
185 Parent.addFnAttr(Name);
186 return true;
187 }
Aakanksha Patilbc568762018-12-13 21:23:12 +0000188
Matt Arsenault6b930462017-07-13 21:43:42 +0000189 return false;
190}
191
192static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
193 bool &NeedQueuePtr) {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000194 // X ids unnecessarily propagated to kernels.
Matt Arsenault6b930462017-07-13 21:43:42 +0000195 static const StringRef AttrNames[] = {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000196 { "amdgpu-work-item-id-x" },
Matt Arsenault6b930462017-07-13 21:43:42 +0000197 { "amdgpu-work-item-id-y" },
198 { "amdgpu-work-item-id-z" },
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000199 { "amdgpu-work-group-id-x" },
Matt Arsenault6b930462017-07-13 21:43:42 +0000200 { "amdgpu-work-group-id-y" },
201 { "amdgpu-work-group-id-z" },
202 { "amdgpu-dispatch-ptr" },
Matt Arsenault23e4df62017-07-14 00:11:13 +0000203 { "amdgpu-dispatch-id" },
Matt Arsenault9166ce82017-07-28 15:52:08 +0000204 { "amdgpu-kernarg-segment-ptr" },
205 { "amdgpu-implicitarg-ptr" }
Matt Arsenault6b930462017-07-13 21:43:42 +0000206 };
207
208 if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
209 NeedQueuePtr = true;
210
211 for (StringRef AttrName : AttrNames)
212 handleAttr(Parent, Callee, AttrName);
213}
214
215bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
Tom Stellard5bfbae52018-07-11 20:59:01 +0000216 const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000217 bool HasFlat = ST.hasFlatAddressSpace();
218 bool HasApertureRegs = ST.hasApertureRegs();
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000219 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
220
Matt Arsenault6b930462017-07-13 21:43:42 +0000221 bool Changed = false;
222 bool NeedQueuePtr = false;
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000223 bool HaveCall = false;
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000224 bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
Matt Arsenault6b930462017-07-13 21:43:42 +0000225
226 for (BasicBlock &BB : F) {
227 for (Instruction &I : BB) {
228 CallSite CS(&I);
229 if (CS) {
230 Function *Callee = CS.getCalledFunction();
231
232 // TODO: Do something with indirect calls.
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000233 if (!Callee) {
234 if (!CS.isInlineAsm())
235 HaveCall = true;
Matt Arsenault6b930462017-07-13 21:43:42 +0000236 continue;
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000237 }
Matt Arsenault6b930462017-07-13 21:43:42 +0000238
239 Intrinsic::ID IID = Callee->getIntrinsicID();
240 if (IID == Intrinsic::not_intrinsic) {
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000241 HaveCall = true;
Matt Arsenault6b930462017-07-13 21:43:42 +0000242 copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
243 Changed = true;
244 } else {
Matt Arsenaulte15855d2017-07-17 22:35:50 +0000245 bool NonKernelOnly = false;
246 StringRef AttrName = intrinsicToAttrName(IID,
247 NonKernelOnly, NeedQueuePtr);
248 if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000249 F.addFnAttr(AttrName);
250 Changed = true;
251 }
252 }
253 }
254
255 if (NeedQueuePtr || HasApertureRegs)
256 continue;
257
Matt Arsenault99c14522016-04-25 19:27:24 +0000258 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
Matt Arsenault0da63502018-08-31 05:49:54 +0000259 if (castRequiresQueuePtr(ASC)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000260 NeedQueuePtr = true;
261 continue;
262 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000263 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000264
265 for (const Use &U : I.operands()) {
266 const auto *OpC = dyn_cast<Constant>(U);
267 if (!OpC)
268 continue;
269
Matt Arsenault0da63502018-08-31 05:49:54 +0000270 if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
Matt Arsenault6b930462017-07-13 21:43:42 +0000271 NeedQueuePtr = true;
272 break;
273 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000274 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000275 }
276 }
277
Matt Arsenault6b930462017-07-13 21:43:42 +0000278 if (NeedQueuePtr) {
279 F.addFnAttr("amdgpu-queue-ptr");
280 Changed = true;
281 }
282
Matt Arsenault254ad3d2017-07-18 16:44:58 +0000283 // TODO: We could refine this to captured pointers that could possibly be
284 // accessed by flat instructions. For now this is mostly a poor way of
285 // estimating whether there are calls before argument lowering.
286 if (HasFlat && !IsFunc && HaveCall) {
287 F.addFnAttr("amdgpu-flat-scratch");
288 Changed = true;
289 }
290
Matt Arsenault6b930462017-07-13 21:43:42 +0000291 return Changed;
Matt Arsenault99c14522016-04-25 19:27:24 +0000292}
Matt Arsenault39319482015-11-06 18:01:57 +0000293
Matt Arsenault6b930462017-07-13 21:43:42 +0000294bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
Aakanksha Patilbc568762018-12-13 21:23:12 +0000295 Module &M = SCC.getCallGraph().getModule();
296 Triple TT(M.getTargetTriple());
Aakanksha Patil729309c2018-12-12 20:49:17 +0000297
Aakanksha Patilbc568762018-12-13 21:23:12 +0000298 bool Changed = false;
299 for (CallGraphNode *I : SCC) {
300 Function *F = I->getFunction();
Matt Arsenault6b930462017-07-13 21:43:42 +0000301 if (!F || F->isDeclaration())
302 continue;
Aakanksha Patilbc568762018-12-13 21:23:12 +0000303
Matt Arsenault6b930462017-07-13 21:43:42 +0000304 Changed |= addFeatureAttributes(*F);
Matt Arsenault99c14522016-04-25 19:27:24 +0000305 }
306
Matt Arsenault39319482015-11-06 18:01:57 +0000307 return Changed;
308}
309
Matt Arsenault6b930462017-07-13 21:43:42 +0000310bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
311 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
312 if (!TPC)
313 report_fatal_error("TargetMachine is required");
314
Matt Arsenault6b930462017-07-13 21:43:42 +0000315 TM = &TPC->getTM<TargetMachine>();
316 return false;
317}
318
319Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000320 return new AMDGPUAnnotateKernelFeatures();
Matt Arsenault39319482015-11-06 18:01:57 +0000321}