blob: 3d8db7cd8af55a75fe1493cc2a2885fb29ca760c [file] [log] [blame]
Matt Arsenault39319482015-11-06 18:01:57 +00001//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass adds target attributes to functions which use intrinsics
11/// which will impact calling convention lowering.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
Matt Arsenaulte823d922017-02-18 18:29:53 +000016#include "AMDGPUSubtarget.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000017#include "llvm/ADT/Triple.h"
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000018#include "llvm/IR/Constants.h"
Matt Arsenault39319482015-11-06 18:01:57 +000019#include "llvm/IR/Instructions.h"
20#include "llvm/IR/Module.h"
21
22#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
23
24using namespace llvm;
25
26namespace {
27
28class AMDGPUAnnotateKernelFeatures : public ModulePass {
29private:
Matt Arsenaulte823d922017-02-18 18:29:53 +000030 const TargetMachine *TM;
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000031 AMDGPUAS AS;
32 static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
Matt Arsenault99c14522016-04-25 19:27:24 +000033
Matt Arsenault39319482015-11-06 18:01:57 +000034 void addAttrToCallers(Function *Intrin, StringRef AttrName);
35 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
36
37public:
38 static char ID;
39
Matt Arsenaulte823d922017-02-18 18:29:53 +000040 AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) :
41 ModulePass(ID), TM(TM_) {}
Matt Arsenault39319482015-11-06 18:01:57 +000042 bool runOnModule(Module &M) override;
Mehdi Amini117296c2016-10-01 02:56:57 +000043 StringRef getPassName() const override {
Matt Arsenault39319482015-11-06 18:01:57 +000044 return "AMDGPU Annotate Kernel Features";
45 }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesAll();
49 ModulePass::getAnalysisUsage(AU);
50 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000051
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000052 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000053 static bool visitConstantExprsRecursively(
54 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000055 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
56 AMDGPUAS AS);
Matt Arsenault39319482015-11-06 18:01:57 +000057};
58
59}
60
61char AMDGPUAnnotateKernelFeatures::ID = 0;
62
63char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
64
Matt Arsenault99c14522016-04-25 19:27:24 +000065INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
66 "Add AMDGPU function attributes", false, false)
Matt Arsenault39319482015-11-06 18:01:57 +000067
Matt Arsenault39319482015-11-06 18:01:57 +000068
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000069// The queue ptr is only needed when casting to flat, not from it.
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000070static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
71 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
Matt Arsenault99c14522016-04-25 19:27:24 +000072}
73
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000074static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
75 const AMDGPUAS &AS) {
76 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000077}
78
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000079bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
80 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000081 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
82 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000083 return castRequiresQueuePtr(SrcAS, AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000084 }
85
86 return false;
87}
88
89bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
90 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000091 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
92 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000093
94 if (!ConstantExprVisited.insert(EntryC).second)
95 return false;
96
97 SmallVector<const Constant *, 16> Stack;
98 Stack.push_back(EntryC);
99
100 while (!Stack.empty()) {
101 const Constant *C = Stack.pop_back_val();
102
103 // Check this constant expression.
104 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000105 if (visitConstantExpr(CE, AS))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000106 return true;
107 }
108
109 // Visit all sub-expressions.
110 for (const Use &U : C->operands()) {
111 const auto *OpC = dyn_cast<Constant>(U);
112 if (!OpC)
113 continue;
114
115 if (!ConstantExprVisited.insert(OpC).second)
116 continue;
117
118 Stack.push_back(OpC);
119 }
120 }
121
122 return false;
123}
124
Matt Arsenault99c14522016-04-25 19:27:24 +0000125// Return true if an addrspacecast is used that requires the queue ptr.
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000126bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
127 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000128 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
129
Matt Arsenault99c14522016-04-25 19:27:24 +0000130 for (const BasicBlock &BB : F) {
131 for (const Instruction &I : BB) {
132 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000133 if (castRequiresQueuePtr(ASC, AS))
Matt Arsenault99c14522016-04-25 19:27:24 +0000134 return true;
135 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000136
137 for (const Use &U : I.operands()) {
138 const auto *OpC = dyn_cast<Constant>(U);
139 if (!OpC)
140 continue;
141
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000142 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000143 return true;
144 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000145 }
146 }
147
148 return false;
149}
Matt Arsenault39319482015-11-06 18:01:57 +0000150
151void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
152 StringRef AttrName) {
153 SmallPtrSet<Function *, 4> SeenFuncs;
154
155 for (User *U : Intrin->users()) {
156 // CallInst is the only valid user for an intrinsic.
157 CallInst *CI = cast<CallInst>(U);
158
159 Function *CallingFunction = CI->getParent()->getParent();
160 if (SeenFuncs.insert(CallingFunction).second)
161 CallingFunction->addFnAttr(AttrName);
162 }
163}
164
165bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
166 Module &M,
167 ArrayRef<StringRef[2]> IntrinsicToAttr) {
168 bool Changed = false;
169
170 for (const StringRef *Arr : IntrinsicToAttr) {
171 if (Function *Fn = M.getFunction(Arr[0])) {
172 addAttrToCallers(Fn, Arr[1]);
173 Changed = true;
174 }
175 }
176
177 return Changed;
178}
179
180bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
181 Triple TT(M.getTargetTriple());
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000182 AS = AMDGPU::getAMDGPUAS(M);
Matt Arsenault39319482015-11-06 18:01:57 +0000183
184 static const StringRef IntrinsicToAttr[][2] = {
185 // .x omitted
Matt Arsenault43976df2016-01-30 04:25:19 +0000186 { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
187 { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
188
189 { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
190 { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
191
Matt Arsenault39319482015-11-06 18:01:57 +0000192 { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
193 { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
194
195 // .x omitted
196 { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
197 { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
Matt Arsenault39319482015-11-06 18:01:57 +0000198 };
199
200 static const StringRef HSAIntrinsicToAttr[][2] = {
Matt Arsenault48ab5262016-04-25 19:27:18 +0000201 { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
Matt Arsenault8d718dc2016-07-22 17:01:30 +0000202 { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
Wei Dingee21a362017-01-24 06:41:21 +0000203 { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
Wei Ding205bfdb2017-02-10 02:15:29 +0000204 { "llvm.trap", "amdgpu-queue-ptr" },
205 { "llvm.debugtrap", "amdgpu-queue-ptr" }
Matt Arsenault39319482015-11-06 18:01:57 +0000206 };
207
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000208 // TODO: We should not add the attributes if the known compile time workgroup
209 // size is 1 for y/z.
210
Matt Arsenault39319482015-11-06 18:01:57 +0000211 // TODO: Intrinsics that require queue ptr.
212
213 // We do not need to note the x workitem or workgroup id because they are
214 // always initialized.
215
216 bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
Tom Stellard0b76fc4c2016-09-16 21:34:26 +0000217 if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) {
Matt Arsenault39319482015-11-06 18:01:57 +0000218 Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
219
Matt Arsenault99c14522016-04-25 19:27:24 +0000220 for (Function &F : M) {
221 if (F.hasFnAttribute("amdgpu-queue-ptr"))
222 continue;
223
Matt Arsenaulte823d922017-02-18 18:29:53 +0000224 bool HasApertureRegs =
225 TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000226 if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
Matt Arsenault99c14522016-04-25 19:27:24 +0000227 F.addFnAttr("amdgpu-queue-ptr");
228 }
229 }
230
Matt Arsenault39319482015-11-06 18:01:57 +0000231 return Changed;
232}
233
Matt Arsenaulte823d922017-02-18 18:29:53 +0000234ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) {
235 return new AMDGPUAnnotateKernelFeatures(TM);
Matt Arsenault39319482015-11-06 18:01:57 +0000236}