blob: 7235d8fae332701a7c76dfb4ad512ec2bfe94b22 [file] [log] [blame]
Matt Arsenault39319482015-11-06 18:01:57 +00001//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass adds target attributes to functions which use intrinsics
11/// which will impact calling convention lowering.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
Matt Arsenaulte823d922017-02-18 18:29:53 +000016#include "AMDGPUSubtarget.h"
Matt Arsenault2ffe8fd2016-08-11 19:18:50 +000017#include "llvm/ADT/Triple.h"
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000018#include "llvm/CodeGen/TargetPassConfig.h"
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000019#include "llvm/IR/Constants.h"
Matt Arsenault39319482015-11-06 18:01:57 +000020#include "llvm/IR/Instructions.h"
21#include "llvm/IR/Module.h"
22
23#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
24
25using namespace llvm;
26
27namespace {
28
29class AMDGPUAnnotateKernelFeatures : public ModulePass {
30private:
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000031 AMDGPUAS AS;
32 static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
Matt Arsenault99c14522016-04-25 19:27:24 +000033
Matt Arsenault39319482015-11-06 18:01:57 +000034 void addAttrToCallers(Function *Intrin, StringRef AttrName);
35 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
36
37public:
38 static char ID;
39
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000040 AMDGPUAnnotateKernelFeatures() : ModulePass(ID) {}
Matt Arsenault39319482015-11-06 18:01:57 +000041 bool runOnModule(Module &M) override;
Mehdi Amini117296c2016-10-01 02:56:57 +000042 StringRef getPassName() const override {
Matt Arsenault39319482015-11-06 18:01:57 +000043 return "AMDGPU Annotate Kernel Features";
44 }
45
46 void getAnalysisUsage(AnalysisUsage &AU) const override {
47 AU.setPreservesAll();
48 ModulePass::getAnalysisUsage(AU);
49 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000050
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000051 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000052 static bool visitConstantExprsRecursively(
53 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000054 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
55 AMDGPUAS AS);
Matt Arsenault39319482015-11-06 18:01:57 +000056};
57
58}
59
60char AMDGPUAnnotateKernelFeatures::ID = 0;
61
62char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
63
Matt Arsenault99c14522016-04-25 19:27:24 +000064INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
65 "Add AMDGPU function attributes", false, false)
Matt Arsenault39319482015-11-06 18:01:57 +000066
Matt Arsenault39319482015-11-06 18:01:57 +000067
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000068// The queue ptr is only needed when casting to flat, not from it.
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000069static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
70 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
Matt Arsenault99c14522016-04-25 19:27:24 +000071}
72
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000073static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
74 const AMDGPUAS &AS) {
75 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000076}
77
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000078bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
79 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000080 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
81 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000082 return castRequiresQueuePtr(SrcAS, AS);
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000083 }
84
85 return false;
86}
87
88bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
89 const Constant *EntryC,
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000090 SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
91 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +000092
93 if (!ConstantExprVisited.insert(EntryC).second)
94 return false;
95
96 SmallVector<const Constant *, 16> Stack;
97 Stack.push_back(EntryC);
98
99 while (!Stack.empty()) {
100 const Constant *C = Stack.pop_back_val();
101
102 // Check this constant expression.
103 if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000104 if (visitConstantExpr(CE, AS))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000105 return true;
106 }
107
108 // Visit all sub-expressions.
109 for (const Use &U : C->operands()) {
110 const auto *OpC = dyn_cast<Constant>(U);
111 if (!OpC)
112 continue;
113
114 if (!ConstantExprVisited.insert(OpC).second)
115 continue;
116
117 Stack.push_back(OpC);
118 }
119 }
120
121 return false;
122}
123
Matt Arsenault99c14522016-04-25 19:27:24 +0000124// Return true if an addrspacecast is used that requires the queue ptr.
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000125bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
126 AMDGPUAS AS) {
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000127 SmallPtrSet<const Constant *, 8> ConstantExprVisited;
128
Matt Arsenault99c14522016-04-25 19:27:24 +0000129 for (const BasicBlock &BB : F) {
130 for (const Instruction &I : BB) {
131 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000132 if (castRequiresQueuePtr(ASC, AS))
Matt Arsenault99c14522016-04-25 19:27:24 +0000133 return true;
134 }
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000135
136 for (const Use &U : I.operands()) {
137 const auto *OpC = dyn_cast<Constant>(U);
138 if (!OpC)
139 continue;
140
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000141 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
Matt Arsenault3b2e2a52016-06-06 20:03:31 +0000142 return true;
143 }
Matt Arsenault99c14522016-04-25 19:27:24 +0000144 }
145 }
146
147 return false;
148}
Matt Arsenault39319482015-11-06 18:01:57 +0000149
150void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
151 StringRef AttrName) {
152 SmallPtrSet<Function *, 4> SeenFuncs;
153
154 for (User *U : Intrin->users()) {
155 // CallInst is the only valid user for an intrinsic.
156 CallInst *CI = cast<CallInst>(U);
157
158 Function *CallingFunction = CI->getParent()->getParent();
159 if (SeenFuncs.insert(CallingFunction).second)
160 CallingFunction->addFnAttr(AttrName);
161 }
162}
163
164bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
165 Module &M,
166 ArrayRef<StringRef[2]> IntrinsicToAttr) {
167 bool Changed = false;
168
169 for (const StringRef *Arr : IntrinsicToAttr) {
170 if (Function *Fn = M.getFunction(Arr[0])) {
171 addAttrToCallers(Fn, Arr[1]);
172 Changed = true;
173 }
174 }
175
176 return Changed;
177}
178
179bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
180 Triple TT(M.getTargetTriple());
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000181 AS = AMDGPU::getAMDGPUAS(M);
Matt Arsenault39319482015-11-06 18:01:57 +0000182
183 static const StringRef IntrinsicToAttr[][2] = {
184 // .x omitted
Matt Arsenault43976df2016-01-30 04:25:19 +0000185 { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
186 { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
187
188 { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
189 { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
190
Matt Arsenault39319482015-11-06 18:01:57 +0000191 { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
192 { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
193
194 // .x omitted
195 { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
196 { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
Matt Arsenault39319482015-11-06 18:01:57 +0000197 };
198
199 static const StringRef HSAIntrinsicToAttr[][2] = {
Matt Arsenault48ab5262016-04-25 19:27:18 +0000200 { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
Matt Arsenault8d718dc2016-07-22 17:01:30 +0000201 { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
Wei Dingee21a362017-01-24 06:41:21 +0000202 { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
Wei Ding205bfdb2017-02-10 02:15:29 +0000203 { "llvm.trap", "amdgpu-queue-ptr" },
204 { "llvm.debugtrap", "amdgpu-queue-ptr" }
Matt Arsenault39319482015-11-06 18:01:57 +0000205 };
206
Matt Arsenaultd0799df2016-01-30 05:10:59 +0000207 // TODO: We should not add the attributes if the known compile time workgroup
208 // size is 1 for y/z.
209
Matt Arsenault39319482015-11-06 18:01:57 +0000210 // TODO: Intrinsics that require queue ptr.
211
212 // We do not need to note the x workitem or workgroup id because they are
213 // always initialized.
214
215 bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
Tom Stellard0b76fc4c2016-09-16 21:34:26 +0000216 if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) {
Matt Arsenault39319482015-11-06 18:01:57 +0000217 Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
218
Matt Arsenault99c14522016-04-25 19:27:24 +0000219 for (Function &F : M) {
220 if (F.hasFnAttribute("amdgpu-queue-ptr"))
221 continue;
222
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000223 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
224 bool HasApertureRegs = TPC && TPC->getTM<TargetMachine>()
225 .getSubtarget<AMDGPUSubtarget>(F)
226 .hasApertureRegs();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000227 if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
Matt Arsenault99c14522016-04-25 19:27:24 +0000228 F.addFnAttr("amdgpu-queue-ptr");
229 }
230 }
231
Matt Arsenault39319482015-11-06 18:01:57 +0000232 return Changed;
233}
234
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000235ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
236 return new AMDGPUAnnotateKernelFeatures();
Matt Arsenault39319482015-11-06 18:01:57 +0000237}