blob: dae42acb41cb7d473ae832af8fb0f22e6ed77156 [file] [log] [blame]
Tom Stellard880a80a2014-06-17 16:53:14 +00001//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass eliminates allocas by either converting them into vectors or
11// by migrating them to local address space.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "AMDGPUSubtarget.h"
17#include "llvm/Analysis/ValueTracking.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/InstVisitor.h"
20#include "llvm/Support/Debug.h"
Benjamin Kramer16132e62015-03-23 18:07:13 +000021#include "llvm/Support/raw_ostream.h"
Tom Stellard880a80a2014-06-17 16:53:14 +000022
23#define DEBUG_TYPE "amdgpu-promote-alloca"
24
25using namespace llvm;
26
27namespace {
28
29class AMDGPUPromoteAlloca : public FunctionPass,
30 public InstVisitor<AMDGPUPromoteAlloca> {
31
32 static char ID;
33 Module *Mod;
34 const AMDGPUSubtarget &ST;
35 int LocalMemAvailable;
36
37public:
38 AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
39 LocalMemAvailable(0) { }
Benjamin Kramer8c90fd72014-09-03 11:41:21 +000040 bool doInitialization(Module &M) override;
41 bool runOnFunction(Function &F) override;
42 const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
Tom Stellard880a80a2014-06-17 16:53:14 +000043 void visitAlloca(AllocaInst &I);
44};
45
46} // End anonymous namespace
47
48char AMDGPUPromoteAlloca::ID = 0;
49
50bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
51 Mod = &M;
52 return false;
53}
54
55bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
56
Craig Toppere3dcce92015-08-01 22:20:21 +000057 FunctionType *FTy = F.getFunctionType();
Tom Stellard880a80a2014-06-17 16:53:14 +000058
59 LocalMemAvailable = ST.getLocalMemorySize();
60
61
62 // If the function has any arguments in the local address space, then it's
63 // possible these arguments require the entire local memory space, so
64 // we cannot use local memory in the pass.
65 for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
Craig Toppere3dcce92015-08-01 22:20:21 +000066 Type *ParamTy = FTy->getParamType(i);
Tom Stellard880a80a2014-06-17 16:53:14 +000067 if (ParamTy->isPointerTy() &&
68 ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
69 LocalMemAvailable = 0;
70 DEBUG(dbgs() << "Function has local memory argument. Promoting to "
71 "local memory disabled.\n");
72 break;
73 }
74 }
75
76 if (LocalMemAvailable > 0) {
77 // Check how much local memory is being used by global objects
78 for (Module::global_iterator I = Mod->global_begin(),
79 E = Mod->global_end(); I != E; ++I) {
80 GlobalVariable *GV = I;
81 PointerType *GVTy = GV->getType();
82 if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
83 continue;
84 for (Value::use_iterator U = GV->use_begin(),
85 UE = GV->use_end(); U != UE; ++U) {
86 Instruction *Use = dyn_cast<Instruction>(*U);
87 if (!Use)
88 continue;
89 if (Use->getParent()->getParent() == &F)
90 LocalMemAvailable -=
Mehdi Amini46a43552015-03-04 18:43:29 +000091 Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
Tom Stellard880a80a2014-06-17 16:53:14 +000092 }
93 }
94 }
95
96 LocalMemAvailable = std::max(0, LocalMemAvailable);
97 DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
98
99 visit(F);
100
101 return false;
102}
103
Craig Toppere3dcce92015-08-01 22:20:21 +0000104static VectorType *arrayTypeToVecType(Type *ArrayTy) {
Tom Stellard880a80a2014-06-17 16:53:14 +0000105 return VectorType::get(ArrayTy->getArrayElementType(),
106 ArrayTy->getArrayNumElements());
107}
108
Benjamin Kramerc6cc58e2014-10-04 16:55:56 +0000109static Value *
110calculateVectorIndex(Value *Ptr,
111 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
Tom Stellard880a80a2014-06-17 16:53:14 +0000112 if (isa<AllocaInst>(Ptr))
113 return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
114
115 GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
116
Benjamin Kramerc6cc58e2014-10-04 16:55:56 +0000117 auto I = GEPIdx.find(GEP);
118 return I == GEPIdx.end() ? nullptr : I->second;
Tom Stellard880a80a2014-06-17 16:53:14 +0000119}
120
121static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
122 // FIXME we only support simple cases
123 if (GEP->getNumOperands() != 3)
124 return NULL;
125
126 ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
127 if (!I0 || !I0->isZero())
128 return NULL;
129
130 return GEP->getOperand(2);
131}
132
Matt Arsenault642d2e72014-06-27 16:52:49 +0000133// Not an instruction handled below to turn into a vector.
134//
135// TODO: Check isTriviallyVectorizable for calls and handle other
136// instructions.
Matt Arsenault7227cc12015-07-28 18:47:00 +0000137static bool canVectorizeInst(Instruction *Inst, User *User) {
Matt Arsenault642d2e72014-06-27 16:52:49 +0000138 switch (Inst->getOpcode()) {
139 case Instruction::Load:
Matt Arsenault642d2e72014-06-27 16:52:49 +0000140 case Instruction::BitCast:
141 case Instruction::AddrSpaceCast:
142 return true;
Matt Arsenault7227cc12015-07-28 18:47:00 +0000143 case Instruction::Store: {
144 // Must be the stored pointer operand, not a stored value.
145 StoreInst *SI = cast<StoreInst>(Inst);
146 return SI->getPointerOperand() == User;
147 }
Matt Arsenault642d2e72014-06-27 16:52:49 +0000148 default:
149 return false;
150 }
151}
152
Tom Stellard880a80a2014-06-17 16:53:14 +0000153static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
154 Type *AllocaTy = Alloca->getAllocatedType();
155
156 DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
157
158 // FIXME: There is no reason why we can't support larger arrays, we
159 // are just being conservative for now.
160 if (!AllocaTy->isArrayTy() ||
161 AllocaTy->getArrayElementType()->isVectorTy() ||
162 AllocaTy->getArrayNumElements() > 4) {
163
164 DEBUG(dbgs() << " Cannot convert type to vector");
165 return false;
166 }
167
168 std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
169 std::vector<Value*> WorkList;
170 for (User *AllocaUser : Alloca->users()) {
171 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
172 if (!GEP) {
Matt Arsenault7227cc12015-07-28 18:47:00 +0000173 if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
Matt Arsenault642d2e72014-06-27 16:52:49 +0000174 return false;
175
Tom Stellard880a80a2014-06-17 16:53:14 +0000176 WorkList.push_back(AllocaUser);
177 continue;
178 }
179
180 Value *Index = GEPToVectorIndex(GEP);
181
182 // If we can't compute a vector index from this GEP, then we can't
183 // promote this alloca to vector.
184 if (!Index) {
Matt Arsenault6f62cf82014-06-27 02:36:59 +0000185 DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
Tom Stellard880a80a2014-06-17 16:53:14 +0000186 return false;
187 }
188
189 GEPVectorIdx[GEP] = Index;
190 for (User *GEPUser : AllocaUser->users()) {
Matt Arsenault7227cc12015-07-28 18:47:00 +0000191 if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
Matt Arsenault642d2e72014-06-27 16:52:49 +0000192 return false;
193
Tom Stellard880a80a2014-06-17 16:53:14 +0000194 WorkList.push_back(GEPUser);
195 }
196 }
197
198 VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
199
Matt Arsenault6f62cf82014-06-27 02:36:59 +0000200 DEBUG(dbgs() << " Converting alloca to vector "
201 << *AllocaTy << " -> " << *VectorTy << '\n');
Tom Stellard880a80a2014-06-17 16:53:14 +0000202
203 for (std::vector<Value*>::iterator I = WorkList.begin(),
204 E = WorkList.end(); I != E; ++I) {
205 Instruction *Inst = cast<Instruction>(*I);
206 IRBuilder<> Builder(Inst);
207 switch (Inst->getOpcode()) {
208 case Instruction::Load: {
209 Value *Ptr = Inst->getOperand(0);
210 Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
211 Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
212 Value *VecValue = Builder.CreateLoad(BitCast);
213 Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
214 Inst->replaceAllUsesWith(ExtractElement);
215 Inst->eraseFromParent();
216 break;
217 }
218 case Instruction::Store: {
219 Value *Ptr = Inst->getOperand(1);
220 Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
221 Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
222 Value *VecValue = Builder.CreateLoad(BitCast);
223 Value *NewVecValue = Builder.CreateInsertElement(VecValue,
224 Inst->getOperand(0),
225 Index);
226 Builder.CreateStore(NewVecValue, BitCast);
227 Inst->eraseFromParent();
228 break;
229 }
230 case Instruction::BitCast:
Matt Arsenault642d2e72014-06-27 16:52:49 +0000231 case Instruction::AddrSpaceCast:
Tom Stellard880a80a2014-06-17 16:53:14 +0000232 break;
233
234 default:
235 Inst->dump();
Matt Arsenault642d2e72014-06-27 16:52:49 +0000236 llvm_unreachable("Inconsistency in instructions promotable to vector");
Tom Stellard880a80a2014-06-17 16:53:14 +0000237 }
238 }
239 return true;
240}
241
Tom Stellard5b2927f2014-10-31 20:52:04 +0000242static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
243 bool Success = true;
Tom Stellard880a80a2014-06-17 16:53:14 +0000244 for (User *User : Val->users()) {
245 if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
246 continue;
Matt Arsenaultfdcd39a2015-07-28 18:29:14 +0000247 if (CallInst *CI = dyn_cast<CallInst>(User)) {
248 // TODO: We might be able to handle some cases where the callee is a
249 // constantexpr bitcast of a function.
250 if (!CI->getCalledFunction())
251 return false;
252
Tom Stellard880a80a2014-06-17 16:53:14 +0000253 WorkList.push_back(User);
254 continue;
255 }
Tom Stellard5b2927f2014-10-31 20:52:04 +0000256
257 // FIXME: Correctly handle ptrtoint instructions.
258 Instruction *UseInst = dyn_cast<Instruction>(User);
259 if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
260 return false;
261
Matt Arsenault7227cc12015-07-28 18:47:00 +0000262 if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) {
263 // Reject if the stored value is not the pointer operand.
264 if (SI->getPointerOperand() != Val)
265 return false;
266 }
267
Tom Stellard880a80a2014-06-17 16:53:14 +0000268 if (!User->getType()->isPointerTy())
269 continue;
Tom Stellard5b2927f2014-10-31 20:52:04 +0000270
Tom Stellard880a80a2014-06-17 16:53:14 +0000271 WorkList.push_back(User);
Tom Stellard5b2927f2014-10-31 20:52:04 +0000272
273 Success &= collectUsesWithPtrTypes(User, WorkList);
Tom Stellard880a80a2014-06-17 16:53:14 +0000274 }
Tom Stellard5b2927f2014-10-31 20:52:04 +0000275 return Success;
Tom Stellard880a80a2014-06-17 16:53:14 +0000276}
277
278void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
279 IRBuilder<> Builder(&I);
280
281 // First try to replace the alloca with a vector
282 Type *AllocaTy = I.getAllocatedType();
283
Matt Arsenault6f62cf82014-06-27 02:36:59 +0000284 DEBUG(dbgs() << "Trying to promote " << I << '\n');
Tom Stellard880a80a2014-06-17 16:53:14 +0000285
286 if (tryPromoteAllocaToVector(&I))
287 return;
288
289 DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
290
291 // FIXME: This is the maximum work group size. We should try to get
292 // value from the reqd_work_group_size function attribute if it is
293 // available.
294 unsigned WorkGroupSize = 256;
Mehdi Amini46a43552015-03-04 18:43:29 +0000295 int AllocaSize =
296 WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
Tom Stellard880a80a2014-06-17 16:53:14 +0000297
298 if (AllocaSize > LocalMemAvailable) {
299 DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
300 return;
301 }
302
Tom Stellard5b2927f2014-10-31 20:52:04 +0000303 std::vector<Value*> WorkList;
304
305 if (!collectUsesWithPtrTypes(&I, WorkList)) {
306 DEBUG(dbgs() << " Do not know how to convert all uses\n");
307 return;
308 }
309
Tom Stellard880a80a2014-06-17 16:53:14 +0000310 DEBUG(dbgs() << "Promoting alloca to local memory\n");
311 LocalMemAvailable -= AllocaSize;
312
David Blaikie156d46e2015-03-24 23:34:31 +0000313 Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
Tom Stellard880a80a2014-06-17 16:53:14 +0000314 GlobalVariable *GV = new GlobalVariable(
David Blaikie156d46e2015-03-24 23:34:31 +0000315 *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
Tom Stellard880a80a2014-06-17 16:53:14 +0000316 GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
317
318 FunctionType *FTy = FunctionType::get(
319 Type::getInt32Ty(Mod->getContext()), false);
320 AttributeSet AttrSet;
321 AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
322
323 Value *ReadLocalSizeY = Mod->getOrInsertFunction(
324 "llvm.r600.read.local.size.y", FTy, AttrSet);
325 Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
326 "llvm.r600.read.local.size.z", FTy, AttrSet);
327 Value *ReadTIDIGX = Mod->getOrInsertFunction(
328 "llvm.r600.read.tidig.x", FTy, AttrSet);
329 Value *ReadTIDIGY = Mod->getOrInsertFunction(
330 "llvm.r600.read.tidig.y", FTy, AttrSet);
331 Value *ReadTIDIGZ = Mod->getOrInsertFunction(
332 "llvm.r600.read.tidig.z", FTy, AttrSet);
333
David Blaikieff6409d2015-05-18 22:13:54 +0000334 Value *TCntY = Builder.CreateCall(ReadLocalSizeY, {});
335 Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ, {});
336 Value *TIdX = Builder.CreateCall(ReadTIDIGX, {});
337 Value *TIdY = Builder.CreateCall(ReadTIDIGY, {});
338 Value *TIdZ = Builder.CreateCall(ReadTIDIGZ, {});
Tom Stellard880a80a2014-06-17 16:53:14 +0000339
340 Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
341 Tmp0 = Builder.CreateMul(Tmp0, TIdX);
342 Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
343 Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
344 TID = Builder.CreateAdd(TID, TIdZ);
345
346 std::vector<Value*> Indices;
347 Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
348 Indices.push_back(TID);
349
David Blaikie156d46e2015-03-24 23:34:31 +0000350 Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
Tom Stellard880a80a2014-06-17 16:53:14 +0000351 I.mutateType(Offset->getType());
352 I.replaceAllUsesWith(Offset);
353 I.eraseFromParent();
354
Tom Stellard880a80a2014-06-17 16:53:14 +0000355 for (std::vector<Value*>::iterator i = WorkList.begin(),
356 e = WorkList.end(); i != e; ++i) {
357 Value *V = *i;
358 CallInst *Call = dyn_cast<CallInst>(V);
359 if (!Call) {
360 Type *EltTy = V->getType()->getPointerElementType();
361 PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
Matt Arsenault65f67e42014-09-15 15:41:44 +0000362
363 // The operand's value should be corrected on its own.
364 if (isa<AddrSpaceCastInst>(V))
365 continue;
366
367 // FIXME: It doesn't really make sense to try to do this for all
368 // instructions.
Tom Stellard880a80a2014-06-17 16:53:14 +0000369 V->mutateType(NewTy);
370 continue;
371 }
372
373 IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
374 if (!Intr) {
375 std::vector<Type*> ArgTypes;
376 for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
377 ArgIdx != ArgEnd; ++ArgIdx) {
378 ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
379 }
380 Function *F = Call->getCalledFunction();
381 FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
382 F->isVarArg());
Yaron Keren75e0c4b2015-03-27 17:51:30 +0000383 Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
384 NewType, F->getAttributes());
Tom Stellard880a80a2014-06-17 16:53:14 +0000385 Function *NewF = cast<Function>(C);
386 Call->setCalledFunction(NewF);
387 continue;
388 }
389
390 Builder.SetInsertPoint(Intr);
391 switch (Intr->getIntrinsicID()) {
392 case Intrinsic::lifetime_start:
393 case Intrinsic::lifetime_end:
394 // These intrinsics are for address space 0 only
395 Intr->eraseFromParent();
396 continue;
397 case Intrinsic::memcpy: {
398 MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
399 Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
400 MemCpy->getLength(), MemCpy->getAlignment(),
401 MemCpy->isVolatile());
402 Intr->eraseFromParent();
403 continue;
404 }
405 case Intrinsic::memset: {
406 MemSetInst *MemSet = cast<MemSetInst>(Intr);
407 Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
408 MemSet->getLength(), MemSet->getAlignment(),
409 MemSet->isVolatile());
410 Intr->eraseFromParent();
411 continue;
412 }
413 default:
414 Intr->dump();
415 llvm_unreachable("Don't know how to promote alloca intrinsic use.");
416 }
417 }
418}
419
420FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
421 return new AMDGPUPromoteAlloca(ST);
422}