Blame - llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp - toolchain/llvm-project

blob: 17b45fa65f14124684d6c43f0c6675a6b1c9dce0 [file] [log] [blame]

Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	1	//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This pass eliminates allocas by either converting them into vectors or
				11	// by migrating them to local address space.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "AMDGPU.h"
				16	#include "AMDGPUSubtarget.h"
				17	#include "llvm/Analysis/ValueTracking.h"
				18	#include "llvm/IR/IRBuilder.h"
Matt Arsenault	bafc9dc	2016-03-11 08:20:50 +0000	[diff] [blame]	19	#include "llvm/IR/IntrinsicInst.h"
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	20	#include "llvm/IR/MDBuilder.h"
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	21	#include "llvm/Support/Debug.h"
Benjamin Kramer	16132e6	2015-03-23 18:07:13 +0000	[diff] [blame]	22	#include "llvm/Support/raw_ostream.h"
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	23
				24	#define DEBUG_TYPE "amdgpu-promote-alloca"
				25
				26	using namespace llvm;
				27
				28	namespace {
				29
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	30	// FIXME: This can create globals so should be a module pass.
Matt Arsenault	bafc9dc	2016-03-11 08:20:50 +0000	[diff] [blame]	31	class AMDGPUPromoteAlloca : public FunctionPass {
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	32	private:
				33	const TargetMachine *TM;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	34	Module *Mod;
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	35	const DataLayout *DL;
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	36	MDNode *MaxWorkGroupSizeRange;
				37
				38	// FIXME: This should be per-kernel.
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	39	uint32_t LocalMemLimit;
				40	uint32_t CurrentLocalMemUsage;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	41
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	42	bool IsAMDGCN;
				43	bool IsAMDHSA;
				44
				45	std::pair<Value , Value > getLocalSizeYZ(IRBuilder<> &Builder);
				46	Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
				47
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	48	/// BaseAlloca is the alloca root the search started from.
				49	/// Val may be that alloca or a recursive user of it.
				50	bool collectUsesWithPtrTypes(Value *BaseAlloca,
				51	Value *Val,
				52	std::vector<Value*> &WorkList) const;
				53
				54	/// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
				55	/// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
				56	/// Returns true if both operands are derived from the same alloca. Val should
				57	/// be the same value as one of the input operands of UseInst.
				58	bool binaryOpIsDerivedFromSameAlloca(Value Alloca, Value Val,
				59	Instruction *UseInst,
				60	int OpIdx0, int OpIdx1) const;
				61
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	62	public:
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	63	static char ID;
				64
				65	AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) :
				66	FunctionPass(ID),
				67	TM(TM_),
				68	Mod(nullptr),
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	69	DL(nullptr),
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	70	MaxWorkGroupSizeRange(nullptr),
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	71	LocalMemLimit(0),
				72	CurrentLocalMemUsage(0),
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	73	IsAMDGCN(false),
				74	IsAMDHSA(false) { }
				75
Benjamin Kramer	8c90fd7	2014-09-03 11:41:21 +0000	[diff] [blame]	76	bool doInitialization(Module &M) override;
				77	bool runOnFunction(Function &F) override;
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	78
				79	const char *getPassName() const override {
				80	return "AMDGPU Promote Alloca";
				81	}
				82
Matt Arsenault	bafc9dc	2016-03-11 08:20:50 +0000	[diff] [blame]	83	void handleAlloca(AllocaInst &I);
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	84
				85	void getAnalysisUsage(AnalysisUsage &AU) const override {
				86	AU.setPreservesCFG();
				87	FunctionPass::getAnalysisUsage(AU);
				88	}
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	89	};
				90
				91	} // End anonymous namespace
				92
				93	char AMDGPUPromoteAlloca::ID = 0;
				94
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	95	INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
				96	"AMDGPU promote alloca to vector or LDS", false, false)
				97
				98	char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
				99
				100
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	101	bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	102	if (!TM)
				103	return false;
				104
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	105	Mod = &M;
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	106	DL = &Mod->getDataLayout();
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	107
				108	// The maximum workitem id.
				109	//
				110	// FIXME: Should get as subtarget property. Usually runtime enforced max is
				111	// 256.
				112	MDBuilder MDB(Mod->getContext());
				113	MaxWorkGroupSizeRange = MDB.createRange(APInt(32, 0), APInt(32, 2048));
				114
				115	const Triple &TT = TM->getTargetTriple();
				116
				117	IsAMDGCN = TT.getArch() == Triple::amdgcn;
				118	IsAMDHSA = TT.getOS() == Triple::AMDHSA;
				119
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	120	return false;
				121	}
				122
				123	bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
Andrew Kaylor	7de74af	2016-04-25 22:23:44 +0000	[diff] [blame]	124	if (!TM \|\| skipFunction(F))
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	125	return false;
				126
Craig Topper	e3dcce9	2015-08-01 22:20:21 +0000	[diff] [blame]	127	FunctionType *FTy = F.getFunctionType();
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	128
				129	// If the function has any arguments in the local address space, then it's
				130	// possible these arguments require the entire local memory space, so
				131	// we cannot use local memory in the pass.
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	132	for (Type *ParamTy : FTy->params()) {
				133	PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
				134	if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	135	LocalMemLimit = 0;
				136	DEBUG(dbgs() << "Function has local memory argument. Promoting to "
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	137	"local memory disabled.\n");
Matt Arsenault	e5737f7	2016-02-02 19:18:57 +0000	[diff] [blame]	138	return false;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	139	}
				140	}
				141
Matt Arsenault	e5737f7	2016-02-02 19:18:57 +0000	[diff] [blame]	142	const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	143
				144	LocalMemLimit = ST.getLocalMemorySize();
				145	if (LocalMemLimit == 0)
Matt Arsenault	e5737f7	2016-02-02 19:18:57 +0000	[diff] [blame]	146	return false;
				147
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	148	const DataLayout &DL = Mod->getDataLayout();
				149
Matt Arsenault	e5737f7	2016-02-02 19:18:57 +0000	[diff] [blame]	150	// Check how much local memory is being used by global objects
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	151	CurrentLocalMemUsage = 0;
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	152	for (GlobalVariable &GV : Mod->globals()) {
				153	if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
Matt Arsenault	e5737f7	2016-02-02 19:18:57 +0000	[diff] [blame]	154	continue;
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	155
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	156	for (const User *U : GV.users()) {
				157	const Instruction *Use = dyn_cast<Instruction>(U);
Matt Arsenault	e5737f7	2016-02-02 19:18:57 +0000	[diff] [blame]	158	if (!Use)
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	159	continue;
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	160
Matt Arsenault	0547b01	2016-04-27 21:05:08 +0000	[diff] [blame]	161	if (Use->getParent()->getParent() == &F) {
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	162	unsigned Align = GV.getAlignment();
				163	if (Align == 0)
				164	Align = DL.getABITypeAlignment(GV.getValueType());
				165
				166	// FIXME: Try to account for padding here. The padding is currently
				167	// determined from the inverse order of uses in the function. I'm not
				168	// sure if the use list order is in any way connected to this, so the
				169	// total reported size is likely incorrect.
				170	uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
				171	CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
				172	CurrentLocalMemUsage += AllocSize;
Matt Arsenault	0547b01	2016-04-27 21:05:08 +0000	[diff] [blame]	173	break;
				174	}
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	175	}
				176	}
				177
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	178	unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage);
				179
				180	// Restrict local memory usage so that we don't drastically reduce occupancy,
				181	// unless it is already significantly reduced.
				182
				183	// TODO: Have some sort of hint or other heuristics to guess occupancy based
				184	// on other factors..
				185	unsigned OccupancyHint
				186	= AMDGPU::getIntegerAttribute(F, "amdgpu-max-waves-per-eu", 0);
				187	if (OccupancyHint == 0)
				188	OccupancyHint = 7;
				189
				190	// Clamp to max value.
				191	OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerCU());
				192
				193	// Check the hint but ignore it if it's obviously wrong from the existing LDS
				194	// usage.
				195	MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
				196
				197
				198	// Round up to the next tier of usage.
				199	unsigned MaxSizeWithWaveCount
				200	= ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy);
				201
				202	// Program is possibly broken by using more local mem than available.
				203	if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
				204	return false;
				205
				206	LocalMemLimit = MaxSizeWithWaveCount;
				207
				208	DEBUG(
				209	dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n"
				210	<< " Rounding size to " << MaxSizeWithWaveCount
				211	<< " with a maximum occupancy of " << MaxOccupancy << '\n'
				212	<< " and " << (LocalMemLimit - CurrentLocalMemUsage)
				213	<< " available for promotion\n"
				214	);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	215
Matt Arsenault	bafc9dc	2016-03-11 08:20:50 +0000	[diff] [blame]	216	BasicBlock &EntryBB = *F.begin();
				217	for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
				218	AllocaInst *AI = dyn_cast<AllocaInst>(I);
				219
				220	++I;
				221	if (AI)
				222	handleAlloca(*AI);
				223	}
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	224
Matt Arsenault	e5737f7	2016-02-02 19:18:57 +0000	[diff] [blame]	225	return true;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	226	}
				227
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	228	std::pair<Value , Value >
				229	AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
				230	if (!IsAMDHSA) {
				231	Function *LocalSizeYFn
				232	= Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
				233	Function *LocalSizeZFn
				234	= Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
				235
				236	CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
				237	CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
				238
				239	LocalSizeY->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
				240	LocalSizeZ->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
				241
				242	return std::make_pair(LocalSizeY, LocalSizeZ);
				243	}
				244
				245	// We must read the size out of the dispatch pointer.
				246	assert(IsAMDGCN);
				247
				248	// We are indexing into this struct, and want to extract the workgroup_size_*
				249	// fields.
				250	//
				251	// typedef struct hsa_kernel_dispatch_packet_s {
				252	// uint16_t header;
				253	// uint16_t setup;
				254	// uint16_t workgroup_size_x ;
				255	// uint16_t workgroup_size_y;
				256	// uint16_t workgroup_size_z;
				257	// uint16_t reserved0;
				258	// uint32_t grid_size_x ;
				259	// uint32_t grid_size_y ;
				260	// uint32_t grid_size_z;
				261	//
				262	// uint32_t private_segment_size;
				263	// uint32_t group_segment_size;
				264	// uint64_t kernel_object;
				265	//
				266	// #ifdef HSA_LARGE_MODEL
				267	// void *kernarg_address;
				268	// #elif defined HSA_LITTLE_ENDIAN
				269	// void *kernarg_address;
				270	// uint32_t reserved1;
				271	// #else
				272	// uint32_t reserved1;
				273	// void *kernarg_address;
				274	// #endif
				275	// uint64_t reserved2;
				276	// hsa_signal_t completion_signal; // uint64_t wrapper
				277	// } hsa_kernel_dispatch_packet_t
				278	//
				279	Function *DispatchPtrFn
				280	= Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
				281
				282	CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
				283	DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
				284	DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
				285
				286	// Size of the dispatch packet struct.
				287	DispatchPtr->addDereferenceableAttr(AttributeSet::ReturnIndex, 64);
				288
				289	Type *I32Ty = Type::getInt32Ty(Mod->getContext());
				290	Value *CastDispatchPtr = Builder.CreateBitCast(
				291	DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
				292
				293	// We could do a single 64-bit load here, but it's likely that the basic
				294	// 32-bit and extract sequence is already present, and it is probably easier
				295	// to CSE this. The loads should be mergable later anyway.
				296	Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
				297	LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
				298
				299	Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
				300	LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
				301
				302	MDNode *MD = llvm::MDNode::get(Mod->getContext(), None);
				303	LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
				304	LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
				305	LoadZU->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
				306
				307	// Extract y component. Upper half of LoadZU should be zero already.
				308	Value *Y = Builder.CreateLShr(LoadXY, 16);
				309
				310	return std::make_pair(Y, LoadZU);
				311	}
				312
				313	Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
				314	Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
				315
				316	switch (N) {
				317	case 0:
				318	IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
				319	: Intrinsic::r600_read_tidig_x;
				320	break;
				321	case 1:
				322	IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
				323	: Intrinsic::r600_read_tidig_y;
				324	break;
				325
				326	case 2:
				327	IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
				328	: Intrinsic::r600_read_tidig_z;
				329	break;
				330	default:
				331	llvm_unreachable("invalid dimension");
				332	}
				333
				334	Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
				335	CallInst *CI = Builder.CreateCall(WorkitemIdFn);
				336	CI->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
				337
				338	return CI;
				339	}
				340
Craig Topper	e3dcce9	2015-08-01 22:20:21 +0000	[diff] [blame]	341	static VectorType arrayTypeToVecType(Type ArrayTy) {
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	342	return VectorType::get(ArrayTy->getArrayElementType(),
				343	ArrayTy->getArrayNumElements());
				344	}
				345
Benjamin Kramer	c6cc58e	2014-10-04 16:55:56 +0000	[diff] [blame]	346	static Value *
				347	calculateVectorIndex(Value *Ptr,
				348	const std::map<GetElementPtrInst , Value > &GEPIdx) {
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	349	if (isa<AllocaInst>(Ptr))
				350	return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
				351
				352	GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
				353
Benjamin Kramer	c6cc58e	2014-10-04 16:55:56 +0000	[diff] [blame]	354	auto I = GEPIdx.find(GEP);
				355	return I == GEPIdx.end() ? nullptr : I->second;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	356	}
				357
				358	static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
				359	// FIXME we only support simple cases
				360	if (GEP->getNumOperands() != 3)
				361	return NULL;
				362
				363	ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
				364	if (!I0 \|\| !I0->isZero())
				365	return NULL;
				366
				367	return GEP->getOperand(2);
				368	}
				369
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	370	// Not an instruction handled below to turn into a vector.
				371	//
				372	// TODO: Check isTriviallyVectorizable for calls and handle other
				373	// instructions.
Matt Arsenault	7227cc1	2015-07-28 18:47:00 +0000	[diff] [blame]	374	static bool canVectorizeInst(Instruction Inst, User User) {
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	375	switch (Inst->getOpcode()) {
				376	case Instruction::Load:
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	377	case Instruction::BitCast:
				378	case Instruction::AddrSpaceCast:
				379	return true;
Matt Arsenault	7227cc1	2015-07-28 18:47:00 +0000	[diff] [blame]	380	case Instruction::Store: {
				381	// Must be the stored pointer operand, not a stored value.
				382	StoreInst *SI = cast<StoreInst>(Inst);
				383	return SI->getPointerOperand() == User;
				384	}
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	385	default:
				386	return false;
				387	}
				388	}
				389
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	390	static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	391	ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	392
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	393	DEBUG(dbgs() << "Alloca candidate for vectorization\n");
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	394
				395	// FIXME: There is no reason why we can't support larger arrays, we
				396	// are just being conservative for now.
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	397	if (!AllocaTy \|\|
				398	AllocaTy->getElementType()->isVectorTy() \|\|
				399	AllocaTy->getNumElements() > 4) {
				400	DEBUG(dbgs() << " Cannot convert type to vector\n");
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	401	return false;
				402	}
				403
				404	std::map<GetElementPtrInst, Value> GEPVectorIdx;
				405	std::vector<Value*> WorkList;
				406	for (User *AllocaUser : Alloca->users()) {
				407	GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
				408	if (!GEP) {
Matt Arsenault	7227cc1	2015-07-28 18:47:00 +0000	[diff] [blame]	409	if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	410	return false;
				411
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	412	WorkList.push_back(AllocaUser);
				413	continue;
				414	}
				415
				416	Value *Index = GEPToVectorIndex(GEP);
				417
				418	// If we can't compute a vector index from this GEP, then we can't
				419	// promote this alloca to vector.
				420	if (!Index) {
Matt Arsenault	6f62cf8	2014-06-27 02:36:59 +0000	[diff] [blame]	421	DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	422	return false;
				423	}
				424
				425	GEPVectorIdx[GEP] = Index;
				426	for (User *GEPUser : AllocaUser->users()) {
Matt Arsenault	7227cc1	2015-07-28 18:47:00 +0000	[diff] [blame]	427	if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	428	return false;
				429
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	430	WorkList.push_back(GEPUser);
				431	}
				432	}
				433
				434	VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
				435
Matt Arsenault	6f62cf8	2014-06-27 02:36:59 +0000	[diff] [blame]	436	DEBUG(dbgs() << " Converting alloca to vector "
				437	<< AllocaTy << " -> " << VectorTy << '\n');
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	438
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	439	for (Value *V : WorkList) {
				440	Instruction *Inst = cast<Instruction>(V);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	441	IRBuilder<> Builder(Inst);
				442	switch (Inst->getOpcode()) {
				443	case Instruction::Load: {
				444	Value *Ptr = Inst->getOperand(0);
				445	Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
				446	Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
				447	Value *VecValue = Builder.CreateLoad(BitCast);
				448	Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
				449	Inst->replaceAllUsesWith(ExtractElement);
				450	Inst->eraseFromParent();
				451	break;
				452	}
				453	case Instruction::Store: {
				454	Value *Ptr = Inst->getOperand(1);
				455	Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
				456	Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
				457	Value *VecValue = Builder.CreateLoad(BitCast);
				458	Value *NewVecValue = Builder.CreateInsertElement(VecValue,
				459	Inst->getOperand(0),
				460	Index);
				461	Builder.CreateStore(NewVecValue, BitCast);
				462	Inst->eraseFromParent();
				463	break;
				464	}
				465	case Instruction::BitCast:
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	466	case Instruction::AddrSpaceCast:
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	467	break;
				468
				469	default:
				470	Inst->dump();
Matt Arsenault	642d2e7	2014-06-27 16:52:49 +0000	[diff] [blame]	471	llvm_unreachable("Inconsistency in instructions promotable to vector");
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	472	}
				473	}
				474	return true;
				475	}
				476
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	477	static bool isCallPromotable(CallInst *CI) {
				478	// TODO: We might be able to handle some cases where the callee is a
				479	// constantexpr bitcast of a function.
				480	if (!CI->getCalledFunction())
				481	return false;
				482
				483	IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
				484	if (!II)
				485	return false;
				486
				487	switch (II->getIntrinsicID()) {
				488	case Intrinsic::memcpy:
Matt Arsenault	7e747f1	2016-02-02 20:28:10 +0000	[diff] [blame]	489	case Intrinsic::memmove:
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	490	case Intrinsic::memset:
				491	case Intrinsic::lifetime_start:
				492	case Intrinsic::lifetime_end:
				493	case Intrinsic::invariant_start:
				494	case Intrinsic::invariant_end:
				495	case Intrinsic::invariant_group_barrier:
Matt Arsenault	7e747f1	2016-02-02 20:28:10 +0000	[diff] [blame]	496	case Intrinsic::objectsize:
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	497	return true;
				498	default:
				499	return false;
				500	}
				501	}
				502
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	503	bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
				504	Value *Val,
				505	Instruction *Inst,
				506	int OpIdx0,
				507	int OpIdx1) const {
				508	// Figure out which operand is the one we might not be promoting.
				509	Value *OtherOp = Inst->getOperand(OpIdx0);
				510	if (Val == OtherOp)
				511	OtherOp = Inst->getOperand(OpIdx1);
				512
Matt Arsenault	891fccc	2016-05-18 15:57:21 +0000	[diff] [blame]	513	if (isa<ConstantPointerNull>(OtherOp))
				514	return true;
				515
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	516	Value OtherObj = GetUnderlyingObject(OtherOp, DL);
				517	if (!isa<AllocaInst>(OtherObj))
				518	return false;
				519
				520	// TODO: We should be able to replace undefs with the right pointer type.
				521
				522	// TODO: If we know the other base object is another promotable
				523	// alloca, not necessarily this alloca, we can do this. The
				524	// important part is both must have the same address space at
				525	// the end.
				526	if (OtherObj != BaseAlloca) {
				527	DEBUG(dbgs() << "Found a binary instruction with another alloca object\n");
				528	return false;
				529	}
				530
				531	return true;
				532	}
				533
				534	bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
				535	Value *BaseAlloca,
				536	Value *Val,
				537	std::vector<Value*> &WorkList) const {
				538
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	539	for (User *User : Val->users()) {
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	540	if (std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	541	continue;
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	542
Matt Arsenault	fdcd39a	2015-07-28 18:29:14 +0000	[diff] [blame]	543	if (CallInst *CI = dyn_cast<CallInst>(User)) {
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	544	if (!isCallPromotable(CI))
Matt Arsenault	fdcd39a	2015-07-28 18:29:14 +0000	[diff] [blame]	545	return false;
				546
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	547	WorkList.push_back(User);
				548	continue;
				549	}
Tom Stellard	5b2927f	2014-10-31 20:52:04 +0000	[diff] [blame]	550
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	551	Instruction *UseInst = cast<Instruction>(User);
				552	if (UseInst->getOpcode() == Instruction::PtrToInt)
Tom Stellard	5b2927f	2014-10-31 20:52:04 +0000	[diff] [blame]	553	return false;
				554
Matt Arsenault	c438ef5	2016-05-18 23:20:24 +0000	[diff] [blame]	555	if (LoadInst *LI = dyn_cast_or_null<LoadInst>(UseInst)) {
				556	if (LI->isVolatile())
				557	return false;
				558
				559	continue;
				560	}
				561
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	562	if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
Matt Arsenault	0a30e45	2016-03-23 23:17:29 +0000	[diff] [blame]	563	if (SI->isVolatile())
				564	return false;
				565
Matt Arsenault	7227cc1	2015-07-28 18:47:00 +0000	[diff] [blame]	566	// Reject if the stored value is not the pointer operand.
				567	if (SI->getPointerOperand() != Val)
				568	return false;
Matt Arsenault	0a30e45	2016-03-23 23:17:29 +0000	[diff] [blame]	569	} else if (AtomicRMWInst *RMW = dyn_cast_or_null<AtomicRMWInst>(UseInst)) {
				570	if (RMW->isVolatile())
				571	return false;
				572	} else if (AtomicCmpXchgInst *CAS
				573	= dyn_cast_or_null<AtomicCmpXchgInst>(UseInst)) {
				574	if (CAS->isVolatile())
				575	return false;
Matt Arsenault	7227cc1	2015-07-28 18:47:00 +0000	[diff] [blame]	576	}
				577
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	578	// Only promote a select if we know that the other select operand
				579	// is from another pointer that will also be promoted.
				580	if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
				581	if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
				582	return false;
Matt Arsenault	891fccc	2016-05-18 15:57:21 +0000	[diff] [blame]	583
				584	// May need to rewrite constant operands.
				585	WorkList.push_back(ICmp);
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	586	}
				587
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	588	if (!User->getType()->isPointerTy())
				589	continue;
Tom Stellard	5b2927f	2014-10-31 20:52:04 +0000	[diff] [blame]	590
Matt Arsenault	de42081	2016-02-02 21:16:12 +0000	[diff] [blame]	591	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
				592	// Be conservative if an address could be computed outside the bounds of
				593	// the alloca.
				594	if (!GEP->isInBounds())
				595	return false;
				596	}
				597
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	598	// Only promote a select if we know that the other select operand is from
				599	// another pointer that will also be promoted.
				600	if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
				601	if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
				602	return false;
				603	}
				604
				605	// Repeat for phis.
				606	if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
				607	// TODO: Handle more complex cases. We should be able to replace loops
				608	// over arrays.
				609	switch (Phi->getNumIncomingValues()) {
				610	case 1:
				611	break;
				612	case 2:
				613	if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
				614	return false;
				615	break;
				616	default:
				617	return false;
				618	}
				619	}
				620
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	621	WorkList.push_back(User);
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	622	if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	623	return false;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	624	}
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	625
				626	return true;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	627	}
				628
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	629	// FIXME: Should try to pick the most likely to be profitable allocas first.
Matt Arsenault	bafc9dc	2016-03-11 08:20:50 +0000	[diff] [blame]	630	void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Matt Arsenault	c5fce69	2016-04-28 18:38:48 +0000	[diff] [blame]	631	// Array allocations are probably not worth handling, since an allocation of
				632	// the array type is the canonical form.
				633	if (!I.isStaticAlloca() \|\| I.isArrayAllocation())
Matt Arsenault	19c5488	2015-08-26 18:37:13 +0000	[diff] [blame]	634	return;
				635
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	636	IRBuilder<> Builder(&I);
				637
				638	// First try to replace the alloca with a vector
				639	Type *AllocaTy = I.getAllocatedType();
				640
Matt Arsenault	6f62cf8	2014-06-27 02:36:59 +0000	[diff] [blame]	641	DEBUG(dbgs() << "Trying to promote " << I << '\n');
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	642
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	643	if (tryPromoteAllocaToVector(&I)) {
				644	DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	645	return;
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	646	}
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	647
Tom Stellard	79a1fd7	2016-04-14 16:27:07 +0000	[diff] [blame]	648	const Function &ContainingFunction = *I.getParent()->getParent();
				649
				650	// FIXME: We should also try to get this value from the reqd_work_group_size
				651	// function attribute if it is available.
				652	unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
				653
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	654	const DataLayout &DL = Mod->getDataLayout();
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	655
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	656	unsigned Align = I.getAlignment();
				657	if (Align == 0)
				658	Align = DL.getABITypeAlignment(I.getAllocatedType());
				659
				660	// FIXME: This computed padding is likely wrong since it depends on inverse
				661	// usage order.
				662	//
				663	// FIXME: It is also possible that if we're allowed to use all of the memory
				664	// could could end up using more than the maximum due to alignment padding.
				665
				666	uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
				667	uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
				668	NewSize += AllocSize;
				669
				670	if (NewSize > LocalMemLimit) {
				671	DEBUG(dbgs() << " " << AllocSize
				672	<< " bytes of local memory not available to promote\n");
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	673	return;
				674	}
				675
Matt Arsenault	8a028bf	2016-05-16 21:19:59 +0000	[diff] [blame]	676	CurrentLocalMemUsage = NewSize;
				677
Tom Stellard	5b2927f	2014-10-31 20:52:04 +0000	[diff] [blame]	678	std::vector<Value*> WorkList;
				679
Matt Arsenault	a61cb48	2016-05-12 01:58:58 +0000	[diff] [blame]	680	if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
Tom Stellard	5b2927f	2014-10-31 20:52:04 +0000	[diff] [blame]	681	DEBUG(dbgs() << " Do not know how to convert all uses\n");
				682	return;
				683	}
				684
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	685	DEBUG(dbgs() << "Promoting alloca to local memory\n");
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	686
Matt Arsenault	cf84e26	2016-02-05 19:47:23 +0000	[diff] [blame]	687	Function *F = I.getParent()->getParent();
				688
Tom Stellard	79a1fd7	2016-04-14 16:27:07 +0000	[diff] [blame]	689	Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	690	GlobalVariable *GV = new GlobalVariable(
Matt Arsenault	cf84e26	2016-02-05 19:47:23 +0000	[diff] [blame]	691	*Mod, GVTy, false, GlobalValue::InternalLinkage,
				692	UndefValue::get(GVTy),
				693	Twine(F->getName()) + Twine('.') + I.getName(),
				694	nullptr,
				695	GlobalVariable::NotThreadLocal,
				696	AMDGPUAS::LOCAL_ADDRESS);
Peter Collingbourne	96efdd6	2016-06-14 21:01:22 +0000	[diff] [blame]	697	GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Matt Arsenault	cf84e26	2016-02-05 19:47:23 +0000	[diff] [blame]	698	GV->setAlignment(I.getAlignment());
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	699
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	700	Value TCntY, TCntZ;
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	701
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	702	std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
				703	Value *TIdX = getWorkitemID(Builder, 0);
				704	Value *TIdY = getWorkitemID(Builder, 1);
				705	Value *TIdZ = getWorkitemID(Builder, 2);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	706
Matt Arsenault	853a1fc	2016-02-02 19:18:48 +0000	[diff] [blame]	707	Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	708	Tmp0 = Builder.CreateMul(Tmp0, TIdX);
Matt Arsenault	853a1fc	2016-02-02 19:18:48 +0000	[diff] [blame]	709	Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	710	Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
				711	TID = Builder.CreateAdd(TID, TIdZ);
				712
Matt Arsenault	853a1fc	2016-02-02 19:18:48 +0000	[diff] [blame]	713	Value *Indices[] = {
				714	Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
				715	TID
				716	};
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	717
Matt Arsenault	853a1fc	2016-02-02 19:18:48 +0000	[diff] [blame]	718	Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	719	I.mutateType(Offset->getType());
				720	I.replaceAllUsesWith(Offset);
				721	I.eraseFromParent();
				722
Matt Arsenault	fb8cdba	2016-02-02 19:32:35 +0000	[diff] [blame]	723	for (Value *V : WorkList) {
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	724	CallInst *Call = dyn_cast<CallInst>(V);
				725	if (!Call) {
Matt Arsenault	891fccc	2016-05-18 15:57:21 +0000	[diff] [blame]	726	if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
				727	Value *Src0 = CI->getOperand(0);
				728	Type *EltTy = Src0->getType()->getPointerElementType();
				729	PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
				730
				731	if (isa<ConstantPointerNull>(CI->getOperand(0)))
				732	CI->setOperand(0, ConstantPointerNull::get(NewTy));
				733
				734	if (isa<ConstantPointerNull>(CI->getOperand(1)))
				735	CI->setOperand(1, ConstantPointerNull::get(NewTy));
				736
				737	continue;
				738	}
Matt Arsenault	65f67e4	2014-09-15 15:41:44 +0000	[diff] [blame]	739
				740	// The operand's value should be corrected on its own.
				741	if (isa<AddrSpaceCastInst>(V))
				742	continue;
				743
Matt Arsenault	891fccc	2016-05-18 15:57:21 +0000	[diff] [blame]	744	Type *EltTy = V->getType()->getPointerElementType();
				745	PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
				746
Matt Arsenault	65f67e4	2014-09-15 15:41:44 +0000	[diff] [blame]	747	// FIXME: It doesn't really make sense to try to do this for all
				748	// instructions.
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	749	V->mutateType(NewTy);
Matt Arsenault	891fccc	2016-05-18 15:57:21 +0000	[diff] [blame]	750
				751	// Adjust the types of any constant operands.
				752	if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
				753	if (isa<ConstantPointerNull>(SI->getOperand(1)))
				754	SI->setOperand(1, ConstantPointerNull::get(NewTy));
				755
				756	if (isa<ConstantPointerNull>(SI->getOperand(2)))
				757	SI->setOperand(2, ConstantPointerNull::get(NewTy));
				758	} else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
				759	for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
				760	if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
				761	Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
				762	}
				763	}
				764
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	765	continue;
				766	}
				767
				768	IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
				769	if (!Intr) {
Matt Arsenault	ad13484	2016-02-02 19:18:53 +0000	[diff] [blame]	770	// FIXME: What is this for? It doesn't make sense to promote arbitrary
				771	// function calls. If the call is to a defined function that can also be
				772	// promoted, we should be able to do this once that function is also
				773	// rewritten.
				774
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	775	std::vector<Type*> ArgTypes;
				776	for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
				777	ArgIdx != ArgEnd; ++ArgIdx) {
				778	ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
				779	}
				780	Function *F = Call->getCalledFunction();
				781	FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
				782	F->isVarArg());
Yaron Keren	75e0c4b	2015-03-27 17:51:30 +0000	[diff] [blame]	783	Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
				784	NewType, F->getAttributes());
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	785	Function *NewF = cast<Function>(C);
				786	Call->setCalledFunction(NewF);
				787	continue;
				788	}
				789
				790	Builder.SetInsertPoint(Intr);
				791	switch (Intr->getIntrinsicID()) {
				792	case Intrinsic::lifetime_start:
				793	case Intrinsic::lifetime_end:
				794	// These intrinsics are for address space 0 only
				795	Intr->eraseFromParent();
				796	continue;
				797	case Intrinsic::memcpy: {
				798	MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
				799	Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
Pete Cooper	67cf9a7	2015-11-19 05:56:52 +0000	[diff] [blame]	800	MemCpy->getLength(), MemCpy->getAlignment(),
				801	MemCpy->isVolatile());
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	802	Intr->eraseFromParent();
				803	continue;
				804	}
Matt Arsenault	7e747f1	2016-02-02 20:28:10 +0000	[diff] [blame]	805	case Intrinsic::memmove: {
				806	MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
				807	Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(),
				808	MemMove->getLength(), MemMove->getAlignment(),
				809	MemMove->isVolatile());
				810	Intr->eraseFromParent();
				811	continue;
				812	}
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	813	case Intrinsic::memset: {
				814	MemSetInst *MemSet = cast<MemSetInst>(Intr);
				815	Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
Pete Cooper	67cf9a7	2015-11-19 05:56:52 +0000	[diff] [blame]	816	MemSet->getLength(), MemSet->getAlignment(),
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	817	MemSet->isVolatile());
				818	Intr->eraseFromParent();
				819	continue;
				820	}
Matt Arsenault	0b783ef0	2016-01-22 19:47:54 +0000	[diff] [blame]	821	case Intrinsic::invariant_start:
				822	case Intrinsic::invariant_end:
				823	case Intrinsic::invariant_group_barrier:
				824	Intr->eraseFromParent();
				825	// FIXME: I think the invariant marker should still theoretically apply,
				826	// but the intrinsics need to be changed to accept pointers with any
				827	// address space.
				828	continue;
Matt Arsenault	7e747f1	2016-02-02 20:28:10 +0000	[diff] [blame]	829	case Intrinsic::objectsize: {
				830	Value *Src = Intr->getOperand(0);
				831	Type *SrcTy = Src->getType()->getPointerElementType();
				832	Function *ObjectSize = Intrinsic::getDeclaration(Mod,
				833	Intrinsic::objectsize,
				834	{ Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
				835	);
				836
				837	CallInst *NewCall
				838	= Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) });
				839	Intr->replaceAllUsesWith(NewCall);
				840	Intr->eraseFromParent();
				841	continue;
				842	}
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	843	default:
				844	Intr->dump();
				845	llvm_unreachable("Don't know how to promote alloca intrinsic use.");
				846	}
				847	}
				848	}
				849
Matt Arsenault	e013246	2016-01-30 05:19:45 +0000	[diff] [blame]	850	FunctionPass llvm::createAMDGPUPromoteAlloca(const TargetMachine TM) {
				851	return new AMDGPUPromoteAlloca(TM);
Tom Stellard	880a80a	2014-06-17 16:53:14 +0000	[diff] [blame]	852	}