Blame - llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp - toolchain/llvm-project

blob: 2378465a23196383d8d464308669f952ba85b2ea [file] [log] [blame]

Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	1	//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// This pass does misc. AMDGPU optimizations on IR before instruction
				12	/// selection.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#include "AMDGPU.h"
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	17	#include "AMDGPUIntrinsicInfo.h"
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	18	#include "AMDGPUSubtarget.h"
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	19	#include "AMDGPUTargetMachine.h"
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	20
				21	#include "llvm/Analysis/DivergenceAnalysis.h"
				22	#include "llvm/CodeGen/Passes.h"
				23	#include "llvm/IR/InstVisitor.h"
				24	#include "llvm/IR/IRBuilder.h"
				25	#include "llvm/Support/Debug.h"
				26	#include "llvm/Support/raw_ostream.h"
				27
				28	#define DEBUG_TYPE "amdgpu-codegenprepare"
				29
				30	using namespace llvm;
				31
				32	namespace {
				33
				34	class AMDGPUCodeGenPrepare : public FunctionPass,
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	35	public InstVisitor<AMDGPUCodeGenPrepare, bool> {
				36	const GCNTargetMachine *TM;
				37	const SISubtarget *ST;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	38	DivergenceAnalysis *DA;
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	39	Module *Mod;
				40	bool HasUnsafeFPMath;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	41
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	42	/// \brief Copies exact/nsw/nuw flags (if any) from binary operator \p I to
				43	/// binary operator \p V.
				44	///
				45	/// \returns Binary operator \p V.
				46	Value copyFlags(const BinaryOperator &I, Value V) const;
				47
				48	/// \returns Equivalent 16 bit integer type for given 32 bit integer type
				49	/// \p T.
				50	Type getI16Ty(IRBuilder<> &B, const Type T) const;
				51
				52	/// \returns Equivalent 32 bit integer type for given 16 bit integer type
				53	/// \p T.
				54	Type getI32Ty(IRBuilder<> &B, const Type T) const;
				55
				56	/// \returns True if the base element of type \p T is 16 bit integer, false
				57	/// otherwise.
				58	bool isI16Ty(const Type *T) const;
				59
				60	/// \returns True if the base element of type \p T is 32 bit integer, false
				61	/// otherwise.
				62	bool isI32Ty(const Type *T) const;
				63
				64	/// \returns True if binary operation \p I is a signed binary operation, false
				65	/// otherwise.
				66	bool isSigned(const BinaryOperator &I) const;
				67
				68	/// \returns True if the condition of 'select' operation \p I comes from a
				69	/// signed 'icmp' operation, false otherwise.
				70	bool isSigned(const SelectInst &I) const;
				71
				72	/// \brief Promotes uniform 16 bit binary operation \p I to equivalent 32 bit
				73	/// binary operation by sign or zero extending operands to 32 bits, replacing
				74	/// 16 bit operation with equivalent 32 bit operation, and truncating the
				75	/// result of 32 bit operation back to 16 bits. 16 bit division operation is
				76	/// not promoted.
				77	///
				78	/// \returns True if 16 bit binary operation is promoted to equivalent 32 bit
				79	/// binary operation, false otherwise.
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	80	bool promoteUniformI16OpToI32(BinaryOperator &I) const;
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	81
				82	/// \brief Promotes uniform 16 bit 'icmp' operation \p I to 32 bit 'icmp'
				83	/// operation by sign or zero extending operands to 32 bits, and replacing 16
				84	/// bit operation with 32 bit operation.
				85	///
				86	/// \returns True.
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	87	bool promoteUniformI16OpToI32(ICmpInst &I) const;
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	88
				89	/// \brief Promotes uniform 16 bit 'select' operation \p I to 32 bit 'select'
				90	/// operation by sign or zero extending operands to 32 bits, replacing 16 bit
				91	/// operation with 32 bit operation, and truncating the result of 32 bit
				92	/// operation back to 16 bits.
				93	///
				94	/// \returns True.
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	95	bool promoteUniformI16OpToI32(SelectInst &I) const;
				96
				97	/// \brief Promotes uniform 16 bit 'bitreverse' intrinsic \p I to 32 bit
				98	/// 'bitreverse' intrinsic by zero extending operand to 32 bits, replacing 16
				99	/// bit intrinsic with 32 bit intrinsic, shifting the result of 32 bit
				100	/// intrinsic 16 bits to the right with zero fill, and truncating the result
				101	/// of shift operation back to 16 bits.
				102	///
				103	/// \returns True.
				104	bool promoteUniformI16BitreverseIntrinsicToI32(IntrinsicInst &I) const;
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	105
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	106	public:
				107	static char ID;
				108	AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
				109	FunctionPass(ID),
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	110	TM(static_cast<const GCNTargetMachine *>(TM)),
				111	ST(nullptr),
				112	DA(nullptr),
				113	Mod(nullptr),
				114	HasUnsafeFPMath(false) { }
				115
				116	bool visitFDiv(BinaryOperator &I);
				117
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	118	bool visitInstruction(Instruction &I) { return false; }
				119	bool visitBinaryOperator(BinaryOperator &I);
				120	bool visitICmpInst(ICmpInst &I);
				121	bool visitSelectInst(SelectInst &I);
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	122
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	123	bool visitIntrinsicInst(IntrinsicInst &I);
				124	bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
				125
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	126	bool doInitialization(Module &M) override;
				127	bool runOnFunction(Function &F) override;
				128
Mehdi Amini	117296c	2016-10-01 02:56:57 +0000	[diff] [blame]	129	StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	130
				131	void getAnalysisUsage(AnalysisUsage &AU) const override {
				132	AU.addRequired<DivergenceAnalysis>();
				133	AU.setPreservesAll();
				134	}
				135	};
				136
				137	} // End anonymous namespace
				138
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	139	Value *AMDGPUCodeGenPrepare::copyFlags(
				140	const BinaryOperator &I, Value *V) const {
				141	assert(isa<BinaryOperator>(V) && "V must be binary operator");
				142
				143	BinaryOperator *BinOp = cast<BinaryOperator>(V);
				144	if (isa<OverflowingBinaryOperator>(BinOp)) {
				145	BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
				146	BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
				147	} else if (isa<PossiblyExactOperator>(BinOp)) {
				148	BinOp->setIsExact(I.isExact());
				149	}
				150
				151	return V;
				152	}
				153
				154	Type AMDGPUCodeGenPrepare::getI16Ty(IRBuilder<> &B, const Type T) const {
				155	assert(isI32Ty(T) && "T must be 32 bits");
				156
				157	if (T->isIntegerTy())
				158	return B.getInt16Ty();
				159	return VectorType::get(B.getInt16Ty(), cast<VectorType>(T)->getNumElements());
				160	}
				161
				162	Type AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type T) const {
				163	assert(isI16Ty(T) && "T must be 16 bits");
				164
				165	if (T->isIntegerTy())
				166	return B.getInt32Ty();
				167	return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
				168	}
				169
				170	bool AMDGPUCodeGenPrepare::isI16Ty(const Type *T) const {
				171	if (T->isIntegerTy(16))
				172	return true;
				173	if (!T->isVectorTy())
				174	return false;
				175	return cast<VectorType>(T)->getElementType()->isIntegerTy(16);
				176	}
				177
				178	bool AMDGPUCodeGenPrepare::isI32Ty(const Type *T) const {
				179	if (T->isIntegerTy(32))
				180	return true;
				181	if (!T->isVectorTy())
				182	return false;
				183	return cast<VectorType>(T)->getElementType()->isIntegerTy(32);
				184	}
				185
				186	bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
Konstantin Zhuravlyov	691e2e0	2016-10-03 18:29:01 +0000	[diff] [blame]	187	return I.getOpcode() == Instruction::AShr \|\|
				188	I.getOpcode() == Instruction::SDiv \|\| I.getOpcode() == Instruction::SRem;
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	189	}
				190
				191	bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
				192	return isa<ICmpInst>(I.getOperand(0)) ?
				193	cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
				194	}
				195
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	196	bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(BinaryOperator &I) const {
				197	assert(isI16Ty(I.getType()) && "I must be 16 bits");
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	198
				199	if (I.getOpcode() == Instruction::SDiv \|\| I.getOpcode() == Instruction::UDiv)
				200	return false;
				201
				202	IRBuilder<> Builder(&I);
				203	Builder.SetCurrentDebugLocation(I.getDebugLoc());
				204
				205	Type *I32Ty = getI32Ty(Builder, I.getType());
				206	Value *ExtOp0 = nullptr;
				207	Value *ExtOp1 = nullptr;
				208	Value *ExtRes = nullptr;
				209	Value *TruncRes = nullptr;
				210
				211	if (isSigned(I)) {
				212	ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
				213	ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
				214	} else {
				215	ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
				216	ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
				217	}
				218	ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
				219	TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
				220
				221	I.replaceAllUsesWith(TruncRes);
				222	I.eraseFromParent();
				223
				224	return true;
				225	}
				226
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	227	bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(ICmpInst &I) const {
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	228	assert(isI16Ty(I.getOperand(0)->getType()) && "Op0 must be 16 bits");
				229	assert(isI16Ty(I.getOperand(1)->getType()) && "Op1 must be 16 bits");
				230
				231	IRBuilder<> Builder(&I);
				232	Builder.SetCurrentDebugLocation(I.getDebugLoc());
				233
				234	Type *I32TyOp0 = getI32Ty(Builder, I.getOperand(0)->getType());
				235	Type *I32TyOp1 = getI32Ty(Builder, I.getOperand(1)->getType());
				236	Value *ExtOp0 = nullptr;
				237	Value *ExtOp1 = nullptr;
				238	Value *NewICmp = nullptr;
				239
				240	if (I.isSigned()) {
				241	ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32TyOp0);
				242	ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32TyOp1);
				243	} else {
				244	ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32TyOp0);
				245	ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32TyOp1);
				246	}
				247	NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
				248
				249	I.replaceAllUsesWith(NewICmp);
				250	I.eraseFromParent();
				251
				252	return true;
				253	}
				254
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	255	bool AMDGPUCodeGenPrepare::promoteUniformI16OpToI32(SelectInst &I) const {
				256	assert(isI16Ty(I.getType()) && "I must be 16 bits");
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	257
				258	IRBuilder<> Builder(&I);
				259	Builder.SetCurrentDebugLocation(I.getDebugLoc());
				260
				261	Type *I32Ty = getI32Ty(Builder, I.getType());
				262	Value *ExtOp1 = nullptr;
				263	Value *ExtOp2 = nullptr;
				264	Value *ExtRes = nullptr;
				265	Value *TruncRes = nullptr;
				266
				267	if (isSigned(I)) {
				268	ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
				269	ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
				270	} else {
				271	ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
				272	ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
				273	}
				274	ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
				275	TruncRes = Builder.CreateTrunc(ExtRes, getI16Ty(Builder, ExtRes->getType()));
				276
				277	I.replaceAllUsesWith(TruncRes);
				278	I.eraseFromParent();
				279
				280	return true;
				281	}
				282
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	283	bool AMDGPUCodeGenPrepare::promoteUniformI16BitreverseIntrinsicToI32(
				284	IntrinsicInst &I) const {
				285	assert(I.getIntrinsicID() == Intrinsic::bitreverse && "I must be bitreverse");
				286	assert(isI16Ty(I.getType()) && "I must be 16 bits");
				287
				288	IRBuilder<> Builder(&I);
				289	Builder.SetCurrentDebugLocation(I.getDebugLoc());
				290
				291	Type *I32Ty = getI32Ty(Builder, I.getType());
				292	Function *I32 =
				293	Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });;
				294	Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
				295	Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
				296	Value *LShrOp = Builder.CreateLShr(ExtRes, 16);
				297	Value *TruncRes =
				298	Builder.CreateTrunc(LShrOp, getI16Ty(Builder, ExtRes->getType()));
				299
				300	I.replaceAllUsesWith(TruncRes);
				301	I.eraseFromParent();
				302
				303	return true;
				304	}
				305
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	306	static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
				307	const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
				308	if (!CNum)
				309	return false;
				310
				311	// Reciprocal f32 is handled separately without denormals.
Matt Arsenault	e3862cd	2016-07-26 23:25:44 +0000	[diff] [blame]	312	return UnsafeDiv \|\| CNum->isExactlyValue(+1.0);
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	313	}
				314
				315	// Insert an intrinsic for fast fdiv for safe math situations where we can
				316	// reduce precision. Leave fdiv for situations where the generic node is
				317	// expected to be optimized.
				318	bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
				319	Type *Ty = FDiv.getType();
				320
				321	// TODO: Handle half
				322	if (!Ty->getScalarType()->isFloatTy())
				323	return false;
				324
				325	MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
				326	if (!FPMath)
				327	return false;
				328
				329	const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
				330	float ULP = FPOp->getFPAccuracy();
				331	if (ULP < 2.5f)
				332	return false;
				333
				334	FastMathFlags FMF = FPOp->getFastMathFlags();
				335	bool UnsafeDiv = HasUnsafeFPMath \|\| FMF.unsafeAlgebra() \|\|
				336	FMF.allowReciprocal();
				337	if (ST->hasFP32Denormals() && !UnsafeDiv)
				338	return false;
				339
				340	IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
				341	Builder.setFastMathFlags(FMF);
				342	Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
				343
				344	const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
				345	Function *Decl
				346	= II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
				347
				348	Value *Num = FDiv.getOperand(0);
				349	Value *Den = FDiv.getOperand(1);
				350
				351	Value *NewFDiv = nullptr;
				352
				353	if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
				354	NewFDiv = UndefValue::get(VT);
				355
				356	// FIXME: Doesn't do the right thing for cases where the vector is partially
				357	// constant. This works when the scalarizer pass is run first.
				358	for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
				359	Value *NumEltI = Builder.CreateExtractElement(Num, I);
				360	Value *DenEltI = Builder.CreateExtractElement(Den, I);
				361	Value *NewElt;
				362
				363	if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
				364	NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
				365	} else {
				366	NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
				367	}
				368
				369	NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
				370	}
				371	} else {
				372	if (!shouldKeepFDivF32(Num, UnsafeDiv))
				373	NewFDiv = Builder.CreateCall(Decl, { Num, Den });
				374	}
				375
				376	if (NewFDiv) {
				377	FDiv.replaceAllUsesWith(NewFDiv);
				378	NewFDiv->takeName(&FDiv);
				379	FDiv.eraseFromParent();
				380	}
				381
				382	return true;
				383	}
				384
				385	static bool hasUnsafeFPMath(const Function &F) {
				386	Attribute Attr = F.getFnAttribute("unsafe-fp-math");
				387	return Attr.getValueAsString() == "true";
				388	}
				389
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	390	bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
				391	bool Changed = false;
				392
				393	// TODO: Should we promote smaller types that will be legalized to i16?
				394	if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	395	Changed \|= promoteUniformI16OpToI32(I);
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	396
				397	return Changed;
				398	}
				399
				400	bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
				401	bool Changed = false;
				402
				403	// TODO: Should we promote smaller types that will be legalized to i16?
				404	if (ST->has16BitInsts() && isI16Ty(I.getOperand(0)->getType()) &&
				405	isI16Ty(I.getOperand(1)->getType()) && DA->isUniform(&I))
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	406	Changed \|= promoteUniformI16OpToI32(I);
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	407
				408	return Changed;
				409	}
				410
				411	bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
				412	bool Changed = false;
				413
				414	// TODO: Should we promote smaller types that will be legalized to i16?
				415	if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
Konstantin Zhuravlyov	b4eb5d5	2016-10-06 02:20:46 +0000	[diff] [blame^]	416	Changed \|= promoteUniformI16OpToI32(I);
				417
				418	return Changed;
				419	}
				420
				421	bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
				422	switch (I.getIntrinsicID()) {
				423	case Intrinsic::bitreverse:
				424	return visitBitreverseIntrinsicInst(I);
				425	default:
				426	return false;
				427	}
				428	}
				429
				430	bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
				431	bool Changed = false;
				432
				433	// TODO: Should we promote smaller types that will be legalized to i16?
				434	if (ST->has16BitInsts() && isI16Ty(I.getType()) && DA->isUniform(&I))
				435	Changed \|= promoteUniformI16BitreverseIntrinsicToI32(I);
Konstantin Zhuravlyov	e14df4b	2016-09-28 20:05:39 +0000	[diff] [blame]	436
				437	return Changed;
				438	}
				439
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	440	bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	441	Mod = &M;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	442	return false;
				443	}
				444
				445	bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
				446	if (!TM \|\| skipFunction(F))
				447	return false;
				448
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	449	ST = &TM->getSubtarget<SISubtarget>(F);
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	450	DA = &getAnalysis<DivergenceAnalysis>();
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	451	HasUnsafeFPMath = hasUnsafeFPMath(F);
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	452
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	453	bool MadeChange = false;
				454
				455	for (BasicBlock &BB : F) {
				456	BasicBlock::iterator Next;
				457	for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
				458	Next = std::next(I);
				459	MadeChange \|= visit(*I);
				460	}
				461	}
				462
				463	return MadeChange;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	464	}
				465
				466	INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
				467	"AMDGPU IR optimizations", false, false)
				468	INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
				469	INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
				470	"AMDGPU IR optimizations", false, false)
				471
				472	char AMDGPUCodeGenPrepare::ID = 0;
				473
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	474	FunctionPass llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine TM) {
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	475	return new AMDGPUCodeGenPrepare(TM);
				476	}