Blame - llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp - toolchain/llvm-project

blob: b955e231699ad0da7d017fb11bc2c28297561f57 [file] [log] [blame]

Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	1	//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// This pass does misc. AMDGPU optimizations on IR before instruction
				12	/// selection.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#include "AMDGPU.h"
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	17	#include "AMDGPUIntrinsicInfo.h"
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	18	#include "AMDGPUSubtarget.h"
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	19	#include "AMDGPUTargetMachine.h"
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	20
				21	#include "llvm/Analysis/DivergenceAnalysis.h"
				22	#include "llvm/CodeGen/Passes.h"
				23	#include "llvm/IR/InstVisitor.h"
				24	#include "llvm/IR/IRBuilder.h"
				25	#include "llvm/Support/Debug.h"
				26	#include "llvm/Support/raw_ostream.h"
				27
				28	#define DEBUG_TYPE "amdgpu-codegenprepare"
				29
				30	using namespace llvm;
				31
				32	namespace {
				33
				34	class AMDGPUCodeGenPrepare : public FunctionPass,
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	35	public InstVisitor<AMDGPUCodeGenPrepare, bool> {
				36	const GCNTargetMachine *TM;
				37	const SISubtarget *ST;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	38	DivergenceAnalysis *DA;
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	39	Module *Mod;
				40	bool HasUnsafeFPMath;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	41
				42	public:
				43	static char ID;
				44	AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
				45	FunctionPass(ID),
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	46	TM(static_cast<const GCNTargetMachine *>(TM)),
				47	ST(nullptr),
				48	DA(nullptr),
				49	Mod(nullptr),
				50	HasUnsafeFPMath(false) { }
				51
				52	bool visitFDiv(BinaryOperator &I);
				53
				54	bool visitInstruction(Instruction &I) {
				55	return false;
				56	}
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	57
				58	bool doInitialization(Module &M) override;
				59	bool runOnFunction(Function &F) override;
				60
				61	const char *getPassName() const override {
				62	return "AMDGPU IR optimizations";
				63	}
				64
				65	void getAnalysisUsage(AnalysisUsage &AU) const override {
				66	AU.addRequired<DivergenceAnalysis>();
				67	AU.setPreservesAll();
				68	}
				69	};
				70
				71	} // End anonymous namespace
				72
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	73	static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
				74	const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
				75	if (!CNum)
				76	return false;
				77
				78	// Reciprocal f32 is handled separately without denormals.
Matt Arsenault	e3862cd	2016-07-26 23:25:44 +0000	[diff] [blame^]	79	return UnsafeDiv \|\| CNum->isExactlyValue(+1.0);
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	80	}
				81
				82	// Insert an intrinsic for fast fdiv for safe math situations where we can
				83	// reduce precision. Leave fdiv for situations where the generic node is
				84	// expected to be optimized.
				85	bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
				86	Type *Ty = FDiv.getType();
				87
				88	// TODO: Handle half
				89	if (!Ty->getScalarType()->isFloatTy())
				90	return false;
				91
				92	MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
				93	if (!FPMath)
				94	return false;
				95
				96	const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
				97	float ULP = FPOp->getFPAccuracy();
				98	if (ULP < 2.5f)
				99	return false;
				100
				101	FastMathFlags FMF = FPOp->getFastMathFlags();
				102	bool UnsafeDiv = HasUnsafeFPMath \|\| FMF.unsafeAlgebra() \|\|
				103	FMF.allowReciprocal();
				104	if (ST->hasFP32Denormals() && !UnsafeDiv)
				105	return false;
				106
				107	IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
				108	Builder.setFastMathFlags(FMF);
				109	Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
				110
				111	const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
				112	Function *Decl
				113	= II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
				114
				115	Value *Num = FDiv.getOperand(0);
				116	Value *Den = FDiv.getOperand(1);
				117
				118	Value *NewFDiv = nullptr;
				119
				120	if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
				121	NewFDiv = UndefValue::get(VT);
				122
				123	// FIXME: Doesn't do the right thing for cases where the vector is partially
				124	// constant. This works when the scalarizer pass is run first.
				125	for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
				126	Value *NumEltI = Builder.CreateExtractElement(Num, I);
				127	Value *DenEltI = Builder.CreateExtractElement(Den, I);
				128	Value *NewElt;
				129
				130	if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
				131	NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
				132	} else {
				133	NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
				134	}
				135
				136	NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
				137	}
				138	} else {
				139	if (!shouldKeepFDivF32(Num, UnsafeDiv))
				140	NewFDiv = Builder.CreateCall(Decl, { Num, Den });
				141	}
				142
				143	if (NewFDiv) {
				144	FDiv.replaceAllUsesWith(NewFDiv);
				145	NewFDiv->takeName(&FDiv);
				146	FDiv.eraseFromParent();
				147	}
				148
				149	return true;
				150	}
				151
				152	static bool hasUnsafeFPMath(const Function &F) {
				153	Attribute Attr = F.getFnAttribute("unsafe-fp-math");
				154	return Attr.getValueAsString() == "true";
				155	}
				156
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	157	bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	158	Mod = &M;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	159	return false;
				160	}
				161
				162	bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
				163	if (!TM \|\| skipFunction(F))
				164	return false;
				165
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	166	ST = &TM->getSubtarget<SISubtarget>(F);
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	167	DA = &getAnalysis<DivergenceAnalysis>();
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	168	HasUnsafeFPMath = hasUnsafeFPMath(F);
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	169
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	170	bool MadeChange = false;
				171
				172	for (BasicBlock &BB : F) {
				173	BasicBlock::iterator Next;
				174	for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
				175	Next = std::next(I);
				176	MadeChange \|= visit(*I);
				177	}
				178	}
				179
				180	return MadeChange;
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	181	}
				182
				183	INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
				184	"AMDGPU IR optimizations", false, false)
				185	INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
				186	INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
				187	"AMDGPU IR optimizations", false, false)
				188
				189	char AMDGPUCodeGenPrepare::ID = 0;
				190
Matt Arsenault	a1fe17c	2016-07-19 23:16:53 +0000	[diff] [blame]	191	FunctionPass llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine TM) {
Matt Arsenault	86de486	2016-06-24 07:07:55 +0000	[diff] [blame]	192	return new AMDGPUCodeGenPrepare(TM);
				193	}