blob: b955e231699ad0da7d017fb11bc2c28297561f57 [file] [log] [blame]
Matt Arsenault86de4862016-06-24 07:07:55 +00001//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// This pass does misc. AMDGPU optimizations on IR before instruction
12/// selection.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPU.h"
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000017#include "AMDGPUIntrinsicInfo.h"
Matt Arsenault86de4862016-06-24 07:07:55 +000018#include "AMDGPUSubtarget.h"
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000019#include "AMDGPUTargetMachine.h"
Matt Arsenault86de4862016-06-24 07:07:55 +000020
21#include "llvm/Analysis/DivergenceAnalysis.h"
22#include "llvm/CodeGen/Passes.h"
23#include "llvm/IR/InstVisitor.h"
24#include "llvm/IR/IRBuilder.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/raw_ostream.h"
27
28#define DEBUG_TYPE "amdgpu-codegenprepare"
29
30using namespace llvm;
31
32namespace {
33
34class AMDGPUCodeGenPrepare : public FunctionPass,
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000035 public InstVisitor<AMDGPUCodeGenPrepare, bool> {
36 const GCNTargetMachine *TM;
37 const SISubtarget *ST;
Matt Arsenault86de4862016-06-24 07:07:55 +000038 DivergenceAnalysis *DA;
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000039 Module *Mod;
40 bool HasUnsafeFPMath;
Matt Arsenault86de4862016-06-24 07:07:55 +000041
42public:
43 static char ID;
44 AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
45 FunctionPass(ID),
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000046 TM(static_cast<const GCNTargetMachine *>(TM)),
47 ST(nullptr),
48 DA(nullptr),
49 Mod(nullptr),
50 HasUnsafeFPMath(false) { }
51
52 bool visitFDiv(BinaryOperator &I);
53
54 bool visitInstruction(Instruction &I) {
55 return false;
56 }
Matt Arsenault86de4862016-06-24 07:07:55 +000057
58 bool doInitialization(Module &M) override;
59 bool runOnFunction(Function &F) override;
60
61 const char *getPassName() const override {
62 return "AMDGPU IR optimizations";
63 }
64
65 void getAnalysisUsage(AnalysisUsage &AU) const override {
66 AU.addRequired<DivergenceAnalysis>();
67 AU.setPreservesAll();
68 }
69};
70
71} // End anonymous namespace
72
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000073static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
74 const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
75 if (!CNum)
76 return false;
77
78 // Reciprocal f32 is handled separately without denormals.
Matt Arsenaulte3862cd2016-07-26 23:25:44 +000079 return UnsafeDiv || CNum->isExactlyValue(+1.0);
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000080}
81
82// Insert an intrinsic for fast fdiv for safe math situations where we can
83// reduce precision. Leave fdiv for situations where the generic node is
84// expected to be optimized.
85bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
86 Type *Ty = FDiv.getType();
87
88 // TODO: Handle half
89 if (!Ty->getScalarType()->isFloatTy())
90 return false;
91
92 MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
93 if (!FPMath)
94 return false;
95
96 const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
97 float ULP = FPOp->getFPAccuracy();
98 if (ULP < 2.5f)
99 return false;
100
101 FastMathFlags FMF = FPOp->getFastMathFlags();
102 bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
103 FMF.allowReciprocal();
104 if (ST->hasFP32Denormals() && !UnsafeDiv)
105 return false;
106
107 IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
108 Builder.setFastMathFlags(FMF);
109 Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
110
111 const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
112 Function *Decl
113 = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
114
115 Value *Num = FDiv.getOperand(0);
116 Value *Den = FDiv.getOperand(1);
117
118 Value *NewFDiv = nullptr;
119
120 if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
121 NewFDiv = UndefValue::get(VT);
122
123 // FIXME: Doesn't do the right thing for cases where the vector is partially
124 // constant. This works when the scalarizer pass is run first.
125 for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
126 Value *NumEltI = Builder.CreateExtractElement(Num, I);
127 Value *DenEltI = Builder.CreateExtractElement(Den, I);
128 Value *NewElt;
129
130 if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
131 NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
132 } else {
133 NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
134 }
135
136 NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
137 }
138 } else {
139 if (!shouldKeepFDivF32(Num, UnsafeDiv))
140 NewFDiv = Builder.CreateCall(Decl, { Num, Den });
141 }
142
143 if (NewFDiv) {
144 FDiv.replaceAllUsesWith(NewFDiv);
145 NewFDiv->takeName(&FDiv);
146 FDiv.eraseFromParent();
147 }
148
149 return true;
150}
151
152static bool hasUnsafeFPMath(const Function &F) {
153 Attribute Attr = F.getFnAttribute("unsafe-fp-math");
154 return Attr.getValueAsString() == "true";
155}
156
Matt Arsenault86de4862016-06-24 07:07:55 +0000157bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000158 Mod = &M;
Matt Arsenault86de4862016-06-24 07:07:55 +0000159 return false;
160}
161
162bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
163 if (!TM || skipFunction(F))
164 return false;
165
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000166 ST = &TM->getSubtarget<SISubtarget>(F);
Matt Arsenault86de4862016-06-24 07:07:55 +0000167 DA = &getAnalysis<DivergenceAnalysis>();
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000168 HasUnsafeFPMath = hasUnsafeFPMath(F);
Matt Arsenault86de4862016-06-24 07:07:55 +0000169
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000170 bool MadeChange = false;
171
172 for (BasicBlock &BB : F) {
173 BasicBlock::iterator Next;
174 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
175 Next = std::next(I);
176 MadeChange |= visit(*I);
177 }
178 }
179
180 return MadeChange;
Matt Arsenault86de4862016-06-24 07:07:55 +0000181}
182
183INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
184 "AMDGPU IR optimizations", false, false)
185INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
186INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
187 "AMDGPU IR optimizations", false, false)
188
189char AMDGPUCodeGenPrepare::ID = 0;
190
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000191FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) {
Matt Arsenault86de4862016-06-24 07:07:55 +0000192 return new AMDGPUCodeGenPrepare(TM);
193}