blob: b694779a5323721943c333d1ed6ed61f3d39d390 [file] [log] [blame]
Preston Gurd2e2efd92012-09-04 18:22:17 +00001//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "bypass-slow-division"
19#include "llvm/Instructions.h"
20#include "llvm/Function.h"
21#include "llvm/IRBuilder.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/Transforms/Utils/BypassSlowDivision.h"
24
25using namespace llvm;
26
27namespace llvm {
28 struct DivOpInfo {
29 bool SignedOp;
30 Value *Dividend;
31 Value *Divisor;
32
33 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
35 };
36
37 struct DivPhiNodes {
38 PHINode *Quotient;
39 PHINode *Remainder;
40
41 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42 : Quotient(InQuotient), Remainder(InRemainder) {}
43 };
44
45 template<>
46 struct DenseMapInfo<DivOpInfo> {
47 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
48 return Val1.SignedOp == Val2.SignedOp &&
49 Val1.Dividend == Val2.Dividend &&
50 Val1.Divisor == Val2.Divisor;
51 }
52
53 static DivOpInfo getEmptyKey() {
54 return DivOpInfo(false, 0, 0);
55 }
56
57 static DivOpInfo getTombstoneKey() {
58 return DivOpInfo(true, 0, 0);
59 }
60
61 static unsigned getHashValue(const DivOpInfo &Val) {
62 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
63 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000064 (unsigned)Val.SignedOp;
Preston Gurd2e2efd92012-09-04 18:22:17 +000065 }
66 };
67
68 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
69}
70
71// insertFastDiv - Substitutes the div/rem instruction with code that checks the
72// value of the operands and uses a shorter-faster div/rem instruction when
73// possible and the longer-slower div/rem instruction otherwise.
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000074static bool insertFastDiv(Function &F,
Preston Gurd2e2efd92012-09-04 18:22:17 +000075 Function::iterator &I,
76 BasicBlock::iterator &J,
77 IntegerType *BypassType,
78 bool UseDivOp,
79 bool UseSignedOp,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000080 DivCacheTy &PerBBDivCache) {
Preston Gurd2e2efd92012-09-04 18:22:17 +000081 // Get instruction operands
82 Instruction *Instr = J;
83 Value *Dividend = Instr->getOperand(0);
84 Value *Divisor = Instr->getOperand(1);
85
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000086 if (isa<ConstantInt>(Divisor) ||
87 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
Preston Gurd2e2efd92012-09-04 18:22:17 +000088 // Operations with immediate values should have
89 // been solved and replaced during compile time.
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000090 return false;
Preston Gurd2e2efd92012-09-04 18:22:17 +000091 }
92
93 // Basic Block is split before divide
94 BasicBlock *MainBB = I;
95 BasicBlock *SuccessorBB = I->splitBasicBlock(J);
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000096 ++I; //advance iterator I to successorBB
Preston Gurd2e2efd92012-09-04 18:22:17 +000097
98 // Add new basic block for slow divide operation
99 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
100 MainBB->getParent(), SuccessorBB);
101 SlowBB->moveBefore(SuccessorBB);
102 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
103 Value *SlowQuotientV;
104 Value *SlowRemainderV;
105 if (UseSignedOp) {
106 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
107 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
108 } else {
109 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
110 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
111 }
112 SlowBuilder.CreateBr(SuccessorBB);
113
114 // Add new basic block for fast divide operation
115 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
116 MainBB->getParent(), SuccessorBB);
117 FastBB->moveBefore(SlowBB);
118 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000119 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
120 BypassType);
121 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
122 BypassType);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000123
124 // udiv/urem because optimization only handles positive numbers
125 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000126 ShortDivisorV);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000127 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
128 ShortDivisorV);
129 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000130 ShortQuotientV,
131 Dividend->getType());
Preston Gurd2e2efd92012-09-04 18:22:17 +0000132 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
133 ShortRemainderV,
134 Dividend->getType());
135 FastBuilder.CreateBr(SuccessorBB);
136
137 // Phi nodes for result of div and rem
138 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
139 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
140 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
141 QuoPhi->addIncoming(FastQuotientV, FastBB);
142 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143 RemPhi->addIncoming(SlowRemainderV, SlowBB);
144 RemPhi->addIncoming(FastRemainderV, FastBB);
145
146 // Replace Instr with appropriate phi node
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000147 if (UseDivOp)
Preston Gurd2e2efd92012-09-04 18:22:17 +0000148 Instr->replaceAllUsesWith(QuoPhi);
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000149 else
Preston Gurd2e2efd92012-09-04 18:22:17 +0000150 Instr->replaceAllUsesWith(RemPhi);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000151 Instr->eraseFromParent();
152
153 // Combine operands into a single value with OR for value testing below
154 MainBB->getInstList().back().eraseFromParent();
155 IRBuilder<> MainBuilder(MainBB, MainBB->end());
156 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
157
158 // BitMask is inverted to check if the operands are
159 // larger than the bypass type
160 uint64_t BitMask = ~BypassType->getBitMask();
161 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
162
163 // Compare operand values and branch
164 Value *ZeroV = MainBuilder.getInt32(0);
165 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
166 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
167
168 // point iterator J at first instruction of successorBB
169 J = I->begin();
170
171 // Cache phi nodes to be used later in place of other instances
172 // of div or rem with the same sign, dividend, and divisor
173 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
174 DivPhiNodes Value(QuoPhi, RemPhi);
175 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000176 return true;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000177}
178
179// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
180// operands and operation are identical. Otherwise call insertFastDiv to perform
181// the optimization and cache the resulting dividend and remainder.
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000182static bool reuseOrInsertFastDiv(Function &F,
Preston Gurd2e2efd92012-09-04 18:22:17 +0000183 Function::iterator &I,
184 BasicBlock::iterator &J,
185 IntegerType *BypassType,
186 bool UseDivOp,
187 bool UseSignedOp,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000188 DivCacheTy &PerBBDivCache) {
Preston Gurd2e2efd92012-09-04 18:22:17 +0000189 // Get instruction operands
190 Instruction *Instr = J;
191 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
Jakub Staszakbe119912012-09-04 23:11:11 +0000192 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000193
194 if (CacheI == PerBBDivCache.end()) {
195 // If previous instance does not exist, insert fast div
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000196 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
197 PerBBDivCache);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000198 }
199
200 // Replace operation value with previously generated phi node
Jakub Staszakbe119912012-09-04 23:11:11 +0000201 DivPhiNodes &Value = CacheI->second;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000202 if (UseDivOp) {
203 // Replace all uses of div instruction with quotient phi node
204 J->replaceAllUsesWith(Value.Quotient);
205 } else {
206 // Replace all uses of rem instruction with remainder phi node
207 J->replaceAllUsesWith(Value.Remainder);
208 }
209
210 // Advance to next operation
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000211 ++J;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000212
213 // Remove redundant operation
214 Instr->eraseFromParent();
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000215 return true;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000216}
217
218// bypassSlowDivision - This optimization identifies DIV instructions that can
219// be profitably bypassed and carried out with a shorter, faster divide.
220bool bypassSlowDivision(Function &F,
221 Function::iterator &I,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000222 const llvm::DenseMap<Type *, Type *> &BypassTypeMap) {
Preston Gurd2e2efd92012-09-04 18:22:17 +0000223 DivCacheTy DivCache;
224
225 bool MadeChange = false;
226 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
227
228 // Get instruction details
229 unsigned Opcode = J->getOpcode();
230 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
231 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000232 bool UseSignedOp = Opcode == Instruction::SDiv ||
233 Opcode == Instruction::SRem;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000234
235 // Only optimize div or rem ops
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000236 if (!UseDivOp && !UseRemOp)
Preston Gurd2e2efd92012-09-04 18:22:17 +0000237 continue;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000238
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000239 // Continue if div/rem type is not bypassed
240 DenseMap<Type *, Type *>::const_iterator BT =
241 BypassTypeMap.find(J->getType());
242 if (BT == BypassTypeMap.end())
243 continue;
244
245 IntegerType *BypassType = cast<IntegerType>(BT->second);
246 MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp,
247 UseSignedOp, DivCache);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000248 }
249
250 return MadeChange;
251}