blob: f2d5e074503520eb8f0bbc54bedaa93182b8bc51 [file] [log] [blame]
Preston Gurd2e2efd92012-09-04 18:22:17 +00001//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
Chandler Carruthd04a8d42012-12-03 16:50:05 +000018#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
Chandler Carruth0b8c9a82013-01-02 11:36:10 +000020#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
Preston Gurd2e2efd92012-09-04 18:22:17 +000023
24using namespace llvm;
25
Stephen Hinesdce4a402014-05-29 02:49:00 -070026#define DEBUG_TYPE "bypass-slow-division"
27
Benjamin Kramer04142bc2012-09-10 11:52:08 +000028namespace {
Preston Gurd2e2efd92012-09-04 18:22:17 +000029 struct DivOpInfo {
30 bool SignedOp;
31 Value *Dividend;
32 Value *Divisor;
33
34 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
36 };
37
38 struct DivPhiNodes {
39 PHINode *Quotient;
40 PHINode *Remainder;
41
42 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43 : Quotient(InQuotient), Remainder(InRemainder) {}
44 };
Benjamin Kramer04142bc2012-09-10 11:52:08 +000045}
Preston Gurd2e2efd92012-09-04 18:22:17 +000046
Benjamin Kramer04142bc2012-09-10 11:52:08 +000047namespace llvm {
Preston Gurd2e2efd92012-09-04 18:22:17 +000048 template<>
49 struct DenseMapInfo<DivOpInfo> {
50 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51 return Val1.SignedOp == Val2.SignedOp &&
52 Val1.Dividend == Val2.Dividend &&
53 Val1.Divisor == Val2.Divisor;
54 }
55
56 static DivOpInfo getEmptyKey() {
Stephen Hinesdce4a402014-05-29 02:49:00 -070057 return DivOpInfo(false, nullptr, nullptr);
Preston Gurd2e2efd92012-09-04 18:22:17 +000058 }
59
60 static DivOpInfo getTombstoneKey() {
Stephen Hinesdce4a402014-05-29 02:49:00 -070061 return DivOpInfo(true, nullptr, nullptr);
Preston Gurd2e2efd92012-09-04 18:22:17 +000062 }
63
64 static unsigned getHashValue(const DivOpInfo &Val) {
65 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000067 (unsigned)Val.SignedOp;
Preston Gurd2e2efd92012-09-04 18:22:17 +000068 }
69 };
70
71 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
72}
73
74// insertFastDiv - Substitutes the div/rem instruction with code that checks the
75// value of the operands and uses a shorter-faster div/rem instruction when
76// possible and the longer-slower div/rem instruction otherwise.
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000077static bool insertFastDiv(Function &F,
Preston Gurd2e2efd92012-09-04 18:22:17 +000078 Function::iterator &I,
79 BasicBlock::iterator &J,
80 IntegerType *BypassType,
81 bool UseDivOp,
82 bool UseSignedOp,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000083 DivCacheTy &PerBBDivCache) {
Preston Gurd2e2efd92012-09-04 18:22:17 +000084 // Get instruction operands
85 Instruction *Instr = J;
86 Value *Dividend = Instr->getOperand(0);
87 Value *Divisor = Instr->getOperand(1);
88
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000089 if (isa<ConstantInt>(Divisor) ||
90 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
Preston Gurd2e2efd92012-09-04 18:22:17 +000091 // Operations with immediate values should have
92 // been solved and replaced during compile time.
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000093 return false;
Preston Gurd2e2efd92012-09-04 18:22:17 +000094 }
95
96 // Basic Block is split before divide
97 BasicBlock *MainBB = I;
98 BasicBlock *SuccessorBB = I->splitBasicBlock(J);
Jakub Staszak7b2d20d2012-09-04 20:48:24 +000099 ++I; //advance iterator I to successorBB
Preston Gurd2e2efd92012-09-04 18:22:17 +0000100
101 // Add new basic block for slow divide operation
102 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
103 MainBB->getParent(), SuccessorBB);
104 SlowBB->moveBefore(SuccessorBB);
105 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
106 Value *SlowQuotientV;
107 Value *SlowRemainderV;
108 if (UseSignedOp) {
109 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
110 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111 } else {
112 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
113 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114 }
115 SlowBuilder.CreateBr(SuccessorBB);
116
117 // Add new basic block for fast divide operation
118 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
119 MainBB->getParent(), SuccessorBB);
120 FastBB->moveBefore(SlowBB);
121 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000122 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
123 BypassType);
124 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
125 BypassType);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000126
127 // udiv/urem because optimization only handles positive numbers
128 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000129 ShortDivisorV);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000130 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
131 ShortDivisorV);
132 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000133 ShortQuotientV,
134 Dividend->getType());
Preston Gurd2e2efd92012-09-04 18:22:17 +0000135 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
136 ShortRemainderV,
137 Dividend->getType());
138 FastBuilder.CreateBr(SuccessorBB);
139
140 // Phi nodes for result of div and rem
141 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
142 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
144 QuoPhi->addIncoming(FastQuotientV, FastBB);
145 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
146 RemPhi->addIncoming(SlowRemainderV, SlowBB);
147 RemPhi->addIncoming(FastRemainderV, FastBB);
148
149 // Replace Instr with appropriate phi node
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000150 if (UseDivOp)
Preston Gurd2e2efd92012-09-04 18:22:17 +0000151 Instr->replaceAllUsesWith(QuoPhi);
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000152 else
Preston Gurd2e2efd92012-09-04 18:22:17 +0000153 Instr->replaceAllUsesWith(RemPhi);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000154 Instr->eraseFromParent();
155
156 // Combine operands into a single value with OR for value testing below
157 MainBB->getInstList().back().eraseFromParent();
158 IRBuilder<> MainBuilder(MainBB, MainBB->end());
159 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
160
161 // BitMask is inverted to check if the operands are
162 // larger than the bypass type
163 uint64_t BitMask = ~BypassType->getBitMask();
164 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
165
166 // Compare operand values and branch
Preston Gurd9a2cfff2013-03-04 18:13:57 +0000167 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000168 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
169 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
170
171 // point iterator J at first instruction of successorBB
172 J = I->begin();
173
174 // Cache phi nodes to be used later in place of other instances
175 // of div or rem with the same sign, dividend, and divisor
176 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
177 DivPhiNodes Value(QuoPhi, RemPhi);
178 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000179 return true;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000180}
181
182// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
183// operands and operation are identical. Otherwise call insertFastDiv to perform
184// the optimization and cache the resulting dividend and remainder.
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000185static bool reuseOrInsertFastDiv(Function &F,
Preston Gurd2e2efd92012-09-04 18:22:17 +0000186 Function::iterator &I,
187 BasicBlock::iterator &J,
188 IntegerType *BypassType,
189 bool UseDivOp,
190 bool UseSignedOp,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000191 DivCacheTy &PerBBDivCache) {
Preston Gurd2e2efd92012-09-04 18:22:17 +0000192 // Get instruction operands
193 Instruction *Instr = J;
194 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
Jakub Staszakbe119912012-09-04 23:11:11 +0000195 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000196
197 if (CacheI == PerBBDivCache.end()) {
198 // If previous instance does not exist, insert fast div
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000199 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
200 PerBBDivCache);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000201 }
202
203 // Replace operation value with previously generated phi node
Jakub Staszakbe119912012-09-04 23:11:11 +0000204 DivPhiNodes &Value = CacheI->second;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000205 if (UseDivOp) {
206 // Replace all uses of div instruction with quotient phi node
207 J->replaceAllUsesWith(Value.Quotient);
208 } else {
209 // Replace all uses of rem instruction with remainder phi node
210 J->replaceAllUsesWith(Value.Remainder);
211 }
212
213 // Advance to next operation
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000214 ++J;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000215
216 // Remove redundant operation
217 Instr->eraseFromParent();
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000218 return true;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000219}
220
221// bypassSlowDivision - This optimization identifies DIV instructions that can
222// be profitably bypassed and carried out with a shorter, faster divide.
Benjamin Kramer04142bc2012-09-10 11:52:08 +0000223bool llvm::bypassSlowDivision(Function &F,
224 Function::iterator &I,
Preston Gurd8d662b52012-10-04 21:33:40 +0000225 const DenseMap<unsigned int, unsigned int> &BypassWidths) {
Preston Gurd2e2efd92012-09-04 18:22:17 +0000226 DivCacheTy DivCache;
227
228 bool MadeChange = false;
229 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
230
231 // Get instruction details
232 unsigned Opcode = J->getOpcode();
233 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
234 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000235 bool UseSignedOp = Opcode == Instruction::SDiv ||
236 Opcode == Instruction::SRem;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000237
238 // Only optimize div or rem ops
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000239 if (!UseDivOp && !UseRemOp)
Preston Gurd2e2efd92012-09-04 18:22:17 +0000240 continue;
Preston Gurd2e2efd92012-09-04 18:22:17 +0000241
Preston Gurdfcf06282012-10-03 16:11:44 +0000242 // Skip division on vector types, only optimize integer instructions
243 if (!J->getType()->isIntegerTy())
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000244 continue;
245
Preston Gurd8d662b52012-10-04 21:33:40 +0000246 // Get bitwidth of div/rem instruction
Preston Gurdfcf06282012-10-03 16:11:44 +0000247 IntegerType *T = cast<IntegerType>(J->getType());
Preston Gurd9a2cfff2013-03-04 18:13:57 +0000248 unsigned int bitwidth = T->getBitWidth();
Preston Gurdfcf06282012-10-03 16:11:44 +0000249
Preston Gurd8d662b52012-10-04 21:33:40 +0000250 // Continue if bitwidth is not bypassed
251 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
252 if (BI == BypassWidths.end())
Preston Gurdfcf06282012-10-03 16:11:44 +0000253 continue;
254
Preston Gurd8d662b52012-10-04 21:33:40 +0000255 // Get type for div/rem instruction with bypass bitwidth
256 IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
Preston Gurdfcf06282012-10-03 16:11:44 +0000257
258 MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
Jakub Staszak7b2d20d2012-09-04 20:48:24 +0000259 UseSignedOp, DivCache);
Preston Gurd2e2efd92012-09-04 18:22:17 +0000260 }
261
262 return MadeChange;
263}