blob: ed663de01998bc69300a79e0f2733d5dfc4b15af [file] [log] [blame]
Preston Gurdcdf540d2012-09-04 18:22:17 +00001//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
Chandler Carruthed0881b2012-12-03 16:50:05 +000018#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000020#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
Justin Lebar0ede5fb2016-10-28 21:43:54 +000023#include "llvm/Transforms/Utils/Local.h"
Preston Gurdcdf540d2012-09-04 18:22:17 +000024
25using namespace llvm;
26
Chandler Carruth964daaa2014-04-22 02:55:47 +000027#define DEBUG_TYPE "bypass-slow-division"
28
Benjamin Kramer1f66f882012-09-10 11:52:08 +000029namespace {
Preston Gurdcdf540d2012-09-04 18:22:17 +000030 struct DivOpInfo {
31 bool SignedOp;
32 Value *Dividend;
33 Value *Divisor;
34
35 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
36 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
37 };
38
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000039 struct QuotRemPair {
40 Value *Quotient;
41 Value *Remainder;
Preston Gurdcdf540d2012-09-04 18:22:17 +000042
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000043 QuotRemPair(Value *InQuotient, Value *InRemainder)
44 : Quotient(InQuotient), Remainder(InRemainder) {}
45 };
46
47 /// A quotient and remainder, plus a BB from which they logically "originate".
48 /// If you use Quotient or Remainder in a Phi node, you should use BB as its
49 /// corresponding predecessor.
50 struct QuotRemWithBB {
51 BasicBlock *BB = nullptr;
52 Value *Quotient = nullptr;
53 Value *Remainder = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +000054 };
Alexander Kornienkof00654e2015-06-23 09:49:53 +000055}
Preston Gurdcdf540d2012-09-04 18:22:17 +000056
Benjamin Kramer1f66f882012-09-10 11:52:08 +000057namespace llvm {
Preston Gurdcdf540d2012-09-04 18:22:17 +000058 template<>
59 struct DenseMapInfo<DivOpInfo> {
60 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
61 return Val1.SignedOp == Val2.SignedOp &&
62 Val1.Dividend == Val2.Dividend &&
63 Val1.Divisor == Val2.Divisor;
64 }
65
66 static DivOpInfo getEmptyKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000067 return DivOpInfo(false, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000068 }
69
70 static DivOpInfo getTombstoneKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000071 return DivOpInfo(true, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000072 }
73
74 static unsigned getHashValue(const DivOpInfo &Val) {
75 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
76 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
Jakub Staszak46beca62012-09-04 20:48:24 +000077 (unsigned)Val.SignedOp;
Preston Gurdcdf540d2012-09-04 18:22:17 +000078 }
79 };
80
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000081 typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
82 typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
Alexander Kornienkof00654e2015-06-23 09:49:53 +000083}
Preston Gurdcdf540d2012-09-04 18:22:17 +000084
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000085namespace {
86class FastDivInsertionTask {
87 bool IsValidTask = false;
88 Instruction *SlowDivOrRem = nullptr;
89 IntegerType *BypassType = nullptr;
90 BasicBlock *MainBB = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +000091
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000092 QuotRemWithBB createSlowBB(BasicBlock *Successor);
93 QuotRemWithBB createFastBB(BasicBlock *Successor);
94 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
95 BasicBlock *PhiBB);
96 Value *insertOperandRuntimeCheck();
97 Optional<QuotRemPair> insertFastDivAndRem();
98
99 bool isSignedOp() {
100 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
101 SlowDivOrRem->getOpcode() == Instruction::SRem;
102 }
103 bool isDivisionOp() {
104 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
105 SlowDivOrRem->getOpcode() == Instruction::UDiv;
106 }
107 Type *getSlowType() { return SlowDivOrRem->getType(); }
108
109public:
110 FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
111 Value *getReplacement(DivCacheTy &Cache);
112};
113} // anonymous namespace
114
115FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
116 const BypassWidthsTy &BypassWidths) {
117 switch (I->getOpcode()) {
118 case Instruction::UDiv:
119 case Instruction::SDiv:
120 case Instruction::URem:
121 case Instruction::SRem:
122 SlowDivOrRem = I;
123 break;
124 default:
125 // I is not a div/rem operation.
126 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000127 }
128
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000129 // Skip division on vector types. Only optimize integer instructions.
130 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
131 if (!SlowType)
132 return;
Justin Lebar28605732016-11-16 00:44:47 +0000133
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000134 // Skip if this bitwidth is not bypassed.
135 auto BI = BypassWidths.find(SlowType->getBitWidth());
136 if (BI == BypassWidths.end())
137 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000138
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000139 // Get type for div/rem instruction with bypass bitwidth.
140 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
141 BypassType = BT;
142
143 // The original basic block.
144 MainBB = I->getParent();
145
146 // The instruction is indeed a slow div or rem operation.
147 IsValidTask = true;
148}
149
150/// Reuses previously-computed dividend or remainder from the current BB if
151/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
152/// perform the optimization and caches the resulting dividend and remainder.
153/// If no replacement can be generated, nullptr is returned.
154Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
155 // First, make sure that the task is valid.
156 if (!IsValidTask)
157 return nullptr;
158
159 // Then, look for a value in Cache.
160 Value *Dividend = SlowDivOrRem->getOperand(0);
161 Value *Divisor = SlowDivOrRem->getOperand(1);
162 DivOpInfo Key(isSignedOp(), Dividend, Divisor);
163 auto CacheI = Cache.find(Key);
164
165 if (CacheI == Cache.end()) {
166 // If previous instance does not exist, try to insert fast div.
167 Optional<QuotRemPair> OptResult = insertFastDivAndRem();
168 // Bail out if insertFastDivAndRem has failed.
169 if (!OptResult)
170 return nullptr;
171 CacheI = Cache.insert({Key, *OptResult}).first;
172 }
173
174 QuotRemPair &Value = CacheI->second;
175 return isDivisionOp() ? Value.Quotient : Value.Remainder;
176}
177
178/// Add new basic block for slow div and rem operations and put it before
179/// SuccessorBB.
180QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
181 QuotRemWithBB DivRemPair;
182 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
183 MainBB->getParent(), SuccessorBB);
184 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
185
186 Value *Dividend = SlowDivOrRem->getOperand(0);
187 Value *Divisor = SlowDivOrRem->getOperand(1);
188
189 if (isSignedOp()) {
190 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
191 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000192 } else {
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000193 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
194 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000195 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000196
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000197 Builder.CreateBr(SuccessorBB);
198 return DivRemPair;
199}
Preston Gurdcdf540d2012-09-04 18:22:17 +0000200
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000201/// Add new basic block for fast div and rem operations and put it before
202/// SuccessorBB.
203QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
204 QuotRemWithBB DivRemPair;
205 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
206 MainBB->getParent(), SuccessorBB);
207 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
Preston Gurdcdf540d2012-09-04 18:22:17 +0000208
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000209 Value *Dividend = SlowDivOrRem->getOperand(0);
210 Value *Divisor = SlowDivOrRem->getOperand(1);
211 Value *ShortDivisorV =
212 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
213 Value *ShortDividendV =
214 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000215
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000216 // udiv/urem because this optimization only handles positive numbers.
217 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
218 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
219 DivRemPair.Quotient =
220 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
221 DivRemPair.Remainder =
222 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
223 Builder.CreateBr(SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000224
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000225 return DivRemPair;
226}
227
228/// Creates Phi nodes for result of Div and Rem.
229QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
230 QuotRemWithBB &RHS,
231 BasicBlock *PhiBB) {
232 IRBuilder<> Builder(PhiBB, PhiBB->begin());
233 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
234 QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
235 QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
236 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
237 RemPhi->addIncoming(LHS.Remainder, LHS.BB);
238 RemPhi->addIncoming(RHS.Remainder, RHS.BB);
239 return QuotRemPair(QuoPhi, RemPhi);
240}
241
242/// Creates a runtime check to test whether both the divisor and dividend fit
243/// into BypassType. The check is inserted at the end of MainBB. True return
244/// value means that the operands fit.
245Value *FastDivInsertionTask::insertOperandRuntimeCheck() {
246 IRBuilder<> Builder(MainBB, MainBB->end());
247 Value *Dividend = SlowDivOrRem->getOperand(0);
248 Value *Divisor = SlowDivOrRem->getOperand(1);
Justin Lebar28605732016-11-16 00:44:47 +0000249
250 // We should have bailed out above if the divisor is a constant, but the
251 // dividend may still be a constant. Set OrV to our non-constant operands
252 // OR'ed together.
253 assert(!isa<ConstantInt>(Divisor));
254
255 Value *OrV;
256 if (!isa<ConstantInt>(Dividend))
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000257 OrV = Builder.CreateOr(Dividend, Divisor);
Justin Lebar28605732016-11-16 00:44:47 +0000258 else
259 OrV = Divisor;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000260
261 // BitMask is inverted to check if the operands are
262 // larger than the bypass type
263 uint64_t BitMask = ~BypassType->getBitMask();
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000264 Value *AndV = Builder.CreateAnd(OrV, BitMask);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000265
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000266 // Compare operand values
267 Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
268 return Builder.CreateICmpEQ(AndV, ZeroV);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000269}
270
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000271/// Substitutes the div/rem instruction with code that checks the value of the
272/// operands and uses a shorter-faster div/rem instruction when possible.
273Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
274 Value *Dividend = SlowDivOrRem->getOperand(0);
275 Value *Divisor = SlowDivOrRem->getOperand(1);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000276
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000277 if (isa<ConstantInt>(Divisor)) {
278 // Keep division by a constant for DAGCombiner.
279 return None;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000280 }
281
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000282 // If the numerator is a constant, bail if it doesn't fit into BypassType.
283 if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
284 if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
285 return None;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000286
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000287 // Split the basic block before the div/rem.
288 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
289 // Remove the unconditional branch from MainBB to SuccessorBB.
290 MainBB->getInstList().back().eraseFromParent();
291 QuotRemWithBB Fast = createFastBB(SuccessorBB);
292 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
293 QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
294 Value *CmpV = insertOperandRuntimeCheck();
295 IRBuilder<> Builder(MainBB, MainBB->end());
296 Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
297 return Result;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000298}
299
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000300/// This optimization identifies DIV/REM instructions in a BB that can be
301/// profitably bypassed and carried out with a shorter, faster divide.
302bool llvm::bypassSlowDivision(BasicBlock *BB,
303 const BypassWidthsTy &BypassWidths) {
304 DivCacheTy PerBBDivCache;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000305
306 bool MadeChange = false;
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000307 Instruction* Next = &*BB->begin();
308 while (Next != nullptr) {
309 // We may add instructions immediately after I, but we want to skip over
310 // them.
311 Instruction* I = Next;
312 Next = Next->getNextNode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000313
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000314 FastDivInsertionTask Task(I, BypassWidths);
315 if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
316 I->replaceAllUsesWith(Replacement);
317 I->eraseFromParent();
318 MadeChange = true;
319 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000320 }
321
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000322 // Above we eagerly create divs and rems, as pairs, so that we can efficiently
323 // create divrem machine instructions. Now erase any unused divs / rems so we
324 // don't leave extra instructions sitting around.
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000325 for (auto &KV : PerBBDivCache)
326 for (Value *V : {KV.second.Quotient, KV.second.Remainder})
327 RecursivelyDeleteTriviallyDeadInstructions(V);
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000328
Preston Gurdcdf540d2012-09-04 18:22:17 +0000329 return MadeChange;
330}