blob: f711b192f6044694fd70e131503858bdb07902b3 [file] [log] [blame]
Eugene Zelenko6cadde72017-10-17 21:27:42 +00001//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
Preston Gurdcdf540d2012-09-04 18:22:17 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
Chandler Carruthed0881b2012-12-03 16:50:05 +000018#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000020#include "llvm/ADT/None.h"
21#include "llvm/ADT/Optional.h"
22#include "llvm/ADT/STLExtras.h"
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000023#include "llvm/ADT/SmallPtrSet.h"
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000024#include "llvm/Analysis/ValueTracking.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000025#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000030#include "llvm/IR/Instruction.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000031#include "llvm/IR/Instructions.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000032#include "llvm/IR/Module.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Value.h"
35#include "llvm/Support/Casting.h"
Craig Topperb45eabc2017-04-26 16:39:58 +000036#include "llvm/Support/KnownBits.h"
Justin Lebar0ede5fb2016-10-28 21:43:54 +000037#include "llvm/Transforms/Utils/Local.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000038#include <cassert>
39#include <cstdint>
Preston Gurdcdf540d2012-09-04 18:22:17 +000040
41using namespace llvm;
42
Chandler Carruth964daaa2014-04-22 02:55:47 +000043#define DEBUG_TYPE "bypass-slow-division"
44
Benjamin Kramer1f66f882012-09-10 11:52:08 +000045namespace {
Eugene Zelenko6cadde72017-10-17 21:27:42 +000046
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000047 struct QuotRemPair {
48 Value *Quotient;
49 Value *Remainder;
Preston Gurdcdf540d2012-09-04 18:22:17 +000050
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000051 QuotRemPair(Value *InQuotient, Value *InRemainder)
52 : Quotient(InQuotient), Remainder(InRemainder) {}
53 };
54
55 /// A quotient and remainder, plus a BB from which they logically "originate".
56 /// If you use Quotient or Remainder in a Phi node, you should use BB as its
57 /// corresponding predecessor.
58 struct QuotRemWithBB {
59 BasicBlock *BB = nullptr;
60 Value *Quotient = nullptr;
61 Value *Remainder = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +000062 };
63
Eugene Zelenko6cadde72017-10-17 21:27:42 +000064using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
65using BypassWidthsTy = DenseMap<unsigned, unsigned>;
66using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
Preston Gurdcdf540d2012-09-04 18:22:17 +000067
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000068enum ValueRange {
69 /// Operand definitely fits into BypassType. No runtime checks are needed.
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000070 VALRNG_KNOWN_SHORT,
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000071 /// A runtime check is required, as value range is unknown.
72 VALRNG_UNKNOWN,
73 /// Operand is unlikely to fit into BypassType. The bypassing should be
74 /// disabled.
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000075 VALRNG_LIKELY_LONG
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000076};
77
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000078class FastDivInsertionTask {
79 bool IsValidTask = false;
80 Instruction *SlowDivOrRem = nullptr;
81 IntegerType *BypassType = nullptr;
82 BasicBlock *MainBB = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +000083
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000084 bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
85 ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000086 QuotRemWithBB createSlowBB(BasicBlock *Successor);
87 QuotRemWithBB createFastBB(BasicBlock *Successor);
88 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
89 BasicBlock *PhiBB);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000090 Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000091 Optional<QuotRemPair> insertFastDivAndRem();
92
93 bool isSignedOp() {
94 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
95 SlowDivOrRem->getOpcode() == Instruction::SRem;
96 }
Eugene Zelenko6cadde72017-10-17 21:27:42 +000097
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000098 bool isDivisionOp() {
99 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
100 SlowDivOrRem->getOpcode() == Instruction::UDiv;
101 }
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000102
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000103 Type *getSlowType() { return SlowDivOrRem->getType(); }
104
105public:
106 FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000107
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000108 Value *getReplacement(DivCacheTy &Cache);
109};
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000110
111} // end anonymous namespace
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000112
113FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
114 const BypassWidthsTy &BypassWidths) {
115 switch (I->getOpcode()) {
116 case Instruction::UDiv:
117 case Instruction::SDiv:
118 case Instruction::URem:
119 case Instruction::SRem:
120 SlowDivOrRem = I;
121 break;
122 default:
123 // I is not a div/rem operation.
124 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000125 }
126
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000127 // Skip division on vector types. Only optimize integer instructions.
128 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
129 if (!SlowType)
130 return;
Justin Lebar28605732016-11-16 00:44:47 +0000131
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000132 // Skip if this bitwidth is not bypassed.
133 auto BI = BypassWidths.find(SlowType->getBitWidth());
134 if (BI == BypassWidths.end())
135 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000136
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000137 // Get type for div/rem instruction with bypass bitwidth.
138 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
139 BypassType = BT;
140
141 // The original basic block.
142 MainBB = I->getParent();
143
144 // The instruction is indeed a slow div or rem operation.
145 IsValidTask = true;
146}
147
148/// Reuses previously-computed dividend or remainder from the current BB if
149/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
150/// perform the optimization and caches the resulting dividend and remainder.
151/// If no replacement can be generated, nullptr is returned.
152Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
153 // First, make sure that the task is valid.
154 if (!IsValidTask)
155 return nullptr;
156
157 // Then, look for a value in Cache.
158 Value *Dividend = SlowDivOrRem->getOperand(0);
159 Value *Divisor = SlowDivOrRem->getOperand(1);
Sanjay Patel5d67d892017-08-24 14:43:33 +0000160 DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000161 auto CacheI = Cache.find(Key);
162
163 if (CacheI == Cache.end()) {
164 // If previous instance does not exist, try to insert fast div.
165 Optional<QuotRemPair> OptResult = insertFastDivAndRem();
166 // Bail out if insertFastDivAndRem has failed.
167 if (!OptResult)
168 return nullptr;
169 CacheI = Cache.insert({Key, *OptResult}).first;
170 }
171
172 QuotRemPair &Value = CacheI->second;
173 return isDivisionOp() ? Value.Quotient : Value.Remainder;
174}
175
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000176/// \brief Check if a value looks like a hash.
177///
178/// The routine is expected to detect values computed using the most common hash
179/// algorithms. Typically, hash computations end with one of the following
180/// instructions:
181///
182/// 1) MUL with a constant wider than BypassType
183/// 2) XOR instruction
184///
185/// And even if we are wrong and the value is not a hash, it is still quite
186/// unlikely that such values will fit into BypassType.
187///
188/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
189/// It is implemented as a depth-first search for values that look neither long
190/// nor hash-like.
191bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
192 Instruction *I = dyn_cast<Instruction>(V);
193 if (!I)
194 return false;
195
196 switch (I->getOpcode()) {
197 case Instruction::Xor:
198 return true;
199 case Instruction::Mul: {
200 // After Constant Hoisting pass, long constants may be represented as
201 // bitcast instructions. As a result, some constants may look like an
202 // instruction at first, and an additional check is necessary to find out if
203 // an operand is actually a constant.
204 Value *Op1 = I->getOperand(1);
205 ConstantInt *C = dyn_cast<ConstantInt>(Op1);
206 if (!C && isa<BitCastInst>(Op1))
207 C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
208 return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
209 }
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000210 case Instruction::PHI:
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000211 // Stop IR traversal in case of a crazy input code. This limits recursion
212 // depth.
213 if (Visited.size() >= 16)
214 return false;
215 // Do not visit nodes that have been visited already. We return true because
216 // it means that we couldn't find any value that doesn't look hash-like.
217 if (Visited.find(I) != Visited.end())
218 return true;
219 Visited.insert(I);
220 return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
221 // Ignore undef values as they probably don't affect the division
222 // operands.
223 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
224 isa<UndefValue>(V);
225 });
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000226 default:
227 return false;
228 }
229}
230
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000231/// Check if an integer value fits into our bypass type.
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000232ValueRange FastDivInsertionTask::getValueRange(Value *V,
233 VisitedSetTy &Visited) {
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000234 unsigned ShortLen = BypassType->getBitWidth();
235 unsigned LongLen = V->getType()->getIntegerBitWidth();
236
237 assert(LongLen > ShortLen && "Value type must be wider than BypassType");
238 unsigned HiBits = LongLen - ShortLen;
239
240 const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
Craig Topperb45eabc2017-04-26 16:39:58 +0000241 KnownBits Known(LongLen);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000242
Craig Topperb45eabc2017-04-26 16:39:58 +0000243 computeKnownBits(V, Known, DL);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000244
Craig Topper8df66c62017-05-12 17:20:30 +0000245 if (Known.countMinLeadingZeros() >= HiBits)
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000246 return VALRNG_KNOWN_SHORT;
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000247
Craig Topper8df66c62017-05-12 17:20:30 +0000248 if (Known.countMaxLeadingZeros() < HiBits)
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000249 return VALRNG_LIKELY_LONG;
250
251 // Long integer divisions are often used in hashtable implementations. It's
252 // not worth bypassing such divisions because hash values are extremely
253 // unlikely to have enough leading zeros. The call below tries to detect
254 // values that are unlikely to fit BypassType (including hashes).
255 if (isHashLikeValue(V, Visited))
256 return VALRNG_LIKELY_LONG;
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000257
258 return VALRNG_UNKNOWN;
259}
260
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000261/// Add new basic block for slow div and rem operations and put it before
262/// SuccessorBB.
263QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
264 QuotRemWithBB DivRemPair;
265 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
266 MainBB->getParent(), SuccessorBB);
267 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
268
269 Value *Dividend = SlowDivOrRem->getOperand(0);
270 Value *Divisor = SlowDivOrRem->getOperand(1);
271
272 if (isSignedOp()) {
273 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
274 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000275 } else {
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000276 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
277 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000278 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000279
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000280 Builder.CreateBr(SuccessorBB);
281 return DivRemPair;
282}
Preston Gurdcdf540d2012-09-04 18:22:17 +0000283
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000284/// Add new basic block for fast div and rem operations and put it before
285/// SuccessorBB.
286QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
287 QuotRemWithBB DivRemPair;
288 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
289 MainBB->getParent(), SuccessorBB);
290 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
Preston Gurdcdf540d2012-09-04 18:22:17 +0000291
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000292 Value *Dividend = SlowDivOrRem->getOperand(0);
293 Value *Divisor = SlowDivOrRem->getOperand(1);
294 Value *ShortDivisorV =
295 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
296 Value *ShortDividendV =
297 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000298
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000299 // udiv/urem because this optimization only handles positive numbers.
300 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
301 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
302 DivRemPair.Quotient =
303 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
304 DivRemPair.Remainder =
305 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
306 Builder.CreateBr(SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000307
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000308 return DivRemPair;
309}
310
311/// Creates Phi nodes for result of Div and Rem.
312QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
313 QuotRemWithBB &RHS,
314 BasicBlock *PhiBB) {
315 IRBuilder<> Builder(PhiBB, PhiBB->begin());
316 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
317 QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
318 QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
319 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
320 RemPhi->addIncoming(LHS.Remainder, LHS.BB);
321 RemPhi->addIncoming(RHS.Remainder, RHS.BB);
322 return QuotRemPair(QuoPhi, RemPhi);
323}
324
325/// Creates a runtime check to test whether both the divisor and dividend fit
326/// into BypassType. The check is inserted at the end of MainBB. True return
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000327/// value means that the operands fit. Either of the operands may be NULL if it
328/// doesn't need a runtime check.
329Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
330 assert((Op1 || Op2) && "Nothing to check");
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000331 IRBuilder<> Builder(MainBB, MainBB->end());
Justin Lebar28605732016-11-16 00:44:47 +0000332
333 Value *OrV;
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000334 if (Op1 && Op2)
335 OrV = Builder.CreateOr(Op1, Op2);
Justin Lebar28605732016-11-16 00:44:47 +0000336 else
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000337 OrV = Op1 ? Op1 : Op2;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000338
339 // BitMask is inverted to check if the operands are
340 // larger than the bypass type
341 uint64_t BitMask = ~BypassType->getBitMask();
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000342 Value *AndV = Builder.CreateAnd(OrV, BitMask);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000343
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000344 // Compare operand values
345 Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
346 return Builder.CreateICmpEQ(AndV, ZeroV);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000347}
348
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000349/// Substitutes the div/rem instruction with code that checks the value of the
350/// operands and uses a shorter-faster div/rem instruction when possible.
351Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
352 Value *Dividend = SlowDivOrRem->getOperand(0);
353 Value *Divisor = SlowDivOrRem->getOperand(1);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000354
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000355 VisitedSetTy SetL;
356 ValueRange DividendRange = getValueRange(Dividend, SetL);
357 if (DividendRange == VALRNG_LIKELY_LONG)
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000358 return None;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000359
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000360 VisitedSetTy SetR;
361 ValueRange DivisorRange = getValueRange(Divisor, SetR);
362 if (DivisorRange == VALRNG_LIKELY_LONG)
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000363 return None;
364
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000365 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
366 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000367
368 if (DividendShort && DivisorShort) {
369 // If both operands are known to be short then just replace the long
Sanjoy Dasaa92cae2017-12-04 19:21:58 +0000370 // division with a short one in-place. Since we're not introducing control
371 // flow in this case, narrowing the division is always a win, even if the
372 // divisor is a constant (and will later get replaced by a multiplication).
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000373
374 IRBuilder<> Builder(SlowDivOrRem);
375 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
376 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
377 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
378 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
379 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
380 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
381 return QuotRemPair(ExtDiv, ExtRem);
Sanjoy Dasaa92cae2017-12-04 19:21:58 +0000382 }
383
384 if (isa<ConstantInt>(Divisor)) {
385 // If the divisor is not a constant, DAGCombiner will convert it to a
386 // multiplication by a magic constant. It isn't clear if it is worth
387 // introducing control flow to get a narrower multiply.
388 return None;
389 }
390
391 if (DividendShort && !isSignedOp()) {
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000392 // If the division is unsigned and Dividend is known to be short, then
393 // either
394 // 1) Divisor is less or equal to Dividend, and the result can be computed
395 // with a short division.
396 // 2) Divisor is greater than Dividend. In this case, no division is needed
397 // at all: The quotient is 0 and the remainder is equal to Dividend.
398 //
399 // So instead of checking at runtime whether Divisor fits into BypassType,
400 // we emit a runtime check to differentiate between these two cases. This
401 // lets us entirely avoid a long div.
402
403 // Split the basic block before the div/rem.
404 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
405 // Remove the unconditional branch from MainBB to SuccessorBB.
406 MainBB->getInstList().back().eraseFromParent();
407 QuotRemWithBB Long;
408 Long.BB = MainBB;
409 Long.Quotient = ConstantInt::get(getSlowType(), 0);
410 Long.Remainder = Dividend;
411 QuotRemWithBB Fast = createFastBB(SuccessorBB);
412 QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
413 IRBuilder<> Builder(MainBB, MainBB->end());
414 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
415 Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
416 return Result;
417 } else {
418 // General case. Create both slow and fast div/rem pairs and choose one of
419 // them at runtime.
420
421 // Split the basic block before the div/rem.
422 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
423 // Remove the unconditional branch from MainBB to SuccessorBB.
424 MainBB->getInstList().back().eraseFromParent();
425 QuotRemWithBB Fast = createFastBB(SuccessorBB);
426 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
427 QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
428 Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
429 DivisorShort ? nullptr : Divisor);
430 IRBuilder<> Builder(MainBB, MainBB->end());
431 Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
432 return Result;
433 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000434}
435
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000436/// This optimization identifies DIV/REM instructions in a BB that can be
437/// profitably bypassed and carried out with a shorter, faster divide.
438bool llvm::bypassSlowDivision(BasicBlock *BB,
439 const BypassWidthsTy &BypassWidths) {
440 DivCacheTy PerBBDivCache;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000441
442 bool MadeChange = false;
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000443 Instruction* Next = &*BB->begin();
444 while (Next != nullptr) {
445 // We may add instructions immediately after I, but we want to skip over
446 // them.
447 Instruction* I = Next;
448 Next = Next->getNextNode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000449
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000450 FastDivInsertionTask Task(I, BypassWidths);
451 if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
452 I->replaceAllUsesWith(Replacement);
453 I->eraseFromParent();
454 MadeChange = true;
455 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000456 }
457
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000458 // Above we eagerly create divs and rems, as pairs, so that we can efficiently
459 // create divrem machine instructions. Now erase any unused divs / rems so we
460 // don't leave extra instructions sitting around.
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000461 for (auto &KV : PerBBDivCache)
462 for (Value *V : {KV.second.Quotient, KV.second.Remainder})
463 RecursivelyDeleteTriviallyDeadInstructions(V);
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000464
Preston Gurdcdf540d2012-09-04 18:22:17 +0000465 return MadeChange;
466}