blob: df299f673f651b17b5e7706aa9269c621eaa59c2 [file] [log] [blame]
Eugene Zelenko6cadde72017-10-17 21:27:42 +00001//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
Preston Gurdcdf540d2012-09-04 18:22:17 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Preston Gurdcdf540d2012-09-04 18:22:17 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file contains an optimization for div and rem on architectures that
10// execute short instructions significantly faster than longer instructions.
11// For example, on Intel Atom 32-bit divides are slow enough that during
12// runtime it is profitable to check the value of the operands, and if they are
13// positive and less than 256 use an unsigned 8-bit divide.
14//
15//===----------------------------------------------------------------------===//
16
Chandler Carruthed0881b2012-12-03 16:50:05 +000017#include "llvm/Transforms/Utils/BypassSlowDivision.h"
18#include "llvm/ADT/DenseMap.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000019#include "llvm/ADT/None.h"
20#include "llvm/ADT/Optional.h"
21#include "llvm/ADT/STLExtras.h"
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000022#include "llvm/ADT/SmallPtrSet.h"
David Blaikie31b98d22018-06-04 21:23:21 +000023#include "llvm/Transforms/Utils/Local.h"
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000024#include "llvm/Analysis/ValueTracking.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000025#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DerivedTypes.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000028#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000030#include "llvm/IR/Instruction.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000031#include "llvm/IR/Instructions.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000032#include "llvm/IR/Module.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Value.h"
35#include "llvm/Support/Casting.h"
Craig Topperb45eabc2017-04-26 16:39:58 +000036#include "llvm/Support/KnownBits.h"
Eugene Zelenko6cadde72017-10-17 21:27:42 +000037#include <cassert>
38#include <cstdint>
Preston Gurdcdf540d2012-09-04 18:22:17 +000039
40using namespace llvm;
41
Chandler Carruth964daaa2014-04-22 02:55:47 +000042#define DEBUG_TYPE "bypass-slow-division"
43
Benjamin Kramer1f66f882012-09-10 11:52:08 +000044namespace {
Eugene Zelenko6cadde72017-10-17 21:27:42 +000045
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000046 struct QuotRemPair {
47 Value *Quotient;
48 Value *Remainder;
Preston Gurdcdf540d2012-09-04 18:22:17 +000049
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000050 QuotRemPair(Value *InQuotient, Value *InRemainder)
51 : Quotient(InQuotient), Remainder(InRemainder) {}
52 };
53
54 /// A quotient and remainder, plus a BB from which they logically "originate".
55 /// If you use Quotient or Remainder in a Phi node, you should use BB as its
56 /// corresponding predecessor.
57 struct QuotRemWithBB {
58 BasicBlock *BB = nullptr;
59 Value *Quotient = nullptr;
60 Value *Remainder = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +000061 };
62
Eugene Zelenko6cadde72017-10-17 21:27:42 +000063using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
64using BypassWidthsTy = DenseMap<unsigned, unsigned>;
65using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
Preston Gurdcdf540d2012-09-04 18:22:17 +000066
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000067enum ValueRange {
68 /// Operand definitely fits into BypassType. No runtime checks are needed.
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000069 VALRNG_KNOWN_SHORT,
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000070 /// A runtime check is required, as value range is unknown.
71 VALRNG_UNKNOWN,
72 /// Operand is unlikely to fit into BypassType. The bypassing should be
73 /// disabled.
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000074 VALRNG_LIKELY_LONG
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000075};
76
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000077class FastDivInsertionTask {
78 bool IsValidTask = false;
79 Instruction *SlowDivOrRem = nullptr;
80 IntegerType *BypassType = nullptr;
81 BasicBlock *MainBB = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +000082
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +000083 bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
84 ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000085 QuotRemWithBB createSlowBB(BasicBlock *Successor);
86 QuotRemWithBB createFastBB(BasicBlock *Successor);
87 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
88 BasicBlock *PhiBB);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000089 Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000090 Optional<QuotRemPair> insertFastDivAndRem();
91
92 bool isSignedOp() {
93 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
94 SlowDivOrRem->getOpcode() == Instruction::SRem;
95 }
Eugene Zelenko6cadde72017-10-17 21:27:42 +000096
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000097 bool isDivisionOp() {
98 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
99 SlowDivOrRem->getOpcode() == Instruction::UDiv;
100 }
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000101
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000102 Type *getSlowType() { return SlowDivOrRem->getType(); }
103
104public:
105 FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000106
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000107 Value *getReplacement(DivCacheTy &Cache);
108};
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000109
110} // end anonymous namespace
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000111
112FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
113 const BypassWidthsTy &BypassWidths) {
114 switch (I->getOpcode()) {
115 case Instruction::UDiv:
116 case Instruction::SDiv:
117 case Instruction::URem:
118 case Instruction::SRem:
119 SlowDivOrRem = I;
120 break;
121 default:
122 // I is not a div/rem operation.
123 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000124 }
125
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000126 // Skip division on vector types. Only optimize integer instructions.
127 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
128 if (!SlowType)
129 return;
Justin Lebar28605732016-11-16 00:44:47 +0000130
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000131 // Skip if this bitwidth is not bypassed.
132 auto BI = BypassWidths.find(SlowType->getBitWidth());
133 if (BI == BypassWidths.end())
134 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000135
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000136 // Get type for div/rem instruction with bypass bitwidth.
137 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
138 BypassType = BT;
139
140 // The original basic block.
141 MainBB = I->getParent();
142
143 // The instruction is indeed a slow div or rem operation.
144 IsValidTask = true;
145}
146
147/// Reuses previously-computed dividend or remainder from the current BB if
148/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
149/// perform the optimization and caches the resulting dividend and remainder.
150/// If no replacement can be generated, nullptr is returned.
151Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
152 // First, make sure that the task is valid.
153 if (!IsValidTask)
154 return nullptr;
155
156 // Then, look for a value in Cache.
157 Value *Dividend = SlowDivOrRem->getOperand(0);
158 Value *Divisor = SlowDivOrRem->getOperand(1);
Sanjay Patel5d67d892017-08-24 14:43:33 +0000159 DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000160 auto CacheI = Cache.find(Key);
161
162 if (CacheI == Cache.end()) {
163 // If previous instance does not exist, try to insert fast div.
164 Optional<QuotRemPair> OptResult = insertFastDivAndRem();
165 // Bail out if insertFastDivAndRem has failed.
166 if (!OptResult)
167 return nullptr;
168 CacheI = Cache.insert({Key, *OptResult}).first;
169 }
170
171 QuotRemPair &Value = CacheI->second;
172 return isDivisionOp() ? Value.Quotient : Value.Remainder;
173}
174
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000175/// Check if a value looks like a hash.
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000176///
177/// The routine is expected to detect values computed using the most common hash
178/// algorithms. Typically, hash computations end with one of the following
179/// instructions:
180///
181/// 1) MUL with a constant wider than BypassType
182/// 2) XOR instruction
183///
184/// And even if we are wrong and the value is not a hash, it is still quite
185/// unlikely that such values will fit into BypassType.
186///
187/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
188/// It is implemented as a depth-first search for values that look neither long
189/// nor hash-like.
190bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
191 Instruction *I = dyn_cast<Instruction>(V);
192 if (!I)
193 return false;
194
195 switch (I->getOpcode()) {
196 case Instruction::Xor:
197 return true;
198 case Instruction::Mul: {
199 // After Constant Hoisting pass, long constants may be represented as
200 // bitcast instructions. As a result, some constants may look like an
201 // instruction at first, and an additional check is necessary to find out if
202 // an operand is actually a constant.
203 Value *Op1 = I->getOperand(1);
204 ConstantInt *C = dyn_cast<ConstantInt>(Op1);
205 if (!C && isa<BitCastInst>(Op1))
206 C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
207 return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
208 }
Eugene Zelenko6cadde72017-10-17 21:27:42 +0000209 case Instruction::PHI:
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000210 // Stop IR traversal in case of a crazy input code. This limits recursion
211 // depth.
212 if (Visited.size() >= 16)
213 return false;
214 // Do not visit nodes that have been visited already. We return true because
215 // it means that we couldn't find any value that doesn't look hash-like.
216 if (Visited.find(I) != Visited.end())
217 return true;
218 Visited.insert(I);
219 return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
220 // Ignore undef values as they probably don't affect the division
221 // operands.
222 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
223 isa<UndefValue>(V);
224 });
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000225 default:
226 return false;
227 }
228}
229
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000230/// Check if an integer value fits into our bypass type.
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000231ValueRange FastDivInsertionTask::getValueRange(Value *V,
232 VisitedSetTy &Visited) {
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000233 unsigned ShortLen = BypassType->getBitWidth();
234 unsigned LongLen = V->getType()->getIntegerBitWidth();
235
236 assert(LongLen > ShortLen && "Value type must be wider than BypassType");
237 unsigned HiBits = LongLen - ShortLen;
238
239 const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
Craig Topperb45eabc2017-04-26 16:39:58 +0000240 KnownBits Known(LongLen);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000241
Craig Topperb45eabc2017-04-26 16:39:58 +0000242 computeKnownBits(V, Known, DL);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000243
Craig Topper8df66c62017-05-12 17:20:30 +0000244 if (Known.countMinLeadingZeros() >= HiBits)
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000245 return VALRNG_KNOWN_SHORT;
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000246
Craig Topper8df66c62017-05-12 17:20:30 +0000247 if (Known.countMaxLeadingZeros() < HiBits)
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000248 return VALRNG_LIKELY_LONG;
249
250 // Long integer divisions are often used in hashtable implementations. It's
251 // not worth bypassing such divisions because hash values are extremely
252 // unlikely to have enough leading zeros. The call below tries to detect
253 // values that are unlikely to fit BypassType (including hashes).
254 if (isHashLikeValue(V, Visited))
255 return VALRNG_LIKELY_LONG;
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000256
257 return VALRNG_UNKNOWN;
258}
259
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000260/// Add new basic block for slow div and rem operations and put it before
261/// SuccessorBB.
262QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
263 QuotRemWithBB DivRemPair;
264 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
265 MainBB->getParent(), SuccessorBB);
266 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
267
268 Value *Dividend = SlowDivOrRem->getOperand(0);
269 Value *Divisor = SlowDivOrRem->getOperand(1);
270
271 if (isSignedOp()) {
272 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
273 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000274 } else {
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000275 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
276 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000277 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000278
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000279 Builder.CreateBr(SuccessorBB);
280 return DivRemPair;
281}
Preston Gurdcdf540d2012-09-04 18:22:17 +0000282
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000283/// Add new basic block for fast div and rem operations and put it before
284/// SuccessorBB.
285QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
286 QuotRemWithBB DivRemPair;
287 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
288 MainBB->getParent(), SuccessorBB);
289 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
Preston Gurdcdf540d2012-09-04 18:22:17 +0000290
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000291 Value *Dividend = SlowDivOrRem->getOperand(0);
292 Value *Divisor = SlowDivOrRem->getOperand(1);
293 Value *ShortDivisorV =
294 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
295 Value *ShortDividendV =
296 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000297
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000298 // udiv/urem because this optimization only handles positive numbers.
299 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
300 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
301 DivRemPair.Quotient =
302 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
303 DivRemPair.Remainder =
304 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
305 Builder.CreateBr(SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000306
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000307 return DivRemPair;
308}
309
310/// Creates Phi nodes for result of Div and Rem.
311QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
312 QuotRemWithBB &RHS,
313 BasicBlock *PhiBB) {
314 IRBuilder<> Builder(PhiBB, PhiBB->begin());
315 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
316 QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
317 QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
318 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
319 RemPhi->addIncoming(LHS.Remainder, LHS.BB);
320 RemPhi->addIncoming(RHS.Remainder, RHS.BB);
321 return QuotRemPair(QuoPhi, RemPhi);
322}
323
324/// Creates a runtime check to test whether both the divisor and dividend fit
325/// into BypassType. The check is inserted at the end of MainBB. True return
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000326/// value means that the operands fit. Either of the operands may be NULL if it
327/// doesn't need a runtime check.
328Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
329 assert((Op1 || Op2) && "Nothing to check");
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000330 IRBuilder<> Builder(MainBB, MainBB->end());
Justin Lebar28605732016-11-16 00:44:47 +0000331
332 Value *OrV;
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000333 if (Op1 && Op2)
334 OrV = Builder.CreateOr(Op1, Op2);
Justin Lebar28605732016-11-16 00:44:47 +0000335 else
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000336 OrV = Op1 ? Op1 : Op2;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000337
338 // BitMask is inverted to check if the operands are
339 // larger than the bypass type
340 uint64_t BitMask = ~BypassType->getBitMask();
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000341 Value *AndV = Builder.CreateAnd(OrV, BitMask);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000342
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000343 // Compare operand values
344 Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
345 return Builder.CreateICmpEQ(AndV, ZeroV);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000346}
347
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000348/// Substitutes the div/rem instruction with code that checks the value of the
349/// operands and uses a shorter-faster div/rem instruction when possible.
350Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
351 Value *Dividend = SlowDivOrRem->getOperand(0);
352 Value *Divisor = SlowDivOrRem->getOperand(1);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000353
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000354 VisitedSetTy SetL;
355 ValueRange DividendRange = getValueRange(Dividend, SetL);
356 if (DividendRange == VALRNG_LIKELY_LONG)
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000357 return None;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000358
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000359 VisitedSetTy SetR;
360 ValueRange DivisorRange = getValueRange(Divisor, SetR);
361 if (DivisorRange == VALRNG_LIKELY_LONG)
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000362 return None;
363
Nikolai Bozhenovfca527a2017-04-02 13:14:30 +0000364 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
365 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000366
367 if (DividendShort && DivisorShort) {
368 // If both operands are known to be short then just replace the long
Sanjoy Dasaa92cae2017-12-04 19:21:58 +0000369 // division with a short one in-place. Since we're not introducing control
370 // flow in this case, narrowing the division is always a win, even if the
371 // divisor is a constant (and will later get replaced by a multiplication).
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000372
373 IRBuilder<> Builder(SlowDivOrRem);
374 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
375 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
376 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
377 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
378 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
379 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
380 return QuotRemPair(ExtDiv, ExtRem);
Sanjoy Dasaa92cae2017-12-04 19:21:58 +0000381 }
382
383 if (isa<ConstantInt>(Divisor)) {
384 // If the divisor is not a constant, DAGCombiner will convert it to a
385 // multiplication by a magic constant. It isn't clear if it is worth
386 // introducing control flow to get a narrower multiply.
387 return None;
388 }
389
Craig Topperb172b882018-08-21 17:15:33 +0000390 // After Constant Hoisting pass, long constants may be represented as
391 // bitcast instructions. As a result, some constants may look like an
392 // instruction at first, and an additional check is necessary to find out if
393 // an operand is actually a constant.
394 if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
395 if (BCI->getParent() == SlowDivOrRem->getParent() &&
396 isa<ConstantInt>(BCI->getOperand(0)))
397 return None;
398
Sanjoy Dasaa92cae2017-12-04 19:21:58 +0000399 if (DividendShort && !isSignedOp()) {
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000400 // If the division is unsigned and Dividend is known to be short, then
401 // either
402 // 1) Divisor is less or equal to Dividend, and the result can be computed
403 // with a short division.
404 // 2) Divisor is greater than Dividend. In this case, no division is needed
405 // at all: The quotient is 0 and the remainder is equal to Dividend.
406 //
407 // So instead of checking at runtime whether Divisor fits into BypassType,
408 // we emit a runtime check to differentiate between these two cases. This
409 // lets us entirely avoid a long div.
410
411 // Split the basic block before the div/rem.
412 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
413 // Remove the unconditional branch from MainBB to SuccessorBB.
414 MainBB->getInstList().back().eraseFromParent();
415 QuotRemWithBB Long;
416 Long.BB = MainBB;
417 Long.Quotient = ConstantInt::get(getSlowType(), 0);
418 Long.Remainder = Dividend;
419 QuotRemWithBB Fast = createFastBB(SuccessorBB);
420 QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
421 IRBuilder<> Builder(MainBB, MainBB->end());
422 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
423 Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
424 return Result;
425 } else {
426 // General case. Create both slow and fast div/rem pairs and choose one of
427 // them at runtime.
428
429 // Split the basic block before the div/rem.
430 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
431 // Remove the unconditional branch from MainBB to SuccessorBB.
432 MainBB->getInstList().back().eraseFromParent();
433 QuotRemWithBB Fast = createFastBB(SuccessorBB);
434 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
435 QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
436 Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
437 DivisorShort ? nullptr : Divisor);
438 IRBuilder<> Builder(MainBB, MainBB->end());
439 Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
440 return Result;
441 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000442}
443
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000444/// This optimization identifies DIV/REM instructions in a BB that can be
445/// profitably bypassed and carried out with a shorter, faster divide.
446bool llvm::bypassSlowDivision(BasicBlock *BB,
447 const BypassWidthsTy &BypassWidths) {
448 DivCacheTy PerBBDivCache;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000449
450 bool MadeChange = false;
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000451 Instruction* Next = &*BB->begin();
452 while (Next != nullptr) {
453 // We may add instructions immediately after I, but we want to skip over
454 // them.
455 Instruction* I = Next;
456 Next = Next->getNextNode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000457
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000458 FastDivInsertionTask Task(I, BypassWidths);
459 if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
460 I->replaceAllUsesWith(Replacement);
461 I->eraseFromParent();
462 MadeChange = true;
463 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000464 }
465
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000466 // Above we eagerly create divs and rems, as pairs, so that we can efficiently
467 // create divrem machine instructions. Now erase any unused divs / rems so we
468 // don't leave extra instructions sitting around.
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000469 for (auto &KV : PerBBDivCache)
470 for (Value *V : {KV.second.Quotient, KV.second.Remainder})
471 RecursivelyDeleteTriviallyDeadInstructions(V);
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000472
Preston Gurdcdf540d2012-09-04 18:22:17 +0000473 return MadeChange;
474}