blob: 9d8cb3187eec9c2526277a050129d1c5b649b00d [file] [log] [blame]
Preston Gurdcdf540d2012-09-04 18:22:17 +00001//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
Chandler Carruthed0881b2012-12-03 16:50:05 +000018#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000020#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000021#include "llvm/IR/Function.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instructions.h"
Justin Lebar0ede5fb2016-10-28 21:43:54 +000024#include "llvm/Transforms/Utils/Local.h"
Preston Gurdcdf540d2012-09-04 18:22:17 +000025
26using namespace llvm;
27
Chandler Carruth964daaa2014-04-22 02:55:47 +000028#define DEBUG_TYPE "bypass-slow-division"
29
Benjamin Kramer1f66f882012-09-10 11:52:08 +000030namespace {
Preston Gurdcdf540d2012-09-04 18:22:17 +000031 struct DivOpInfo {
32 bool SignedOp;
33 Value *Dividend;
34 Value *Divisor;
35
36 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
37 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
38 };
39
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000040 struct QuotRemPair {
41 Value *Quotient;
42 Value *Remainder;
Preston Gurdcdf540d2012-09-04 18:22:17 +000043
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000044 QuotRemPair(Value *InQuotient, Value *InRemainder)
45 : Quotient(InQuotient), Remainder(InRemainder) {}
46 };
47
48 /// A quotient and remainder, plus a BB from which they logically "originate".
49 /// If you use Quotient or Remainder in a Phi node, you should use BB as its
50 /// corresponding predecessor.
51 struct QuotRemWithBB {
52 BasicBlock *BB = nullptr;
53 Value *Quotient = nullptr;
54 Value *Remainder = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +000055 };
Alexander Kornienkof00654e2015-06-23 09:49:53 +000056}
Preston Gurdcdf540d2012-09-04 18:22:17 +000057
Benjamin Kramer1f66f882012-09-10 11:52:08 +000058namespace llvm {
Preston Gurdcdf540d2012-09-04 18:22:17 +000059 template<>
60 struct DenseMapInfo<DivOpInfo> {
61 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
62 return Val1.SignedOp == Val2.SignedOp &&
63 Val1.Dividend == Val2.Dividend &&
64 Val1.Divisor == Val2.Divisor;
65 }
66
67 static DivOpInfo getEmptyKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000068 return DivOpInfo(false, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000069 }
70
71 static DivOpInfo getTombstoneKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000072 return DivOpInfo(true, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000073 }
74
75 static unsigned getHashValue(const DivOpInfo &Val) {
76 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
77 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
Jakub Staszak46beca62012-09-04 20:48:24 +000078 (unsigned)Val.SignedOp;
Preston Gurdcdf540d2012-09-04 18:22:17 +000079 }
80 };
81
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000082 typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
83 typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
Alexander Kornienkof00654e2015-06-23 09:49:53 +000084}
Preston Gurdcdf540d2012-09-04 18:22:17 +000085
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000086namespace {
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +000087enum ValueRange {
88 /// Operand definitely fits into BypassType. No runtime checks are needed.
89 VALRNG_SHORT,
90 /// A runtime check is required, as value range is unknown.
91 VALRNG_UNKNOWN,
92 /// Operand is unlikely to fit into BypassType. The bypassing should be
93 /// disabled.
94 VALRNG_LONG
95};
96
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +000097class FastDivInsertionTask {
98 bool IsValidTask = false;
99 Instruction *SlowDivOrRem = nullptr;
100 IntegerType *BypassType = nullptr;
101 BasicBlock *MainBB = nullptr;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000102
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000103 ValueRange getValueRange(Value *Op);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000104 QuotRemWithBB createSlowBB(BasicBlock *Successor);
105 QuotRemWithBB createFastBB(BasicBlock *Successor);
106 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
107 BasicBlock *PhiBB);
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000108 Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000109 Optional<QuotRemPair> insertFastDivAndRem();
110
111 bool isSignedOp() {
112 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
113 SlowDivOrRem->getOpcode() == Instruction::SRem;
114 }
115 bool isDivisionOp() {
116 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
117 SlowDivOrRem->getOpcode() == Instruction::UDiv;
118 }
119 Type *getSlowType() { return SlowDivOrRem->getType(); }
120
121public:
122 FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
123 Value *getReplacement(DivCacheTy &Cache);
124};
125} // anonymous namespace
126
127FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
128 const BypassWidthsTy &BypassWidths) {
129 switch (I->getOpcode()) {
130 case Instruction::UDiv:
131 case Instruction::SDiv:
132 case Instruction::URem:
133 case Instruction::SRem:
134 SlowDivOrRem = I;
135 break;
136 default:
137 // I is not a div/rem operation.
138 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000139 }
140
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000141 // Skip division on vector types. Only optimize integer instructions.
142 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
143 if (!SlowType)
144 return;
Justin Lebar28605732016-11-16 00:44:47 +0000145
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000146 // Skip if this bitwidth is not bypassed.
147 auto BI = BypassWidths.find(SlowType->getBitWidth());
148 if (BI == BypassWidths.end())
149 return;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000150
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000151 // Get type for div/rem instruction with bypass bitwidth.
152 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
153 BypassType = BT;
154
155 // The original basic block.
156 MainBB = I->getParent();
157
158 // The instruction is indeed a slow div or rem operation.
159 IsValidTask = true;
160}
161
162/// Reuses previously-computed dividend or remainder from the current BB if
163/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
164/// perform the optimization and caches the resulting dividend and remainder.
165/// If no replacement can be generated, nullptr is returned.
166Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
167 // First, make sure that the task is valid.
168 if (!IsValidTask)
169 return nullptr;
170
171 // Then, look for a value in Cache.
172 Value *Dividend = SlowDivOrRem->getOperand(0);
173 Value *Divisor = SlowDivOrRem->getOperand(1);
174 DivOpInfo Key(isSignedOp(), Dividend, Divisor);
175 auto CacheI = Cache.find(Key);
176
177 if (CacheI == Cache.end()) {
178 // If previous instance does not exist, try to insert fast div.
179 Optional<QuotRemPair> OptResult = insertFastDivAndRem();
180 // Bail out if insertFastDivAndRem has failed.
181 if (!OptResult)
182 return nullptr;
183 CacheI = Cache.insert({Key, *OptResult}).first;
184 }
185
186 QuotRemPair &Value = CacheI->second;
187 return isDivisionOp() ? Value.Quotient : Value.Remainder;
188}
189
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000190/// Check if an integer value fits into our bypass type.
191ValueRange FastDivInsertionTask::getValueRange(Value *V) {
192 unsigned ShortLen = BypassType->getBitWidth();
193 unsigned LongLen = V->getType()->getIntegerBitWidth();
194
195 assert(LongLen > ShortLen && "Value type must be wider than BypassType");
196 unsigned HiBits = LongLen - ShortLen;
197
198 const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
199 APInt Zeros(LongLen, 0), Ones(LongLen, 0);
200
201 computeKnownBits(V, Zeros, Ones, DL);
202
203 if (Zeros.countLeadingOnes() >= HiBits)
204 return VALRNG_SHORT;
205
206 if (Ones.countLeadingZeros() < HiBits)
207 return VALRNG_LONG;
208
209 return VALRNG_UNKNOWN;
210}
211
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000212/// Add new basic block for slow div and rem operations and put it before
213/// SuccessorBB.
214QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
215 QuotRemWithBB DivRemPair;
216 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
217 MainBB->getParent(), SuccessorBB);
218 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
219
220 Value *Dividend = SlowDivOrRem->getOperand(0);
221 Value *Divisor = SlowDivOrRem->getOperand(1);
222
223 if (isSignedOp()) {
224 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
225 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000226 } else {
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000227 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
228 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000229 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000230
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000231 Builder.CreateBr(SuccessorBB);
232 return DivRemPair;
233}
Preston Gurdcdf540d2012-09-04 18:22:17 +0000234
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000235/// Add new basic block for fast div and rem operations and put it before
236/// SuccessorBB.
237QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
238 QuotRemWithBB DivRemPair;
239 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
240 MainBB->getParent(), SuccessorBB);
241 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
Preston Gurdcdf540d2012-09-04 18:22:17 +0000242
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000243 Value *Dividend = SlowDivOrRem->getOperand(0);
244 Value *Divisor = SlowDivOrRem->getOperand(1);
245 Value *ShortDivisorV =
246 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
247 Value *ShortDividendV =
248 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000249
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000250 // udiv/urem because this optimization only handles positive numbers.
251 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
252 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
253 DivRemPair.Quotient =
254 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
255 DivRemPair.Remainder =
256 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
257 Builder.CreateBr(SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000258
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000259 return DivRemPair;
260}
261
262/// Creates Phi nodes for result of Div and Rem.
263QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
264 QuotRemWithBB &RHS,
265 BasicBlock *PhiBB) {
266 IRBuilder<> Builder(PhiBB, PhiBB->begin());
267 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
268 QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
269 QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
270 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
271 RemPhi->addIncoming(LHS.Remainder, LHS.BB);
272 RemPhi->addIncoming(RHS.Remainder, RHS.BB);
273 return QuotRemPair(QuoPhi, RemPhi);
274}
275
276/// Creates a runtime check to test whether both the divisor and dividend fit
277/// into BypassType. The check is inserted at the end of MainBB. True return
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000278/// value means that the operands fit. Either of the operands may be NULL if it
279/// doesn't need a runtime check.
280Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
281 assert((Op1 || Op2) && "Nothing to check");
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000282 IRBuilder<> Builder(MainBB, MainBB->end());
Justin Lebar28605732016-11-16 00:44:47 +0000283
284 Value *OrV;
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000285 if (Op1 && Op2)
286 OrV = Builder.CreateOr(Op1, Op2);
Justin Lebar28605732016-11-16 00:44:47 +0000287 else
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000288 OrV = Op1 ? Op1 : Op2;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000289
290 // BitMask is inverted to check if the operands are
291 // larger than the bypass type
292 uint64_t BitMask = ~BypassType->getBitMask();
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000293 Value *AndV = Builder.CreateAnd(OrV, BitMask);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000294
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000295 // Compare operand values
296 Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
297 return Builder.CreateICmpEQ(AndV, ZeroV);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000298}
299
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000300/// Substitutes the div/rem instruction with code that checks the value of the
301/// operands and uses a shorter-faster div/rem instruction when possible.
302Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
303 Value *Dividend = SlowDivOrRem->getOperand(0);
304 Value *Divisor = SlowDivOrRem->getOperand(1);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000305
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000306 if (isa<ConstantInt>(Divisor)) {
307 // Keep division by a constant for DAGCombiner.
308 return None;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000309 }
310
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000311 ValueRange DividendRange = getValueRange(Dividend);
312 if (DividendRange == VALRNG_LONG)
313 return None;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000314
Nikolai Bozhenov4a04fb9e2017-03-02 22:12:15 +0000315 ValueRange DivisorRange = getValueRange(Divisor);
316 if (DivisorRange == VALRNG_LONG)
317 return None;
318
319 bool DividendShort = (DividendRange == VALRNG_SHORT);
320 bool DivisorShort = (DivisorRange == VALRNG_SHORT);
321
322 if (DividendShort && DivisorShort) {
323 // If both operands are known to be short then just replace the long
324 // division with a short one in-place.
325
326 IRBuilder<> Builder(SlowDivOrRem);
327 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
328 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
329 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
330 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
331 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
332 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
333 return QuotRemPair(ExtDiv, ExtRem);
334 } else if (DividendShort && !isSignedOp()) {
335 // If the division is unsigned and Dividend is known to be short, then
336 // either
337 // 1) Divisor is less or equal to Dividend, and the result can be computed
338 // with a short division.
339 // 2) Divisor is greater than Dividend. In this case, no division is needed
340 // at all: The quotient is 0 and the remainder is equal to Dividend.
341 //
342 // So instead of checking at runtime whether Divisor fits into BypassType,
343 // we emit a runtime check to differentiate between these two cases. This
344 // lets us entirely avoid a long div.
345
346 // Split the basic block before the div/rem.
347 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
348 // Remove the unconditional branch from MainBB to SuccessorBB.
349 MainBB->getInstList().back().eraseFromParent();
350 QuotRemWithBB Long;
351 Long.BB = MainBB;
352 Long.Quotient = ConstantInt::get(getSlowType(), 0);
353 Long.Remainder = Dividend;
354 QuotRemWithBB Fast = createFastBB(SuccessorBB);
355 QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
356 IRBuilder<> Builder(MainBB, MainBB->end());
357 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
358 Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
359 return Result;
360 } else {
361 // General case. Create both slow and fast div/rem pairs and choose one of
362 // them at runtime.
363
364 // Split the basic block before the div/rem.
365 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
366 // Remove the unconditional branch from MainBB to SuccessorBB.
367 MainBB->getInstList().back().eraseFromParent();
368 QuotRemWithBB Fast = createFastBB(SuccessorBB);
369 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
370 QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
371 Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
372 DivisorShort ? nullptr : Divisor);
373 IRBuilder<> Builder(MainBB, MainBB->end());
374 Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
375 return Result;
376 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000377}
378
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000379/// This optimization identifies DIV/REM instructions in a BB that can be
380/// profitably bypassed and carried out with a shorter, faster divide.
381bool llvm::bypassSlowDivision(BasicBlock *BB,
382 const BypassWidthsTy &BypassWidths) {
383 DivCacheTy PerBBDivCache;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000384
385 bool MadeChange = false;
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000386 Instruction* Next = &*BB->begin();
387 while (Next != nullptr) {
388 // We may add instructions immediately after I, but we want to skip over
389 // them.
390 Instruction* I = Next;
391 Next = Next->getNextNode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000392
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000393 FastDivInsertionTask Task(I, BypassWidths);
394 if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
395 I->replaceAllUsesWith(Replacement);
396 I->eraseFromParent();
397 MadeChange = true;
398 }
Preston Gurdcdf540d2012-09-04 18:22:17 +0000399 }
400
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000401 // Above we eagerly create divs and rems, as pairs, so that we can efficiently
402 // create divrem machine instructions. Now erase any unused divs / rems so we
403 // don't leave extra instructions sitting around.
Nikolai Bozhenovd4b12b32017-03-02 22:05:07 +0000404 for (auto &KV : PerBBDivCache)
405 for (Value *V : {KV.second.Quotient, KV.second.Remainder})
406 RecursivelyDeleteTriviallyDeadInstructions(V);
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000407
Preston Gurdcdf540d2012-09-04 18:22:17 +0000408 return MadeChange;
409}