blob: 41a854362c92e9be340202edc278e529b720ecfd [file] [log] [blame]
Preston Gurdcdf540d2012-09-04 18:22:17 +00001//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
Chandler Carruthed0881b2012-12-03 16:50:05 +000018#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000020#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
Preston Gurdcdf540d2012-09-04 18:22:17 +000023
24using namespace llvm;
25
Chandler Carruth964daaa2014-04-22 02:55:47 +000026#define DEBUG_TYPE "bypass-slow-division"
27
Benjamin Kramer1f66f882012-09-10 11:52:08 +000028namespace {
Preston Gurdcdf540d2012-09-04 18:22:17 +000029 struct DivOpInfo {
30 bool SignedOp;
31 Value *Dividend;
32 Value *Divisor;
33
34 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
36 };
37
38 struct DivPhiNodes {
39 PHINode *Quotient;
40 PHINode *Remainder;
41
42 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43 : Quotient(InQuotient), Remainder(InRemainder) {}
44 };
Alexander Kornienkof00654e2015-06-23 09:49:53 +000045}
Preston Gurdcdf540d2012-09-04 18:22:17 +000046
Benjamin Kramer1f66f882012-09-10 11:52:08 +000047namespace llvm {
Preston Gurdcdf540d2012-09-04 18:22:17 +000048 template<>
49 struct DenseMapInfo<DivOpInfo> {
50 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51 return Val1.SignedOp == Val2.SignedOp &&
52 Val1.Dividend == Val2.Dividend &&
53 Val1.Divisor == Val2.Divisor;
54 }
55
56 static DivOpInfo getEmptyKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000057 return DivOpInfo(false, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000058 }
59
60 static DivOpInfo getTombstoneKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000061 return DivOpInfo(true, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000062 }
63
64 static unsigned getHashValue(const DivOpInfo &Val) {
65 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
Jakub Staszak46beca62012-09-04 20:48:24 +000067 (unsigned)Val.SignedOp;
Preston Gurdcdf540d2012-09-04 18:22:17 +000068 }
69 };
70
71 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
Alexander Kornienkof00654e2015-06-23 09:49:53 +000072}
Preston Gurdcdf540d2012-09-04 18:22:17 +000073
74// insertFastDiv - Substitutes the div/rem instruction with code that checks the
75// value of the operands and uses a shorter-faster div/rem instruction when
76// possible and the longer-slower div/rem instruction otherwise.
Eric Christopher49a7d6c2016-01-04 23:18:58 +000077static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
78 bool UseDivOp, bool UseSignedOp,
Jakub Staszak46beca62012-09-04 20:48:24 +000079 DivCacheTy &PerBBDivCache) {
Eric Christopher49a7d6c2016-01-04 23:18:58 +000080 Function *F = I->getParent()->getParent();
Preston Gurdcdf540d2012-09-04 18:22:17 +000081 // Get instruction operands
Eric Christopher49a7d6c2016-01-04 23:18:58 +000082 Value *Dividend = I->getOperand(0);
83 Value *Divisor = I->getOperand(1);
Preston Gurdcdf540d2012-09-04 18:22:17 +000084
Jakub Staszak46beca62012-09-04 20:48:24 +000085 if (isa<ConstantInt>(Divisor) ||
86 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
Preston Gurdcdf540d2012-09-04 18:22:17 +000087 // Operations with immediate values should have
88 // been solved and replaced during compile time.
Jakub Staszak46beca62012-09-04 20:48:24 +000089 return false;
Preston Gurdcdf540d2012-09-04 18:22:17 +000090 }
91
92 // Basic Block is split before divide
Eric Christopher49a7d6c2016-01-04 23:18:58 +000093 BasicBlock *MainBB = &*I->getParent();
94 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
Preston Gurdcdf540d2012-09-04 18:22:17 +000095
96 // Add new basic block for slow divide operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +000097 BasicBlock *SlowBB =
98 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +000099 SlowBB->moveBefore(SuccessorBB);
100 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
101 Value *SlowQuotientV;
102 Value *SlowRemainderV;
103 if (UseSignedOp) {
104 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
105 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
106 } else {
107 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
108 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
109 }
110 SlowBuilder.CreateBr(SuccessorBB);
111
112 // Add new basic block for fast divide operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000113 BasicBlock *FastBB =
114 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000115 FastBB->moveBefore(SlowBB);
116 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Jakub Staszak46beca62012-09-04 20:48:24 +0000117 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
118 BypassType);
119 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
120 BypassType);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000121
122 // udiv/urem because optimization only handles positive numbers
Justin Lebar468bf732016-10-28 21:43:51 +0000123 Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000124 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
125 ShortDivisorV);
126 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
Jakub Staszak46beca62012-09-04 20:48:24 +0000127 ShortQuotientV,
128 Dividend->getType());
Preston Gurdcdf540d2012-09-04 18:22:17 +0000129 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
130 ShortRemainderV,
131 Dividend->getType());
132 FastBuilder.CreateBr(SuccessorBB);
133
134 // Phi nodes for result of div and rem
135 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000136 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000137 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
138 QuoPhi->addIncoming(FastQuotientV, FastBB);
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000139 PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000140 RemPhi->addIncoming(SlowRemainderV, SlowBB);
141 RemPhi->addIncoming(FastRemainderV, FastBB);
142
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000143 // Replace I with appropriate phi node
Jakub Staszak46beca62012-09-04 20:48:24 +0000144 if (UseDivOp)
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000145 I->replaceAllUsesWith(QuoPhi);
Jakub Staszak46beca62012-09-04 20:48:24 +0000146 else
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000147 I->replaceAllUsesWith(RemPhi);
148 I->eraseFromParent();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000149
150 // Combine operands into a single value with OR for value testing below
151 MainBB->getInstList().back().eraseFromParent();
152 IRBuilder<> MainBuilder(MainBB, MainBB->end());
153 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
154
155 // BitMask is inverted to check if the operands are
156 // larger than the bypass type
157 uint64_t BitMask = ~BypassType->getBitMask();
158 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
159
160 // Compare operand values and branch
Preston Gurd485296d2013-03-04 18:13:57 +0000161 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000162 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
163 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
164
Preston Gurdcdf540d2012-09-04 18:22:17 +0000165 // Cache phi nodes to be used later in place of other instances
166 // of div or rem with the same sign, dividend, and divisor
167 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
168 DivPhiNodes Value(QuoPhi, RemPhi);
169 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
Jakub Staszak46beca62012-09-04 20:48:24 +0000170 return true;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000171}
172
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000173// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
174// the current BB if operands and operation are identical. Otherwise calls
175// insertFastDiv to perform the optimization and caches the resulting dividend
176// and remainder.
177static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
178 bool UseDivOp, bool UseSignedOp,
Jakub Staszak46beca62012-09-04 20:48:24 +0000179 DivCacheTy &PerBBDivCache) {
Preston Gurdcdf540d2012-09-04 18:22:17 +0000180 // Get instruction operands
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000181 DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
Jakub Staszake535c1a2012-09-04 23:11:11 +0000182 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000183
184 if (CacheI == PerBBDivCache.end()) {
185 // If previous instance does not exist, insert fast div
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000186 return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000187 }
188
189 // Replace operation value with previously generated phi node
Jakub Staszake535c1a2012-09-04 23:11:11 +0000190 DivPhiNodes &Value = CacheI->second;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000191 if (UseDivOp) {
192 // Replace all uses of div instruction with quotient phi node
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000193 I->replaceAllUsesWith(Value.Quotient);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000194 } else {
195 // Replace all uses of rem instruction with remainder phi node
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000196 I->replaceAllUsesWith(Value.Remainder);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000197 }
198
Preston Gurdcdf540d2012-09-04 18:22:17 +0000199 // Remove redundant operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000200 I->eraseFromParent();
Jakub Staszak46beca62012-09-04 20:48:24 +0000201 return true;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000202}
203
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000204// bypassSlowDivision - This optimization identifies DIV instructions in a BB
205// that can be profitably bypassed and carried out with a shorter, faster
206// divide.
207bool llvm::bypassSlowDivision(
208 BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
Preston Gurdcdf540d2012-09-04 18:22:17 +0000209 DivCacheTy DivCache;
210
211 bool MadeChange = false;
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000212 Instruction* Next = &*BB->begin();
213 while (Next != nullptr) {
214 // We may add instructions immediately after I, but we want to skip over
215 // them.
216 Instruction* I = Next;
217 Next = Next->getNextNode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000218
219 // Get instruction details
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000220 unsigned Opcode = I->getOpcode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000221 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
222 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
Jakub Staszak46beca62012-09-04 20:48:24 +0000223 bool UseSignedOp = Opcode == Instruction::SDiv ||
224 Opcode == Instruction::SRem;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000225
226 // Only optimize div or rem ops
Jakub Staszak46beca62012-09-04 20:48:24 +0000227 if (!UseDivOp && !UseRemOp)
Preston Gurdcdf540d2012-09-04 18:22:17 +0000228 continue;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000229
Preston Gurd5509e3d2012-10-03 16:11:44 +0000230 // Skip division on vector types, only optimize integer instructions
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000231 if (!I->getType()->isIntegerTy())
Jakub Staszak46beca62012-09-04 20:48:24 +0000232 continue;
233
Preston Gurd0d67f512012-10-04 21:33:40 +0000234 // Get bitwidth of div/rem instruction
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000235 IntegerType *T = cast<IntegerType>(I->getType());
Preston Gurd485296d2013-03-04 18:13:57 +0000236 unsigned int bitwidth = T->getBitWidth();
Preston Gurd5509e3d2012-10-03 16:11:44 +0000237
Preston Gurd0d67f512012-10-04 21:33:40 +0000238 // Continue if bitwidth is not bypassed
239 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
240 if (BI == BypassWidths.end())
Preston Gurd5509e3d2012-10-03 16:11:44 +0000241 continue;
242
Preston Gurd0d67f512012-10-04 21:33:40 +0000243 // Get type for div/rem instruction with bypass bitwidth
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000244 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
Preston Gurd5509e3d2012-10-03 16:11:44 +0000245
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000246 MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000247 }
248
249 return MadeChange;
250}