blob: bc2cef26edcbc8f3578afd6aa52070876d6aa345 [file] [log] [blame]
Preston Gurdcdf540d2012-09-04 18:22:17 +00001//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
Chandler Carruthed0881b2012-12-03 16:50:05 +000018#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000020#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
Justin Lebar0ede5fb2016-10-28 21:43:54 +000023#include "llvm/Transforms/Utils/Local.h"
Preston Gurdcdf540d2012-09-04 18:22:17 +000024
25using namespace llvm;
26
Chandler Carruth964daaa2014-04-22 02:55:47 +000027#define DEBUG_TYPE "bypass-slow-division"
28
Benjamin Kramer1f66f882012-09-10 11:52:08 +000029namespace {
Preston Gurdcdf540d2012-09-04 18:22:17 +000030 struct DivOpInfo {
31 bool SignedOp;
32 Value *Dividend;
33 Value *Divisor;
34
35 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
36 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
37 };
38
39 struct DivPhiNodes {
40 PHINode *Quotient;
41 PHINode *Remainder;
42
43 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
44 : Quotient(InQuotient), Remainder(InRemainder) {}
45 };
Alexander Kornienkof00654e2015-06-23 09:49:53 +000046}
Preston Gurdcdf540d2012-09-04 18:22:17 +000047
Benjamin Kramer1f66f882012-09-10 11:52:08 +000048namespace llvm {
Preston Gurdcdf540d2012-09-04 18:22:17 +000049 template<>
50 struct DenseMapInfo<DivOpInfo> {
51 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
52 return Val1.SignedOp == Val2.SignedOp &&
53 Val1.Dividend == Val2.Dividend &&
54 Val1.Divisor == Val2.Divisor;
55 }
56
57 static DivOpInfo getEmptyKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000058 return DivOpInfo(false, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000059 }
60
61 static DivOpInfo getTombstoneKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000062 return DivOpInfo(true, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000063 }
64
65 static unsigned getHashValue(const DivOpInfo &Val) {
66 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
67 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
Jakub Staszak46beca62012-09-04 20:48:24 +000068 (unsigned)Val.SignedOp;
Preston Gurdcdf540d2012-09-04 18:22:17 +000069 }
70 };
71
72 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
Alexander Kornienkof00654e2015-06-23 09:49:53 +000073}
Preston Gurdcdf540d2012-09-04 18:22:17 +000074
75// insertFastDiv - Substitutes the div/rem instruction with code that checks the
76// value of the operands and uses a shorter-faster div/rem instruction when
77// possible and the longer-slower div/rem instruction otherwise.
Eric Christopher49a7d6c2016-01-04 23:18:58 +000078static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
79 bool UseDivOp, bool UseSignedOp,
Jakub Staszak46beca62012-09-04 20:48:24 +000080 DivCacheTy &PerBBDivCache) {
Eric Christopher49a7d6c2016-01-04 23:18:58 +000081 Function *F = I->getParent()->getParent();
Preston Gurdcdf540d2012-09-04 18:22:17 +000082 // Get instruction operands
Eric Christopher49a7d6c2016-01-04 23:18:58 +000083 Value *Dividend = I->getOperand(0);
84 Value *Divisor = I->getOperand(1);
Preston Gurdcdf540d2012-09-04 18:22:17 +000085
Justin Lebar583b8682016-11-16 00:44:43 +000086 if (isa<ConstantInt>(Divisor)) {
87 // Division by a constant should have been been solved and replaced earlier
88 // in the pipeline.
Jakub Staszak46beca62012-09-04 20:48:24 +000089 return false;
Preston Gurdcdf540d2012-09-04 18:22:17 +000090 }
91
Justin Lebar28605732016-11-16 00:44:47 +000092 // If the numerator is a constant, bail if it doesn't fit into BypassType.
93 if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
94 if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
95 return false;
96
Preston Gurdcdf540d2012-09-04 18:22:17 +000097 // Basic Block is split before divide
Eric Christopher49a7d6c2016-01-04 23:18:58 +000098 BasicBlock *MainBB = &*I->getParent();
99 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000100
101 // Add new basic block for slow divide operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000102 BasicBlock *SlowBB =
103 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000104 SlowBB->moveBefore(SuccessorBB);
105 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
106 Value *SlowQuotientV;
107 Value *SlowRemainderV;
108 if (UseSignedOp) {
109 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
110 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
111 } else {
112 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
113 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
114 }
115 SlowBuilder.CreateBr(SuccessorBB);
116
117 // Add new basic block for fast divide operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000118 BasicBlock *FastBB =
119 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000120 FastBB->moveBefore(SlowBB);
121 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Jakub Staszak46beca62012-09-04 20:48:24 +0000122 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
123 BypassType);
124 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
125 BypassType);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000126
127 // udiv/urem because optimization only handles positive numbers
Justin Lebar468bf732016-10-28 21:43:51 +0000128 Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000129 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
130 ShortDivisorV);
131 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
Jakub Staszak46beca62012-09-04 20:48:24 +0000132 ShortQuotientV,
133 Dividend->getType());
Preston Gurdcdf540d2012-09-04 18:22:17 +0000134 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
135 ShortRemainderV,
136 Dividend->getType());
137 FastBuilder.CreateBr(SuccessorBB);
138
139 // Phi nodes for result of div and rem
140 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000141 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000142 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
143 QuoPhi->addIncoming(FastQuotientV, FastBB);
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000144 PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000145 RemPhi->addIncoming(SlowRemainderV, SlowBB);
146 RemPhi->addIncoming(FastRemainderV, FastBB);
147
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000148 // Replace I with appropriate phi node
Jakub Staszak46beca62012-09-04 20:48:24 +0000149 if (UseDivOp)
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000150 I->replaceAllUsesWith(QuoPhi);
Jakub Staszak46beca62012-09-04 20:48:24 +0000151 else
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000152 I->replaceAllUsesWith(RemPhi);
153 I->eraseFromParent();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000154
155 // Combine operands into a single value with OR for value testing below
156 MainBB->getInstList().back().eraseFromParent();
157 IRBuilder<> MainBuilder(MainBB, MainBB->end());
Justin Lebar28605732016-11-16 00:44:47 +0000158
159 // We should have bailed out above if the divisor is a constant, but the
160 // dividend may still be a constant. Set OrV to our non-constant operands
161 // OR'ed together.
162 assert(!isa<ConstantInt>(Divisor));
163
164 Value *OrV;
165 if (!isa<ConstantInt>(Dividend))
166 OrV = MainBuilder.CreateOr(Dividend, Divisor);
167 else
168 OrV = Divisor;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000169
170 // BitMask is inverted to check if the operands are
171 // larger than the bypass type
172 uint64_t BitMask = ~BypassType->getBitMask();
173 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
174
175 // Compare operand values and branch
Preston Gurd485296d2013-03-04 18:13:57 +0000176 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000177 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
178 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
179
Preston Gurdcdf540d2012-09-04 18:22:17 +0000180 // Cache phi nodes to be used later in place of other instances
181 // of div or rem with the same sign, dividend, and divisor
182 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
183 DivPhiNodes Value(QuoPhi, RemPhi);
184 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
Jakub Staszak46beca62012-09-04 20:48:24 +0000185 return true;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000186}
187
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000188// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
189// the current BB if operands and operation are identical. Otherwise calls
190// insertFastDiv to perform the optimization and caches the resulting dividend
191// and remainder.
192static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
193 bool UseDivOp, bool UseSignedOp,
Jakub Staszak46beca62012-09-04 20:48:24 +0000194 DivCacheTy &PerBBDivCache) {
Preston Gurdcdf540d2012-09-04 18:22:17 +0000195 // Get instruction operands
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000196 DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
Jakub Staszake535c1a2012-09-04 23:11:11 +0000197 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000198
199 if (CacheI == PerBBDivCache.end()) {
200 // If previous instance does not exist, insert fast div
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000201 return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000202 }
203
204 // Replace operation value with previously generated phi node
Jakub Staszake535c1a2012-09-04 23:11:11 +0000205 DivPhiNodes &Value = CacheI->second;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000206 if (UseDivOp) {
207 // Replace all uses of div instruction with quotient phi node
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000208 I->replaceAllUsesWith(Value.Quotient);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000209 } else {
210 // Replace all uses of rem instruction with remainder phi node
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000211 I->replaceAllUsesWith(Value.Remainder);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000212 }
213
Preston Gurdcdf540d2012-09-04 18:22:17 +0000214 // Remove redundant operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000215 I->eraseFromParent();
Jakub Staszak46beca62012-09-04 20:48:24 +0000216 return true;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000217}
218
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000219// bypassSlowDivision - This optimization identifies DIV instructions in a BB
220// that can be profitably bypassed and carried out with a shorter, faster
221// divide.
222bool llvm::bypassSlowDivision(
223 BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
Preston Gurdcdf540d2012-09-04 18:22:17 +0000224 DivCacheTy DivCache;
225
226 bool MadeChange = false;
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000227 Instruction* Next = &*BB->begin();
228 while (Next != nullptr) {
229 // We may add instructions immediately after I, but we want to skip over
230 // them.
231 Instruction* I = Next;
232 Next = Next->getNextNode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000233
234 // Get instruction details
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000235 unsigned Opcode = I->getOpcode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000236 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
237 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
Jakub Staszak46beca62012-09-04 20:48:24 +0000238 bool UseSignedOp = Opcode == Instruction::SDiv ||
239 Opcode == Instruction::SRem;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000240
241 // Only optimize div or rem ops
Jakub Staszak46beca62012-09-04 20:48:24 +0000242 if (!UseDivOp && !UseRemOp)
Preston Gurdcdf540d2012-09-04 18:22:17 +0000243 continue;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000244
Preston Gurd5509e3d2012-10-03 16:11:44 +0000245 // Skip division on vector types, only optimize integer instructions
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000246 if (!I->getType()->isIntegerTy())
Jakub Staszak46beca62012-09-04 20:48:24 +0000247 continue;
248
Preston Gurd0d67f512012-10-04 21:33:40 +0000249 // Get bitwidth of div/rem instruction
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000250 IntegerType *T = cast<IntegerType>(I->getType());
Preston Gurd485296d2013-03-04 18:13:57 +0000251 unsigned int bitwidth = T->getBitWidth();
Preston Gurd5509e3d2012-10-03 16:11:44 +0000252
Preston Gurd0d67f512012-10-04 21:33:40 +0000253 // Continue if bitwidth is not bypassed
254 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
255 if (BI == BypassWidths.end())
Preston Gurd5509e3d2012-10-03 16:11:44 +0000256 continue;
257
Preston Gurd0d67f512012-10-04 21:33:40 +0000258 // Get type for div/rem instruction with bypass bitwidth
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000259 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
Preston Gurd5509e3d2012-10-03 16:11:44 +0000260
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000261 MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000262 }
263
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000264 // Above we eagerly create divs and rems, as pairs, so that we can efficiently
265 // create divrem machine instructions. Now erase any unused divs / rems so we
266 // don't leave extra instructions sitting around.
267 for (auto &KV : DivCache)
268 for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
269 RecursivelyDeleteTriviallyDeadInstructions(Phi);
270
Preston Gurdcdf540d2012-09-04 18:22:17 +0000271 return MadeChange;
272}