blob: 0e2a46533532dfba3c85eb6044142ed867ba5ac7 [file] [log] [blame]
Preston Gurdcdf540d2012-09-04 18:22:17 +00001//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains an optimization for div and rem on architectures that
11// execute short instructions significantly faster than longer instructions.
12// For example, on Intel Atom 32-bit divides are slow enough that during
13// runtime it is profitable to check the value of the operands, and if they are
14// positive and less than 256 use an unsigned 8-bit divide.
15//
16//===----------------------------------------------------------------------===//
17
Chandler Carruthed0881b2012-12-03 16:50:05 +000018#include "llvm/Transforms/Utils/BypassSlowDivision.h"
19#include "llvm/ADT/DenseMap.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000020#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instructions.h"
Justin Lebar0ede5fb2016-10-28 21:43:54 +000023#include "llvm/Transforms/Utils/Local.h"
Preston Gurdcdf540d2012-09-04 18:22:17 +000024
25using namespace llvm;
26
Chandler Carruth964daaa2014-04-22 02:55:47 +000027#define DEBUG_TYPE "bypass-slow-division"
28
Benjamin Kramer1f66f882012-09-10 11:52:08 +000029namespace {
Preston Gurdcdf540d2012-09-04 18:22:17 +000030 struct DivOpInfo {
31 bool SignedOp;
32 Value *Dividend;
33 Value *Divisor;
34
35 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
36 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
37 };
38
39 struct DivPhiNodes {
40 PHINode *Quotient;
41 PHINode *Remainder;
42
43 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
44 : Quotient(InQuotient), Remainder(InRemainder) {}
45 };
Alexander Kornienkof00654e2015-06-23 09:49:53 +000046}
Preston Gurdcdf540d2012-09-04 18:22:17 +000047
Benjamin Kramer1f66f882012-09-10 11:52:08 +000048namespace llvm {
Preston Gurdcdf540d2012-09-04 18:22:17 +000049 template<>
50 struct DenseMapInfo<DivOpInfo> {
51 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
52 return Val1.SignedOp == Val2.SignedOp &&
53 Val1.Dividend == Val2.Dividend &&
54 Val1.Divisor == Val2.Divisor;
55 }
56
57 static DivOpInfo getEmptyKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000058 return DivOpInfo(false, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000059 }
60
61 static DivOpInfo getTombstoneKey() {
Craig Topperf40110f2014-04-25 05:29:35 +000062 return DivOpInfo(true, nullptr, nullptr);
Preston Gurdcdf540d2012-09-04 18:22:17 +000063 }
64
65 static unsigned getHashValue(const DivOpInfo &Val) {
66 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
67 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
Jakub Staszak46beca62012-09-04 20:48:24 +000068 (unsigned)Val.SignedOp;
Preston Gurdcdf540d2012-09-04 18:22:17 +000069 }
70 };
71
72 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
Alexander Kornienkof00654e2015-06-23 09:49:53 +000073}
Preston Gurdcdf540d2012-09-04 18:22:17 +000074
75// insertFastDiv - Substitutes the div/rem instruction with code that checks the
76// value of the operands and uses a shorter-faster div/rem instruction when
77// possible and the longer-slower div/rem instruction otherwise.
Eric Christopher49a7d6c2016-01-04 23:18:58 +000078static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
79 bool UseDivOp, bool UseSignedOp,
Jakub Staszak46beca62012-09-04 20:48:24 +000080 DivCacheTy &PerBBDivCache) {
Eric Christopher49a7d6c2016-01-04 23:18:58 +000081 Function *F = I->getParent()->getParent();
Preston Gurdcdf540d2012-09-04 18:22:17 +000082 // Get instruction operands
Eric Christopher49a7d6c2016-01-04 23:18:58 +000083 Value *Dividend = I->getOperand(0);
84 Value *Divisor = I->getOperand(1);
Preston Gurdcdf540d2012-09-04 18:22:17 +000085
Jakub Staszak46beca62012-09-04 20:48:24 +000086 if (isa<ConstantInt>(Divisor) ||
87 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
Preston Gurdcdf540d2012-09-04 18:22:17 +000088 // Operations with immediate values should have
89 // been solved and replaced during compile time.
Jakub Staszak46beca62012-09-04 20:48:24 +000090 return false;
Preston Gurdcdf540d2012-09-04 18:22:17 +000091 }
92
93 // Basic Block is split before divide
Eric Christopher49a7d6c2016-01-04 23:18:58 +000094 BasicBlock *MainBB = &*I->getParent();
95 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
Preston Gurdcdf540d2012-09-04 18:22:17 +000096
97 // Add new basic block for slow divide operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +000098 BasicBlock *SlowBB =
99 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000100 SlowBB->moveBefore(SuccessorBB);
101 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
102 Value *SlowQuotientV;
103 Value *SlowRemainderV;
104 if (UseSignedOp) {
105 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
106 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
107 } else {
108 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
109 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
110 }
111 SlowBuilder.CreateBr(SuccessorBB);
112
113 // Add new basic block for fast divide operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000114 BasicBlock *FastBB =
115 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000116 FastBB->moveBefore(SlowBB);
117 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
Jakub Staszak46beca62012-09-04 20:48:24 +0000118 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
119 BypassType);
120 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
121 BypassType);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000122
123 // udiv/urem because optimization only handles positive numbers
Justin Lebar468bf732016-10-28 21:43:51 +0000124 Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000125 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
126 ShortDivisorV);
127 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
Jakub Staszak46beca62012-09-04 20:48:24 +0000128 ShortQuotientV,
129 Dividend->getType());
Preston Gurdcdf540d2012-09-04 18:22:17 +0000130 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
131 ShortRemainderV,
132 Dividend->getType());
133 FastBuilder.CreateBr(SuccessorBB);
134
135 // Phi nodes for result of div and rem
136 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000137 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000138 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
139 QuoPhi->addIncoming(FastQuotientV, FastBB);
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000140 PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000141 RemPhi->addIncoming(SlowRemainderV, SlowBB);
142 RemPhi->addIncoming(FastRemainderV, FastBB);
143
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000144 // Replace I with appropriate phi node
Jakub Staszak46beca62012-09-04 20:48:24 +0000145 if (UseDivOp)
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000146 I->replaceAllUsesWith(QuoPhi);
Jakub Staszak46beca62012-09-04 20:48:24 +0000147 else
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000148 I->replaceAllUsesWith(RemPhi);
149 I->eraseFromParent();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000150
151 // Combine operands into a single value with OR for value testing below
152 MainBB->getInstList().back().eraseFromParent();
153 IRBuilder<> MainBuilder(MainBB, MainBB->end());
154 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
155
156 // BitMask is inverted to check if the operands are
157 // larger than the bypass type
158 uint64_t BitMask = ~BypassType->getBitMask();
159 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
160
161 // Compare operand values and branch
Preston Gurd485296d2013-03-04 18:13:57 +0000162 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000163 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
164 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
165
Preston Gurdcdf540d2012-09-04 18:22:17 +0000166 // Cache phi nodes to be used later in place of other instances
167 // of div or rem with the same sign, dividend, and divisor
168 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
169 DivPhiNodes Value(QuoPhi, RemPhi);
170 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
Jakub Staszak46beca62012-09-04 20:48:24 +0000171 return true;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000172}
173
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000174// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
175// the current BB if operands and operation are identical. Otherwise calls
176// insertFastDiv to perform the optimization and caches the resulting dividend
177// and remainder.
178static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
179 bool UseDivOp, bool UseSignedOp,
Jakub Staszak46beca62012-09-04 20:48:24 +0000180 DivCacheTy &PerBBDivCache) {
Preston Gurdcdf540d2012-09-04 18:22:17 +0000181 // Get instruction operands
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000182 DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
Jakub Staszake535c1a2012-09-04 23:11:11 +0000183 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000184
185 if (CacheI == PerBBDivCache.end()) {
186 // If previous instance does not exist, insert fast div
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000187 return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000188 }
189
190 // Replace operation value with previously generated phi node
Jakub Staszake535c1a2012-09-04 23:11:11 +0000191 DivPhiNodes &Value = CacheI->second;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000192 if (UseDivOp) {
193 // Replace all uses of div instruction with quotient phi node
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000194 I->replaceAllUsesWith(Value.Quotient);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000195 } else {
196 // Replace all uses of rem instruction with remainder phi node
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000197 I->replaceAllUsesWith(Value.Remainder);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000198 }
199
Preston Gurdcdf540d2012-09-04 18:22:17 +0000200 // Remove redundant operation
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000201 I->eraseFromParent();
Jakub Staszak46beca62012-09-04 20:48:24 +0000202 return true;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000203}
204
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000205// bypassSlowDivision - This optimization identifies DIV instructions in a BB
206// that can be profitably bypassed and carried out with a shorter, faster
207// divide.
208bool llvm::bypassSlowDivision(
209 BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
Preston Gurdcdf540d2012-09-04 18:22:17 +0000210 DivCacheTy DivCache;
211
212 bool MadeChange = false;
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000213 Instruction* Next = &*BB->begin();
214 while (Next != nullptr) {
215 // We may add instructions immediately after I, but we want to skip over
216 // them.
217 Instruction* I = Next;
218 Next = Next->getNextNode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000219
220 // Get instruction details
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000221 unsigned Opcode = I->getOpcode();
Preston Gurdcdf540d2012-09-04 18:22:17 +0000222 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
223 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
Jakub Staszak46beca62012-09-04 20:48:24 +0000224 bool UseSignedOp = Opcode == Instruction::SDiv ||
225 Opcode == Instruction::SRem;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000226
227 // Only optimize div or rem ops
Jakub Staszak46beca62012-09-04 20:48:24 +0000228 if (!UseDivOp && !UseRemOp)
Preston Gurdcdf540d2012-09-04 18:22:17 +0000229 continue;
Preston Gurdcdf540d2012-09-04 18:22:17 +0000230
Preston Gurd5509e3d2012-10-03 16:11:44 +0000231 // Skip division on vector types, only optimize integer instructions
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000232 if (!I->getType()->isIntegerTy())
Jakub Staszak46beca62012-09-04 20:48:24 +0000233 continue;
234
Preston Gurd0d67f512012-10-04 21:33:40 +0000235 // Get bitwidth of div/rem instruction
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000236 IntegerType *T = cast<IntegerType>(I->getType());
Preston Gurd485296d2013-03-04 18:13:57 +0000237 unsigned int bitwidth = T->getBitWidth();
Preston Gurd5509e3d2012-10-03 16:11:44 +0000238
Preston Gurd0d67f512012-10-04 21:33:40 +0000239 // Continue if bitwidth is not bypassed
240 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
241 if (BI == BypassWidths.end())
Preston Gurd5509e3d2012-10-03 16:11:44 +0000242 continue;
243
Preston Gurd0d67f512012-10-04 21:33:40 +0000244 // Get type for div/rem instruction with bypass bitwidth
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000245 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
Preston Gurd5509e3d2012-10-03 16:11:44 +0000246
Eric Christopher49a7d6c2016-01-04 23:18:58 +0000247 MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
Preston Gurdcdf540d2012-09-04 18:22:17 +0000248 }
249
Justin Lebar0ede5fb2016-10-28 21:43:54 +0000250 // Above we eagerly create divs and rems, as pairs, so that we can efficiently
251 // create divrem machine instructions. Now erase any unused divs / rems so we
252 // don't leave extra instructions sitting around.
253 for (auto &KV : DivCache)
254 for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
255 RecursivelyDeleteTriviallyDeadInstructions(Phi);
256
Preston Gurdcdf540d2012-09-04 18:22:17 +0000257 return MadeChange;
258}