Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 1 | //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file contains an optimization for div and rem on architectures that |
| 11 | // execute short instructions significantly faster than longer instructions. |
| 12 | // For example, on Intel Atom 32-bit divides are slow enough that during |
| 13 | // runtime it is profitable to check the value of the operands, and if they are |
| 14 | // positive and less than 256 use an unsigned 8-bit divide. |
| 15 | // |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | |
Chandler Carruth | ed0881b | 2012-12-03 16:50:05 +0000 | [diff] [blame] | 18 | #include "llvm/Transforms/Utils/BypassSlowDivision.h" |
| 19 | #include "llvm/ADT/DenseMap.h" |
Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 20 | #include "llvm/IR/Function.h" |
| 21 | #include "llvm/IR/IRBuilder.h" |
| 22 | #include "llvm/IR/Instructions.h" |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 23 | |
| 24 | using namespace llvm; |
| 25 | |
Chandler Carruth | 964daaa | 2014-04-22 02:55:47 +0000 | [diff] [blame] | 26 | #define DEBUG_TYPE "bypass-slow-division" |
| 27 | |
Benjamin Kramer | 1f66f88 | 2012-09-10 11:52:08 +0000 | [diff] [blame] | 28 | namespace { |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 29 | struct DivOpInfo { |
| 30 | bool SignedOp; |
| 31 | Value *Dividend; |
| 32 | Value *Divisor; |
| 33 | |
| 34 | DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) |
| 35 | : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} |
| 36 | }; |
| 37 | |
| 38 | struct DivPhiNodes { |
| 39 | PHINode *Quotient; |
| 40 | PHINode *Remainder; |
| 41 | |
| 42 | DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) |
| 43 | : Quotient(InQuotient), Remainder(InRemainder) {} |
| 44 | }; |
Alexander Kornienko | f00654e | 2015-06-23 09:49:53 +0000 | [diff] [blame] | 45 | } |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 46 | |
Benjamin Kramer | 1f66f88 | 2012-09-10 11:52:08 +0000 | [diff] [blame] | 47 | namespace llvm { |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 48 | template<> |
| 49 | struct DenseMapInfo<DivOpInfo> { |
| 50 | static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { |
| 51 | return Val1.SignedOp == Val2.SignedOp && |
| 52 | Val1.Dividend == Val2.Dividend && |
| 53 | Val1.Divisor == Val2.Divisor; |
| 54 | } |
| 55 | |
| 56 | static DivOpInfo getEmptyKey() { |
Craig Topper | f40110f | 2014-04-25 05:29:35 +0000 | [diff] [blame] | 57 | return DivOpInfo(false, nullptr, nullptr); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 58 | } |
| 59 | |
| 60 | static DivOpInfo getTombstoneKey() { |
Craig Topper | f40110f | 2014-04-25 05:29:35 +0000 | [diff] [blame] | 61 | return DivOpInfo(true, nullptr, nullptr); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 62 | } |
| 63 | |
| 64 | static unsigned getHashValue(const DivOpInfo &Val) { |
| 65 | return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ |
| 66 | reinterpret_cast<uintptr_t>(Val.Divisor)) ^ |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 67 | (unsigned)Val.SignedOp; |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 68 | } |
| 69 | }; |
| 70 | |
| 71 | typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; |
Alexander Kornienko | f00654e | 2015-06-23 09:49:53 +0000 | [diff] [blame] | 72 | } |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 73 | |
| 74 | // insertFastDiv - Substitutes the div/rem instruction with code that checks the |
| 75 | // value of the operands and uses a shorter-faster div/rem instruction when |
| 76 | // possible and the longer-slower div/rem instruction otherwise. |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 77 | static bool insertFastDiv(Instruction *I, IntegerType *BypassType, |
| 78 | bool UseDivOp, bool UseSignedOp, |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 79 | DivCacheTy &PerBBDivCache) { |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 80 | Function *F = I->getParent()->getParent(); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 81 | // Get instruction operands |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 82 | Value *Dividend = I->getOperand(0); |
| 83 | Value *Divisor = I->getOperand(1); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 84 | |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 85 | if (isa<ConstantInt>(Divisor) || |
| 86 | (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 87 | // Operations with immediate values should have |
| 88 | // been solved and replaced during compile time. |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 89 | return false; |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 90 | } |
| 91 | |
| 92 | // Basic Block is split before divide |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 93 | BasicBlock *MainBB = &*I->getParent(); |
| 94 | BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 95 | |
| 96 | // Add new basic block for slow divide operation |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 97 | BasicBlock *SlowBB = |
| 98 | BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 99 | SlowBB->moveBefore(SuccessorBB); |
| 100 | IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); |
| 101 | Value *SlowQuotientV; |
| 102 | Value *SlowRemainderV; |
| 103 | if (UseSignedOp) { |
| 104 | SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); |
| 105 | SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); |
| 106 | } else { |
| 107 | SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); |
| 108 | SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); |
| 109 | } |
| 110 | SlowBuilder.CreateBr(SuccessorBB); |
| 111 | |
| 112 | // Add new basic block for fast divide operation |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 113 | BasicBlock *FastBB = |
| 114 | BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 115 | FastBB->moveBefore(SlowBB); |
| 116 | IRBuilder<> FastBuilder(FastBB, FastBB->begin()); |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 117 | Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, |
| 118 | BypassType); |
| 119 | Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, |
| 120 | BypassType); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 121 | |
| 122 | // udiv/urem because optimization only handles positive numbers |
Justin Lebar | 468bf73 | 2016-10-28 21:43:51 +0000 | [diff] [blame^] | 123 | Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 124 | Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, |
| 125 | ShortDivisorV); |
| 126 | Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 127 | ShortQuotientV, |
| 128 | Dividend->getType()); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 129 | Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, |
| 130 | ShortRemainderV, |
| 131 | Dividend->getType()); |
| 132 | FastBuilder.CreateBr(SuccessorBB); |
| 133 | |
| 134 | // Phi nodes for result of div and rem |
| 135 | IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 136 | PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 137 | QuoPhi->addIncoming(SlowQuotientV, SlowBB); |
| 138 | QuoPhi->addIncoming(FastQuotientV, FastBB); |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 139 | PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 140 | RemPhi->addIncoming(SlowRemainderV, SlowBB); |
| 141 | RemPhi->addIncoming(FastRemainderV, FastBB); |
| 142 | |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 143 | // Replace I with appropriate phi node |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 144 | if (UseDivOp) |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 145 | I->replaceAllUsesWith(QuoPhi); |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 146 | else |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 147 | I->replaceAllUsesWith(RemPhi); |
| 148 | I->eraseFromParent(); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 149 | |
| 150 | // Combine operands into a single value with OR for value testing below |
| 151 | MainBB->getInstList().back().eraseFromParent(); |
| 152 | IRBuilder<> MainBuilder(MainBB, MainBB->end()); |
| 153 | Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); |
| 154 | |
| 155 | // BitMask is inverted to check if the operands are |
| 156 | // larger than the bypass type |
| 157 | uint64_t BitMask = ~BypassType->getBitMask(); |
| 158 | Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); |
| 159 | |
| 160 | // Compare operand values and branch |
Preston Gurd | 485296d | 2013-03-04 18:13:57 +0000 | [diff] [blame] | 161 | Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 162 | Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); |
| 163 | MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); |
| 164 | |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 165 | // Cache phi nodes to be used later in place of other instances |
| 166 | // of div or rem with the same sign, dividend, and divisor |
| 167 | DivOpInfo Key(UseSignedOp, Dividend, Divisor); |
| 168 | DivPhiNodes Value(QuoPhi, RemPhi); |
| 169 | PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 170 | return true; |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 171 | } |
| 172 | |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 173 | // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from |
| 174 | // the current BB if operands and operation are identical. Otherwise calls |
| 175 | // insertFastDiv to perform the optimization and caches the resulting dividend |
| 176 | // and remainder. |
| 177 | static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, |
| 178 | bool UseDivOp, bool UseSignedOp, |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 179 | DivCacheTy &PerBBDivCache) { |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 180 | // Get instruction operands |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 181 | DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); |
Jakub Staszak | e535c1a | 2012-09-04 23:11:11 +0000 | [diff] [blame] | 182 | DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 183 | |
| 184 | if (CacheI == PerBBDivCache.end()) { |
| 185 | // If previous instance does not exist, insert fast div |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 186 | return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 187 | } |
| 188 | |
| 189 | // Replace operation value with previously generated phi node |
Jakub Staszak | e535c1a | 2012-09-04 23:11:11 +0000 | [diff] [blame] | 190 | DivPhiNodes &Value = CacheI->second; |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 191 | if (UseDivOp) { |
| 192 | // Replace all uses of div instruction with quotient phi node |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 193 | I->replaceAllUsesWith(Value.Quotient); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 194 | } else { |
| 195 | // Replace all uses of rem instruction with remainder phi node |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 196 | I->replaceAllUsesWith(Value.Remainder); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 197 | } |
| 198 | |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 199 | // Remove redundant operation |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 200 | I->eraseFromParent(); |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 201 | return true; |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 202 | } |
| 203 | |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 204 | // bypassSlowDivision - This optimization identifies DIV instructions in a BB |
| 205 | // that can be profitably bypassed and carried out with a shorter, faster |
| 206 | // divide. |
| 207 | bool llvm::bypassSlowDivision( |
| 208 | BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 209 | DivCacheTy DivCache; |
| 210 | |
| 211 | bool MadeChange = false; |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 212 | Instruction* Next = &*BB->begin(); |
| 213 | while (Next != nullptr) { |
| 214 | // We may add instructions immediately after I, but we want to skip over |
| 215 | // them. |
| 216 | Instruction* I = Next; |
| 217 | Next = Next->getNextNode(); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 218 | |
| 219 | // Get instruction details |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 220 | unsigned Opcode = I->getOpcode(); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 221 | bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; |
| 222 | bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 223 | bool UseSignedOp = Opcode == Instruction::SDiv || |
| 224 | Opcode == Instruction::SRem; |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 225 | |
| 226 | // Only optimize div or rem ops |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 227 | if (!UseDivOp && !UseRemOp) |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 228 | continue; |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 229 | |
Preston Gurd | 5509e3d | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 230 | // Skip division on vector types, only optimize integer instructions |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 231 | if (!I->getType()->isIntegerTy()) |
Jakub Staszak | 46beca6 | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 232 | continue; |
| 233 | |
Preston Gurd | 0d67f51 | 2012-10-04 21:33:40 +0000 | [diff] [blame] | 234 | // Get bitwidth of div/rem instruction |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 235 | IntegerType *T = cast<IntegerType>(I->getType()); |
Preston Gurd | 485296d | 2013-03-04 18:13:57 +0000 | [diff] [blame] | 236 | unsigned int bitwidth = T->getBitWidth(); |
Preston Gurd | 5509e3d | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 237 | |
Preston Gurd | 0d67f51 | 2012-10-04 21:33:40 +0000 | [diff] [blame] | 238 | // Continue if bitwidth is not bypassed |
| 239 | DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); |
| 240 | if (BI == BypassWidths.end()) |
Preston Gurd | 5509e3d | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 241 | continue; |
| 242 | |
Preston Gurd | 0d67f51 | 2012-10-04 21:33:40 +0000 | [diff] [blame] | 243 | // Get type for div/rem instruction with bypass bitwidth |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 244 | IntegerType *BT = IntegerType::get(I->getContext(), BI->second); |
Preston Gurd | 5509e3d | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 245 | |
Eric Christopher | 49a7d6c | 2016-01-04 23:18:58 +0000 | [diff] [blame] | 246 | MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); |
Preston Gurd | cdf540d | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 247 | } |
| 248 | |
| 249 | return MadeChange; |
| 250 | } |