Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 1 | //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file contains an optimization for div and rem on architectures that |
| 11 | // execute short instructions significantly faster than longer instructions. |
| 12 | // For example, on Intel Atom 32-bit divides are slow enough that during |
| 13 | // runtime it is profitable to check the value of the operands, and if they are |
| 14 | // positive and less than 256 use an unsigned 8-bit divide. |
| 15 | // |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | |
Chandler Carruth | d04a8d4 | 2012-12-03 16:50:05 +0000 | [diff] [blame] | 18 | #include "llvm/Transforms/Utils/BypassSlowDivision.h" |
| 19 | #include "llvm/ADT/DenseMap.h" |
Chandler Carruth | 0b8c9a8 | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 20 | #include "llvm/IR/Function.h" |
| 21 | #include "llvm/IR/IRBuilder.h" |
| 22 | #include "llvm/IR/Instructions.h" |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 23 | |
| 24 | using namespace llvm; |
| 25 | |
Stephen Hines | dce4a40 | 2014-05-29 02:49:00 -0700 | [diff] [blame^] | 26 | #define DEBUG_TYPE "bypass-slow-division" |
| 27 | |
Benjamin Kramer | 04142bc | 2012-09-10 11:52:08 +0000 | [diff] [blame] | 28 | namespace { |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 29 | struct DivOpInfo { |
| 30 | bool SignedOp; |
| 31 | Value *Dividend; |
| 32 | Value *Divisor; |
| 33 | |
| 34 | DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) |
| 35 | : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} |
| 36 | }; |
| 37 | |
| 38 | struct DivPhiNodes { |
| 39 | PHINode *Quotient; |
| 40 | PHINode *Remainder; |
| 41 | |
| 42 | DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) |
| 43 | : Quotient(InQuotient), Remainder(InRemainder) {} |
| 44 | }; |
Benjamin Kramer | 04142bc | 2012-09-10 11:52:08 +0000 | [diff] [blame] | 45 | } |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 46 | |
Benjamin Kramer | 04142bc | 2012-09-10 11:52:08 +0000 | [diff] [blame] | 47 | namespace llvm { |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 48 | template<> |
| 49 | struct DenseMapInfo<DivOpInfo> { |
| 50 | static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { |
| 51 | return Val1.SignedOp == Val2.SignedOp && |
| 52 | Val1.Dividend == Val2.Dividend && |
| 53 | Val1.Divisor == Val2.Divisor; |
| 54 | } |
| 55 | |
| 56 | static DivOpInfo getEmptyKey() { |
Stephen Hines | dce4a40 | 2014-05-29 02:49:00 -0700 | [diff] [blame^] | 57 | return DivOpInfo(false, nullptr, nullptr); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 58 | } |
| 59 | |
| 60 | static DivOpInfo getTombstoneKey() { |
Stephen Hines | dce4a40 | 2014-05-29 02:49:00 -0700 | [diff] [blame^] | 61 | return DivOpInfo(true, nullptr, nullptr); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 62 | } |
| 63 | |
| 64 | static unsigned getHashValue(const DivOpInfo &Val) { |
| 65 | return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ |
| 66 | reinterpret_cast<uintptr_t>(Val.Divisor)) ^ |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 67 | (unsigned)Val.SignedOp; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 68 | } |
| 69 | }; |
| 70 | |
| 71 | typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; |
| 72 | } |
| 73 | |
| 74 | // insertFastDiv - Substitutes the div/rem instruction with code that checks the |
| 75 | // value of the operands and uses a shorter-faster div/rem instruction when |
| 76 | // possible and the longer-slower div/rem instruction otherwise. |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 77 | static bool insertFastDiv(Function &F, |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 78 | Function::iterator &I, |
| 79 | BasicBlock::iterator &J, |
| 80 | IntegerType *BypassType, |
| 81 | bool UseDivOp, |
| 82 | bool UseSignedOp, |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 83 | DivCacheTy &PerBBDivCache) { |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 84 | // Get instruction operands |
| 85 | Instruction *Instr = J; |
| 86 | Value *Dividend = Instr->getOperand(0); |
| 87 | Value *Divisor = Instr->getOperand(1); |
| 88 | |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 89 | if (isa<ConstantInt>(Divisor) || |
| 90 | (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 91 | // Operations with immediate values should have |
| 92 | // been solved and replaced during compile time. |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 93 | return false; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 94 | } |
| 95 | |
| 96 | // Basic Block is split before divide |
| 97 | BasicBlock *MainBB = I; |
| 98 | BasicBlock *SuccessorBB = I->splitBasicBlock(J); |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 99 | ++I; //advance iterator I to successorBB |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 100 | |
| 101 | // Add new basic block for slow divide operation |
| 102 | BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", |
| 103 | MainBB->getParent(), SuccessorBB); |
| 104 | SlowBB->moveBefore(SuccessorBB); |
| 105 | IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); |
| 106 | Value *SlowQuotientV; |
| 107 | Value *SlowRemainderV; |
| 108 | if (UseSignedOp) { |
| 109 | SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); |
| 110 | SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); |
| 111 | } else { |
| 112 | SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); |
| 113 | SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); |
| 114 | } |
| 115 | SlowBuilder.CreateBr(SuccessorBB); |
| 116 | |
| 117 | // Add new basic block for fast divide operation |
| 118 | BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", |
| 119 | MainBB->getParent(), SuccessorBB); |
| 120 | FastBB->moveBefore(SlowBB); |
| 121 | IRBuilder<> FastBuilder(FastBB, FastBB->begin()); |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 122 | Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, |
| 123 | BypassType); |
| 124 | Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, |
| 125 | BypassType); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 126 | |
| 127 | // udiv/urem because optimization only handles positive numbers |
| 128 | Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 129 | ShortDivisorV); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 130 | Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, |
| 131 | ShortDivisorV); |
| 132 | Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 133 | ShortQuotientV, |
| 134 | Dividend->getType()); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 135 | Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, |
| 136 | ShortRemainderV, |
| 137 | Dividend->getType()); |
| 138 | FastBuilder.CreateBr(SuccessorBB); |
| 139 | |
| 140 | // Phi nodes for result of div and rem |
| 141 | IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); |
| 142 | PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); |
| 143 | QuoPhi->addIncoming(SlowQuotientV, SlowBB); |
| 144 | QuoPhi->addIncoming(FastQuotientV, FastBB); |
| 145 | PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); |
| 146 | RemPhi->addIncoming(SlowRemainderV, SlowBB); |
| 147 | RemPhi->addIncoming(FastRemainderV, FastBB); |
| 148 | |
| 149 | // Replace Instr with appropriate phi node |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 150 | if (UseDivOp) |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 151 | Instr->replaceAllUsesWith(QuoPhi); |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 152 | else |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 153 | Instr->replaceAllUsesWith(RemPhi); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 154 | Instr->eraseFromParent(); |
| 155 | |
| 156 | // Combine operands into a single value with OR for value testing below |
| 157 | MainBB->getInstList().back().eraseFromParent(); |
| 158 | IRBuilder<> MainBuilder(MainBB, MainBB->end()); |
| 159 | Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); |
| 160 | |
| 161 | // BitMask is inverted to check if the operands are |
| 162 | // larger than the bypass type |
| 163 | uint64_t BitMask = ~BypassType->getBitMask(); |
| 164 | Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); |
| 165 | |
| 166 | // Compare operand values and branch |
Preston Gurd | 9a2cfff | 2013-03-04 18:13:57 +0000 | [diff] [blame] | 167 | Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 168 | Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); |
| 169 | MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); |
| 170 | |
| 171 | // point iterator J at first instruction of successorBB |
| 172 | J = I->begin(); |
| 173 | |
| 174 | // Cache phi nodes to be used later in place of other instances |
| 175 | // of div or rem with the same sign, dividend, and divisor |
| 176 | DivOpInfo Key(UseSignedOp, Dividend, Divisor); |
| 177 | DivPhiNodes Value(QuoPhi, RemPhi); |
| 178 | PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 179 | return true; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 180 | } |
| 181 | |
| 182 | // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if |
| 183 | // operands and operation are identical. Otherwise call insertFastDiv to perform |
| 184 | // the optimization and cache the resulting dividend and remainder. |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 185 | static bool reuseOrInsertFastDiv(Function &F, |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 186 | Function::iterator &I, |
| 187 | BasicBlock::iterator &J, |
| 188 | IntegerType *BypassType, |
| 189 | bool UseDivOp, |
| 190 | bool UseSignedOp, |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 191 | DivCacheTy &PerBBDivCache) { |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 192 | // Get instruction operands |
| 193 | Instruction *Instr = J; |
| 194 | DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); |
Jakub Staszak | be11991 | 2012-09-04 23:11:11 +0000 | [diff] [blame] | 195 | DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 196 | |
| 197 | if (CacheI == PerBBDivCache.end()) { |
| 198 | // If previous instance does not exist, insert fast div |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 199 | return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, |
| 200 | PerBBDivCache); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 201 | } |
| 202 | |
| 203 | // Replace operation value with previously generated phi node |
Jakub Staszak | be11991 | 2012-09-04 23:11:11 +0000 | [diff] [blame] | 204 | DivPhiNodes &Value = CacheI->second; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 205 | if (UseDivOp) { |
| 206 | // Replace all uses of div instruction with quotient phi node |
| 207 | J->replaceAllUsesWith(Value.Quotient); |
| 208 | } else { |
| 209 | // Replace all uses of rem instruction with remainder phi node |
| 210 | J->replaceAllUsesWith(Value.Remainder); |
| 211 | } |
| 212 | |
| 213 | // Advance to next operation |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 214 | ++J; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 215 | |
| 216 | // Remove redundant operation |
| 217 | Instr->eraseFromParent(); |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 218 | return true; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 219 | } |
| 220 | |
| 221 | // bypassSlowDivision - This optimization identifies DIV instructions that can |
| 222 | // be profitably bypassed and carried out with a shorter, faster divide. |
Benjamin Kramer | 04142bc | 2012-09-10 11:52:08 +0000 | [diff] [blame] | 223 | bool llvm::bypassSlowDivision(Function &F, |
| 224 | Function::iterator &I, |
Preston Gurd | 8d662b5 | 2012-10-04 21:33:40 +0000 | [diff] [blame] | 225 | const DenseMap<unsigned int, unsigned int> &BypassWidths) { |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 226 | DivCacheTy DivCache; |
| 227 | |
| 228 | bool MadeChange = false; |
| 229 | for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { |
| 230 | |
| 231 | // Get instruction details |
| 232 | unsigned Opcode = J->getOpcode(); |
| 233 | bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; |
| 234 | bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 235 | bool UseSignedOp = Opcode == Instruction::SDiv || |
| 236 | Opcode == Instruction::SRem; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 237 | |
| 238 | // Only optimize div or rem ops |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 239 | if (!UseDivOp && !UseRemOp) |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 240 | continue; |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 241 | |
Preston Gurd | fcf0628 | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 242 | // Skip division on vector types, only optimize integer instructions |
| 243 | if (!J->getType()->isIntegerTy()) |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 244 | continue; |
| 245 | |
Preston Gurd | 8d662b5 | 2012-10-04 21:33:40 +0000 | [diff] [blame] | 246 | // Get bitwidth of div/rem instruction |
Preston Gurd | fcf0628 | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 247 | IntegerType *T = cast<IntegerType>(J->getType()); |
Preston Gurd | 9a2cfff | 2013-03-04 18:13:57 +0000 | [diff] [blame] | 248 | unsigned int bitwidth = T->getBitWidth(); |
Preston Gurd | fcf0628 | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 249 | |
Preston Gurd | 8d662b5 | 2012-10-04 21:33:40 +0000 | [diff] [blame] | 250 | // Continue if bitwidth is not bypassed |
| 251 | DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); |
| 252 | if (BI == BypassWidths.end()) |
Preston Gurd | fcf0628 | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 253 | continue; |
| 254 | |
Preston Gurd | 8d662b5 | 2012-10-04 21:33:40 +0000 | [diff] [blame] | 255 | // Get type for div/rem instruction with bypass bitwidth |
| 256 | IntegerType *BT = IntegerType::get(J->getContext(), BI->second); |
Preston Gurd | fcf0628 | 2012-10-03 16:11:44 +0000 | [diff] [blame] | 257 | |
| 258 | MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, |
Jakub Staszak | 7b2d20d | 2012-09-04 20:48:24 +0000 | [diff] [blame] | 259 | UseSignedOp, DivCache); |
Preston Gurd | 2e2efd9 | 2012-09-04 18:22:17 +0000 | [diff] [blame] | 260 | } |
| 261 | |
| 262 | return MadeChange; |
| 263 | } |