Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 1 | //===-- StraightLineStrengthReduce.cpp - ------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements straight-line strength reduction (SLSR). Unlike loop |
| 11 | // strength reduction, this algorithm is designed to reduce arithmetic |
| 12 | // redundancy in straight-line code instead of loops. It has proven to be |
| 13 | // effective in simplifying arithmetic statements derived from an unrolled loop. |
| 14 | // It can also simplify the logic of SeparateConstOffsetFromGEP. |
| 15 | // |
| 16 | // There are many optimizations we can perform in the domain of SLSR. This file |
| 17 | // for now contains only an initial step. Specifically, we look for strength |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 18 | // reduction candidates in two forms: |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 19 | // |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 20 | // Form 1: (B + i) * S |
| 21 | // Form 2: &B[i * S] |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 22 | // |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 23 | // where S is an integer variable, and i is a constant integer. If we found two |
| 24 | // candidates |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 25 | // |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 26 | // S1: X = (B + i) * S |
| 27 | // S2: Y = (B + i') * S |
| 28 | // |
| 29 | // or |
| 30 | // |
| 31 | // S1: X = &B[i * S] |
| 32 | // S2: Y = &B[i' * S] |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 33 | // |
| 34 | // and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with |
| 35 | // |
| 36 | // Y = X + (i' - i) * S |
| 37 | // |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 38 | // or |
| 39 | // |
| 40 | // Y = &X[(i' - i) * S] |
| 41 | // |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 42 | // where (i' - i) * S is folded to the extent possible. When S2 has multiple |
| 43 | // bases, we pick the one that is closest to S2, or S2's "immediate" basis. |
| 44 | // |
| 45 | // TODO: |
| 46 | // |
| 47 | // - Handle candidates in the form of B + i * S |
| 48 | // |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 49 | // - Floating point arithmetics when fast math is enabled. |
| 50 | // |
| 51 | // - SLSR may decrease ILP at the architecture level. Targets that are very |
| 52 | // sensitive to ILP may want to disable it. Having SLSR to consider ILP is |
| 53 | // left as future work. |
| 54 | #include <vector> |
| 55 | |
| 56 | #include "llvm/ADT/DenseSet.h" |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 57 | #include "llvm/ADT/FoldingSet.h" |
| 58 | #include "llvm/Analysis/ScalarEvolution.h" |
| 59 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 60 | #include "llvm/IR/DataLayout.h" |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 61 | #include "llvm/IR/Dominators.h" |
| 62 | #include "llvm/IR/IRBuilder.h" |
| 63 | #include "llvm/IR/Module.h" |
| 64 | #include "llvm/IR/PatternMatch.h" |
| 65 | #include "llvm/Support/raw_ostream.h" |
| 66 | #include "llvm/Transforms/Scalar.h" |
| 67 | |
| 68 | using namespace llvm; |
| 69 | using namespace PatternMatch; |
| 70 | |
| 71 | namespace { |
| 72 | |
| 73 | class StraightLineStrengthReduce : public FunctionPass { |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 74 | public: |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 75 | // SLSR candidate. Such a candidate must be in the form of |
| 76 | // (Base + Index) * Stride |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 77 | // or |
| 78 | // Base[..][Index * Stride][..] |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 79 | struct Candidate : public ilist_node<Candidate> { |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 80 | enum Kind { |
| 81 | Invalid, // reserved for the default constructor |
| 82 | Mul, // (B + i) * S |
| 83 | GEP, // &B[..][i * S][..] |
| 84 | }; |
| 85 | |
| 86 | Candidate() |
| 87 | : CandidateKind(Invalid), Base(nullptr), Index(nullptr), |
| 88 | Stride(nullptr), Ins(nullptr), Basis(nullptr) {} |
| 89 | Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, |
| 90 | Instruction *I) |
| 91 | : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I), |
| 92 | Basis(nullptr) {} |
| 93 | Kind CandidateKind; |
| 94 | const SCEV *Base; |
| 95 | // Note that Index and Stride of a GEP candidate may not have the same |
| 96 | // integer type. In that case, during rewriting, Stride will be |
| 97 | // sign-extended or truncated to Index's type. |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 98 | ConstantInt *Index; |
| 99 | Value *Stride; |
| 100 | // The instruction this candidate corresponds to. It helps us to rewrite a |
| 101 | // candidate with respect to its immediate basis. Note that one instruction |
| 102 | // can corresponds to multiple candidates depending on how you associate the |
| 103 | // expression. For instance, |
| 104 | // |
| 105 | // (a + 1) * (b + 2) |
| 106 | // |
| 107 | // can be treated as |
| 108 | // |
| 109 | // <Base: a, Index: 1, Stride: b + 2> |
| 110 | // |
| 111 | // or |
| 112 | // |
| 113 | // <Base: b, Index: 2, Stride: a + 1> |
| 114 | Instruction *Ins; |
| 115 | // Points to the immediate basis of this candidate, or nullptr if we cannot |
| 116 | // find any basis for this candidate. |
| 117 | Candidate *Basis; |
| 118 | }; |
| 119 | |
| 120 | static char ID; |
| 121 | |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 122 | StraightLineStrengthReduce() |
| 123 | : FunctionPass(ID), DL(nullptr), DT(nullptr), TTI(nullptr) { |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 124 | initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry()); |
| 125 | } |
| 126 | |
| 127 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 128 | AU.addRequired<DominatorTreeWrapperPass>(); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 129 | AU.addRequired<ScalarEvolution>(); |
| 130 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 131 | // We do not modify the shape of the CFG. |
| 132 | AU.setPreservesCFG(); |
| 133 | } |
| 134 | |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 135 | bool doInitialization(Module &M) override { |
| 136 | DL = &M.getDataLayout(); |
| 137 | return false; |
| 138 | } |
| 139 | |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 140 | bool runOnFunction(Function &F) override; |
| 141 | |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 142 | private: |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 143 | // Returns true if Basis is a basis for C, i.e., Basis dominates C and they |
| 144 | // share the same base and stride. |
| 145 | bool isBasisFor(const Candidate &Basis, const Candidate &C); |
| 146 | // Checks whether I is in a candidate form. If so, adds all the matching forms |
| 147 | // to Candidates, and tries to find the immediate basis for each of them. |
| 148 | void allocateCandidateAndFindBasis(Instruction *I); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 149 | // Allocate candidates and find bases for Mul instructions. |
| 150 | void allocateCandidateAndFindBasisForMul(Instruction *I); |
| 151 | // Splits LHS into Base + Index and, if succeeds, calls |
| 152 | // allocateCandidateAndFindBasis. |
| 153 | void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS, |
| 154 | Instruction *I); |
| 155 | // Allocate candidates and find bases for GetElementPtr instructions. |
| 156 | void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP); |
| 157 | // A helper function that scales Idx with ElementSize before invoking |
| 158 | // allocateCandidateAndFindBasis. |
| 159 | void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, |
| 160 | Value *S, uint64_t ElementSize, |
| 161 | Instruction *I); |
| 162 | // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate |
| 163 | // basis. |
| 164 | void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B, |
| 165 | ConstantInt *Idx, Value *S, |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 166 | Instruction *I); |
| 167 | // Rewrites candidate C with respect to Basis. |
| 168 | void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 169 | // A helper function that factors ArrayIdx to a product of a stride and a |
| 170 | // constant index, and invokes allocateCandidateAndFindBasis with the |
| 171 | // factorings. |
| 172 | void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, |
| 173 | GetElementPtrInst *GEP); |
| 174 | // Emit code that computes the "bump" from Basis to C. If the candidate is a |
| 175 | // GEP and the bump is not divisible by the element size of the GEP, this |
| 176 | // function sets the BumpWithUglyGEP flag to notify its caller to bump the |
| 177 | // basis using an ugly GEP. |
| 178 | static Value *emitBump(const Candidate &Basis, const Candidate &C, |
| 179 | IRBuilder<> &Builder, const DataLayout *DL, |
| 180 | bool &BumpWithUglyGEP); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 181 | |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 182 | const DataLayout *DL; |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 183 | DominatorTree *DT; |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 184 | ScalarEvolution *SE; |
| 185 | TargetTransformInfo *TTI; |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 186 | ilist<Candidate> Candidates; |
| 187 | // Temporarily holds all instructions that are unlinked (but not deleted) by |
| 188 | // rewriteCandidateWithBasis. These instructions will be actually removed |
| 189 | // after all rewriting finishes. |
| 190 | DenseSet<Instruction *> UnlinkedInstructions; |
| 191 | }; |
| 192 | } // anonymous namespace |
| 193 | |
| 194 | char StraightLineStrengthReduce::ID = 0; |
| 195 | INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr", |
| 196 | "Straight line strength reduction", false, false) |
| 197 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 198 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) |
| 199 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 200 | INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr", |
| 201 | "Straight line strength reduction", false, false) |
| 202 | |
| 203 | FunctionPass *llvm::createStraightLineStrengthReducePass() { |
| 204 | return new StraightLineStrengthReduce(); |
| 205 | } |
| 206 | |
| 207 | bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, |
| 208 | const Candidate &C) { |
| 209 | return (Basis.Ins != C.Ins && // skip the same instruction |
| 210 | // Basis must dominate C in order to rewrite C with respect to Basis. |
| 211 | DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) && |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 212 | // They share the same base, stride, and candidate kind. |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 213 | Basis.Base == C.Base && |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 214 | Basis.Stride == C.Stride && |
| 215 | Basis.CandidateKind == C.CandidateKind); |
| 216 | } |
| 217 | |
| 218 | static bool isCompletelyFoldable(GetElementPtrInst *GEP, |
| 219 | const TargetTransformInfo *TTI, |
| 220 | const DataLayout *DL) { |
| 221 | GlobalVariable *BaseGV = nullptr; |
| 222 | int64_t BaseOffset = 0; |
| 223 | bool HasBaseReg = false; |
| 224 | int64_t Scale = 0; |
| 225 | |
| 226 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand())) |
| 227 | BaseGV = GV; |
| 228 | else |
| 229 | HasBaseReg = true; |
| 230 | |
| 231 | gep_type_iterator GTI = gep_type_begin(GEP); |
| 232 | for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) { |
| 233 | if (isa<SequentialType>(*GTI)) { |
| 234 | int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); |
| 235 | if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) { |
| 236 | BaseOffset += ConstIdx->getSExtValue() * ElementSize; |
| 237 | } else { |
| 238 | // Needs scale register. |
| 239 | if (Scale != 0) { |
| 240 | // No addressing mode takes two scale registers. |
| 241 | return false; |
| 242 | } |
| 243 | Scale = ElementSize; |
| 244 | } |
| 245 | } else { |
| 246 | StructType *STy = cast<StructType>(*GTI); |
| 247 | uint64_t Field = cast<ConstantInt>(*I)->getZExtValue(); |
| 248 | BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field); |
| 249 | } |
| 250 | } |
| 251 | return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV, |
| 252 | BaseOffset, HasBaseReg, Scale); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 253 | } |
| 254 | |
| 255 | // TODO: We currently implement an algorithm whose time complexity is linear to |
| 256 | // the number of existing candidates. However, a better algorithm exists. We |
| 257 | // could depth-first search the dominator tree, and maintain a hash table that |
| 258 | // contains all candidates that dominate the node being traversed. This hash |
| 259 | // table is indexed by the base and the stride of a candidate. Therefore, |
| 260 | // finding the immediate basis of a candidate boils down to one hash-table look |
| 261 | // up. |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 262 | void StraightLineStrengthReduce::allocateCandidateAndFindBasis( |
| 263 | Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, |
| 264 | Instruction *I) { |
| 265 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { |
| 266 | // If &B[Idx * S] fits into an addressing mode, do not turn it into |
| 267 | // non-free computation. |
| 268 | if (isCompletelyFoldable(GEP, TTI, DL)) |
| 269 | return; |
| 270 | } |
| 271 | |
| 272 | Candidate C(CT, B, Idx, S, I); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 273 | // Try to compute the immediate basis of C. |
| 274 | unsigned NumIterations = 0; |
| 275 | // Limit the scan radius to avoid running forever. |
Aaron Ballman | 34c325e | 2015-02-04 14:01:08 +0000 | [diff] [blame] | 276 | static const unsigned MaxNumIterations = 50; |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 277 | for (auto Basis = Candidates.rbegin(); |
| 278 | Basis != Candidates.rend() && NumIterations < MaxNumIterations; |
| 279 | ++Basis, ++NumIterations) { |
| 280 | if (isBasisFor(*Basis, C)) { |
| 281 | C.Basis = &(*Basis); |
| 282 | break; |
| 283 | } |
| 284 | } |
| 285 | // Regardless of whether we find a basis for C, we need to push C to the |
| 286 | // candidate list. |
| 287 | Candidates.push_back(C); |
| 288 | } |
| 289 | |
| 290 | void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) { |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 291 | switch (I->getOpcode()) { |
| 292 | case Instruction::Mul: |
| 293 | allocateCandidateAndFindBasisForMul(I); |
| 294 | break; |
| 295 | case Instruction::GetElementPtr: |
| 296 | allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I)); |
| 297 | break; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( |
| 302 | Value *LHS, Value *RHS, Instruction *I) { |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 303 | Value *B = nullptr; |
| 304 | ConstantInt *Idx = nullptr; |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 305 | // Only handle the canonical operand ordering. |
| 306 | if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { |
| 307 | // If LHS is in the form of "Base + Index", then I is in the form of |
| 308 | // "(Base + Index) * RHS". |
| 309 | allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); |
| 310 | } else { |
| 311 | // Otherwise, at least try the form (LHS + 0) * RHS. |
| 312 | ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0); |
| 313 | allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, |
| 314 | I); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 315 | } |
| 316 | } |
| 317 | |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 318 | void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( |
| 319 | Instruction *I) { |
| 320 | // Try matching (B + i) * S. |
| 321 | // TODO: we could extend SLSR to float and vector types. |
| 322 | if (!isa<IntegerType>(I->getType())) |
| 323 | return; |
| 324 | |
| 325 | Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); |
| 326 | allocateCandidateAndFindBasisForMul(LHS, RHS, I); |
| 327 | if (LHS != RHS) { |
| 328 | // Symmetrically, try to split RHS to Base + Index. |
| 329 | allocateCandidateAndFindBasisForMul(RHS, LHS, I); |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( |
| 334 | const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize, |
| 335 | Instruction *I) { |
Jingyue Wu | 99a6bed | 2015-04-02 21:18:32 +0000 | [diff] [blame^] | 336 | // I = B + sext(Idx *nsw S) * ElementSize |
| 337 | // = B + (sext(Idx) * sext(S)) * ElementSize |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 338 | // = B + (sext(Idx) * ElementSize) * sext(S) |
| 339 | // Casting to IntegerType is safe because we skipped vector GEPs. |
| 340 | IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType())); |
| 341 | ConstantInt *ScaledIdx = ConstantInt::get( |
| 342 | IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); |
| 343 | allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); |
| 344 | } |
| 345 | |
| 346 | void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, |
| 347 | const SCEV *Base, |
| 348 | uint64_t ElementSize, |
| 349 | GetElementPtrInst *GEP) { |
| 350 | // At least, ArrayIdx = ArrayIdx *s 1. |
| 351 | allocateCandidateAndFindBasisForGEP( |
| 352 | Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1), |
| 353 | ArrayIdx, ElementSize, GEP); |
| 354 | Value *LHS = nullptr; |
| 355 | ConstantInt *RHS = nullptr; |
| 356 | // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4). |
| 357 | // |
| 358 | // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx |
| 359 | // itself. This would allow us to handle the shl case for free. However, |
| 360 | // matching SCEVs has two issues: |
| 361 | // |
| 362 | // 1. this would complicate rewriting because the rewriting procedure |
| 363 | // would have to translate SCEVs back to IR instructions. This translation |
| 364 | // is difficult when LHS is further evaluated to a composite SCEV. |
| 365 | // |
| 366 | // 2. ScalarEvolution is designed to be control-flow oblivious. It tends |
| 367 | // to strip nsw/nuw flags which are critical for SLSR to trace into |
| 368 | // sext'ed multiplication. |
| 369 | if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) { |
| 370 | // SLSR is currently unsafe if i * S may overflow. |
Jingyue Wu | 99a6bed | 2015-04-02 21:18:32 +0000 | [diff] [blame^] | 371 | // GEP = Base + sext(LHS *nsw RHS) * ElementSize |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 372 | allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( |
| 377 | GetElementPtrInst *GEP) { |
| 378 | // TODO: handle vector GEPs |
| 379 | if (GEP->getType()->isVectorTy()) |
| 380 | return; |
| 381 | |
| 382 | const SCEV *GEPExpr = SE->getSCEV(GEP); |
| 383 | Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); |
| 384 | |
| 385 | gep_type_iterator GTI = gep_type_begin(GEP); |
| 386 | for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) { |
| 387 | if (!isa<SequentialType>(*GTI++)) |
| 388 | continue; |
| 389 | Value *ArrayIdx = *I; |
| 390 | // Compute the byte offset of this index. |
| 391 | uint64_t ElementSize = DL->getTypeAllocSize(*GTI); |
| 392 | const SCEV *ElementSizeExpr = SE->getSizeOfExpr(IntPtrTy, *GTI); |
| 393 | const SCEV *ArrayIdxExpr = SE->getSCEV(ArrayIdx); |
| 394 | ArrayIdxExpr = SE->getTruncateOrSignExtend(ArrayIdxExpr, IntPtrTy); |
| 395 | const SCEV *LocalOffset = |
| 396 | SE->getMulExpr(ArrayIdxExpr, ElementSizeExpr, SCEV::FlagNSW); |
| 397 | // The base of this candidate equals GEPExpr less the byte offset of this |
| 398 | // index. |
| 399 | const SCEV *Base = SE->getMinusSCEV(GEPExpr, LocalOffset); |
| 400 | factorArrayIndex(ArrayIdx, Base, ElementSize, GEP); |
| 401 | // When ArrayIdx is the sext of a value, we try to factor that value as |
| 402 | // well. Handling this case is important because array indices are |
| 403 | // typically sign-extended to the pointer size. |
| 404 | Value *TruncatedArrayIdx = nullptr; |
| 405 | if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx)))) |
| 406 | factorArrayIndex(TruncatedArrayIdx, Base, ElementSize, GEP); |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | // A helper function that unifies the bitwidth of A and B. |
| 411 | static void unifyBitWidth(APInt &A, APInt &B) { |
| 412 | if (A.getBitWidth() < B.getBitWidth()) |
| 413 | A = A.sext(B.getBitWidth()); |
| 414 | else if (A.getBitWidth() > B.getBitWidth()) |
| 415 | B = B.sext(A.getBitWidth()); |
| 416 | } |
| 417 | |
| 418 | Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, |
| 419 | const Candidate &C, |
| 420 | IRBuilder<> &Builder, |
| 421 | const DataLayout *DL, |
| 422 | bool &BumpWithUglyGEP) { |
| 423 | APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue(); |
| 424 | unifyBitWidth(Idx, BasisIdx); |
| 425 | APInt IndexOffset = Idx - BasisIdx; |
| 426 | |
| 427 | BumpWithUglyGEP = false; |
| 428 | if (Basis.CandidateKind == Candidate::GEP) { |
| 429 | APInt ElementSize( |
| 430 | IndexOffset.getBitWidth(), |
| 431 | DL->getTypeAllocSize( |
| 432 | cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType())); |
| 433 | APInt Q, R; |
| 434 | APInt::sdivrem(IndexOffset, ElementSize, Q, R); |
| 435 | if (R.getSExtValue() == 0) |
| 436 | IndexOffset = Q; |
| 437 | else |
| 438 | BumpWithUglyGEP = true; |
| 439 | } |
| 440 | // Compute Bump = C - Basis = (i' - i) * S. |
| 441 | // Common case 1: if (i' - i) is 1, Bump = S. |
| 442 | if (IndexOffset.getSExtValue() == 1) |
| 443 | return C.Stride; |
| 444 | // Common case 2: if (i' - i) is -1, Bump = -S. |
| 445 | if (IndexOffset.getSExtValue() == -1) |
| 446 | return Builder.CreateNeg(C.Stride); |
| 447 | // Otherwise, Bump = (i' - i) * sext/trunc(S). |
| 448 | ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset); |
| 449 | Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType()); |
| 450 | return Builder.CreateMul(ExtendedStride, Delta); |
| 451 | } |
| 452 | |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 453 | void StraightLineStrengthReduce::rewriteCandidateWithBasis( |
| 454 | const Candidate &C, const Candidate &Basis) { |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 455 | assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base && |
| 456 | C.Stride == Basis.Stride); |
| 457 | |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 458 | // An instruction can correspond to multiple candidates. Therefore, instead of |
| 459 | // simply deleting an instruction when we rewrite it, we mark its parent as |
| 460 | // nullptr (i.e. unlink it) so that we can skip the candidates whose |
| 461 | // instruction is already rewritten. |
| 462 | if (!C.Ins->getParent()) |
| 463 | return; |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 464 | |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 465 | IRBuilder<> Builder(C.Ins); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 466 | bool BumpWithUglyGEP; |
| 467 | Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); |
| 468 | Value *Reduced = nullptr; // equivalent to but weaker than C.Ins |
| 469 | switch (C.CandidateKind) { |
| 470 | case Candidate::Mul: |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 471 | Reduced = Builder.CreateAdd(Basis.Ins, Bump); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 472 | break; |
| 473 | case Candidate::GEP: |
| 474 | { |
| 475 | Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); |
Jingyue Wu | 99a6bed | 2015-04-02 21:18:32 +0000 | [diff] [blame^] | 476 | bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 477 | if (BumpWithUglyGEP) { |
| 478 | // C = (char *)Basis + Bump |
| 479 | unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); |
| 480 | Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); |
| 481 | Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); |
Jingyue Wu | 99a6bed | 2015-04-02 21:18:32 +0000 | [diff] [blame^] | 482 | if (InBounds) |
| 483 | Reduced = Builder.CreateInBoundsGEP(Reduced, Bump); |
| 484 | else |
| 485 | Reduced = Builder.CreateGEP(Reduced, Bump); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 486 | Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); |
| 487 | } else { |
| 488 | // C = gep Basis, Bump |
| 489 | // Canonicalize bump to pointer size. |
| 490 | Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); |
Jingyue Wu | 99a6bed | 2015-04-02 21:18:32 +0000 | [diff] [blame^] | 491 | if (InBounds) |
| 492 | Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump); |
| 493 | else |
| 494 | Reduced = Builder.CreateGEP(Basis.Ins, Bump); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 495 | } |
| 496 | } |
| 497 | break; |
| 498 | default: |
| 499 | llvm_unreachable("C.CandidateKind is invalid"); |
| 500 | }; |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 501 | Reduced->takeName(C.Ins); |
| 502 | C.Ins->replaceAllUsesWith(Reduced); |
| 503 | C.Ins->dropAllReferences(); |
| 504 | // Unlink C.Ins so that we can skip other candidates also corresponding to |
| 505 | // C.Ins. The actual deletion is postponed to the end of runOnFunction. |
| 506 | C.Ins->removeFromParent(); |
| 507 | UnlinkedInstructions.insert(C.Ins); |
| 508 | } |
| 509 | |
| 510 | bool StraightLineStrengthReduce::runOnFunction(Function &F) { |
| 511 | if (skipOptnoneFunction(F)) |
| 512 | return false; |
| 513 | |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 514 | TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 515 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 516 | SE = &getAnalysis<ScalarEvolution>(); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 517 | // Traverse the dominator tree in the depth-first order. This order makes sure |
| 518 | // all bases of a candidate are in Candidates when we process it. |
| 519 | for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT); |
| 520 | node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) { |
Jingyue Wu | 177a815 | 2015-03-26 16:49:24 +0000 | [diff] [blame] | 521 | for (auto &I : *node->getBlock()) |
| 522 | allocateCandidateAndFindBasis(&I); |
Jingyue Wu | d7966ff | 2015-02-03 19:37:06 +0000 | [diff] [blame] | 523 | } |
| 524 | |
| 525 | // Rewrite candidates in the reverse depth-first order. This order makes sure |
| 526 | // a candidate being rewritten is not a basis for any other candidate. |
| 527 | while (!Candidates.empty()) { |
| 528 | const Candidate &C = Candidates.back(); |
| 529 | if (C.Basis != nullptr) { |
| 530 | rewriteCandidateWithBasis(C, *C.Basis); |
| 531 | } |
| 532 | Candidates.pop_back(); |
| 533 | } |
| 534 | |
| 535 | // Delete all unlink instructions. |
| 536 | for (auto I : UnlinkedInstructions) { |
| 537 | delete I; |
| 538 | } |
| 539 | bool Ret = !UnlinkedInstructions.empty(); |
| 540 | UnlinkedInstructions.clear(); |
| 541 | return Ret; |
| 542 | } |